1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * MDI component property name/value string definitions 161 */ 162 const char *mdi_component_prop = "mpxio-component"; 163 const char *mdi_component_prop_vhci = "vhci"; 164 const char *mdi_component_prop_phci = "phci"; 165 const char *mdi_component_prop_client = "client"; 166 167 /* 168 * MDI client global unique identifier property name 169 */ 170 const char *mdi_client_guid_prop = "client-guid"; 171 172 /* 173 * MDI client load balancing property name/value string definitions 174 */ 175 const char *mdi_load_balance = "load-balance"; 176 const char *mdi_load_balance_none = "none"; 177 const char *mdi_load_balance_rr = "round-robin"; 178 const char *mdi_load_balance_lba = "logical-block"; 179 180 /* 181 * Obsolete vHCI class definition; to be removed after Leadville update 182 */ 183 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 184 185 static char vhci_greeting[] = 186 "\tThere already exists one vHCI driver for class %s\n" 187 "\tOnly one vHCI driver for each class is allowed\n"; 188 189 /* 190 * Static function prototypes 191 */ 192 static int i_mdi_phci_offline(dev_info_t *, uint_t); 193 static int i_mdi_client_offline(dev_info_t *, uint_t); 194 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 195 static void i_mdi_phci_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static int i_mdi_client_pre_detach(dev_info_t *, 198 ddi_detach_cmd_t); 199 static void i_mdi_client_post_detach(dev_info_t *, 200 ddi_detach_cmd_t, int); 201 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 202 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 203 static int i_mdi_lba_lb(mdi_client_t *ct, 204 mdi_pathinfo_t **ret_pip, struct buf *buf); 205 static void i_mdi_pm_hold_client(mdi_client_t *, int); 206 static void i_mdi_pm_rele_client(mdi_client_t *, int); 207 static void i_mdi_pm_reset_client(mdi_client_t *); 208 static int i_mdi_power_all_phci(mdi_client_t *); 209 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 210 211 212 /* 213 * Internal mdi_pathinfo node functions 214 */ 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_component_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 /*ARGSUSED*/ 351 int 352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 353 int flags) 354 { 355 mdi_vhci_t *vh = NULL; 356 357 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 358 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 411 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 412 413 /* 414 * Store the vHCI ops vectors 415 */ 416 vh->vh_dip = vdip; 417 vh->vh_ops = vops; 418 419 setup_vhci_cache(vh); 420 421 if (mdi_vhci_head == NULL) { 422 mdi_vhci_head = vh; 423 } 424 if (mdi_vhci_tail) { 425 mdi_vhci_tail->vh_next = vh; 426 } 427 mdi_vhci_tail = vh; 428 mdi_vhci_count++; 429 } 430 431 /* 432 * Claim the devfs node as a vhci component 433 */ 434 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 435 436 /* 437 * Initialize our back reference from dev_info node 438 */ 439 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 440 mutex_exit(&mdi_mutex); 441 return (MDI_SUCCESS); 442 } 443 444 /* 445 * mdi_vhci_unregister(): 446 * Unregister a vHCI module from mpxio framework 447 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 448 * of a vhci to unregister it from the framework. 449 * Return Values: 450 * MDI_SUCCESS 451 * MDI_FAILURE 452 */ 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 460 461 /* 462 * Check for invalid VHCI 463 */ 464 if ((vh = i_devi_get_vhci(vdip)) == NULL) 465 return (MDI_FAILURE); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 mutex_enter(&mdi_mutex); 471 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 472 if (found == vh) 473 break; 474 prev = found; 475 } 476 477 if (found == NULL) { 478 mutex_exit(&mdi_mutex); 479 return (MDI_FAILURE); 480 } 481 482 /* 483 * Check the vHCI, pHCI and client count. All the pHCIs and clients 484 * should have been unregistered, before a vHCI can be 485 * unregistered. 486 */ 487 MDI_VHCI_PHCI_LOCK(vh); 488 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 489 MDI_VHCI_PHCI_UNLOCK(vh); 490 mutex_exit(&mdi_mutex); 491 return (MDI_FAILURE); 492 } 493 MDI_VHCI_PHCI_UNLOCK(vh); 494 495 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 496 mutex_exit(&mdi_mutex); 497 return (MDI_FAILURE); 498 } 499 500 /* 501 * Remove the vHCI from the global list 502 */ 503 if (vh == mdi_vhci_head) { 504 mdi_vhci_head = vh->vh_next; 505 } else { 506 prev->vh_next = vh->vh_next; 507 } 508 if (vh == mdi_vhci_tail) { 509 mdi_vhci_tail = prev; 510 } 511 mdi_vhci_count--; 512 mutex_exit(&mdi_mutex); 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 mutex_destroy(&vh->vh_phci_mutex); 521 mutex_destroy(&vh->vh_client_mutex); 522 523 kmem_free(vh, sizeof (mdi_vhci_t)); 524 return (MDI_SUCCESS); 525 } 526 527 /* 528 * i_mdi_vhci_class2vhci(): 529 * Look for a matching vHCI module given a vHCI class name 530 * Return Values: 531 * Handle to a vHCI component 532 * NULL 533 */ 534 static mdi_vhci_t * 535 i_mdi_vhci_class2vhci(char *class) 536 { 537 mdi_vhci_t *vh = NULL; 538 539 ASSERT(!MUTEX_HELD(&mdi_mutex)); 540 541 mutex_enter(&mdi_mutex); 542 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 543 if (strcmp(vh->vh_class, class) == 0) { 544 break; 545 } 546 } 547 mutex_exit(&mdi_mutex); 548 return (vh); 549 } 550 551 /* 552 * i_devi_get_vhci(): 553 * Utility function to get the handle to a vHCI component 554 * Return Values: 555 * Handle to a vHCI component 556 * NULL 557 */ 558 mdi_vhci_t * 559 i_devi_get_vhci(dev_info_t *vdip) 560 { 561 mdi_vhci_t *vh = NULL; 562 if (MDI_VHCI(vdip)) { 563 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 564 } 565 return (vh); 566 } 567 568 /* 569 * mdi_phci_register(): 570 * Register a pHCI module with mpxio framework 571 * mdi_phci_register() is called by pHCI drivers to register with 572 * the mpxio framework and a specific 'class_driver' vHCI. The 573 * pHCI driver must call this interface as part of its attach(9e) 574 * handler. 575 * Return Values: 576 * MDI_SUCCESS 577 * MDI_FAILURE 578 */ 579 /*ARGSUSED*/ 580 int 581 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 582 { 583 mdi_phci_t *ph; 584 mdi_vhci_t *vh; 585 char *data; 586 char *pathname; 587 588 /* 589 * Some subsystems, like fcp, perform pHCI registration from a 590 * different thread than the one doing the pHCI attach(9E) - the 591 * driver attach code is waiting for this other thread to complete. 592 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 593 * (indicating that some thread has done an ndi_devi_enter of parent) 594 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 595 */ 596 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 597 598 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 599 (void) ddi_pathname(pdip, pathname); 600 601 /* 602 * Check for mpxio-disable property. Enable mpxio if the property is 603 * missing or not set to "yes". 604 * If the property is set to "yes" then emit a brief message. 605 */ 606 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 607 &data) == DDI_SUCCESS)) { 608 if (strcmp(data, "yes") == 0) { 609 MDI_DEBUG(1, (CE_CONT, pdip, 610 "?%s (%s%d) multipath capabilities " 611 "disabled via %s.conf.\n", pathname, 612 ddi_driver_name(pdip), ddi_get_instance(pdip), 613 ddi_driver_name(pdip))); 614 ddi_prop_free(data); 615 kmem_free(pathname, MAXPATHLEN); 616 return (MDI_FAILURE); 617 } 618 ddi_prop_free(data); 619 } 620 621 kmem_free(pathname, MAXPATHLEN); 622 623 /* 624 * Search for a matching vHCI 625 */ 626 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 627 if (vh == NULL) { 628 return (MDI_FAILURE); 629 } 630 631 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 632 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 633 ph->ph_dip = pdip; 634 ph->ph_vhci = vh; 635 ph->ph_next = NULL; 636 ph->ph_unstable = 0; 637 ph->ph_vprivate = 0; 638 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 639 640 MDI_PHCI_LOCK(ph); 641 MDI_PHCI_SET_POWER_UP(ph); 642 MDI_PHCI_UNLOCK(ph); 643 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 644 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 645 646 vhcache_phci_add(vh->vh_config, ph); 647 648 MDI_VHCI_PHCI_LOCK(vh); 649 if (vh->vh_phci_head == NULL) { 650 vh->vh_phci_head = ph; 651 } 652 if (vh->vh_phci_tail) { 653 vh->vh_phci_tail->ph_next = ph; 654 } 655 vh->vh_phci_tail = ph; 656 vh->vh_phci_count++; 657 MDI_VHCI_PHCI_UNLOCK(vh); 658 659 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 660 return (MDI_SUCCESS); 661 } 662 663 /* 664 * mdi_phci_unregister(): 665 * Unregister a pHCI module from mpxio framework 666 * mdi_phci_unregister() is called by the pHCI drivers from their 667 * detach(9E) handler to unregister their instances from the 668 * framework. 669 * Return Values: 670 * MDI_SUCCESS 671 * MDI_FAILURE 672 */ 673 /*ARGSUSED*/ 674 int 675 mdi_phci_unregister(dev_info_t *pdip, int flags) 676 { 677 mdi_vhci_t *vh; 678 mdi_phci_t *ph; 679 mdi_phci_t *tmp; 680 mdi_phci_t *prev = NULL; 681 682 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 683 684 ph = i_devi_get_phci(pdip); 685 if (ph == NULL) { 686 MDI_DEBUG(1, (CE_WARN, pdip, 687 "!pHCI unregister: Not a valid pHCI")); 688 return (MDI_FAILURE); 689 } 690 691 vh = ph->ph_vhci; 692 ASSERT(vh != NULL); 693 if (vh == NULL) { 694 MDI_DEBUG(1, (CE_WARN, pdip, 695 "!pHCI unregister: Not a valid vHCI")); 696 return (MDI_FAILURE); 697 } 698 699 MDI_VHCI_PHCI_LOCK(vh); 700 tmp = vh->vh_phci_head; 701 while (tmp) { 702 if (tmp == ph) { 703 break; 704 } 705 prev = tmp; 706 tmp = tmp->ph_next; 707 } 708 709 if (ph == vh->vh_phci_head) { 710 vh->vh_phci_head = ph->ph_next; 711 } else { 712 prev->ph_next = ph->ph_next; 713 } 714 715 if (ph == vh->vh_phci_tail) { 716 vh->vh_phci_tail = prev; 717 } 718 719 vh->vh_phci_count--; 720 MDI_VHCI_PHCI_UNLOCK(vh); 721 722 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 723 ESC_DDI_INITIATOR_UNREGISTER); 724 vhcache_phci_remove(vh->vh_config, ph); 725 cv_destroy(&ph->ph_unstable_cv); 726 mutex_destroy(&ph->ph_mutex); 727 kmem_free(ph, sizeof (mdi_phci_t)); 728 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 729 DEVI(pdip)->devi_mdi_xhci = NULL; 730 return (MDI_SUCCESS); 731 } 732 733 /* 734 * i_devi_get_phci(): 735 * Utility function to return the phci extensions. 736 */ 737 static mdi_phci_t * 738 i_devi_get_phci(dev_info_t *pdip) 739 { 740 mdi_phci_t *ph = NULL; 741 if (MDI_PHCI(pdip)) { 742 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 743 } 744 return (ph); 745 } 746 747 /* 748 * Single thread mdi entry into devinfo node for modifying its children. 749 * If necessary we perform an ndi_devi_enter of the vHCI before doing 750 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 751 * for the vHCI and one for the pHCI. 752 */ 753 void 754 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 755 { 756 dev_info_t *vdip; 757 int vcircular, pcircular; 758 759 /* Verify calling context */ 760 ASSERT(MDI_PHCI(phci_dip)); 761 vdip = mdi_devi_get_vdip(phci_dip); 762 ASSERT(vdip); /* A pHCI always has a vHCI */ 763 764 /* 765 * If pHCI is detaching then the framework has already entered the 766 * vHCI on a threads that went down the code path leading to 767 * detach_node(). This framework enter of the vHCI during pHCI 768 * detach is done to avoid deadlock with vHCI power management 769 * operations which enter the vHCI and the enter down the path 770 * to the pHCI. If pHCI is detaching then we piggyback this calls 771 * enter of the vHCI on frameworks vHCI enter that has already 772 * occurred - this is OK because we know that the framework thread 773 * doing detach is waiting for our completion. 774 * 775 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 776 * race with detach - but we can't do that because the framework has 777 * already entered the parent, so we have some complexity instead. 778 */ 779 for (;;) { 780 if (ndi_devi_tryenter(vdip, &vcircular)) { 781 ASSERT(vcircular != -1); 782 if (DEVI_IS_DETACHING(phci_dip)) { 783 ndi_devi_exit(vdip, vcircular); 784 vcircular = -1; 785 } 786 break; 787 } else if (DEVI_IS_DETACHING(phci_dip)) { 788 vcircular = -1; 789 break; 790 } else { 791 delay(1); 792 } 793 } 794 795 ndi_devi_enter(phci_dip, &pcircular); 796 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 797 } 798 799 /* 800 * Release mdi_devi_enter or successful mdi_devi_tryenter. 801 */ 802 void 803 mdi_devi_exit(dev_info_t *phci_dip, int circular) 804 { 805 dev_info_t *vdip; 806 int vcircular, pcircular; 807 808 /* Verify calling context */ 809 ASSERT(MDI_PHCI(phci_dip)); 810 vdip = mdi_devi_get_vdip(phci_dip); 811 ASSERT(vdip); /* A pHCI always has a vHCI */ 812 813 /* extract two circular recursion values from single int */ 814 pcircular = (short)(circular & 0xFFFF); 815 vcircular = (short)((circular >> 16) & 0xFFFF); 816 817 ndi_devi_exit(phci_dip, pcircular); 818 if (vcircular != -1) 819 ndi_devi_exit(vdip, vcircular); 820 } 821 822 /* 823 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 824 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 825 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 826 * with vHCI power management code during path online/offline. Each 827 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 828 * occur within the scope of an active mdi_devi_enter that establishes the 829 * circular value. 830 */ 831 void 832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 833 { 834 int pcircular; 835 836 /* Verify calling context */ 837 ASSERT(MDI_PHCI(phci_dip)); 838 839 pcircular = (short)(circular & 0xFFFF); 840 ndi_devi_exit(phci_dip, pcircular); 841 } 842 843 void 844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 845 { 846 int pcircular; 847 848 /* Verify calling context */ 849 ASSERT(MDI_PHCI(phci_dip)); 850 851 ndi_devi_enter(phci_dip, &pcircular); 852 853 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 854 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 855 } 856 857 /* 858 * mdi_devi_get_vdip(): 859 * given a pHCI dip return vHCI dip 860 */ 861 dev_info_t * 862 mdi_devi_get_vdip(dev_info_t *pdip) 863 { 864 mdi_phci_t *ph; 865 866 ph = i_devi_get_phci(pdip); 867 if (ph && ph->ph_vhci) 868 return (ph->ph_vhci->vh_dip); 869 return (NULL); 870 } 871 872 /* 873 * mdi_devi_pdip_entered(): 874 * Return 1 if we are vHCI and have done an ndi_devi_enter 875 * of a pHCI 876 */ 877 int 878 mdi_devi_pdip_entered(dev_info_t *vdip) 879 { 880 mdi_vhci_t *vh; 881 mdi_phci_t *ph; 882 883 vh = i_devi_get_vhci(vdip); 884 if (vh == NULL) 885 return (0); 886 887 MDI_VHCI_PHCI_LOCK(vh); 888 ph = vh->vh_phci_head; 889 while (ph) { 890 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 891 MDI_VHCI_PHCI_UNLOCK(vh); 892 return (1); 893 } 894 ph = ph->ph_next; 895 } 896 MDI_VHCI_PHCI_UNLOCK(vh); 897 return (0); 898 } 899 900 /* 901 * mdi_phci_path2devinfo(): 902 * Utility function to search for a valid phci device given 903 * the devfs pathname. 904 */ 905 dev_info_t * 906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 907 { 908 char *temp_pathname; 909 mdi_vhci_t *vh; 910 mdi_phci_t *ph; 911 dev_info_t *pdip = NULL; 912 913 vh = i_devi_get_vhci(vdip); 914 ASSERT(vh != NULL); 915 916 if (vh == NULL) { 917 /* 918 * Invalid vHCI component, return failure 919 */ 920 return (NULL); 921 } 922 923 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 924 MDI_VHCI_PHCI_LOCK(vh); 925 ph = vh->vh_phci_head; 926 while (ph != NULL) { 927 pdip = ph->ph_dip; 928 ASSERT(pdip != NULL); 929 *temp_pathname = '\0'; 930 (void) ddi_pathname(pdip, temp_pathname); 931 if (strcmp(temp_pathname, pathname) == 0) { 932 break; 933 } 934 ph = ph->ph_next; 935 } 936 if (ph == NULL) { 937 pdip = NULL; 938 } 939 MDI_VHCI_PHCI_UNLOCK(vh); 940 kmem_free(temp_pathname, MAXPATHLEN); 941 return (pdip); 942 } 943 944 /* 945 * mdi_phci_get_path_count(): 946 * get number of path information nodes associated with a given 947 * pHCI device. 948 */ 949 int 950 mdi_phci_get_path_count(dev_info_t *pdip) 951 { 952 mdi_phci_t *ph; 953 int count = 0; 954 955 ph = i_devi_get_phci(pdip); 956 if (ph != NULL) { 957 count = ph->ph_path_count; 958 } 959 return (count); 960 } 961 962 /* 963 * i_mdi_phci_lock(): 964 * Lock a pHCI device 965 * Return Values: 966 * None 967 * Note: 968 * The default locking order is: 969 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 970 * But there are number of situations where locks need to be 971 * grabbed in reverse order. This routine implements try and lock 972 * mechanism depending on the requested parameter option. 973 */ 974 static void 975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 976 { 977 if (pip) { 978 /* Reverse locking is requested. */ 979 while (MDI_PHCI_TRYLOCK(ph) == 0) { 980 /* 981 * tryenter failed. Try to grab again 982 * after a small delay 983 */ 984 MDI_PI_HOLD(pip); 985 MDI_PI_UNLOCK(pip); 986 delay(1); 987 MDI_PI_LOCK(pip); 988 MDI_PI_RELE(pip); 989 } 990 } else { 991 MDI_PHCI_LOCK(ph); 992 } 993 } 994 995 /* 996 * i_mdi_phci_unlock(): 997 * Unlock the pHCI component 998 */ 999 static void 1000 i_mdi_phci_unlock(mdi_phci_t *ph) 1001 { 1002 MDI_PHCI_UNLOCK(ph); 1003 } 1004 1005 /* 1006 * i_mdi_devinfo_create(): 1007 * create client device's devinfo node 1008 * Return Values: 1009 * dev_info 1010 * NULL 1011 * Notes: 1012 */ 1013 static dev_info_t * 1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1015 char **compatible, int ncompatible) 1016 { 1017 dev_info_t *cdip = NULL; 1018 1019 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1020 1021 /* Verify for duplicate entry */ 1022 cdip = i_mdi_devinfo_find(vh, name, guid); 1023 ASSERT(cdip == NULL); 1024 if (cdip) { 1025 cmn_err(CE_WARN, 1026 "i_mdi_devinfo_create: client dip %p already exists", 1027 (void *)cdip); 1028 } 1029 1030 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1031 if (cdip == NULL) 1032 goto fail; 1033 1034 /* 1035 * Create component type and Global unique identifier 1036 * properties 1037 */ 1038 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1039 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1040 goto fail; 1041 } 1042 1043 /* Decorate the node with compatible property */ 1044 if (compatible && 1045 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1046 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1047 goto fail; 1048 } 1049 1050 return (cdip); 1051 1052 fail: 1053 if (cdip) { 1054 (void) ndi_prop_remove_all(cdip); 1055 (void) ndi_devi_free(cdip); 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * i_mdi_devinfo_find(): 1062 * Find a matching devinfo node for given client node name 1063 * and its guid. 1064 * Return Values: 1065 * Handle to a dev_info node or NULL 1066 */ 1067 static dev_info_t * 1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1069 { 1070 char *data; 1071 dev_info_t *cdip = NULL; 1072 dev_info_t *ndip = NULL; 1073 int circular; 1074 1075 ndi_devi_enter(vh->vh_dip, &circular); 1076 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1077 while ((cdip = ndip) != NULL) { 1078 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1079 1080 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1081 continue; 1082 } 1083 1084 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1085 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1086 &data) != DDI_PROP_SUCCESS) { 1087 continue; 1088 } 1089 1090 if (strcmp(data, guid) != 0) { 1091 ddi_prop_free(data); 1092 continue; 1093 } 1094 ddi_prop_free(data); 1095 break; 1096 } 1097 ndi_devi_exit(vh->vh_dip, circular); 1098 return (cdip); 1099 } 1100 1101 /* 1102 * i_mdi_devinfo_remove(): 1103 * Remove a client device node 1104 */ 1105 static int 1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1107 { 1108 int rv = MDI_SUCCESS; 1109 1110 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1111 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1112 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1113 if (rv != NDI_SUCCESS) { 1114 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1115 " failed. cdip = %p\n", (void *)cdip)); 1116 } 1117 /* 1118 * Convert to MDI error code 1119 */ 1120 switch (rv) { 1121 case NDI_SUCCESS: 1122 rv = MDI_SUCCESS; 1123 break; 1124 case NDI_BUSY: 1125 rv = MDI_BUSY; 1126 break; 1127 default: 1128 rv = MDI_FAILURE; 1129 break; 1130 } 1131 } 1132 return (rv); 1133 } 1134 1135 /* 1136 * i_devi_get_client() 1137 * Utility function to get mpxio component extensions 1138 */ 1139 static mdi_client_t * 1140 i_devi_get_client(dev_info_t *cdip) 1141 { 1142 mdi_client_t *ct = NULL; 1143 1144 if (MDI_CLIENT(cdip)) { 1145 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1146 } 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_is_child_present(): 1152 * Search for the presence of client device dev_info node 1153 */ 1154 static int 1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1156 { 1157 int rv = MDI_FAILURE; 1158 struct dev_info *dip; 1159 int circular; 1160 1161 ndi_devi_enter(vdip, &circular); 1162 dip = DEVI(vdip)->devi_child; 1163 while (dip) { 1164 if (dip == DEVI(cdip)) { 1165 rv = MDI_SUCCESS; 1166 break; 1167 } 1168 dip = dip->devi_sibling; 1169 } 1170 ndi_devi_exit(vdip, circular); 1171 return (rv); 1172 } 1173 1174 1175 /* 1176 * i_mdi_client_lock(): 1177 * Grab client component lock 1178 * Return Values: 1179 * None 1180 * Note: 1181 * The default locking order is: 1182 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1183 * But there are number of situations where locks need to be 1184 * grabbed in reverse order. This routine implements try and lock 1185 * mechanism depending on the requested parameter option. 1186 */ 1187 static void 1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1189 { 1190 if (pip) { 1191 /* 1192 * Reverse locking is requested. 1193 */ 1194 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1195 /* 1196 * tryenter failed. Try to grab again 1197 * after a small delay 1198 */ 1199 MDI_PI_HOLD(pip); 1200 MDI_PI_UNLOCK(pip); 1201 delay(1); 1202 MDI_PI_LOCK(pip); 1203 MDI_PI_RELE(pip); 1204 } 1205 } else { 1206 MDI_CLIENT_LOCK(ct); 1207 } 1208 } 1209 1210 /* 1211 * i_mdi_client_unlock(): 1212 * Unlock a client component 1213 */ 1214 static void 1215 i_mdi_client_unlock(mdi_client_t *ct) 1216 { 1217 MDI_CLIENT_UNLOCK(ct); 1218 } 1219 1220 /* 1221 * i_mdi_client_alloc(): 1222 * Allocate and initialize a client structure. Caller should 1223 * hold the vhci client lock. 1224 * Return Values: 1225 * Handle to a client component 1226 */ 1227 /*ARGSUSED*/ 1228 static mdi_client_t * 1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1230 { 1231 mdi_client_t *ct; 1232 1233 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1234 1235 /* 1236 * Allocate and initialize a component structure. 1237 */ 1238 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1239 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1240 ct->ct_hnext = NULL; 1241 ct->ct_hprev = NULL; 1242 ct->ct_dip = NULL; 1243 ct->ct_vhci = vh; 1244 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1245 (void) strcpy(ct->ct_drvname, name); 1246 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_guid, lguid); 1248 ct->ct_cprivate = NULL; 1249 ct->ct_vprivate = NULL; 1250 ct->ct_flags = 0; 1251 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1252 MDI_CLIENT_LOCK(ct); 1253 MDI_CLIENT_SET_OFFLINE(ct); 1254 MDI_CLIENT_SET_DETACH(ct); 1255 MDI_CLIENT_SET_POWER_UP(ct); 1256 MDI_CLIENT_UNLOCK(ct); 1257 ct->ct_failover_flags = 0; 1258 ct->ct_failover_status = 0; 1259 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1260 ct->ct_unstable = 0; 1261 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1262 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1263 ct->ct_lb = vh->vh_lb; 1264 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1265 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1266 ct->ct_path_count = 0; 1267 ct->ct_path_head = NULL; 1268 ct->ct_path_tail = NULL; 1269 ct->ct_path_last = NULL; 1270 1271 /* 1272 * Add this client component to our client hash queue 1273 */ 1274 i_mdi_client_enlist_table(vh, ct); 1275 return (ct); 1276 } 1277 1278 /* 1279 * i_mdi_client_enlist_table(): 1280 * Attach the client device to the client hash table. Caller 1281 * should hold the vhci client lock. 1282 */ 1283 static void 1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1285 { 1286 int index; 1287 struct client_hash *head; 1288 1289 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1290 1291 index = i_mdi_get_hash_key(ct->ct_guid); 1292 head = &vh->vh_client_table[index]; 1293 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1294 head->ct_hash_head = ct; 1295 head->ct_hash_count++; 1296 vh->vh_client_count++; 1297 } 1298 1299 /* 1300 * i_mdi_client_delist_table(): 1301 * Attach the client device to the client hash table. 1302 * Caller should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 char *guid; 1309 struct client_hash *head; 1310 mdi_client_t *next; 1311 mdi_client_t *last; 1312 1313 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1314 1315 guid = ct->ct_guid; 1316 index = i_mdi_get_hash_key(guid); 1317 head = &vh->vh_client_table[index]; 1318 1319 last = NULL; 1320 next = (mdi_client_t *)head->ct_hash_head; 1321 while (next != NULL) { 1322 if (next == ct) { 1323 break; 1324 } 1325 last = next; 1326 next = next->ct_hnext; 1327 } 1328 1329 if (next) { 1330 head->ct_hash_count--; 1331 if (last == NULL) { 1332 head->ct_hash_head = ct->ct_hnext; 1333 } else { 1334 last->ct_hnext = ct->ct_hnext; 1335 } 1336 ct->ct_hnext = NULL; 1337 vh->vh_client_count--; 1338 } 1339 } 1340 1341 1342 /* 1343 * i_mdi_client_free(): 1344 * Free a client component 1345 */ 1346 static int 1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1348 { 1349 int rv = MDI_SUCCESS; 1350 int flags = ct->ct_flags; 1351 dev_info_t *cdip; 1352 dev_info_t *vdip; 1353 1354 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1355 1356 vdip = vh->vh_dip; 1357 cdip = ct->ct_dip; 1358 1359 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1360 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1361 DEVI(cdip)->devi_mdi_client = NULL; 1362 1363 /* 1364 * Clear out back ref. to dev_info_t node 1365 */ 1366 ct->ct_dip = NULL; 1367 1368 /* 1369 * Remove this client from our hash queue 1370 */ 1371 i_mdi_client_delist_table(vh, ct); 1372 1373 /* 1374 * Uninitialize and free the component 1375 */ 1376 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1377 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1378 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1379 cv_destroy(&ct->ct_failover_cv); 1380 cv_destroy(&ct->ct_unstable_cv); 1381 cv_destroy(&ct->ct_powerchange_cv); 1382 mutex_destroy(&ct->ct_mutex); 1383 kmem_free(ct, sizeof (*ct)); 1384 1385 if (cdip != NULL) { 1386 MDI_VHCI_CLIENT_UNLOCK(vh); 1387 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1388 MDI_VHCI_CLIENT_LOCK(vh); 1389 } 1390 return (rv); 1391 } 1392 1393 /* 1394 * i_mdi_client_find(): 1395 * Find the client structure corresponding to a given guid 1396 * Caller should hold the vhci client lock. 1397 */ 1398 static mdi_client_t * 1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1400 { 1401 int index; 1402 struct client_hash *head; 1403 mdi_client_t *ct; 1404 1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1406 1407 index = i_mdi_get_hash_key(guid); 1408 head = &vh->vh_client_table[index]; 1409 1410 ct = head->ct_hash_head; 1411 while (ct != NULL) { 1412 if (strcmp(ct->ct_guid, guid) == 0 && 1413 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1414 break; 1415 } 1416 ct = ct->ct_hnext; 1417 } 1418 return (ct); 1419 } 1420 1421 /* 1422 * i_mdi_client_update_state(): 1423 * Compute and update client device state 1424 * Notes: 1425 * A client device can be in any of three possible states: 1426 * 1427 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1428 * one online/standby paths. Can tolerate failures. 1429 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1430 * no alternate paths available as standby. A failure on the online 1431 * would result in loss of access to device data. 1432 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1433 * no paths available to access the device. 1434 */ 1435 static void 1436 i_mdi_client_update_state(mdi_client_t *ct) 1437 { 1438 int state; 1439 1440 ASSERT(MDI_CLIENT_LOCKED(ct)); 1441 state = i_mdi_client_compute_state(ct, NULL); 1442 MDI_CLIENT_SET_STATE(ct, state); 1443 } 1444 1445 /* 1446 * i_mdi_client_compute_state(): 1447 * Compute client device state 1448 * 1449 * mdi_phci_t * Pointer to pHCI structure which should 1450 * while computing the new value. Used by 1451 * i_mdi_phci_offline() to find the new 1452 * client state after DR of a pHCI. 1453 */ 1454 static int 1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1456 { 1457 int state; 1458 int online_count = 0; 1459 int standby_count = 0; 1460 mdi_pathinfo_t *pip, *next; 1461 1462 ASSERT(MDI_CLIENT_LOCKED(ct)); 1463 pip = ct->ct_path_head; 1464 while (pip != NULL) { 1465 MDI_PI_LOCK(pip); 1466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1467 if (MDI_PI(pip)->pi_phci == ph) { 1468 MDI_PI_UNLOCK(pip); 1469 pip = next; 1470 continue; 1471 } 1472 1473 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1474 == MDI_PATHINFO_STATE_ONLINE) 1475 online_count++; 1476 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1477 == MDI_PATHINFO_STATE_STANDBY) 1478 standby_count++; 1479 MDI_PI_UNLOCK(pip); 1480 pip = next; 1481 } 1482 1483 if (online_count == 0) { 1484 if (standby_count == 0) { 1485 state = MDI_CLIENT_STATE_FAILED; 1486 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1487 " ct = %p\n", (void *)ct)); 1488 } else if (standby_count == 1) { 1489 state = MDI_CLIENT_STATE_DEGRADED; 1490 } else { 1491 state = MDI_CLIENT_STATE_OPTIMAL; 1492 } 1493 } else if (online_count == 1) { 1494 if (standby_count == 0) { 1495 state = MDI_CLIENT_STATE_DEGRADED; 1496 } else { 1497 state = MDI_CLIENT_STATE_OPTIMAL; 1498 } 1499 } else { 1500 state = MDI_CLIENT_STATE_OPTIMAL; 1501 } 1502 return (state); 1503 } 1504 1505 /* 1506 * i_mdi_client2devinfo(): 1507 * Utility function 1508 */ 1509 dev_info_t * 1510 i_mdi_client2devinfo(mdi_client_t *ct) 1511 { 1512 return (ct->ct_dip); 1513 } 1514 1515 /* 1516 * mdi_client_path2_devinfo(): 1517 * Given the parent devinfo and child devfs pathname, search for 1518 * a valid devfs node handle. 1519 */ 1520 dev_info_t * 1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1522 { 1523 dev_info_t *cdip = NULL; 1524 dev_info_t *ndip = NULL; 1525 char *temp_pathname; 1526 int circular; 1527 1528 /* 1529 * Allocate temp buffer 1530 */ 1531 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1532 1533 /* 1534 * Lock parent against changes 1535 */ 1536 ndi_devi_enter(vdip, &circular); 1537 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1538 while ((cdip = ndip) != NULL) { 1539 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1540 1541 *temp_pathname = '\0'; 1542 (void) ddi_pathname(cdip, temp_pathname); 1543 if (strcmp(temp_pathname, pathname) == 0) { 1544 break; 1545 } 1546 } 1547 /* 1548 * Release devinfo lock 1549 */ 1550 ndi_devi_exit(vdip, circular); 1551 1552 /* 1553 * Free the temp buffer 1554 */ 1555 kmem_free(temp_pathname, MAXPATHLEN); 1556 return (cdip); 1557 } 1558 1559 /* 1560 * mdi_client_get_path_count(): 1561 * Utility function to get number of path information nodes 1562 * associated with a given client device. 1563 */ 1564 int 1565 mdi_client_get_path_count(dev_info_t *cdip) 1566 { 1567 mdi_client_t *ct; 1568 int count = 0; 1569 1570 ct = i_devi_get_client(cdip); 1571 if (ct != NULL) { 1572 count = ct->ct_path_count; 1573 } 1574 return (count); 1575 } 1576 1577 1578 /* 1579 * i_mdi_get_hash_key(): 1580 * Create a hash using strings as keys 1581 * 1582 */ 1583 static int 1584 i_mdi_get_hash_key(char *str) 1585 { 1586 uint32_t g, hash = 0; 1587 char *p; 1588 1589 for (p = str; *p != '\0'; p++) { 1590 g = *p; 1591 hash += g; 1592 } 1593 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1594 } 1595 1596 /* 1597 * mdi_get_lb_policy(): 1598 * Get current load balancing policy for a given client device 1599 */ 1600 client_lb_t 1601 mdi_get_lb_policy(dev_info_t *cdip) 1602 { 1603 client_lb_t lb = LOAD_BALANCE_NONE; 1604 mdi_client_t *ct; 1605 1606 ct = i_devi_get_client(cdip); 1607 if (ct != NULL) { 1608 lb = ct->ct_lb; 1609 } 1610 return (lb); 1611 } 1612 1613 /* 1614 * mdi_set_lb_region_size(): 1615 * Set current region size for the load-balance 1616 */ 1617 int 1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1619 { 1620 mdi_client_t *ct; 1621 int rv = MDI_FAILURE; 1622 1623 ct = i_devi_get_client(cdip); 1624 if (ct != NULL && ct->ct_lb_args != NULL) { 1625 ct->ct_lb_args->region_size = region_size; 1626 rv = MDI_SUCCESS; 1627 } 1628 return (rv); 1629 } 1630 1631 /* 1632 * mdi_Set_lb_policy(): 1633 * Set current load balancing policy for a given client device 1634 */ 1635 int 1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1637 { 1638 mdi_client_t *ct; 1639 int rv = MDI_FAILURE; 1640 1641 ct = i_devi_get_client(cdip); 1642 if (ct != NULL) { 1643 ct->ct_lb = lb; 1644 rv = MDI_SUCCESS; 1645 } 1646 return (rv); 1647 } 1648 1649 /* 1650 * mdi_failover(): 1651 * failover function called by the vHCI drivers to initiate 1652 * a failover operation. This is typically due to non-availability 1653 * of online paths to route I/O requests. Failover can be 1654 * triggered through user application also. 1655 * 1656 * The vHCI driver calls mdi_failover() to initiate a failover 1657 * operation. mdi_failover() calls back into the vHCI driver's 1658 * vo_failover() entry point to perform the actual failover 1659 * operation. The reason for requiring the vHCI driver to 1660 * initiate failover by calling mdi_failover(), instead of directly 1661 * executing vo_failover() itself, is to ensure that the mdi 1662 * framework can keep track of the client state properly. 1663 * Additionally, mdi_failover() provides as a convenience the 1664 * option of performing the failover operation synchronously or 1665 * asynchronously 1666 * 1667 * Upon successful completion of the failover operation, the 1668 * paths that were previously ONLINE will be in the STANDBY state, 1669 * and the newly activated paths will be in the ONLINE state. 1670 * 1671 * The flags modifier determines whether the activation is done 1672 * synchronously: MDI_FAILOVER_SYNC 1673 * Return Values: 1674 * MDI_SUCCESS 1675 * MDI_FAILURE 1676 * MDI_BUSY 1677 */ 1678 /*ARGSUSED*/ 1679 int 1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1681 { 1682 int rv; 1683 mdi_client_t *ct; 1684 1685 ct = i_devi_get_client(cdip); 1686 ASSERT(ct != NULL); 1687 if (ct == NULL) { 1688 /* cdip is not a valid client device. Nothing more to do. */ 1689 return (MDI_FAILURE); 1690 } 1691 1692 MDI_CLIENT_LOCK(ct); 1693 1694 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1695 /* A path to the client is being freed */ 1696 MDI_CLIENT_UNLOCK(ct); 1697 return (MDI_BUSY); 1698 } 1699 1700 1701 if (MDI_CLIENT_IS_FAILED(ct)) { 1702 /* 1703 * Client is in failed state. Nothing more to do. 1704 */ 1705 MDI_CLIENT_UNLOCK(ct); 1706 return (MDI_FAILURE); 1707 } 1708 1709 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1710 /* 1711 * Failover is already in progress; return BUSY 1712 */ 1713 MDI_CLIENT_UNLOCK(ct); 1714 return (MDI_BUSY); 1715 } 1716 /* 1717 * Make sure that mdi_pathinfo node state changes are processed. 1718 * We do not allow failovers to progress while client path state 1719 * changes are in progress 1720 */ 1721 if (ct->ct_unstable) { 1722 if (flags == MDI_FAILOVER_ASYNC) { 1723 MDI_CLIENT_UNLOCK(ct); 1724 return (MDI_BUSY); 1725 } else { 1726 while (ct->ct_unstable) 1727 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1728 } 1729 } 1730 1731 /* 1732 * Client device is in stable state. Before proceeding, perform sanity 1733 * checks again. 1734 */ 1735 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1736 (!i_ddi_devi_attached(ct->ct_dip))) { 1737 /* 1738 * Client is in failed state. Nothing more to do. 1739 */ 1740 MDI_CLIENT_UNLOCK(ct); 1741 return (MDI_FAILURE); 1742 } 1743 1744 /* 1745 * Set the client state as failover in progress. 1746 */ 1747 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1748 ct->ct_failover_flags = flags; 1749 MDI_CLIENT_UNLOCK(ct); 1750 1751 if (flags == MDI_FAILOVER_ASYNC) { 1752 /* 1753 * Submit the initiate failover request via CPR safe 1754 * taskq threads. 1755 */ 1756 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1757 ct, KM_SLEEP); 1758 return (MDI_ACCEPT); 1759 } else { 1760 /* 1761 * Synchronous failover mode. Typically invoked from the user 1762 * land. 1763 */ 1764 rv = i_mdi_failover(ct); 1765 } 1766 return (rv); 1767 } 1768 1769 /* 1770 * i_mdi_failover(): 1771 * internal failover function. Invokes vHCI drivers failover 1772 * callback function and process the failover status 1773 * Return Values: 1774 * None 1775 * 1776 * Note: A client device in failover state can not be detached or freed. 1777 */ 1778 static int 1779 i_mdi_failover(void *arg) 1780 { 1781 int rv = MDI_SUCCESS; 1782 mdi_client_t *ct = (mdi_client_t *)arg; 1783 mdi_vhci_t *vh = ct->ct_vhci; 1784 1785 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1786 1787 if (vh->vh_ops->vo_failover != NULL) { 1788 /* 1789 * Call vHCI drivers callback routine 1790 */ 1791 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1792 ct->ct_failover_flags); 1793 } 1794 1795 MDI_CLIENT_LOCK(ct); 1796 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1797 1798 /* 1799 * Save the failover return status 1800 */ 1801 ct->ct_failover_status = rv; 1802 1803 /* 1804 * As a result of failover, client status would have been changed. 1805 * Update the client state and wake up anyone waiting on this client 1806 * device. 1807 */ 1808 i_mdi_client_update_state(ct); 1809 1810 cv_broadcast(&ct->ct_failover_cv); 1811 MDI_CLIENT_UNLOCK(ct); 1812 return (rv); 1813 } 1814 1815 /* 1816 * Load balancing is logical block. 1817 * IOs within the range described by region_size 1818 * would go on the same path. This would improve the 1819 * performance by cache-hit on some of the RAID devices. 1820 * Search only for online paths(At some point we 1821 * may want to balance across target ports). 1822 * If no paths are found then default to round-robin. 1823 */ 1824 static int 1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1826 { 1827 int path_index = -1; 1828 int online_path_count = 0; 1829 int online_nonpref_path_count = 0; 1830 int region_size = ct->ct_lb_args->region_size; 1831 mdi_pathinfo_t *pip; 1832 mdi_pathinfo_t *next; 1833 int preferred, path_cnt; 1834 1835 pip = ct->ct_path_head; 1836 while (pip) { 1837 MDI_PI_LOCK(pip); 1838 if (MDI_PI(pip)->pi_state == 1839 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1840 online_path_count++; 1841 } else if (MDI_PI(pip)->pi_state == 1842 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1843 online_nonpref_path_count++; 1844 } 1845 next = (mdi_pathinfo_t *) 1846 MDI_PI(pip)->pi_client_link; 1847 MDI_PI_UNLOCK(pip); 1848 pip = next; 1849 } 1850 /* if found any online/preferred then use this type */ 1851 if (online_path_count > 0) { 1852 path_cnt = online_path_count; 1853 preferred = 1; 1854 } else if (online_nonpref_path_count > 0) { 1855 path_cnt = online_nonpref_path_count; 1856 preferred = 0; 1857 } else { 1858 path_cnt = 0; 1859 } 1860 if (path_cnt) { 1861 path_index = (bp->b_blkno >> region_size) % path_cnt; 1862 pip = ct->ct_path_head; 1863 while (pip && path_index != -1) { 1864 MDI_PI_LOCK(pip); 1865 if (path_index == 0 && 1866 (MDI_PI(pip)->pi_state == 1867 MDI_PATHINFO_STATE_ONLINE) && 1868 MDI_PI(pip)->pi_preferred == preferred) { 1869 MDI_PI_HOLD(pip); 1870 MDI_PI_UNLOCK(pip); 1871 *ret_pip = pip; 1872 return (MDI_SUCCESS); 1873 } 1874 path_index --; 1875 next = (mdi_pathinfo_t *) 1876 MDI_PI(pip)->pi_client_link; 1877 MDI_PI_UNLOCK(pip); 1878 pip = next; 1879 } 1880 if (pip == NULL) { 1881 MDI_DEBUG(4, (CE_NOTE, NULL, 1882 "!lba %llx, no pip !!\n", 1883 bp->b_lblkno)); 1884 } else { 1885 MDI_DEBUG(4, (CE_NOTE, NULL, 1886 "!lba %llx, no pip for path_index, " 1887 "pip %p\n", bp->b_lblkno, (void *)pip)); 1888 } 1889 } 1890 return (MDI_FAILURE); 1891 } 1892 1893 /* 1894 * mdi_select_path(): 1895 * select a path to access a client device. 1896 * 1897 * mdi_select_path() function is called by the vHCI drivers to 1898 * select a path to route the I/O request to. The caller passes 1899 * the block I/O data transfer structure ("buf") as one of the 1900 * parameters. The mpxio framework uses the buf structure 1901 * contents to maintain per path statistics (total I/O size / 1902 * count pending). If more than one online paths are available to 1903 * select, the framework automatically selects a suitable path 1904 * for routing I/O request. If a failover operation is active for 1905 * this client device the call shall be failed with MDI_BUSY error 1906 * code. 1907 * 1908 * By default this function returns a suitable path in online 1909 * state based on the current load balancing policy. Currently 1910 * we support LOAD_BALANCE_NONE (Previously selected online path 1911 * will continue to be used till the path is usable) and 1912 * LOAD_BALANCE_RR (Online paths will be selected in a round 1913 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1914 * based on the logical block). The load balancing 1915 * through vHCI drivers configuration file (driver.conf). 1916 * 1917 * vHCI drivers may override this default behavior by specifying 1918 * appropriate flags. If start_pip is specified (non NULL) is 1919 * used as start point to walk and find the next appropriate path. 1920 * The following values are currently defined: 1921 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1922 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1923 * 1924 * The non-standard behavior is used by the scsi_vhci driver, 1925 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1926 * attach of client devices (to avoid an unnecessary failover 1927 * when the STANDBY path comes up first), during failover 1928 * (to activate a STANDBY path as ONLINE). 1929 * 1930 * The selected path is returned in a a mdi_hold_path() state 1931 * (pi_ref_cnt). Caller should release the hold by calling 1932 * mdi_rele_path(). 1933 * 1934 * Return Values: 1935 * MDI_SUCCESS - Completed successfully 1936 * MDI_BUSY - Client device is busy failing over 1937 * MDI_NOPATH - Client device is online, but no valid path are 1938 * available to access this client device 1939 * MDI_FAILURE - Invalid client device or state 1940 * MDI_DEVI_ONLINING 1941 * - Client device (struct dev_info state) is in 1942 * onlining state. 1943 */ 1944 1945 /*ARGSUSED*/ 1946 int 1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1948 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1949 { 1950 mdi_client_t *ct; 1951 mdi_pathinfo_t *pip; 1952 mdi_pathinfo_t *next; 1953 mdi_pathinfo_t *head; 1954 mdi_pathinfo_t *start; 1955 client_lb_t lbp; /* load balancing policy */ 1956 int sb = 1; /* standard behavior */ 1957 int preferred = 1; /* preferred path */ 1958 int cond, cont = 1; 1959 int retry = 0; 1960 1961 if (flags != 0) { 1962 /* 1963 * disable default behavior 1964 */ 1965 sb = 0; 1966 } 1967 1968 *ret_pip = NULL; 1969 ct = i_devi_get_client(cdip); 1970 if (ct == NULL) { 1971 /* mdi extensions are NULL, Nothing more to do */ 1972 return (MDI_FAILURE); 1973 } 1974 1975 MDI_CLIENT_LOCK(ct); 1976 1977 if (sb) { 1978 if (MDI_CLIENT_IS_FAILED(ct)) { 1979 /* 1980 * Client is not ready to accept any I/O requests. 1981 * Fail this request. 1982 */ 1983 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1984 "client state offline ct = %p\n", (void *)ct)); 1985 MDI_CLIENT_UNLOCK(ct); 1986 return (MDI_FAILURE); 1987 } 1988 1989 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1990 /* 1991 * Check for Failover is in progress. If so tell the 1992 * caller that this device is busy. 1993 */ 1994 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1995 "client failover in progress ct = %p\n", 1996 (void *)ct)); 1997 MDI_CLIENT_UNLOCK(ct); 1998 return (MDI_BUSY); 1999 } 2000 2001 /* 2002 * Check to see whether the client device is attached. 2003 * If not so, let the vHCI driver manually select a path 2004 * (standby) and let the probe/attach process to continue. 2005 */ 2006 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2007 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2008 "ct = %p\n", (void *)ct)); 2009 MDI_CLIENT_UNLOCK(ct); 2010 return (MDI_DEVI_ONLINING); 2011 } 2012 } 2013 2014 /* 2015 * Cache in the client list head. If head of the list is NULL 2016 * return MDI_NOPATH 2017 */ 2018 head = ct->ct_path_head; 2019 if (head == NULL) { 2020 MDI_CLIENT_UNLOCK(ct); 2021 return (MDI_NOPATH); 2022 } 2023 2024 /* 2025 * for non default behavior, bypass current 2026 * load balancing policy and always use LOAD_BALANCE_RR 2027 * except that the start point will be adjusted based 2028 * on the provided start_pip 2029 */ 2030 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2031 2032 switch (lbp) { 2033 case LOAD_BALANCE_NONE: 2034 /* 2035 * Load balancing is None or Alternate path mode 2036 * Start looking for a online mdi_pathinfo node starting from 2037 * last known selected path 2038 */ 2039 preferred = 1; 2040 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2041 if (pip == NULL) { 2042 pip = head; 2043 } 2044 start = pip; 2045 do { 2046 MDI_PI_LOCK(pip); 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if ((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE) && 2054 preferred == MDI_PI(pip)->pi_preferred) { 2055 /* 2056 * Return the path in hold state. Caller should 2057 * release the lock by calling mdi_rele_path() 2058 */ 2059 MDI_PI_HOLD(pip); 2060 MDI_PI_UNLOCK(pip); 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Path is busy. 2069 */ 2070 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2071 MDI_PI_IS_TRANSIENT(pip)) 2072 retry = 1; 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 next = head; 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 pip = next; 2082 if (start == pip && preferred) { 2083 preferred = 0; 2084 } else if (start == pip && !preferred) { 2085 cont = 0; 2086 } 2087 } while (cont); 2088 break; 2089 2090 case LOAD_BALANCE_LBA: 2091 /* 2092 * Make sure we are looking 2093 * for an online path. Otherwise, if it is for a STANDBY 2094 * path request, it will go through and fetch an ONLINE 2095 * path which is not desirable. 2096 */ 2097 if ((ct->ct_lb_args != NULL) && 2098 (ct->ct_lb_args->region_size) && bp && 2099 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2100 if (i_mdi_lba_lb(ct, ret_pip, bp) 2101 == MDI_SUCCESS) { 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_SUCCESS); 2104 } 2105 } 2106 /* FALLTHROUGH */ 2107 case LOAD_BALANCE_RR: 2108 /* 2109 * Load balancing is Round Robin. Start looking for a online 2110 * mdi_pathinfo node starting from last known selected path 2111 * as the start point. If override flags are specified, 2112 * process accordingly. 2113 * If the search is already in effect(start_pip not null), 2114 * then lets just use the same path preference to continue the 2115 * traversal. 2116 */ 2117 2118 if (start_pip != NULL) { 2119 preferred = MDI_PI(start_pip)->pi_preferred; 2120 } else { 2121 preferred = 1; 2122 } 2123 2124 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2125 if (start == NULL) { 2126 pip = head; 2127 } else { 2128 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2129 if (pip == NULL) { 2130 if (!sb) { 2131 if (preferred == 0) { 2132 /* 2133 * Looks like we have completed 2134 * the traversal as preferred 2135 * value is 0. Time to bail out. 2136 */ 2137 *ret_pip = NULL; 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } else { 2141 /* 2142 * Looks like we reached the 2143 * end of the list. Lets enable 2144 * traversal of non preferred 2145 * paths. 2146 */ 2147 preferred = 0; 2148 } 2149 } 2150 pip = head; 2151 } 2152 } 2153 start = pip; 2154 do { 2155 MDI_PI_LOCK(pip); 2156 if (sb) { 2157 cond = ((MDI_PI(pip)->pi_state == 2158 MDI_PATHINFO_STATE_ONLINE && 2159 MDI_PI(pip)->pi_preferred == 2160 preferred) ? 1 : 0); 2161 } else { 2162 if (flags == MDI_SELECT_ONLINE_PATH) { 2163 cond = ((MDI_PI(pip)->pi_state == 2164 MDI_PATHINFO_STATE_ONLINE && 2165 MDI_PI(pip)->pi_preferred == 2166 preferred) ? 1 : 0); 2167 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2168 cond = ((MDI_PI(pip)->pi_state == 2169 MDI_PATHINFO_STATE_STANDBY && 2170 MDI_PI(pip)->pi_preferred == 2171 preferred) ? 1 : 0); 2172 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2173 MDI_SELECT_STANDBY_PATH)) { 2174 cond = (((MDI_PI(pip)->pi_state == 2175 MDI_PATHINFO_STATE_ONLINE || 2176 (MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_STANDBY)) && 2178 MDI_PI(pip)->pi_preferred == 2179 preferred) ? 1 : 0); 2180 } else if (flags == 2181 (MDI_SELECT_STANDBY_PATH | 2182 MDI_SELECT_ONLINE_PATH | 2183 MDI_SELECT_USER_DISABLE_PATH)) { 2184 cond = (((MDI_PI(pip)->pi_state == 2185 MDI_PATHINFO_STATE_ONLINE || 2186 (MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_STANDBY) || 2188 (MDI_PI(pip)->pi_state == 2189 (MDI_PATHINFO_STATE_ONLINE| 2190 MDI_PATHINFO_STATE_USER_DISABLE)) || 2191 (MDI_PI(pip)->pi_state == 2192 (MDI_PATHINFO_STATE_STANDBY | 2193 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2194 MDI_PI(pip)->pi_preferred == 2195 preferred) ? 1 : 0); 2196 } else { 2197 cond = 0; 2198 } 2199 } 2200 /* 2201 * No need to explicitly check if the path is disabled. 2202 * Since we are checking for state == ONLINE and the 2203 * same veriable is used for DISABLE/ENABLE information. 2204 */ 2205 if (cond) { 2206 /* 2207 * Return the path in hold state. Caller should 2208 * release the lock by calling mdi_rele_path() 2209 */ 2210 MDI_PI_HOLD(pip); 2211 MDI_PI_UNLOCK(pip); 2212 if (sb) 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 /* 2219 * Path is busy. 2220 */ 2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2222 MDI_PI_IS_TRANSIENT(pip)) 2223 retry = 1; 2224 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 do_again: 2229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2230 if (next == NULL) { 2231 if (!sb) { 2232 if (preferred == 1) { 2233 /* 2234 * Looks like we reached the 2235 * end of the list. Lets enable 2236 * traversal of non preferred 2237 * paths. 2238 */ 2239 preferred = 0; 2240 next = head; 2241 } else { 2242 /* 2243 * We have done both the passes 2244 * Preferred as well as for 2245 * Non-preferred. Bail out now. 2246 */ 2247 cont = 0; 2248 } 2249 } else { 2250 /* 2251 * Standard behavior case. 2252 */ 2253 next = head; 2254 } 2255 } 2256 MDI_PI_UNLOCK(pip); 2257 if (cont == 0) { 2258 break; 2259 } 2260 pip = next; 2261 2262 if (!sb) { 2263 /* 2264 * We need to handle the selection of 2265 * non-preferred path in the following 2266 * case: 2267 * 2268 * +------+ +------+ +------+ +-----+ 2269 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2270 * +------+ +------+ +------+ +-----+ 2271 * 2272 * If we start the search with B, we need to 2273 * skip beyond B to pick C which is non - 2274 * preferred in the second pass. The following 2275 * test, if true, will allow us to skip over 2276 * the 'start'(B in the example) to select 2277 * other non preferred elements. 2278 */ 2279 if ((start_pip != NULL) && (start_pip == pip) && 2280 (MDI_PI(start_pip)->pi_preferred 2281 != preferred)) { 2282 /* 2283 * try again after going past the start 2284 * pip 2285 */ 2286 MDI_PI_LOCK(pip); 2287 goto do_again; 2288 } 2289 } else { 2290 /* 2291 * Standard behavior case 2292 */ 2293 if (start == pip && preferred) { 2294 /* look for nonpreferred paths */ 2295 preferred = 0; 2296 } else if (start == pip && !preferred) { 2297 /* 2298 * Exit condition 2299 */ 2300 cont = 0; 2301 } 2302 } 2303 } while (cont); 2304 break; 2305 } 2306 2307 MDI_CLIENT_UNLOCK(ct); 2308 if (retry == 1) { 2309 return (MDI_BUSY); 2310 } else { 2311 return (MDI_NOPATH); 2312 } 2313 } 2314 2315 /* 2316 * For a client, return the next available path to any phci 2317 * 2318 * Note: 2319 * Caller should hold the branch's devinfo node to get a consistent 2320 * snap shot of the mdi_pathinfo nodes. 2321 * 2322 * Please note that even the list is stable the mdi_pathinfo 2323 * node state and properties are volatile. The caller should lock 2324 * and unlock the nodes by calling mdi_pi_lock() and 2325 * mdi_pi_unlock() functions to get a stable properties. 2326 * 2327 * If there is a need to use the nodes beyond the hold of the 2328 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2329 * need to be held against unexpected removal by calling 2330 * mdi_hold_path() and should be released by calling 2331 * mdi_rele_path() on completion. 2332 */ 2333 mdi_pathinfo_t * 2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2335 { 2336 mdi_client_t *ct; 2337 2338 if (!MDI_CLIENT(ct_dip)) 2339 return (NULL); 2340 2341 /* 2342 * Walk through client link 2343 */ 2344 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2345 ASSERT(ct != NULL); 2346 2347 if (pip == NULL) 2348 return ((mdi_pathinfo_t *)ct->ct_path_head); 2349 2350 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2351 } 2352 2353 /* 2354 * For a phci, return the next available path to any client 2355 * Note: ditto mdi_get_next_phci_path() 2356 */ 2357 mdi_pathinfo_t * 2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2359 { 2360 mdi_phci_t *ph; 2361 2362 if (!MDI_PHCI(ph_dip)) 2363 return (NULL); 2364 2365 /* 2366 * Walk through pHCI link 2367 */ 2368 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2369 ASSERT(ph != NULL); 2370 2371 if (pip == NULL) 2372 return ((mdi_pathinfo_t *)ph->ph_path_head); 2373 2374 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2375 } 2376 2377 /* 2378 * mdi_hold_path(): 2379 * Hold the mdi_pathinfo node against unwanted unexpected free. 2380 * Return Values: 2381 * None 2382 */ 2383 void 2384 mdi_hold_path(mdi_pathinfo_t *pip) 2385 { 2386 if (pip) { 2387 MDI_PI_LOCK(pip); 2388 MDI_PI_HOLD(pip); 2389 MDI_PI_UNLOCK(pip); 2390 } 2391 } 2392 2393 2394 /* 2395 * mdi_rele_path(): 2396 * Release the mdi_pathinfo node which was selected 2397 * through mdi_select_path() mechanism or manually held by 2398 * calling mdi_hold_path(). 2399 * Return Values: 2400 * None 2401 */ 2402 void 2403 mdi_rele_path(mdi_pathinfo_t *pip) 2404 { 2405 if (pip) { 2406 MDI_PI_LOCK(pip); 2407 MDI_PI_RELE(pip); 2408 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2409 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2410 } 2411 MDI_PI_UNLOCK(pip); 2412 } 2413 } 2414 2415 /* 2416 * mdi_pi_lock(): 2417 * Lock the mdi_pathinfo node. 2418 * Note: 2419 * The caller should release the lock by calling mdi_pi_unlock() 2420 */ 2421 void 2422 mdi_pi_lock(mdi_pathinfo_t *pip) 2423 { 2424 ASSERT(pip != NULL); 2425 if (pip) { 2426 MDI_PI_LOCK(pip); 2427 } 2428 } 2429 2430 2431 /* 2432 * mdi_pi_unlock(): 2433 * Unlock the mdi_pathinfo node. 2434 * Note: 2435 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2436 */ 2437 void 2438 mdi_pi_unlock(mdi_pathinfo_t *pip) 2439 { 2440 ASSERT(pip != NULL); 2441 if (pip) { 2442 MDI_PI_UNLOCK(pip); 2443 } 2444 } 2445 2446 /* 2447 * mdi_pi_find(): 2448 * Search the list of mdi_pathinfo nodes attached to the 2449 * pHCI/Client device node whose path address matches "paddr". 2450 * Returns a pointer to the mdi_pathinfo node if a matching node is 2451 * found. 2452 * Return Values: 2453 * mdi_pathinfo node handle 2454 * NULL 2455 * Notes: 2456 * Caller need not hold any locks to call this function. 2457 */ 2458 mdi_pathinfo_t * 2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2460 { 2461 mdi_phci_t *ph; 2462 mdi_vhci_t *vh; 2463 mdi_client_t *ct; 2464 mdi_pathinfo_t *pip = NULL; 2465 2466 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2467 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2468 if ((pdip == NULL) || (paddr == NULL)) { 2469 return (NULL); 2470 } 2471 ph = i_devi_get_phci(pdip); 2472 if (ph == NULL) { 2473 /* 2474 * Invalid pHCI device, Nothing more to do. 2475 */ 2476 MDI_DEBUG(2, (CE_WARN, pdip, 2477 "!mdi_pi_find: invalid phci")); 2478 return (NULL); 2479 } 2480 2481 vh = ph->ph_vhci; 2482 if (vh == NULL) { 2483 /* 2484 * Invalid vHCI device, Nothing more to do. 2485 */ 2486 MDI_DEBUG(2, (CE_WARN, pdip, 2487 "!mdi_pi_find: invalid vhci")); 2488 return (NULL); 2489 } 2490 2491 /* 2492 * Look for pathinfo node identified by paddr. 2493 */ 2494 if (caddr == NULL) { 2495 /* 2496 * Find a mdi_pathinfo node under pHCI list for a matching 2497 * unit address. 2498 */ 2499 MDI_PHCI_LOCK(ph); 2500 if (MDI_PHCI_IS_OFFLINE(ph)) { 2501 MDI_DEBUG(2, (CE_WARN, pdip, 2502 "!mdi_pi_find: offline phci %p", (void *)ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (NULL); 2505 } 2506 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2507 2508 while (pip != NULL) { 2509 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2510 break; 2511 } 2512 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2513 } 2514 MDI_PHCI_UNLOCK(ph); 2515 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2516 (void *)pip)); 2517 return (pip); 2518 } 2519 2520 /* 2521 * XXX - Is the rest of the code in this function really necessary? 2522 * The consumers of mdi_pi_find() can search for the desired pathinfo 2523 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2524 * whether the search is based on the pathinfo nodes attached to 2525 * the pHCI or the client node, the result will be the same. 2526 */ 2527 2528 /* 2529 * Find the client device corresponding to 'caddr' 2530 */ 2531 MDI_VHCI_CLIENT_LOCK(vh); 2532 2533 /* 2534 * XXX - Passing NULL to the following function works as long as the 2535 * the client addresses (caddr) are unique per vhci basis. 2536 */ 2537 ct = i_mdi_client_find(vh, NULL, caddr); 2538 if (ct == NULL) { 2539 /* 2540 * Client not found, Obviously mdi_pathinfo node has not been 2541 * created yet. 2542 */ 2543 MDI_VHCI_CLIENT_UNLOCK(vh); 2544 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2545 "found for caddr %s", caddr ? caddr : "NULL")); 2546 return (NULL); 2547 } 2548 2549 /* 2550 * Hold the client lock and look for a mdi_pathinfo node with matching 2551 * pHCI and paddr 2552 */ 2553 MDI_CLIENT_LOCK(ct); 2554 2555 /* 2556 * Release the global mutex as it is no more needed. Note: We always 2557 * respect the locking order while acquiring. 2558 */ 2559 MDI_VHCI_CLIENT_UNLOCK(vh); 2560 2561 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2562 while (pip != NULL) { 2563 /* 2564 * Compare the unit address 2565 */ 2566 if ((MDI_PI(pip)->pi_phci == ph) && 2567 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2568 break; 2569 } 2570 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2571 } 2572 MDI_CLIENT_UNLOCK(ct); 2573 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2574 return (pip); 2575 } 2576 2577 /* 2578 * mdi_pi_alloc(): 2579 * Allocate and initialize a new instance of a mdi_pathinfo node. 2580 * The mdi_pathinfo node returned by this function identifies a 2581 * unique device path is capable of having properties attached 2582 * and passed to mdi_pi_online() to fully attach and online the 2583 * path and client device node. 2584 * The mdi_pathinfo node returned by this function must be 2585 * destroyed using mdi_pi_free() if the path is no longer 2586 * operational or if the caller fails to attach a client device 2587 * node when calling mdi_pi_online(). The framework will not free 2588 * the resources allocated. 2589 * This function can be called from both interrupt and kernel 2590 * contexts. DDI_NOSLEEP flag should be used while calling 2591 * from interrupt contexts. 2592 * Return Values: 2593 * MDI_SUCCESS 2594 * MDI_FAILURE 2595 * MDI_NOMEM 2596 */ 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 mdi_vhci_t *vh; 2603 mdi_phci_t *ph; 2604 mdi_client_t *ct; 2605 mdi_pathinfo_t *pip = NULL; 2606 dev_info_t *cdip; 2607 int rv = MDI_NOMEM; 2608 int path_allocated = 0; 2609 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2611 cname ? cname : "NULL", caddr ? caddr : "NULL", 2612 paddr ? paddr : "NULL")); 2613 2614 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2615 ret_pip == NULL) { 2616 /* Nothing more to do */ 2617 return (MDI_FAILURE); 2618 } 2619 2620 *ret_pip = NULL; 2621 2622 /* No allocations on detaching pHCI */ 2623 if (DEVI_IS_DETACHING(pdip)) { 2624 /* Invalid pHCI device, return failure */ 2625 MDI_DEBUG(1, (CE_WARN, pdip, 2626 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2627 return (MDI_FAILURE); 2628 } 2629 2630 ph = i_devi_get_phci(pdip); 2631 ASSERT(ph != NULL); 2632 if (ph == NULL) { 2633 /* Invalid pHCI device, return failure */ 2634 MDI_DEBUG(1, (CE_WARN, pdip, 2635 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2636 return (MDI_FAILURE); 2637 } 2638 2639 MDI_PHCI_LOCK(ph); 2640 vh = ph->ph_vhci; 2641 if (vh == NULL) { 2642 /* Invalid vHCI device, return failure */ 2643 MDI_DEBUG(1, (CE_WARN, pdip, 2644 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2645 MDI_PHCI_UNLOCK(ph); 2646 return (MDI_FAILURE); 2647 } 2648 2649 if (MDI_PHCI_IS_READY(ph) == 0) { 2650 /* 2651 * Do not allow new node creation when pHCI is in 2652 * offline/suspended states 2653 */ 2654 MDI_DEBUG(1, (CE_WARN, pdip, 2655 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2656 MDI_PHCI_UNLOCK(ph); 2657 return (MDI_BUSY); 2658 } 2659 MDI_PHCI_UNSTABLE(ph); 2660 MDI_PHCI_UNLOCK(ph); 2661 2662 /* look for a matching client, create one if not found */ 2663 MDI_VHCI_CLIENT_LOCK(vh); 2664 ct = i_mdi_client_find(vh, cname, caddr); 2665 if (ct == NULL) { 2666 ct = i_mdi_client_alloc(vh, cname, caddr); 2667 ASSERT(ct != NULL); 2668 } 2669 2670 if (ct->ct_dip == NULL) { 2671 /* 2672 * Allocate a devinfo node 2673 */ 2674 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2675 compatible, ncompatible); 2676 if (ct->ct_dip == NULL) { 2677 (void) i_mdi_client_free(vh, ct); 2678 goto fail; 2679 } 2680 } 2681 cdip = ct->ct_dip; 2682 2683 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2684 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2685 2686 MDI_CLIENT_LOCK(ct); 2687 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2688 while (pip != NULL) { 2689 /* 2690 * Compare the unit address 2691 */ 2692 if ((MDI_PI(pip)->pi_phci == ph) && 2693 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2694 break; 2695 } 2696 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2697 } 2698 MDI_CLIENT_UNLOCK(ct); 2699 2700 if (pip == NULL) { 2701 /* 2702 * This is a new path for this client device. Allocate and 2703 * initialize a new pathinfo node 2704 */ 2705 pip = i_mdi_pi_alloc(ph, paddr, ct); 2706 ASSERT(pip != NULL); 2707 path_allocated = 1; 2708 } 2709 rv = MDI_SUCCESS; 2710 2711 fail: 2712 /* 2713 * Release the global mutex. 2714 */ 2715 MDI_VHCI_CLIENT_UNLOCK(vh); 2716 2717 /* 2718 * Mark the pHCI as stable 2719 */ 2720 MDI_PHCI_LOCK(ph); 2721 MDI_PHCI_STABLE(ph); 2722 MDI_PHCI_UNLOCK(ph); 2723 *ret_pip = pip; 2724 2725 MDI_DEBUG(2, (CE_NOTE, pdip, 2726 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2727 2728 if (path_allocated) 2729 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2730 2731 return (rv); 2732 } 2733 2734 /*ARGSUSED*/ 2735 int 2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2737 int flags, mdi_pathinfo_t **ret_pip) 2738 { 2739 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2740 flags, ret_pip)); 2741 } 2742 2743 /* 2744 * i_mdi_pi_alloc(): 2745 * Allocate a mdi_pathinfo node and add to the pHCI path list 2746 * Return Values: 2747 * mdi_pathinfo 2748 */ 2749 /*ARGSUSED*/ 2750 static mdi_pathinfo_t * 2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2752 { 2753 mdi_pathinfo_t *pip; 2754 int ct_circular; 2755 int ph_circular; 2756 int se_flag; 2757 int kmem_flag; 2758 2759 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2760 2761 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2762 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2763 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2764 MDI_PATHINFO_STATE_TRANSIENT; 2765 2766 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2767 MDI_PI_SET_USER_DISABLE(pip); 2768 2769 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2770 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2771 2772 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2773 MDI_PI_SET_DRV_DISABLE(pip); 2774 2775 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2776 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2777 MDI_PI(pip)->pi_client = ct; 2778 MDI_PI(pip)->pi_phci = ph; 2779 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2780 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2781 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2782 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2783 MDI_PI(pip)->pi_pprivate = NULL; 2784 MDI_PI(pip)->pi_cprivate = NULL; 2785 MDI_PI(pip)->pi_vprivate = NULL; 2786 MDI_PI(pip)->pi_client_link = NULL; 2787 MDI_PI(pip)->pi_phci_link = NULL; 2788 MDI_PI(pip)->pi_ref_cnt = 0; 2789 MDI_PI(pip)->pi_kstats = NULL; 2790 MDI_PI(pip)->pi_preferred = 1; 2791 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2792 2793 /* 2794 * Lock both dev_info nodes against changes in parallel. 2795 * 2796 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2797 * This atypical operation is done to synchronize pathinfo nodes 2798 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2799 * the pathinfo nodes are children of the Client. 2800 */ 2801 ndi_devi_enter(ct->ct_dip, &ct_circular); 2802 ndi_devi_enter(ph->ph_dip, &ph_circular); 2803 2804 i_mdi_phci_add_path(ph, pip); 2805 i_mdi_client_add_path(ct, pip); 2806 2807 ndi_devi_exit(ph->ph_dip, ph_circular); 2808 ndi_devi_exit(ct->ct_dip, ct_circular); 2809 2810 /* determine interrupt context */ 2811 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2812 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2813 2814 i_ddi_di_cache_invalidate(kmem_flag); 2815 2816 return (pip); 2817 } 2818 2819 /* 2820 * i_mdi_phci_add_path(): 2821 * Add a mdi_pathinfo node to pHCI list. 2822 * Notes: 2823 * Caller should per-pHCI mutex 2824 */ 2825 static void 2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2827 { 2828 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2829 2830 MDI_PHCI_LOCK(ph); 2831 if (ph->ph_path_head == NULL) { 2832 ph->ph_path_head = pip; 2833 } else { 2834 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2835 } 2836 ph->ph_path_tail = pip; 2837 ph->ph_path_count++; 2838 MDI_PHCI_UNLOCK(ph); 2839 } 2840 2841 /* 2842 * i_mdi_client_add_path(): 2843 * Add mdi_pathinfo node to client list 2844 */ 2845 static void 2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2847 { 2848 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2849 2850 MDI_CLIENT_LOCK(ct); 2851 if (ct->ct_path_head == NULL) { 2852 ct->ct_path_head = pip; 2853 } else { 2854 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2855 } 2856 ct->ct_path_tail = pip; 2857 ct->ct_path_count++; 2858 MDI_CLIENT_UNLOCK(ct); 2859 } 2860 2861 /* 2862 * mdi_pi_free(): 2863 * Free the mdi_pathinfo node and also client device node if this 2864 * is the last path to the device 2865 * Return Values: 2866 * MDI_SUCCESS 2867 * MDI_FAILURE 2868 * MDI_BUSY 2869 */ 2870 /*ARGSUSED*/ 2871 int 2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2873 { 2874 int rv = MDI_FAILURE; 2875 mdi_vhci_t *vh; 2876 mdi_phci_t *ph; 2877 mdi_client_t *ct; 2878 int (*f)(); 2879 int client_held = 0; 2880 2881 MDI_PI_LOCK(pip); 2882 ph = MDI_PI(pip)->pi_phci; 2883 ASSERT(ph != NULL); 2884 if (ph == NULL) { 2885 /* 2886 * Invalid pHCI device, return failure 2887 */ 2888 MDI_DEBUG(1, (CE_WARN, NULL, 2889 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2890 MDI_PI_UNLOCK(pip); 2891 return (MDI_FAILURE); 2892 } 2893 2894 vh = ph->ph_vhci; 2895 ASSERT(vh != NULL); 2896 if (vh == NULL) { 2897 /* Invalid pHCI device, return failure */ 2898 MDI_DEBUG(1, (CE_WARN, NULL, 2899 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2900 MDI_PI_UNLOCK(pip); 2901 return (MDI_FAILURE); 2902 } 2903 2904 ct = MDI_PI(pip)->pi_client; 2905 ASSERT(ct != NULL); 2906 if (ct == NULL) { 2907 /* 2908 * Invalid Client device, return failure 2909 */ 2910 MDI_DEBUG(1, (CE_WARN, NULL, 2911 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2912 MDI_PI_UNLOCK(pip); 2913 return (MDI_FAILURE); 2914 } 2915 2916 /* 2917 * Check to see for busy condition. A mdi_pathinfo can only be freed 2918 * if the node state is either offline or init and the reference count 2919 * is zero. 2920 */ 2921 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2922 MDI_PI_IS_INITING(pip))) { 2923 /* 2924 * Node is busy 2925 */ 2926 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2927 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2928 MDI_PI_UNLOCK(pip); 2929 return (MDI_BUSY); 2930 } 2931 2932 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2933 /* 2934 * Give a chance for pending I/Os to complete. 2935 */ 2936 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2937 "%d cmds still pending on path: %p\n", 2938 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2939 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2940 &MDI_PI(pip)->pi_mutex, 2941 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2942 /* 2943 * The timeout time reached without ref_cnt being zero 2944 * being signaled. 2945 */ 2946 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2947 "!mdi_pi_free: " 2948 "Timeout reached on path %p without the cond\n", 2949 (void *)pip)); 2950 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2951 "!mdi_pi_free: " 2952 "%d cmds still pending on path: %p\n", 2953 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2954 MDI_PI_UNLOCK(pip); 2955 return (MDI_BUSY); 2956 } 2957 } 2958 if (MDI_PI(pip)->pi_pm_held) { 2959 client_held = 1; 2960 } 2961 MDI_PI_UNLOCK(pip); 2962 2963 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2964 2965 MDI_CLIENT_LOCK(ct); 2966 2967 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2968 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2969 2970 /* 2971 * Wait till failover is complete before removing this node. 2972 */ 2973 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2974 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2975 2976 MDI_CLIENT_UNLOCK(ct); 2977 MDI_VHCI_CLIENT_LOCK(vh); 2978 MDI_CLIENT_LOCK(ct); 2979 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2980 2981 if (!MDI_PI_IS_INITING(pip)) { 2982 f = vh->vh_ops->vo_pi_uninit; 2983 if (f != NULL) { 2984 rv = (*f)(vh->vh_dip, pip, 0); 2985 } 2986 } 2987 /* 2988 * If vo_pi_uninit() completed successfully. 2989 */ 2990 if (rv == MDI_SUCCESS) { 2991 if (client_held) { 2992 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2993 "i_mdi_pm_rele_client\n")); 2994 i_mdi_pm_rele_client(ct, 1); 2995 } 2996 i_mdi_pi_free(ph, pip, ct); 2997 if (ct->ct_path_count == 0) { 2998 /* 2999 * Client lost its last path. 3000 * Clean up the client device 3001 */ 3002 MDI_CLIENT_UNLOCK(ct); 3003 (void) i_mdi_client_free(ct->ct_vhci, ct); 3004 MDI_VHCI_CLIENT_UNLOCK(vh); 3005 return (rv); 3006 } 3007 } 3008 MDI_CLIENT_UNLOCK(ct); 3009 MDI_VHCI_CLIENT_UNLOCK(vh); 3010 3011 if (rv == MDI_FAILURE) 3012 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3013 3014 return (rv); 3015 } 3016 3017 /* 3018 * i_mdi_pi_free(): 3019 * Free the mdi_pathinfo node 3020 */ 3021 static void 3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3023 { 3024 int ct_circular; 3025 int ph_circular; 3026 int se_flag; 3027 int kmem_flag; 3028 3029 ASSERT(MDI_CLIENT_LOCKED(ct)); 3030 3031 /* 3032 * remove any per-path kstats 3033 */ 3034 i_mdi_pi_kstat_destroy(pip); 3035 3036 /* See comments in i_mdi_pi_alloc() */ 3037 ndi_devi_enter(ct->ct_dip, &ct_circular); 3038 ndi_devi_enter(ph->ph_dip, &ph_circular); 3039 3040 i_mdi_client_remove_path(ct, pip); 3041 i_mdi_phci_remove_path(ph, pip); 3042 3043 ndi_devi_exit(ph->ph_dip, ph_circular); 3044 ndi_devi_exit(ct->ct_dip, ct_circular); 3045 3046 /* determine interrupt context */ 3047 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3048 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3049 3050 i_ddi_di_cache_invalidate(kmem_flag); 3051 3052 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3053 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3054 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3055 if (MDI_PI(pip)->pi_addr) { 3056 kmem_free(MDI_PI(pip)->pi_addr, 3057 strlen(MDI_PI(pip)->pi_addr) + 1); 3058 MDI_PI(pip)->pi_addr = NULL; 3059 } 3060 3061 if (MDI_PI(pip)->pi_prop) { 3062 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3063 MDI_PI(pip)->pi_prop = NULL; 3064 } 3065 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_remove_path(): 3071 * Remove a mdi_pathinfo node from pHCI list. 3072 * Notes: 3073 * Caller should hold per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 mdi_pathinfo_t *prev = NULL; 3079 mdi_pathinfo_t *path = NULL; 3080 3081 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3082 3083 MDI_PHCI_LOCK(ph); 3084 path = ph->ph_path_head; 3085 while (path != NULL) { 3086 if (path == pip) { 3087 break; 3088 } 3089 prev = path; 3090 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3091 } 3092 3093 if (path) { 3094 ph->ph_path_count--; 3095 if (prev) { 3096 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3097 } else { 3098 ph->ph_path_head = 3099 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3100 } 3101 if (ph->ph_path_tail == path) { 3102 ph->ph_path_tail = prev; 3103 } 3104 } 3105 3106 /* 3107 * Clear the pHCI link 3108 */ 3109 MDI_PI(pip)->pi_phci_link = NULL; 3110 MDI_PI(pip)->pi_phci = NULL; 3111 MDI_PHCI_UNLOCK(ph); 3112 } 3113 3114 /* 3115 * i_mdi_client_remove_path(): 3116 * Remove a mdi_pathinfo node from client path list. 3117 */ 3118 static void 3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3120 { 3121 mdi_pathinfo_t *prev = NULL; 3122 mdi_pathinfo_t *path; 3123 3124 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3125 3126 ASSERT(MDI_CLIENT_LOCKED(ct)); 3127 path = ct->ct_path_head; 3128 while (path != NULL) { 3129 if (path == pip) { 3130 break; 3131 } 3132 prev = path; 3133 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3134 } 3135 3136 if (path) { 3137 ct->ct_path_count--; 3138 if (prev) { 3139 MDI_PI(prev)->pi_client_link = 3140 MDI_PI(path)->pi_client_link; 3141 } else { 3142 ct->ct_path_head = 3143 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3144 } 3145 if (ct->ct_path_tail == path) { 3146 ct->ct_path_tail = prev; 3147 } 3148 if (ct->ct_path_last == path) { 3149 ct->ct_path_last = ct->ct_path_head; 3150 } 3151 } 3152 MDI_PI(pip)->pi_client_link = NULL; 3153 MDI_PI(pip)->pi_client = NULL; 3154 } 3155 3156 /* 3157 * i_mdi_pi_state_change(): 3158 * online a mdi_pathinfo node 3159 * 3160 * Return Values: 3161 * MDI_SUCCESS 3162 * MDI_FAILURE 3163 */ 3164 /*ARGSUSED*/ 3165 static int 3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3167 { 3168 int rv = MDI_SUCCESS; 3169 mdi_vhci_t *vh; 3170 mdi_phci_t *ph; 3171 mdi_client_t *ct; 3172 int (*f)(); 3173 dev_info_t *cdip; 3174 3175 MDI_PI_LOCK(pip); 3176 3177 ph = MDI_PI(pip)->pi_phci; 3178 ASSERT(ph); 3179 if (ph == NULL) { 3180 /* 3181 * Invalid pHCI device, fail the request 3182 */ 3183 MDI_PI_UNLOCK(pip); 3184 MDI_DEBUG(1, (CE_WARN, NULL, 3185 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3186 return (MDI_FAILURE); 3187 } 3188 3189 vh = ph->ph_vhci; 3190 ASSERT(vh); 3191 if (vh == NULL) { 3192 /* 3193 * Invalid vHCI device, fail the request 3194 */ 3195 MDI_PI_UNLOCK(pip); 3196 MDI_DEBUG(1, (CE_WARN, NULL, 3197 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3198 return (MDI_FAILURE); 3199 } 3200 3201 ct = MDI_PI(pip)->pi_client; 3202 ASSERT(ct != NULL); 3203 if (ct == NULL) { 3204 /* 3205 * Invalid client device, fail the request 3206 */ 3207 MDI_PI_UNLOCK(pip); 3208 MDI_DEBUG(1, (CE_WARN, NULL, 3209 "!mdi_pi_state_change: invalid client pip=%p", 3210 (void *)pip)); 3211 return (MDI_FAILURE); 3212 } 3213 3214 /* 3215 * If this path has not been initialized yet, Callback vHCI driver's 3216 * pathinfo node initialize entry point 3217 */ 3218 3219 if (MDI_PI_IS_INITING(pip)) { 3220 MDI_PI_UNLOCK(pip); 3221 f = vh->vh_ops->vo_pi_init; 3222 if (f != NULL) { 3223 rv = (*f)(vh->vh_dip, pip, 0); 3224 if (rv != MDI_SUCCESS) { 3225 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3226 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3227 (void *)vh, (void *)pip)); 3228 return (MDI_FAILURE); 3229 } 3230 } 3231 MDI_PI_LOCK(pip); 3232 MDI_PI_CLEAR_TRANSIENT(pip); 3233 } 3234 3235 /* 3236 * Do not allow state transition when pHCI is in offline/suspended 3237 * states 3238 */ 3239 i_mdi_phci_lock(ph, pip); 3240 if (MDI_PHCI_IS_READY(ph) == 0) { 3241 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3242 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3243 (void *)ph)); 3244 MDI_PI_UNLOCK(pip); 3245 i_mdi_phci_unlock(ph); 3246 return (MDI_BUSY); 3247 } 3248 MDI_PHCI_UNSTABLE(ph); 3249 i_mdi_phci_unlock(ph); 3250 3251 /* 3252 * Check if mdi_pathinfo state is in transient state. 3253 * If yes, offlining is in progress and wait till transient state is 3254 * cleared. 3255 */ 3256 if (MDI_PI_IS_TRANSIENT(pip)) { 3257 while (MDI_PI_IS_TRANSIENT(pip)) { 3258 cv_wait(&MDI_PI(pip)->pi_state_cv, 3259 &MDI_PI(pip)->pi_mutex); 3260 } 3261 } 3262 3263 /* 3264 * Grab the client lock in reverse order sequence and release the 3265 * mdi_pathinfo mutex. 3266 */ 3267 i_mdi_client_lock(ct, pip); 3268 MDI_PI_UNLOCK(pip); 3269 3270 /* 3271 * Wait till failover state is cleared 3272 */ 3273 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3274 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3275 3276 /* 3277 * Mark the mdi_pathinfo node state as transient 3278 */ 3279 MDI_PI_LOCK(pip); 3280 switch (state) { 3281 case MDI_PATHINFO_STATE_ONLINE: 3282 MDI_PI_SET_ONLINING(pip); 3283 break; 3284 3285 case MDI_PATHINFO_STATE_STANDBY: 3286 MDI_PI_SET_STANDBYING(pip); 3287 break; 3288 3289 case MDI_PATHINFO_STATE_FAULT: 3290 /* 3291 * Mark the pathinfo state as FAULTED 3292 */ 3293 MDI_PI_SET_FAULTING(pip); 3294 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3295 break; 3296 3297 case MDI_PATHINFO_STATE_OFFLINE: 3298 /* 3299 * ndi_devi_offline() cannot hold pip or ct locks. 3300 */ 3301 MDI_PI_UNLOCK(pip); 3302 /* 3303 * Don't offline the client dev_info node unless we have 3304 * no available paths left at all. 3305 */ 3306 cdip = ct->ct_dip; 3307 if ((flag & NDI_DEVI_REMOVE) && 3308 (ct->ct_path_count == 1)) { 3309 i_mdi_client_unlock(ct); 3310 rv = ndi_devi_offline(cdip, 0); 3311 if (rv != NDI_SUCCESS) { 3312 /* 3313 * Convert to MDI error code 3314 */ 3315 switch (rv) { 3316 case NDI_BUSY: 3317 rv = MDI_BUSY; 3318 break; 3319 default: 3320 rv = MDI_FAILURE; 3321 break; 3322 } 3323 goto state_change_exit; 3324 } else { 3325 i_mdi_client_lock(ct, NULL); 3326 } 3327 } 3328 /* 3329 * Mark the mdi_pathinfo node state as transient 3330 */ 3331 MDI_PI_LOCK(pip); 3332 MDI_PI_SET_OFFLINING(pip); 3333 break; 3334 } 3335 MDI_PI_UNLOCK(pip); 3336 MDI_CLIENT_UNSTABLE(ct); 3337 i_mdi_client_unlock(ct); 3338 3339 f = vh->vh_ops->vo_pi_state_change; 3340 if (f != NULL) 3341 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3342 3343 MDI_CLIENT_LOCK(ct); 3344 MDI_PI_LOCK(pip); 3345 if (rv == MDI_NOT_SUPPORTED) { 3346 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3347 } 3348 if (rv != MDI_SUCCESS) { 3349 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3350 "!vo_pi_state_change: failed rv = %x", rv)); 3351 } 3352 if (MDI_PI_IS_TRANSIENT(pip)) { 3353 if (rv == MDI_SUCCESS) { 3354 MDI_PI_CLEAR_TRANSIENT(pip); 3355 } else { 3356 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3357 } 3358 } 3359 3360 /* 3361 * Wake anyone waiting for this mdi_pathinfo node 3362 */ 3363 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3364 MDI_PI_UNLOCK(pip); 3365 3366 /* 3367 * Mark the client device as stable 3368 */ 3369 MDI_CLIENT_STABLE(ct); 3370 if (rv == MDI_SUCCESS) { 3371 if (ct->ct_unstable == 0) { 3372 cdip = ct->ct_dip; 3373 3374 /* 3375 * Onlining the mdi_pathinfo node will impact the 3376 * client state Update the client and dev_info node 3377 * state accordingly 3378 */ 3379 rv = NDI_SUCCESS; 3380 i_mdi_client_update_state(ct); 3381 switch (MDI_CLIENT_STATE(ct)) { 3382 case MDI_CLIENT_STATE_OPTIMAL: 3383 case MDI_CLIENT_STATE_DEGRADED: 3384 if (cdip && !i_ddi_devi_attached(cdip) && 3385 ((state == MDI_PATHINFO_STATE_ONLINE) || 3386 (state == MDI_PATHINFO_STATE_STANDBY))) { 3387 3388 /* 3389 * Must do ndi_devi_online() through 3390 * hotplug thread for deferred 3391 * attach mechanism to work 3392 */ 3393 MDI_CLIENT_UNLOCK(ct); 3394 rv = ndi_devi_online(cdip, 0); 3395 MDI_CLIENT_LOCK(ct); 3396 if ((rv != NDI_SUCCESS) && 3397 (MDI_CLIENT_STATE(ct) == 3398 MDI_CLIENT_STATE_DEGRADED)) { 3399 /* 3400 * ndi_devi_online failed. 3401 * Reset client flags to 3402 * offline. 3403 */ 3404 MDI_DEBUG(1, (CE_WARN, cdip, 3405 "!ndi_devi_online: failed " 3406 " Error: %x", rv)); 3407 MDI_CLIENT_SET_OFFLINE(ct); 3408 } 3409 if (rv != NDI_SUCCESS) { 3410 /* Reset the path state */ 3411 MDI_PI_LOCK(pip); 3412 MDI_PI(pip)->pi_state = 3413 MDI_PI_OLD_STATE(pip); 3414 MDI_PI_UNLOCK(pip); 3415 } 3416 } 3417 break; 3418 3419 case MDI_CLIENT_STATE_FAILED: 3420 /* 3421 * This is the last path case for 3422 * non-user initiated events. 3423 */ 3424 if (((flag & NDI_DEVI_REMOVE) == 0) && 3425 cdip && (i_ddi_node_state(cdip) >= 3426 DS_INITIALIZED)) { 3427 MDI_CLIENT_UNLOCK(ct); 3428 rv = ndi_devi_offline(cdip, 0); 3429 MDI_CLIENT_LOCK(ct); 3430 3431 if (rv != NDI_SUCCESS) { 3432 /* 3433 * ndi_devi_offline failed. 3434 * Reset client flags to 3435 * online as the path could not 3436 * be offlined. 3437 */ 3438 MDI_DEBUG(1, (CE_WARN, cdip, 3439 "!ndi_devi_offline: failed " 3440 " Error: %x", rv)); 3441 MDI_CLIENT_SET_ONLINE(ct); 3442 } 3443 } 3444 break; 3445 } 3446 /* 3447 * Convert to MDI error code 3448 */ 3449 switch (rv) { 3450 case NDI_SUCCESS: 3451 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3452 i_mdi_report_path_state(ct, pip); 3453 rv = MDI_SUCCESS; 3454 break; 3455 case NDI_BUSY: 3456 rv = MDI_BUSY; 3457 break; 3458 default: 3459 rv = MDI_FAILURE; 3460 break; 3461 } 3462 } 3463 } 3464 MDI_CLIENT_UNLOCK(ct); 3465 3466 state_change_exit: 3467 /* 3468 * Mark the pHCI as stable again. 3469 */ 3470 MDI_PHCI_LOCK(ph); 3471 MDI_PHCI_STABLE(ph); 3472 MDI_PHCI_UNLOCK(ph); 3473 return (rv); 3474 } 3475 3476 /* 3477 * mdi_pi_online(): 3478 * Place the path_info node in the online state. The path is 3479 * now available to be selected by mdi_select_path() for 3480 * transporting I/O requests to client devices. 3481 * Return Values: 3482 * MDI_SUCCESS 3483 * MDI_FAILURE 3484 */ 3485 int 3486 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3487 { 3488 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3489 int client_held = 0; 3490 int rv; 3491 3492 ASSERT(ct != NULL); 3493 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3494 if (rv != MDI_SUCCESS) 3495 return (rv); 3496 3497 MDI_PI_LOCK(pip); 3498 if (MDI_PI(pip)->pi_pm_held == 0) { 3499 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3500 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3501 i_mdi_pm_hold_pip(pip); 3502 client_held = 1; 3503 } 3504 MDI_PI_UNLOCK(pip); 3505 3506 if (client_held) { 3507 MDI_CLIENT_LOCK(ct); 3508 if (ct->ct_power_cnt == 0) { 3509 rv = i_mdi_power_all_phci(ct); 3510 } 3511 3512 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3513 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3514 i_mdi_pm_hold_client(ct, 1); 3515 MDI_CLIENT_UNLOCK(ct); 3516 } 3517 3518 return (rv); 3519 } 3520 3521 /* 3522 * mdi_pi_standby(): 3523 * Place the mdi_pathinfo node in standby state 3524 * 3525 * Return Values: 3526 * MDI_SUCCESS 3527 * MDI_FAILURE 3528 */ 3529 int 3530 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3531 { 3532 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3533 } 3534 3535 /* 3536 * mdi_pi_fault(): 3537 * Place the mdi_pathinfo node in fault'ed state 3538 * Return Values: 3539 * MDI_SUCCESS 3540 * MDI_FAILURE 3541 */ 3542 int 3543 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3544 { 3545 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3546 } 3547 3548 /* 3549 * mdi_pi_offline(): 3550 * Offline a mdi_pathinfo node. 3551 * Return Values: 3552 * MDI_SUCCESS 3553 * MDI_FAILURE 3554 */ 3555 int 3556 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3557 { 3558 int ret, client_held = 0; 3559 mdi_client_t *ct; 3560 3561 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3562 3563 if (ret == MDI_SUCCESS) { 3564 MDI_PI_LOCK(pip); 3565 if (MDI_PI(pip)->pi_pm_held) { 3566 client_held = 1; 3567 } 3568 MDI_PI_UNLOCK(pip); 3569 3570 if (client_held) { 3571 ct = MDI_PI(pip)->pi_client; 3572 MDI_CLIENT_LOCK(ct); 3573 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3574 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3575 i_mdi_pm_rele_client(ct, 1); 3576 MDI_CLIENT_UNLOCK(ct); 3577 } 3578 } 3579 3580 return (ret); 3581 } 3582 3583 /* 3584 * i_mdi_pi_offline(): 3585 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3586 */ 3587 static int 3588 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3589 { 3590 dev_info_t *vdip = NULL; 3591 mdi_vhci_t *vh = NULL; 3592 mdi_client_t *ct = NULL; 3593 int (*f)(); 3594 int rv; 3595 3596 MDI_PI_LOCK(pip); 3597 ct = MDI_PI(pip)->pi_client; 3598 ASSERT(ct != NULL); 3599 3600 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3601 /* 3602 * Give a chance for pending I/Os to complete. 3603 */ 3604 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3605 "%d cmds still pending on path: %p\n", 3606 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3607 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3608 &MDI_PI(pip)->pi_mutex, 3609 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3610 /* 3611 * The timeout time reached without ref_cnt being zero 3612 * being signaled. 3613 */ 3614 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3615 "Timeout reached on path %p without the cond\n", 3616 (void *)pip)); 3617 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3618 "%d cmds still pending on path: %p\n", 3619 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3620 } 3621 } 3622 vh = ct->ct_vhci; 3623 vdip = vh->vh_dip; 3624 3625 /* 3626 * Notify vHCI that has registered this event 3627 */ 3628 ASSERT(vh->vh_ops); 3629 f = vh->vh_ops->vo_pi_state_change; 3630 3631 if (f != NULL) { 3632 MDI_PI_UNLOCK(pip); 3633 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3634 flags)) != MDI_SUCCESS) { 3635 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3636 "!vo_path_offline failed " 3637 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3638 } 3639 MDI_PI_LOCK(pip); 3640 } 3641 3642 /* 3643 * Set the mdi_pathinfo node state and clear the transient condition 3644 */ 3645 MDI_PI_SET_OFFLINE(pip); 3646 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3647 MDI_PI_UNLOCK(pip); 3648 3649 MDI_CLIENT_LOCK(ct); 3650 if (rv == MDI_SUCCESS) { 3651 if (ct->ct_unstable == 0) { 3652 dev_info_t *cdip = ct->ct_dip; 3653 3654 /* 3655 * Onlining the mdi_pathinfo node will impact the 3656 * client state Update the client and dev_info node 3657 * state accordingly 3658 */ 3659 i_mdi_client_update_state(ct); 3660 rv = NDI_SUCCESS; 3661 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3662 if (cdip && 3663 (i_ddi_node_state(cdip) >= 3664 DS_INITIALIZED)) { 3665 MDI_CLIENT_UNLOCK(ct); 3666 rv = ndi_devi_offline(cdip, 0); 3667 MDI_CLIENT_LOCK(ct); 3668 if (rv != NDI_SUCCESS) { 3669 /* 3670 * ndi_devi_offline failed. 3671 * Reset client flags to 3672 * online. 3673 */ 3674 MDI_DEBUG(4, (CE_WARN, cdip, 3675 "!ndi_devi_offline: failed " 3676 " Error: %x", rv)); 3677 MDI_CLIENT_SET_ONLINE(ct); 3678 } 3679 } 3680 } 3681 /* 3682 * Convert to MDI error code 3683 */ 3684 switch (rv) { 3685 case NDI_SUCCESS: 3686 rv = MDI_SUCCESS; 3687 break; 3688 case NDI_BUSY: 3689 rv = MDI_BUSY; 3690 break; 3691 default: 3692 rv = MDI_FAILURE; 3693 break; 3694 } 3695 } 3696 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3697 i_mdi_report_path_state(ct, pip); 3698 } 3699 3700 MDI_CLIENT_UNLOCK(ct); 3701 3702 /* 3703 * Change in the mdi_pathinfo node state will impact the client state 3704 */ 3705 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3706 (void *)ct, (void *)pip)); 3707 return (rv); 3708 } 3709 3710 3711 /* 3712 * mdi_pi_get_addr(): 3713 * Get the unit address associated with a mdi_pathinfo node 3714 * 3715 * Return Values: 3716 * char * 3717 */ 3718 char * 3719 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3720 { 3721 if (pip == NULL) 3722 return (NULL); 3723 3724 return (MDI_PI(pip)->pi_addr); 3725 } 3726 3727 /* 3728 * mdi_pi_get_client(): 3729 * Get the client devinfo associated with a mdi_pathinfo node 3730 * 3731 * Return Values: 3732 * Handle to client device dev_info node 3733 */ 3734 dev_info_t * 3735 mdi_pi_get_client(mdi_pathinfo_t *pip) 3736 { 3737 dev_info_t *dip = NULL; 3738 if (pip) { 3739 dip = MDI_PI(pip)->pi_client->ct_dip; 3740 } 3741 return (dip); 3742 } 3743 3744 /* 3745 * mdi_pi_get_phci(): 3746 * Get the pHCI devinfo associated with the mdi_pathinfo node 3747 * Return Values: 3748 * Handle to dev_info node 3749 */ 3750 dev_info_t * 3751 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3752 { 3753 dev_info_t *dip = NULL; 3754 if (pip) { 3755 dip = MDI_PI(pip)->pi_phci->ph_dip; 3756 } 3757 return (dip); 3758 } 3759 3760 /* 3761 * mdi_pi_get_client_private(): 3762 * Get the client private information associated with the 3763 * mdi_pathinfo node 3764 */ 3765 void * 3766 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3767 { 3768 void *cprivate = NULL; 3769 if (pip) { 3770 cprivate = MDI_PI(pip)->pi_cprivate; 3771 } 3772 return (cprivate); 3773 } 3774 3775 /* 3776 * mdi_pi_set_client_private(): 3777 * Set the client private information in the mdi_pathinfo node 3778 */ 3779 void 3780 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3781 { 3782 if (pip) { 3783 MDI_PI(pip)->pi_cprivate = priv; 3784 } 3785 } 3786 3787 /* 3788 * mdi_pi_get_phci_private(): 3789 * Get the pHCI private information associated with the 3790 * mdi_pathinfo node 3791 */ 3792 caddr_t 3793 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3794 { 3795 caddr_t pprivate = NULL; 3796 if (pip) { 3797 pprivate = MDI_PI(pip)->pi_pprivate; 3798 } 3799 return (pprivate); 3800 } 3801 3802 /* 3803 * mdi_pi_set_phci_private(): 3804 * Set the pHCI private information in the mdi_pathinfo node 3805 */ 3806 void 3807 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3808 { 3809 if (pip) { 3810 MDI_PI(pip)->pi_pprivate = priv; 3811 } 3812 } 3813 3814 /* 3815 * mdi_pi_get_state(): 3816 * Get the mdi_pathinfo node state. Transient states are internal 3817 * and not provided to the users 3818 */ 3819 mdi_pathinfo_state_t 3820 mdi_pi_get_state(mdi_pathinfo_t *pip) 3821 { 3822 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3823 3824 if (pip) { 3825 if (MDI_PI_IS_TRANSIENT(pip)) { 3826 /* 3827 * mdi_pathinfo is in state transition. Return the 3828 * last good state. 3829 */ 3830 state = MDI_PI_OLD_STATE(pip); 3831 } else { 3832 state = MDI_PI_STATE(pip); 3833 } 3834 } 3835 return (state); 3836 } 3837 3838 /* 3839 * Note that the following function needs to be the new interface for 3840 * mdi_pi_get_state when mpxio gets integrated to ON. 3841 */ 3842 int 3843 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3844 uint32_t *ext_state) 3845 { 3846 *state = MDI_PATHINFO_STATE_INIT; 3847 3848 if (pip) { 3849 if (MDI_PI_IS_TRANSIENT(pip)) { 3850 /* 3851 * mdi_pathinfo is in state transition. Return the 3852 * last good state. 3853 */ 3854 *state = MDI_PI_OLD_STATE(pip); 3855 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3856 } else { 3857 *state = MDI_PI_STATE(pip); 3858 *ext_state = MDI_PI_EXT_STATE(pip); 3859 } 3860 } 3861 return (MDI_SUCCESS); 3862 } 3863 3864 /* 3865 * mdi_pi_get_preferred: 3866 * Get the preferred path flag 3867 */ 3868 int 3869 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3870 { 3871 if (pip) { 3872 return (MDI_PI(pip)->pi_preferred); 3873 } 3874 return (0); 3875 } 3876 3877 /* 3878 * mdi_pi_set_preferred: 3879 * Set the preferred path flag 3880 */ 3881 void 3882 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3883 { 3884 if (pip) { 3885 MDI_PI(pip)->pi_preferred = preferred; 3886 } 3887 } 3888 3889 /* 3890 * mdi_pi_set_state(): 3891 * Set the mdi_pathinfo node state 3892 */ 3893 void 3894 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3895 { 3896 uint32_t ext_state; 3897 3898 if (pip) { 3899 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3900 MDI_PI(pip)->pi_state = state; 3901 MDI_PI(pip)->pi_state |= ext_state; 3902 } 3903 } 3904 3905 /* 3906 * Property functions: 3907 */ 3908 int 3909 i_map_nvlist_error_to_mdi(int val) 3910 { 3911 int rv; 3912 3913 switch (val) { 3914 case 0: 3915 rv = DDI_PROP_SUCCESS; 3916 break; 3917 case EINVAL: 3918 case ENOTSUP: 3919 rv = DDI_PROP_INVAL_ARG; 3920 break; 3921 case ENOMEM: 3922 rv = DDI_PROP_NO_MEMORY; 3923 break; 3924 default: 3925 rv = DDI_PROP_NOT_FOUND; 3926 break; 3927 } 3928 return (rv); 3929 } 3930 3931 /* 3932 * mdi_pi_get_next_prop(): 3933 * Property walk function. The caller should hold mdi_pi_lock() 3934 * and release by calling mdi_pi_unlock() at the end of walk to 3935 * get a consistent value. 3936 */ 3937 nvpair_t * 3938 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3939 { 3940 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3941 return (NULL); 3942 } 3943 ASSERT(MDI_PI_LOCKED(pip)); 3944 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3945 } 3946 3947 /* 3948 * mdi_prop_remove(): 3949 * Remove the named property from the named list. 3950 */ 3951 int 3952 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3953 { 3954 if (pip == NULL) { 3955 return (DDI_PROP_NOT_FOUND); 3956 } 3957 ASSERT(!MDI_PI_LOCKED(pip)); 3958 MDI_PI_LOCK(pip); 3959 if (MDI_PI(pip)->pi_prop == NULL) { 3960 MDI_PI_UNLOCK(pip); 3961 return (DDI_PROP_NOT_FOUND); 3962 } 3963 if (name) { 3964 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3965 } else { 3966 char nvp_name[MAXNAMELEN]; 3967 nvpair_t *nvp; 3968 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3969 while (nvp) { 3970 nvpair_t *next; 3971 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3972 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3973 nvpair_name(nvp)); 3974 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3975 nvp_name); 3976 nvp = next; 3977 } 3978 } 3979 MDI_PI_UNLOCK(pip); 3980 return (DDI_PROP_SUCCESS); 3981 } 3982 3983 /* 3984 * mdi_prop_size(): 3985 * Get buffer size needed to pack the property data. 3986 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3987 * buffer size. 3988 */ 3989 int 3990 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3991 { 3992 int rv; 3993 size_t bufsize; 3994 3995 *buflenp = 0; 3996 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3997 return (DDI_PROP_NOT_FOUND); 3998 } 3999 ASSERT(MDI_PI_LOCKED(pip)); 4000 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4001 &bufsize, NV_ENCODE_NATIVE); 4002 *buflenp = bufsize; 4003 return (i_map_nvlist_error_to_mdi(rv)); 4004 } 4005 4006 /* 4007 * mdi_prop_pack(): 4008 * pack the property list. The caller should hold the 4009 * mdi_pathinfo_t node to get a consistent data 4010 */ 4011 int 4012 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4013 { 4014 int rv; 4015 size_t bufsize; 4016 4017 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4018 return (DDI_PROP_NOT_FOUND); 4019 } 4020 4021 ASSERT(MDI_PI_LOCKED(pip)); 4022 4023 bufsize = buflen; 4024 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4025 NV_ENCODE_NATIVE, KM_SLEEP); 4026 4027 return (i_map_nvlist_error_to_mdi(rv)); 4028 } 4029 4030 /* 4031 * mdi_prop_update_byte(): 4032 * Create/Update a byte property 4033 */ 4034 int 4035 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4036 { 4037 int rv; 4038 4039 if (pip == NULL) { 4040 return (DDI_PROP_INVAL_ARG); 4041 } 4042 ASSERT(!MDI_PI_LOCKED(pip)); 4043 MDI_PI_LOCK(pip); 4044 if (MDI_PI(pip)->pi_prop == NULL) { 4045 MDI_PI_UNLOCK(pip); 4046 return (DDI_PROP_NOT_FOUND); 4047 } 4048 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4049 MDI_PI_UNLOCK(pip); 4050 return (i_map_nvlist_error_to_mdi(rv)); 4051 } 4052 4053 /* 4054 * mdi_prop_update_byte_array(): 4055 * Create/Update a byte array property 4056 */ 4057 int 4058 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4059 uint_t nelements) 4060 { 4061 int rv; 4062 4063 if (pip == NULL) { 4064 return (DDI_PROP_INVAL_ARG); 4065 } 4066 ASSERT(!MDI_PI_LOCKED(pip)); 4067 MDI_PI_LOCK(pip); 4068 if (MDI_PI(pip)->pi_prop == NULL) { 4069 MDI_PI_UNLOCK(pip); 4070 return (DDI_PROP_NOT_FOUND); 4071 } 4072 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4073 MDI_PI_UNLOCK(pip); 4074 return (i_map_nvlist_error_to_mdi(rv)); 4075 } 4076 4077 /* 4078 * mdi_prop_update_int(): 4079 * Create/Update a 32 bit integer property 4080 */ 4081 int 4082 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4083 { 4084 int rv; 4085 4086 if (pip == NULL) { 4087 return (DDI_PROP_INVAL_ARG); 4088 } 4089 ASSERT(!MDI_PI_LOCKED(pip)); 4090 MDI_PI_LOCK(pip); 4091 if (MDI_PI(pip)->pi_prop == NULL) { 4092 MDI_PI_UNLOCK(pip); 4093 return (DDI_PROP_NOT_FOUND); 4094 } 4095 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4096 MDI_PI_UNLOCK(pip); 4097 return (i_map_nvlist_error_to_mdi(rv)); 4098 } 4099 4100 /* 4101 * mdi_prop_update_int64(): 4102 * Create/Update a 64 bit integer property 4103 */ 4104 int 4105 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4106 { 4107 int rv; 4108 4109 if (pip == NULL) { 4110 return (DDI_PROP_INVAL_ARG); 4111 } 4112 ASSERT(!MDI_PI_LOCKED(pip)); 4113 MDI_PI_LOCK(pip); 4114 if (MDI_PI(pip)->pi_prop == NULL) { 4115 MDI_PI_UNLOCK(pip); 4116 return (DDI_PROP_NOT_FOUND); 4117 } 4118 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4119 MDI_PI_UNLOCK(pip); 4120 return (i_map_nvlist_error_to_mdi(rv)); 4121 } 4122 4123 /* 4124 * mdi_prop_update_int_array(): 4125 * Create/Update a int array property 4126 */ 4127 int 4128 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4129 uint_t nelements) 4130 { 4131 int rv; 4132 4133 if (pip == NULL) { 4134 return (DDI_PROP_INVAL_ARG); 4135 } 4136 ASSERT(!MDI_PI_LOCKED(pip)); 4137 MDI_PI_LOCK(pip); 4138 if (MDI_PI(pip)->pi_prop == NULL) { 4139 MDI_PI_UNLOCK(pip); 4140 return (DDI_PROP_NOT_FOUND); 4141 } 4142 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4143 nelements); 4144 MDI_PI_UNLOCK(pip); 4145 return (i_map_nvlist_error_to_mdi(rv)); 4146 } 4147 4148 /* 4149 * mdi_prop_update_string(): 4150 * Create/Update a string property 4151 */ 4152 int 4153 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4154 { 4155 int rv; 4156 4157 if (pip == NULL) { 4158 return (DDI_PROP_INVAL_ARG); 4159 } 4160 ASSERT(!MDI_PI_LOCKED(pip)); 4161 MDI_PI_LOCK(pip); 4162 if (MDI_PI(pip)->pi_prop == NULL) { 4163 MDI_PI_UNLOCK(pip); 4164 return (DDI_PROP_NOT_FOUND); 4165 } 4166 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4167 MDI_PI_UNLOCK(pip); 4168 return (i_map_nvlist_error_to_mdi(rv)); 4169 } 4170 4171 /* 4172 * mdi_prop_update_string_array(): 4173 * Create/Update a string array property 4174 */ 4175 int 4176 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4177 uint_t nelements) 4178 { 4179 int rv; 4180 4181 if (pip == NULL) { 4182 return (DDI_PROP_INVAL_ARG); 4183 } 4184 ASSERT(!MDI_PI_LOCKED(pip)); 4185 MDI_PI_LOCK(pip); 4186 if (MDI_PI(pip)->pi_prop == NULL) { 4187 MDI_PI_UNLOCK(pip); 4188 return (DDI_PROP_NOT_FOUND); 4189 } 4190 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4191 nelements); 4192 MDI_PI_UNLOCK(pip); 4193 return (i_map_nvlist_error_to_mdi(rv)); 4194 } 4195 4196 /* 4197 * mdi_prop_lookup_byte(): 4198 * Look for byte property identified by name. The data returned 4199 * is the actual property and valid as long as mdi_pathinfo_t node 4200 * is alive. 4201 */ 4202 int 4203 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4204 { 4205 int rv; 4206 4207 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4208 return (DDI_PROP_NOT_FOUND); 4209 } 4210 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4211 return (i_map_nvlist_error_to_mdi(rv)); 4212 } 4213 4214 4215 /* 4216 * mdi_prop_lookup_byte_array(): 4217 * Look for byte array property identified by name. The data 4218 * returned is the actual property and valid as long as 4219 * mdi_pathinfo_t node is alive. 4220 */ 4221 int 4222 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4223 uint_t *nelements) 4224 { 4225 int rv; 4226 4227 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4228 return (DDI_PROP_NOT_FOUND); 4229 } 4230 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4231 nelements); 4232 return (i_map_nvlist_error_to_mdi(rv)); 4233 } 4234 4235 /* 4236 * mdi_prop_lookup_int(): 4237 * Look for int property identified by name. The data returned 4238 * is the actual property and valid as long as mdi_pathinfo_t 4239 * node is alive. 4240 */ 4241 int 4242 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4243 { 4244 int rv; 4245 4246 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4247 return (DDI_PROP_NOT_FOUND); 4248 } 4249 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4250 return (i_map_nvlist_error_to_mdi(rv)); 4251 } 4252 4253 /* 4254 * mdi_prop_lookup_int64(): 4255 * Look for int64 property identified by name. The data returned 4256 * is the actual property and valid as long as mdi_pathinfo_t node 4257 * is alive. 4258 */ 4259 int 4260 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4261 { 4262 int rv; 4263 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4264 return (DDI_PROP_NOT_FOUND); 4265 } 4266 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4267 return (i_map_nvlist_error_to_mdi(rv)); 4268 } 4269 4270 /* 4271 * mdi_prop_lookup_int_array(): 4272 * Look for int array property identified by name. The data 4273 * returned is the actual property and valid as long as 4274 * mdi_pathinfo_t node is alive. 4275 */ 4276 int 4277 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4278 uint_t *nelements) 4279 { 4280 int rv; 4281 4282 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4283 return (DDI_PROP_NOT_FOUND); 4284 } 4285 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4286 (int32_t **)data, nelements); 4287 return (i_map_nvlist_error_to_mdi(rv)); 4288 } 4289 4290 /* 4291 * mdi_prop_lookup_string(): 4292 * Look for string property identified by name. The data 4293 * returned is the actual property and valid as long as 4294 * mdi_pathinfo_t node is alive. 4295 */ 4296 int 4297 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4298 { 4299 int rv; 4300 4301 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4302 return (DDI_PROP_NOT_FOUND); 4303 } 4304 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4305 return (i_map_nvlist_error_to_mdi(rv)); 4306 } 4307 4308 /* 4309 * mdi_prop_lookup_string_array(): 4310 * Look for string array property identified by name. The data 4311 * returned is the actual property and valid as long as 4312 * mdi_pathinfo_t node is alive. 4313 */ 4314 int 4315 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4316 uint_t *nelements) 4317 { 4318 int rv; 4319 4320 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4321 return (DDI_PROP_NOT_FOUND); 4322 } 4323 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4324 nelements); 4325 return (i_map_nvlist_error_to_mdi(rv)); 4326 } 4327 4328 /* 4329 * mdi_prop_free(): 4330 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4331 * functions return the pointer to actual property data and not a 4332 * copy of it. So the data returned is valid as long as 4333 * mdi_pathinfo_t node is valid. 4334 */ 4335 /*ARGSUSED*/ 4336 int 4337 mdi_prop_free(void *data) 4338 { 4339 return (DDI_PROP_SUCCESS); 4340 } 4341 4342 /*ARGSUSED*/ 4343 static void 4344 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4345 { 4346 char *phci_path, *ct_path; 4347 char *ct_status; 4348 char *status; 4349 dev_info_t *dip = ct->ct_dip; 4350 char lb_buf[64]; 4351 4352 ASSERT(MDI_CLIENT_LOCKED(ct)); 4353 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4354 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4355 return; 4356 } 4357 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4358 ct_status = "optimal"; 4359 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4360 ct_status = "degraded"; 4361 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4362 ct_status = "failed"; 4363 } else { 4364 ct_status = "unknown"; 4365 } 4366 4367 if (MDI_PI_IS_OFFLINE(pip)) { 4368 status = "offline"; 4369 } else if (MDI_PI_IS_ONLINE(pip)) { 4370 status = "online"; 4371 } else if (MDI_PI_IS_STANDBY(pip)) { 4372 status = "standby"; 4373 } else if (MDI_PI_IS_FAULT(pip)) { 4374 status = "faulted"; 4375 } else { 4376 status = "unknown"; 4377 } 4378 4379 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4380 (void) snprintf(lb_buf, sizeof (lb_buf), 4381 "%s, region-size: %d", mdi_load_balance_lba, 4382 ct->ct_lb_args->region_size); 4383 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4384 (void) snprintf(lb_buf, sizeof (lb_buf), 4385 "%s", mdi_load_balance_none); 4386 } else { 4387 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4388 mdi_load_balance_rr); 4389 } 4390 4391 if (dip) { 4392 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4393 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4394 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4395 "path %s (%s%d) to target address: %s is %s" 4396 " Load balancing: %s\n", 4397 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4398 ddi_get_instance(dip), ct_status, 4399 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4400 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4401 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4402 MDI_PI(pip)->pi_addr, status, lb_buf); 4403 kmem_free(phci_path, MAXPATHLEN); 4404 kmem_free(ct_path, MAXPATHLEN); 4405 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4406 } 4407 } 4408 4409 #ifdef DEBUG 4410 /* 4411 * i_mdi_log(): 4412 * Utility function for error message management 4413 * 4414 */ 4415 /*PRINTFLIKE3*/ 4416 static void 4417 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4418 { 4419 char name[MAXNAMELEN]; 4420 char buf[MAXNAMELEN]; 4421 char *bp; 4422 va_list ap; 4423 int log_only = 0; 4424 int boot_only = 0; 4425 int console_only = 0; 4426 4427 if (dip) { 4428 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4429 ddi_node_name(dip), ddi_get_instance(dip)); 4430 } else { 4431 name[0] = 0; 4432 } 4433 4434 va_start(ap, fmt); 4435 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4436 va_end(ap); 4437 4438 switch (buf[0]) { 4439 case '!': 4440 bp = &buf[1]; 4441 log_only = 1; 4442 break; 4443 case '?': 4444 bp = &buf[1]; 4445 boot_only = 1; 4446 break; 4447 case '^': 4448 bp = &buf[1]; 4449 console_only = 1; 4450 break; 4451 default: 4452 bp = buf; 4453 break; 4454 } 4455 if (mdi_debug_logonly) { 4456 log_only = 1; 4457 boot_only = 0; 4458 console_only = 0; 4459 } 4460 4461 switch (level) { 4462 case CE_NOTE: 4463 level = CE_CONT; 4464 /* FALLTHROUGH */ 4465 case CE_CONT: 4466 case CE_WARN: 4467 case CE_PANIC: 4468 if (boot_only) { 4469 cmn_err(level, "?mdi: %s%s", name, bp); 4470 } else if (console_only) { 4471 cmn_err(level, "^mdi: %s%s", name, bp); 4472 } else if (log_only) { 4473 cmn_err(level, "!mdi: %s%s", name, bp); 4474 } else { 4475 cmn_err(level, "mdi: %s%s", name, bp); 4476 } 4477 break; 4478 default: 4479 cmn_err(level, "mdi: %s%s", name, bp); 4480 break; 4481 } 4482 } 4483 #endif /* DEBUG */ 4484 4485 void 4486 i_mdi_client_online(dev_info_t *ct_dip) 4487 { 4488 mdi_client_t *ct; 4489 4490 /* 4491 * Client online notification. Mark client state as online 4492 * restore our binding with dev_info node 4493 */ 4494 ct = i_devi_get_client(ct_dip); 4495 ASSERT(ct != NULL); 4496 MDI_CLIENT_LOCK(ct); 4497 MDI_CLIENT_SET_ONLINE(ct); 4498 /* catch for any memory leaks */ 4499 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4500 ct->ct_dip = ct_dip; 4501 4502 if (ct->ct_power_cnt == 0) 4503 (void) i_mdi_power_all_phci(ct); 4504 4505 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4506 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4507 i_mdi_pm_hold_client(ct, 1); 4508 4509 MDI_CLIENT_UNLOCK(ct); 4510 } 4511 4512 void 4513 i_mdi_phci_online(dev_info_t *ph_dip) 4514 { 4515 mdi_phci_t *ph; 4516 4517 /* pHCI online notification. Mark state accordingly */ 4518 ph = i_devi_get_phci(ph_dip); 4519 ASSERT(ph != NULL); 4520 MDI_PHCI_LOCK(ph); 4521 MDI_PHCI_SET_ONLINE(ph); 4522 MDI_PHCI_UNLOCK(ph); 4523 } 4524 4525 /* 4526 * mdi_devi_online(): 4527 * Online notification from NDI framework on pHCI/client 4528 * device online. 4529 * Return Values: 4530 * NDI_SUCCESS 4531 * MDI_FAILURE 4532 */ 4533 /*ARGSUSED*/ 4534 int 4535 mdi_devi_online(dev_info_t *dip, uint_t flags) 4536 { 4537 if (MDI_PHCI(dip)) { 4538 i_mdi_phci_online(dip); 4539 } 4540 4541 if (MDI_CLIENT(dip)) { 4542 i_mdi_client_online(dip); 4543 } 4544 return (NDI_SUCCESS); 4545 } 4546 4547 /* 4548 * mdi_devi_offline(): 4549 * Offline notification from NDI framework on pHCI/Client device 4550 * offline. 4551 * 4552 * Return Values: 4553 * NDI_SUCCESS 4554 * NDI_FAILURE 4555 */ 4556 /*ARGSUSED*/ 4557 int 4558 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4559 { 4560 int rv = NDI_SUCCESS; 4561 4562 if (MDI_CLIENT(dip)) { 4563 rv = i_mdi_client_offline(dip, flags); 4564 if (rv != NDI_SUCCESS) 4565 return (rv); 4566 } 4567 4568 if (MDI_PHCI(dip)) { 4569 rv = i_mdi_phci_offline(dip, flags); 4570 4571 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4572 /* set client back online */ 4573 i_mdi_client_online(dip); 4574 } 4575 } 4576 4577 return (rv); 4578 } 4579 4580 /*ARGSUSED*/ 4581 static int 4582 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4583 { 4584 int rv = NDI_SUCCESS; 4585 mdi_phci_t *ph; 4586 mdi_client_t *ct; 4587 mdi_pathinfo_t *pip; 4588 mdi_pathinfo_t *next; 4589 mdi_pathinfo_t *failed_pip = NULL; 4590 dev_info_t *cdip; 4591 4592 /* 4593 * pHCI component offline notification 4594 * Make sure that this pHCI instance is free to be offlined. 4595 * If it is OK to proceed, Offline and remove all the child 4596 * mdi_pathinfo nodes. This process automatically offlines 4597 * corresponding client devices, for which this pHCI provides 4598 * critical services. 4599 */ 4600 ph = i_devi_get_phci(dip); 4601 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4602 (void *)dip, (void *)ph)); 4603 if (ph == NULL) { 4604 return (rv); 4605 } 4606 4607 MDI_PHCI_LOCK(ph); 4608 4609 if (MDI_PHCI_IS_OFFLINE(ph)) { 4610 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4611 (void *)ph)); 4612 MDI_PHCI_UNLOCK(ph); 4613 return (NDI_SUCCESS); 4614 } 4615 4616 /* 4617 * Check to see if the pHCI can be offlined 4618 */ 4619 if (ph->ph_unstable) { 4620 MDI_DEBUG(1, (CE_WARN, dip, 4621 "!One or more target devices are in transient " 4622 "state. This device can not be removed at " 4623 "this moment. Please try again later.")); 4624 MDI_PHCI_UNLOCK(ph); 4625 return (NDI_BUSY); 4626 } 4627 4628 pip = ph->ph_path_head; 4629 while (pip != NULL) { 4630 MDI_PI_LOCK(pip); 4631 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4632 4633 /* 4634 * The mdi_pathinfo state is OK. Check the client state. 4635 * If failover in progress fail the pHCI from offlining 4636 */ 4637 ct = MDI_PI(pip)->pi_client; 4638 i_mdi_client_lock(ct, pip); 4639 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4640 (ct->ct_unstable)) { 4641 /* 4642 * Failover is in progress, Fail the DR 4643 */ 4644 MDI_DEBUG(1, (CE_WARN, dip, 4645 "!pHCI device (%s%d) is Busy. %s", 4646 ddi_driver_name(dip), ddi_get_instance(dip), 4647 "This device can not be removed at " 4648 "this moment. Please try again later.")); 4649 MDI_PI_UNLOCK(pip); 4650 i_mdi_client_unlock(ct); 4651 MDI_PHCI_UNLOCK(ph); 4652 return (NDI_BUSY); 4653 } 4654 MDI_PI_UNLOCK(pip); 4655 4656 /* 4657 * Check to see of we are removing the last path of this 4658 * client device... 4659 */ 4660 cdip = ct->ct_dip; 4661 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4662 (i_mdi_client_compute_state(ct, ph) == 4663 MDI_CLIENT_STATE_FAILED)) { 4664 i_mdi_client_unlock(ct); 4665 MDI_PHCI_UNLOCK(ph); 4666 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4667 /* 4668 * ndi_devi_offline() failed. 4669 * This pHCI provides the critical path 4670 * to one or more client devices. 4671 * Return busy. 4672 */ 4673 MDI_PHCI_LOCK(ph); 4674 MDI_DEBUG(1, (CE_WARN, dip, 4675 "!pHCI device (%s%d) is Busy. %s", 4676 ddi_driver_name(dip), ddi_get_instance(dip), 4677 "This device can not be removed at " 4678 "this moment. Please try again later.")); 4679 failed_pip = pip; 4680 break; 4681 } else { 4682 MDI_PHCI_LOCK(ph); 4683 pip = next; 4684 } 4685 } else { 4686 i_mdi_client_unlock(ct); 4687 pip = next; 4688 } 4689 } 4690 4691 if (failed_pip) { 4692 pip = ph->ph_path_head; 4693 while (pip != failed_pip) { 4694 MDI_PI_LOCK(pip); 4695 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4696 ct = MDI_PI(pip)->pi_client; 4697 i_mdi_client_lock(ct, pip); 4698 cdip = ct->ct_dip; 4699 switch (MDI_CLIENT_STATE(ct)) { 4700 case MDI_CLIENT_STATE_OPTIMAL: 4701 case MDI_CLIENT_STATE_DEGRADED: 4702 if (cdip) { 4703 MDI_PI_UNLOCK(pip); 4704 i_mdi_client_unlock(ct); 4705 MDI_PHCI_UNLOCK(ph); 4706 (void) ndi_devi_online(cdip, 0); 4707 MDI_PHCI_LOCK(ph); 4708 pip = next; 4709 continue; 4710 } 4711 break; 4712 4713 case MDI_CLIENT_STATE_FAILED: 4714 if (cdip) { 4715 MDI_PI_UNLOCK(pip); 4716 i_mdi_client_unlock(ct); 4717 MDI_PHCI_UNLOCK(ph); 4718 (void) ndi_devi_offline(cdip, 0); 4719 MDI_PHCI_LOCK(ph); 4720 pip = next; 4721 continue; 4722 } 4723 break; 4724 } 4725 MDI_PI_UNLOCK(pip); 4726 i_mdi_client_unlock(ct); 4727 pip = next; 4728 } 4729 MDI_PHCI_UNLOCK(ph); 4730 return (NDI_BUSY); 4731 } 4732 4733 /* 4734 * Mark the pHCI as offline 4735 */ 4736 MDI_PHCI_SET_OFFLINE(ph); 4737 4738 /* 4739 * Mark the child mdi_pathinfo nodes as transient 4740 */ 4741 pip = ph->ph_path_head; 4742 while (pip != NULL) { 4743 MDI_PI_LOCK(pip); 4744 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4745 MDI_PI_SET_OFFLINING(pip); 4746 MDI_PI_UNLOCK(pip); 4747 pip = next; 4748 } 4749 MDI_PHCI_UNLOCK(ph); 4750 /* 4751 * Give a chance for any pending commands to execute 4752 */ 4753 delay(1); 4754 MDI_PHCI_LOCK(ph); 4755 pip = ph->ph_path_head; 4756 while (pip != NULL) { 4757 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4758 (void) i_mdi_pi_offline(pip, flags); 4759 MDI_PI_LOCK(pip); 4760 ct = MDI_PI(pip)->pi_client; 4761 if (!MDI_PI_IS_OFFLINE(pip)) { 4762 MDI_DEBUG(1, (CE_WARN, dip, 4763 "!pHCI device (%s%d) is Busy. %s", 4764 ddi_driver_name(dip), ddi_get_instance(dip), 4765 "This device can not be removed at " 4766 "this moment. Please try again later.")); 4767 MDI_PI_UNLOCK(pip); 4768 MDI_PHCI_SET_ONLINE(ph); 4769 MDI_PHCI_UNLOCK(ph); 4770 return (NDI_BUSY); 4771 } 4772 MDI_PI_UNLOCK(pip); 4773 pip = next; 4774 } 4775 MDI_PHCI_UNLOCK(ph); 4776 4777 return (rv); 4778 } 4779 4780 void 4781 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 4782 { 4783 mdi_phci_t *ph; 4784 mdi_client_t *ct; 4785 mdi_pathinfo_t *pip; 4786 mdi_pathinfo_t *next; 4787 dev_info_t *cdip; 4788 4789 if (!MDI_PHCI(dip)) 4790 return; 4791 4792 ph = i_devi_get_phci(dip); 4793 if (ph == NULL) { 4794 return; 4795 } 4796 4797 MDI_PHCI_LOCK(ph); 4798 4799 if (MDI_PHCI_IS_OFFLINE(ph)) { 4800 /* has no last path */ 4801 MDI_PHCI_UNLOCK(ph); 4802 return; 4803 } 4804 4805 pip = ph->ph_path_head; 4806 while (pip != NULL) { 4807 MDI_PI_LOCK(pip); 4808 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4809 4810 ct = MDI_PI(pip)->pi_client; 4811 i_mdi_client_lock(ct, pip); 4812 MDI_PI_UNLOCK(pip); 4813 4814 cdip = ct->ct_dip; 4815 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4816 (i_mdi_client_compute_state(ct, ph) == 4817 MDI_CLIENT_STATE_FAILED)) { 4818 /* Last path. Mark client dip as retiring */ 4819 i_mdi_client_unlock(ct); 4820 MDI_PHCI_UNLOCK(ph); 4821 (void) e_ddi_mark_retiring(cdip, cons_array); 4822 MDI_PHCI_LOCK(ph); 4823 pip = next; 4824 } else { 4825 i_mdi_client_unlock(ct); 4826 pip = next; 4827 } 4828 } 4829 4830 MDI_PHCI_UNLOCK(ph); 4831 4832 return; 4833 } 4834 4835 void 4836 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 4837 { 4838 mdi_phci_t *ph; 4839 mdi_client_t *ct; 4840 mdi_pathinfo_t *pip; 4841 mdi_pathinfo_t *next; 4842 dev_info_t *cdip; 4843 4844 if (!MDI_PHCI(dip)) 4845 return; 4846 4847 ph = i_devi_get_phci(dip); 4848 if (ph == NULL) 4849 return; 4850 4851 MDI_PHCI_LOCK(ph); 4852 4853 if (MDI_PHCI_IS_OFFLINE(ph)) { 4854 MDI_PHCI_UNLOCK(ph); 4855 /* not last path */ 4856 return; 4857 } 4858 4859 if (ph->ph_unstable) { 4860 MDI_PHCI_UNLOCK(ph); 4861 /* can't check for constraints */ 4862 *constraint = 0; 4863 return; 4864 } 4865 4866 pip = ph->ph_path_head; 4867 while (pip != NULL) { 4868 MDI_PI_LOCK(pip); 4869 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4870 4871 /* 4872 * The mdi_pathinfo state is OK. Check the client state. 4873 * If failover in progress fail the pHCI from offlining 4874 */ 4875 ct = MDI_PI(pip)->pi_client; 4876 i_mdi_client_lock(ct, pip); 4877 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4878 (ct->ct_unstable)) { 4879 /* 4880 * Failover is in progress, can't check for constraints 4881 */ 4882 MDI_PI_UNLOCK(pip); 4883 i_mdi_client_unlock(ct); 4884 MDI_PHCI_UNLOCK(ph); 4885 *constraint = 0; 4886 return; 4887 } 4888 MDI_PI_UNLOCK(pip); 4889 4890 /* 4891 * Check to see of we are retiring the last path of this 4892 * client device... 4893 */ 4894 cdip = ct->ct_dip; 4895 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4896 (i_mdi_client_compute_state(ct, ph) == 4897 MDI_CLIENT_STATE_FAILED)) { 4898 i_mdi_client_unlock(ct); 4899 MDI_PHCI_UNLOCK(ph); 4900 (void) e_ddi_retire_notify(cdip, constraint); 4901 MDI_PHCI_LOCK(ph); 4902 pip = next; 4903 } else { 4904 i_mdi_client_unlock(ct); 4905 pip = next; 4906 } 4907 } 4908 4909 MDI_PHCI_UNLOCK(ph); 4910 4911 return; 4912 } 4913 4914 /* 4915 * offline the path(s) hanging off the PHCI. If the 4916 * last path to any client, check that constraints 4917 * have been applied. 4918 */ 4919 void 4920 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 4921 { 4922 mdi_phci_t *ph; 4923 mdi_client_t *ct; 4924 mdi_pathinfo_t *pip; 4925 mdi_pathinfo_t *next; 4926 dev_info_t *cdip; 4927 int unstable = 0; 4928 int constraint; 4929 4930 if (!MDI_PHCI(dip)) 4931 return; 4932 4933 ph = i_devi_get_phci(dip); 4934 if (ph == NULL) { 4935 /* no last path and no pips */ 4936 return; 4937 } 4938 4939 MDI_PHCI_LOCK(ph); 4940 4941 if (MDI_PHCI_IS_OFFLINE(ph)) { 4942 MDI_PHCI_UNLOCK(ph); 4943 /* no last path and no pips */ 4944 return; 4945 } 4946 4947 /* 4948 * Check to see if the pHCI can be offlined 4949 */ 4950 if (ph->ph_unstable) { 4951 unstable = 1; 4952 } 4953 4954 pip = ph->ph_path_head; 4955 while (pip != NULL) { 4956 MDI_PI_LOCK(pip); 4957 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4958 4959 /* 4960 * if failover in progress fail the pHCI from offlining 4961 */ 4962 ct = MDI_PI(pip)->pi_client; 4963 i_mdi_client_lock(ct, pip); 4964 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4965 (ct->ct_unstable)) { 4966 unstable = 1; 4967 } 4968 MDI_PI_UNLOCK(pip); 4969 4970 /* 4971 * Check to see of we are removing the last path of this 4972 * client device... 4973 */ 4974 cdip = ct->ct_dip; 4975 if (!phci_only && cdip && 4976 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4977 (i_mdi_client_compute_state(ct, ph) == 4978 MDI_CLIENT_STATE_FAILED)) { 4979 i_mdi_client_unlock(ct); 4980 MDI_PHCI_UNLOCK(ph); 4981 /* 4982 * We don't retire clients we just retire the 4983 * path to a client. If it is the last path 4984 * to a client, constraints are checked and 4985 * if we pass the last path is offlined. MPXIO will 4986 * then fail all I/Os to the client. Since we don't 4987 * want to retire the client on a path error 4988 * set constraint = 0 so that the client dip 4989 * is not retired. 4990 */ 4991 constraint = 0; 4992 (void) e_ddi_retire_finalize(cdip, &constraint); 4993 MDI_PHCI_LOCK(ph); 4994 pip = next; 4995 } else { 4996 i_mdi_client_unlock(ct); 4997 pip = next; 4998 } 4999 } 5000 5001 /* 5002 * Cannot offline pip(s) 5003 */ 5004 if (unstable) { 5005 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5006 "retire, dip = %p", (void *)dip); 5007 MDI_PHCI_UNLOCK(ph); 5008 return; 5009 } 5010 5011 /* 5012 * Mark the pHCI as offline 5013 */ 5014 MDI_PHCI_SET_OFFLINE(ph); 5015 5016 /* 5017 * Mark the child mdi_pathinfo nodes as transient 5018 */ 5019 pip = ph->ph_path_head; 5020 while (pip != NULL) { 5021 MDI_PI_LOCK(pip); 5022 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5023 MDI_PI_SET_OFFLINING(pip); 5024 MDI_PI_UNLOCK(pip); 5025 pip = next; 5026 } 5027 MDI_PHCI_UNLOCK(ph); 5028 /* 5029 * Give a chance for any pending commands to execute 5030 */ 5031 delay(1); 5032 MDI_PHCI_LOCK(ph); 5033 pip = ph->ph_path_head; 5034 while (pip != NULL) { 5035 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5036 (void) i_mdi_pi_offline(pip, 0); 5037 MDI_PI_LOCK(pip); 5038 ct = MDI_PI(pip)->pi_client; 5039 if (!MDI_PI_IS_OFFLINE(pip)) { 5040 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5041 "PHCI dip = %p", (void *)dip); 5042 MDI_PI_UNLOCK(pip); 5043 MDI_PHCI_SET_ONLINE(ph); 5044 MDI_PHCI_UNLOCK(ph); 5045 return; 5046 } 5047 MDI_PI_UNLOCK(pip); 5048 pip = next; 5049 } 5050 MDI_PHCI_UNLOCK(ph); 5051 5052 return; 5053 } 5054 5055 void 5056 mdi_phci_unretire(dev_info_t *dip) 5057 { 5058 ASSERT(MDI_PHCI(dip)); 5059 5060 /* 5061 * Online the phci 5062 */ 5063 i_mdi_phci_online(dip); 5064 } 5065 5066 /*ARGSUSED*/ 5067 static int 5068 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5069 { 5070 int rv = NDI_SUCCESS; 5071 mdi_client_t *ct; 5072 5073 /* 5074 * Client component to go offline. Make sure that we are 5075 * not in failing over state and update client state 5076 * accordingly 5077 */ 5078 ct = i_devi_get_client(dip); 5079 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5080 (void *)dip, (void *)ct)); 5081 if (ct != NULL) { 5082 MDI_CLIENT_LOCK(ct); 5083 if (ct->ct_unstable) { 5084 /* 5085 * One or more paths are in transient state, 5086 * Dont allow offline of a client device 5087 */ 5088 MDI_DEBUG(1, (CE_WARN, dip, 5089 "!One or more paths to this device is " 5090 "in transient state. This device can not " 5091 "be removed at this moment. " 5092 "Please try again later.")); 5093 MDI_CLIENT_UNLOCK(ct); 5094 return (NDI_BUSY); 5095 } 5096 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5097 /* 5098 * Failover is in progress, Dont allow DR of 5099 * a client device 5100 */ 5101 MDI_DEBUG(1, (CE_WARN, dip, 5102 "!Client device (%s%d) is Busy. %s", 5103 ddi_driver_name(dip), ddi_get_instance(dip), 5104 "This device can not be removed at " 5105 "this moment. Please try again later.")); 5106 MDI_CLIENT_UNLOCK(ct); 5107 return (NDI_BUSY); 5108 } 5109 MDI_CLIENT_SET_OFFLINE(ct); 5110 5111 /* 5112 * Unbind our relationship with the dev_info node 5113 */ 5114 if (flags & NDI_DEVI_REMOVE) { 5115 ct->ct_dip = NULL; 5116 } 5117 MDI_CLIENT_UNLOCK(ct); 5118 } 5119 return (rv); 5120 } 5121 5122 /* 5123 * mdi_pre_attach(): 5124 * Pre attach() notification handler 5125 */ 5126 /*ARGSUSED*/ 5127 int 5128 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5129 { 5130 /* don't support old DDI_PM_RESUME */ 5131 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5132 (cmd == DDI_PM_RESUME)) 5133 return (DDI_FAILURE); 5134 5135 return (DDI_SUCCESS); 5136 } 5137 5138 /* 5139 * mdi_post_attach(): 5140 * Post attach() notification handler 5141 */ 5142 /*ARGSUSED*/ 5143 void 5144 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5145 { 5146 mdi_phci_t *ph; 5147 mdi_client_t *ct; 5148 mdi_vhci_t *vh; 5149 5150 if (MDI_PHCI(dip)) { 5151 ph = i_devi_get_phci(dip); 5152 ASSERT(ph != NULL); 5153 5154 MDI_PHCI_LOCK(ph); 5155 switch (cmd) { 5156 case DDI_ATTACH: 5157 MDI_DEBUG(2, (CE_NOTE, dip, 5158 "!pHCI post_attach: called %p\n", (void *)ph)); 5159 if (error == DDI_SUCCESS) { 5160 MDI_PHCI_SET_ATTACH(ph); 5161 } else { 5162 MDI_DEBUG(1, (CE_NOTE, dip, 5163 "!pHCI post_attach: failed error=%d\n", 5164 error)); 5165 MDI_PHCI_SET_DETACH(ph); 5166 } 5167 break; 5168 5169 case DDI_RESUME: 5170 MDI_DEBUG(2, (CE_NOTE, dip, 5171 "!pHCI post_resume: called %p\n", (void *)ph)); 5172 if (error == DDI_SUCCESS) { 5173 MDI_PHCI_SET_RESUME(ph); 5174 } else { 5175 MDI_DEBUG(1, (CE_NOTE, dip, 5176 "!pHCI post_resume: failed error=%d\n", 5177 error)); 5178 MDI_PHCI_SET_SUSPEND(ph); 5179 } 5180 break; 5181 } 5182 MDI_PHCI_UNLOCK(ph); 5183 } 5184 5185 if (MDI_CLIENT(dip)) { 5186 ct = i_devi_get_client(dip); 5187 ASSERT(ct != NULL); 5188 5189 MDI_CLIENT_LOCK(ct); 5190 switch (cmd) { 5191 case DDI_ATTACH: 5192 MDI_DEBUG(2, (CE_NOTE, dip, 5193 "!Client post_attach: called %p\n", (void *)ct)); 5194 if (error != DDI_SUCCESS) { 5195 MDI_DEBUG(1, (CE_NOTE, dip, 5196 "!Client post_attach: failed error=%d\n", 5197 error)); 5198 MDI_CLIENT_SET_DETACH(ct); 5199 MDI_DEBUG(4, (CE_WARN, dip, 5200 "mdi_post_attach i_mdi_pm_reset_client\n")); 5201 i_mdi_pm_reset_client(ct); 5202 break; 5203 } 5204 5205 /* 5206 * Client device has successfully attached, inform 5207 * the vhci. 5208 */ 5209 vh = ct->ct_vhci; 5210 if (vh->vh_ops->vo_client_attached) 5211 (*vh->vh_ops->vo_client_attached)(dip); 5212 5213 MDI_CLIENT_SET_ATTACH(ct); 5214 break; 5215 5216 case DDI_RESUME: 5217 MDI_DEBUG(2, (CE_NOTE, dip, 5218 "!Client post_attach: called %p\n", (void *)ct)); 5219 if (error == DDI_SUCCESS) { 5220 MDI_CLIENT_SET_RESUME(ct); 5221 } else { 5222 MDI_DEBUG(1, (CE_NOTE, dip, 5223 "!Client post_resume: failed error=%d\n", 5224 error)); 5225 MDI_CLIENT_SET_SUSPEND(ct); 5226 } 5227 break; 5228 } 5229 MDI_CLIENT_UNLOCK(ct); 5230 } 5231 } 5232 5233 /* 5234 * mdi_pre_detach(): 5235 * Pre detach notification handler 5236 */ 5237 /*ARGSUSED*/ 5238 int 5239 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5240 { 5241 int rv = DDI_SUCCESS; 5242 5243 if (MDI_CLIENT(dip)) { 5244 (void) i_mdi_client_pre_detach(dip, cmd); 5245 } 5246 5247 if (MDI_PHCI(dip)) { 5248 rv = i_mdi_phci_pre_detach(dip, cmd); 5249 } 5250 5251 return (rv); 5252 } 5253 5254 /*ARGSUSED*/ 5255 static int 5256 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5257 { 5258 int rv = DDI_SUCCESS; 5259 mdi_phci_t *ph; 5260 mdi_client_t *ct; 5261 mdi_pathinfo_t *pip; 5262 mdi_pathinfo_t *failed_pip = NULL; 5263 mdi_pathinfo_t *next; 5264 5265 ph = i_devi_get_phci(dip); 5266 if (ph == NULL) { 5267 return (rv); 5268 } 5269 5270 MDI_PHCI_LOCK(ph); 5271 switch (cmd) { 5272 case DDI_DETACH: 5273 MDI_DEBUG(2, (CE_NOTE, dip, 5274 "!pHCI pre_detach: called %p\n", (void *)ph)); 5275 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5276 /* 5277 * mdi_pathinfo nodes are still attached to 5278 * this pHCI. Fail the detach for this pHCI. 5279 */ 5280 MDI_DEBUG(2, (CE_WARN, dip, 5281 "!pHCI pre_detach: " 5282 "mdi_pathinfo nodes are still attached " 5283 "%p\n", (void *)ph)); 5284 rv = DDI_FAILURE; 5285 break; 5286 } 5287 MDI_PHCI_SET_DETACH(ph); 5288 break; 5289 5290 case DDI_SUSPEND: 5291 /* 5292 * pHCI is getting suspended. Since mpxio client 5293 * devices may not be suspended at this point, to avoid 5294 * a potential stack overflow, it is important to suspend 5295 * client devices before pHCI can be suspended. 5296 */ 5297 5298 MDI_DEBUG(2, (CE_NOTE, dip, 5299 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5300 /* 5301 * Suspend all the client devices accessible through this pHCI 5302 */ 5303 pip = ph->ph_path_head; 5304 while (pip != NULL && rv == DDI_SUCCESS) { 5305 dev_info_t *cdip; 5306 MDI_PI_LOCK(pip); 5307 next = 5308 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5309 ct = MDI_PI(pip)->pi_client; 5310 i_mdi_client_lock(ct, pip); 5311 cdip = ct->ct_dip; 5312 MDI_PI_UNLOCK(pip); 5313 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5314 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5315 i_mdi_client_unlock(ct); 5316 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5317 DDI_SUCCESS) { 5318 /* 5319 * Suspend of one of the client 5320 * device has failed. 5321 */ 5322 MDI_DEBUG(1, (CE_WARN, dip, 5323 "!Suspend of device (%s%d) failed.", 5324 ddi_driver_name(cdip), 5325 ddi_get_instance(cdip))); 5326 failed_pip = pip; 5327 break; 5328 } 5329 } else { 5330 i_mdi_client_unlock(ct); 5331 } 5332 pip = next; 5333 } 5334 5335 if (rv == DDI_SUCCESS) { 5336 /* 5337 * Suspend of client devices is complete. Proceed 5338 * with pHCI suspend. 5339 */ 5340 MDI_PHCI_SET_SUSPEND(ph); 5341 } else { 5342 /* 5343 * Revert back all the suspended client device states 5344 * to converse. 5345 */ 5346 pip = ph->ph_path_head; 5347 while (pip != failed_pip) { 5348 dev_info_t *cdip; 5349 MDI_PI_LOCK(pip); 5350 next = 5351 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5352 ct = MDI_PI(pip)->pi_client; 5353 i_mdi_client_lock(ct, pip); 5354 cdip = ct->ct_dip; 5355 MDI_PI_UNLOCK(pip); 5356 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5357 i_mdi_client_unlock(ct); 5358 (void) devi_attach(cdip, DDI_RESUME); 5359 } else { 5360 i_mdi_client_unlock(ct); 5361 } 5362 pip = next; 5363 } 5364 } 5365 break; 5366 5367 default: 5368 rv = DDI_FAILURE; 5369 break; 5370 } 5371 MDI_PHCI_UNLOCK(ph); 5372 return (rv); 5373 } 5374 5375 /*ARGSUSED*/ 5376 static int 5377 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5378 { 5379 int rv = DDI_SUCCESS; 5380 mdi_client_t *ct; 5381 5382 ct = i_devi_get_client(dip); 5383 if (ct == NULL) { 5384 return (rv); 5385 } 5386 5387 MDI_CLIENT_LOCK(ct); 5388 switch (cmd) { 5389 case DDI_DETACH: 5390 MDI_DEBUG(2, (CE_NOTE, dip, 5391 "!Client pre_detach: called %p\n", (void *)ct)); 5392 MDI_CLIENT_SET_DETACH(ct); 5393 break; 5394 5395 case DDI_SUSPEND: 5396 MDI_DEBUG(2, (CE_NOTE, dip, 5397 "!Client pre_suspend: called %p\n", (void *)ct)); 5398 MDI_CLIENT_SET_SUSPEND(ct); 5399 break; 5400 5401 default: 5402 rv = DDI_FAILURE; 5403 break; 5404 } 5405 MDI_CLIENT_UNLOCK(ct); 5406 return (rv); 5407 } 5408 5409 /* 5410 * mdi_post_detach(): 5411 * Post detach notification handler 5412 */ 5413 /*ARGSUSED*/ 5414 void 5415 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5416 { 5417 /* 5418 * Detach/Suspend of mpxio component failed. Update our state 5419 * too 5420 */ 5421 if (MDI_PHCI(dip)) 5422 i_mdi_phci_post_detach(dip, cmd, error); 5423 5424 if (MDI_CLIENT(dip)) 5425 i_mdi_client_post_detach(dip, cmd, error); 5426 } 5427 5428 /*ARGSUSED*/ 5429 static void 5430 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5431 { 5432 mdi_phci_t *ph; 5433 5434 /* 5435 * Detach/Suspend of phci component failed. Update our state 5436 * too 5437 */ 5438 ph = i_devi_get_phci(dip); 5439 if (ph == NULL) { 5440 return; 5441 } 5442 5443 MDI_PHCI_LOCK(ph); 5444 /* 5445 * Detach of pHCI failed. Restore back converse 5446 * state 5447 */ 5448 switch (cmd) { 5449 case DDI_DETACH: 5450 MDI_DEBUG(2, (CE_NOTE, dip, 5451 "!pHCI post_detach: called %p\n", (void *)ph)); 5452 if (error != DDI_SUCCESS) 5453 MDI_PHCI_SET_ATTACH(ph); 5454 break; 5455 5456 case DDI_SUSPEND: 5457 MDI_DEBUG(2, (CE_NOTE, dip, 5458 "!pHCI post_suspend: called %p\n", (void *)ph)); 5459 if (error != DDI_SUCCESS) 5460 MDI_PHCI_SET_RESUME(ph); 5461 break; 5462 } 5463 MDI_PHCI_UNLOCK(ph); 5464 } 5465 5466 /*ARGSUSED*/ 5467 static void 5468 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5469 { 5470 mdi_client_t *ct; 5471 5472 ct = i_devi_get_client(dip); 5473 if (ct == NULL) { 5474 return; 5475 } 5476 MDI_CLIENT_LOCK(ct); 5477 /* 5478 * Detach of Client failed. Restore back converse 5479 * state 5480 */ 5481 switch (cmd) { 5482 case DDI_DETACH: 5483 MDI_DEBUG(2, (CE_NOTE, dip, 5484 "!Client post_detach: called %p\n", (void *)ct)); 5485 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5486 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5487 "i_mdi_pm_rele_client\n")); 5488 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5489 } else { 5490 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5491 "i_mdi_pm_reset_client\n")); 5492 i_mdi_pm_reset_client(ct); 5493 } 5494 if (error != DDI_SUCCESS) 5495 MDI_CLIENT_SET_ATTACH(ct); 5496 break; 5497 5498 case DDI_SUSPEND: 5499 MDI_DEBUG(2, (CE_NOTE, dip, 5500 "!Client post_suspend: called %p\n", (void *)ct)); 5501 if (error != DDI_SUCCESS) 5502 MDI_CLIENT_SET_RESUME(ct); 5503 break; 5504 } 5505 MDI_CLIENT_UNLOCK(ct); 5506 } 5507 5508 int 5509 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5510 { 5511 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5512 } 5513 5514 /* 5515 * create and install per-path (client - pHCI) statistics 5516 * I/O stats supported: nread, nwritten, reads, and writes 5517 * Error stats - hard errors, soft errors, & transport errors 5518 */ 5519 int 5520 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5521 { 5522 kstat_t *kiosp, *kerrsp; 5523 struct pi_errs *nsp; 5524 struct mdi_pi_kstats *mdi_statp; 5525 5526 if (MDI_PI(pip)->pi_kstats != NULL) 5527 return (MDI_SUCCESS); 5528 5529 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5530 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5531 return (MDI_FAILURE); 5532 } 5533 5534 (void) strcat(ksname, ",err"); 5535 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5536 KSTAT_TYPE_NAMED, 5537 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5538 if (kerrsp == NULL) { 5539 kstat_delete(kiosp); 5540 return (MDI_FAILURE); 5541 } 5542 5543 nsp = (struct pi_errs *)kerrsp->ks_data; 5544 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5545 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5546 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5547 KSTAT_DATA_UINT32); 5548 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5549 KSTAT_DATA_UINT32); 5550 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5551 KSTAT_DATA_UINT32); 5552 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5553 KSTAT_DATA_UINT32); 5554 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5555 KSTAT_DATA_UINT32); 5556 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5557 KSTAT_DATA_UINT32); 5558 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5559 KSTAT_DATA_UINT32); 5560 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5561 5562 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5563 mdi_statp->pi_kstat_ref = 1; 5564 mdi_statp->pi_kstat_iostats = kiosp; 5565 mdi_statp->pi_kstat_errstats = kerrsp; 5566 kstat_install(kiosp); 5567 kstat_install(kerrsp); 5568 MDI_PI(pip)->pi_kstats = mdi_statp; 5569 return (MDI_SUCCESS); 5570 } 5571 5572 /* 5573 * destroy per-path properties 5574 */ 5575 static void 5576 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5577 { 5578 5579 struct mdi_pi_kstats *mdi_statp; 5580 5581 if (MDI_PI(pip)->pi_kstats == NULL) 5582 return; 5583 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5584 return; 5585 5586 MDI_PI(pip)->pi_kstats = NULL; 5587 5588 /* 5589 * the kstat may be shared between multiple pathinfo nodes 5590 * decrement this pathinfo's usage, removing the kstats 5591 * themselves when the last pathinfo reference is removed. 5592 */ 5593 ASSERT(mdi_statp->pi_kstat_ref > 0); 5594 if (--mdi_statp->pi_kstat_ref != 0) 5595 return; 5596 5597 kstat_delete(mdi_statp->pi_kstat_iostats); 5598 kstat_delete(mdi_statp->pi_kstat_errstats); 5599 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5600 } 5601 5602 /* 5603 * update I/O paths KSTATS 5604 */ 5605 void 5606 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5607 { 5608 kstat_t *iostatp; 5609 size_t xfer_cnt; 5610 5611 ASSERT(pip != NULL); 5612 5613 /* 5614 * I/O can be driven across a path prior to having path 5615 * statistics available, i.e. probe(9e). 5616 */ 5617 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5618 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5619 xfer_cnt = bp->b_bcount - bp->b_resid; 5620 if (bp->b_flags & B_READ) { 5621 KSTAT_IO_PTR(iostatp)->reads++; 5622 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5623 } else { 5624 KSTAT_IO_PTR(iostatp)->writes++; 5625 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5626 } 5627 } 5628 } 5629 5630 /* 5631 * Enable the path(specific client/target/initiator) 5632 * Enabling a path means that MPxIO may select the enabled path for routing 5633 * future I/O requests, subject to other path state constraints. 5634 */ 5635 int 5636 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5637 { 5638 mdi_phci_t *ph; 5639 5640 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5641 if (ph == NULL) { 5642 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5643 " failed. pip: %p ph = NULL\n", (void *)pip)); 5644 return (MDI_FAILURE); 5645 } 5646 5647 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5648 MDI_ENABLE_OP); 5649 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5650 " Returning success pip = %p. ph = %p\n", 5651 (void *)pip, (void *)ph)); 5652 return (MDI_SUCCESS); 5653 5654 } 5655 5656 /* 5657 * Disable the path (specific client/target/initiator) 5658 * Disabling a path means that MPxIO will not select the disabled path for 5659 * routing any new I/O requests. 5660 */ 5661 int 5662 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5663 { 5664 mdi_phci_t *ph; 5665 5666 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5667 if (ph == NULL) { 5668 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5669 " failed. pip: %p ph = NULL\n", (void *)pip)); 5670 return (MDI_FAILURE); 5671 } 5672 5673 (void) i_mdi_enable_disable_path(pip, 5674 ph->ph_vhci, flags, MDI_DISABLE_OP); 5675 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5676 "Returning success pip = %p. ph = %p", 5677 (void *)pip, (void *)ph)); 5678 return (MDI_SUCCESS); 5679 } 5680 5681 /* 5682 * disable the path to a particular pHCI (pHCI specified in the phci_path 5683 * argument) for a particular client (specified in the client_path argument). 5684 * Disabling a path means that MPxIO will not select the disabled path for 5685 * routing any new I/O requests. 5686 * NOTE: this will be removed once the NWS files are changed to use the new 5687 * mdi_{enable,disable}_path interfaces 5688 */ 5689 int 5690 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5691 { 5692 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5693 } 5694 5695 /* 5696 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5697 * argument) for a particular client (specified in the client_path argument). 5698 * Enabling a path means that MPxIO may select the enabled path for routing 5699 * future I/O requests, subject to other path state constraints. 5700 * NOTE: this will be removed once the NWS files are changed to use the new 5701 * mdi_{enable,disable}_path interfaces 5702 */ 5703 5704 int 5705 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5706 { 5707 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5708 } 5709 5710 /* 5711 * Common routine for doing enable/disable. 5712 */ 5713 static mdi_pathinfo_t * 5714 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5715 int op) 5716 { 5717 int sync_flag = 0; 5718 int rv; 5719 mdi_pathinfo_t *next; 5720 int (*f)() = NULL; 5721 5722 f = vh->vh_ops->vo_pi_state_change; 5723 5724 sync_flag = (flags << 8) & 0xf00; 5725 5726 /* 5727 * Do a callback into the mdi consumer to let it 5728 * know that path is about to get enabled/disabled. 5729 */ 5730 if (f != NULL) { 5731 rv = (*f)(vh->vh_dip, pip, 0, 5732 MDI_PI_EXT_STATE(pip), 5733 MDI_EXT_STATE_CHANGE | sync_flag | 5734 op | MDI_BEFORE_STATE_CHANGE); 5735 if (rv != MDI_SUCCESS) { 5736 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5737 "!vo_pi_state_change: failed rv = %x", rv)); 5738 } 5739 } 5740 MDI_PI_LOCK(pip); 5741 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5742 5743 switch (flags) { 5744 case USER_DISABLE: 5745 if (op == MDI_DISABLE_OP) { 5746 MDI_PI_SET_USER_DISABLE(pip); 5747 } else { 5748 MDI_PI_SET_USER_ENABLE(pip); 5749 } 5750 break; 5751 case DRIVER_DISABLE: 5752 if (op == MDI_DISABLE_OP) { 5753 MDI_PI_SET_DRV_DISABLE(pip); 5754 } else { 5755 MDI_PI_SET_DRV_ENABLE(pip); 5756 } 5757 break; 5758 case DRIVER_DISABLE_TRANSIENT: 5759 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5760 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5761 } else { 5762 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5763 } 5764 break; 5765 } 5766 MDI_PI_UNLOCK(pip); 5767 /* 5768 * Do a callback into the mdi consumer to let it 5769 * know that path is now enabled/disabled. 5770 */ 5771 if (f != NULL) { 5772 rv = (*f)(vh->vh_dip, pip, 0, 5773 MDI_PI_EXT_STATE(pip), 5774 MDI_EXT_STATE_CHANGE | sync_flag | 5775 op | MDI_AFTER_STATE_CHANGE); 5776 if (rv != MDI_SUCCESS) { 5777 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5778 "!vo_pi_state_change: failed rv = %x", rv)); 5779 } 5780 } 5781 return (next); 5782 } 5783 5784 /* 5785 * Common routine for doing enable/disable. 5786 * NOTE: this will be removed once the NWS files are changed to use the new 5787 * mdi_{enable,disable}_path has been putback 5788 */ 5789 int 5790 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5791 { 5792 5793 mdi_phci_t *ph; 5794 mdi_vhci_t *vh = NULL; 5795 mdi_client_t *ct; 5796 mdi_pathinfo_t *next, *pip; 5797 int found_it; 5798 5799 ph = i_devi_get_phci(pdip); 5800 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5801 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5802 (void *)cdip)); 5803 if (ph == NULL) { 5804 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5805 "Op %d failed. ph = NULL\n", op)); 5806 return (MDI_FAILURE); 5807 } 5808 5809 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5810 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5811 "Op Invalid operation = %d\n", op)); 5812 return (MDI_FAILURE); 5813 } 5814 5815 vh = ph->ph_vhci; 5816 5817 if (cdip == NULL) { 5818 /* 5819 * Need to mark the Phci as enabled/disabled. 5820 */ 5821 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5822 "Op %d for the phci\n", op)); 5823 MDI_PHCI_LOCK(ph); 5824 switch (flags) { 5825 case USER_DISABLE: 5826 if (op == MDI_DISABLE_OP) { 5827 MDI_PHCI_SET_USER_DISABLE(ph); 5828 } else { 5829 MDI_PHCI_SET_USER_ENABLE(ph); 5830 } 5831 break; 5832 case DRIVER_DISABLE: 5833 if (op == MDI_DISABLE_OP) { 5834 MDI_PHCI_SET_DRV_DISABLE(ph); 5835 } else { 5836 MDI_PHCI_SET_DRV_ENABLE(ph); 5837 } 5838 break; 5839 case DRIVER_DISABLE_TRANSIENT: 5840 if (op == MDI_DISABLE_OP) { 5841 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5842 } else { 5843 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5844 } 5845 break; 5846 default: 5847 MDI_PHCI_UNLOCK(ph); 5848 MDI_DEBUG(1, (CE_NOTE, NULL, 5849 "!i_mdi_pi_enable_disable:" 5850 " Invalid flag argument= %d\n", flags)); 5851 } 5852 5853 /* 5854 * Phci has been disabled. Now try to enable/disable 5855 * path info's to each client. 5856 */ 5857 pip = ph->ph_path_head; 5858 while (pip != NULL) { 5859 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5860 } 5861 MDI_PHCI_UNLOCK(ph); 5862 } else { 5863 5864 /* 5865 * Disable a specific client. 5866 */ 5867 ct = i_devi_get_client(cdip); 5868 if (ct == NULL) { 5869 MDI_DEBUG(1, (CE_NOTE, NULL, 5870 "!i_mdi_pi_enable_disable:" 5871 " failed. ct = NULL operation = %d\n", op)); 5872 return (MDI_FAILURE); 5873 } 5874 5875 MDI_CLIENT_LOCK(ct); 5876 pip = ct->ct_path_head; 5877 found_it = 0; 5878 while (pip != NULL) { 5879 MDI_PI_LOCK(pip); 5880 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5881 if (MDI_PI(pip)->pi_phci == ph) { 5882 MDI_PI_UNLOCK(pip); 5883 found_it = 1; 5884 break; 5885 } 5886 MDI_PI_UNLOCK(pip); 5887 pip = next; 5888 } 5889 5890 5891 MDI_CLIENT_UNLOCK(ct); 5892 if (found_it == 0) { 5893 MDI_DEBUG(1, (CE_NOTE, NULL, 5894 "!i_mdi_pi_enable_disable:" 5895 " failed. Could not find corresponding pip\n")); 5896 return (MDI_FAILURE); 5897 } 5898 5899 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5900 } 5901 5902 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5903 "Op %d Returning success pdip = %p cdip = %p\n", 5904 op, (void *)pdip, (void *)cdip)); 5905 return (MDI_SUCCESS); 5906 } 5907 5908 /* 5909 * Ensure phci powered up 5910 */ 5911 static void 5912 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5913 { 5914 dev_info_t *ph_dip; 5915 5916 ASSERT(pip != NULL); 5917 ASSERT(MDI_PI_LOCKED(pip)); 5918 5919 if (MDI_PI(pip)->pi_pm_held) { 5920 return; 5921 } 5922 5923 ph_dip = mdi_pi_get_phci(pip); 5924 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5925 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5926 if (ph_dip == NULL) { 5927 return; 5928 } 5929 5930 MDI_PI_UNLOCK(pip); 5931 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5932 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5933 5934 pm_hold_power(ph_dip); 5935 5936 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5937 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5938 MDI_PI_LOCK(pip); 5939 5940 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 5941 if (DEVI(ph_dip)->devi_pm_info) 5942 MDI_PI(pip)->pi_pm_held = 1; 5943 } 5944 5945 /* 5946 * Allow phci powered down 5947 */ 5948 static void 5949 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5950 { 5951 dev_info_t *ph_dip = NULL; 5952 5953 ASSERT(pip != NULL); 5954 ASSERT(MDI_PI_LOCKED(pip)); 5955 5956 if (MDI_PI(pip)->pi_pm_held == 0) { 5957 return; 5958 } 5959 5960 ph_dip = mdi_pi_get_phci(pip); 5961 ASSERT(ph_dip != NULL); 5962 5963 MDI_PI_UNLOCK(pip); 5964 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5965 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5966 5967 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5968 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5969 pm_rele_power(ph_dip); 5970 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5971 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5972 5973 MDI_PI_LOCK(pip); 5974 MDI_PI(pip)->pi_pm_held = 0; 5975 } 5976 5977 static void 5978 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5979 { 5980 ASSERT(MDI_CLIENT_LOCKED(ct)); 5981 5982 ct->ct_power_cnt += incr; 5983 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5984 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5985 ct->ct_power_cnt, incr)); 5986 ASSERT(ct->ct_power_cnt >= 0); 5987 } 5988 5989 static void 5990 i_mdi_rele_all_phci(mdi_client_t *ct) 5991 { 5992 mdi_pathinfo_t *pip; 5993 5994 ASSERT(MDI_CLIENT_LOCKED(ct)); 5995 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5996 while (pip != NULL) { 5997 mdi_hold_path(pip); 5998 MDI_PI_LOCK(pip); 5999 i_mdi_pm_rele_pip(pip); 6000 MDI_PI_UNLOCK(pip); 6001 mdi_rele_path(pip); 6002 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6003 } 6004 } 6005 6006 static void 6007 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6008 { 6009 ASSERT(MDI_CLIENT_LOCKED(ct)); 6010 6011 if (i_ddi_devi_attached(ct->ct_dip)) { 6012 ct->ct_power_cnt -= decr; 6013 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6014 "ct_power_cnt = %d decr = %d\n", 6015 (void *)ct, ct->ct_power_cnt, decr)); 6016 } 6017 6018 ASSERT(ct->ct_power_cnt >= 0); 6019 if (ct->ct_power_cnt == 0) { 6020 i_mdi_rele_all_phci(ct); 6021 return; 6022 } 6023 } 6024 6025 static void 6026 i_mdi_pm_reset_client(mdi_client_t *ct) 6027 { 6028 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6029 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6030 ASSERT(MDI_CLIENT_LOCKED(ct)); 6031 ct->ct_power_cnt = 0; 6032 i_mdi_rele_all_phci(ct); 6033 ct->ct_powercnt_config = 0; 6034 ct->ct_powercnt_unconfig = 0; 6035 ct->ct_powercnt_reset = 1; 6036 } 6037 6038 static int 6039 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6040 { 6041 int ret; 6042 dev_info_t *ph_dip; 6043 6044 MDI_PI_LOCK(pip); 6045 i_mdi_pm_hold_pip(pip); 6046 6047 ph_dip = mdi_pi_get_phci(pip); 6048 MDI_PI_UNLOCK(pip); 6049 6050 /* bring all components of phci to full power */ 6051 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6052 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6053 ddi_get_instance(ph_dip), (void *)pip)); 6054 6055 ret = pm_powerup(ph_dip); 6056 6057 if (ret == DDI_FAILURE) { 6058 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6059 "pm_powerup FAILED for %s%d %p\n", 6060 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6061 (void *)pip)); 6062 6063 MDI_PI_LOCK(pip); 6064 i_mdi_pm_rele_pip(pip); 6065 MDI_PI_UNLOCK(pip); 6066 return (MDI_FAILURE); 6067 } 6068 6069 return (MDI_SUCCESS); 6070 } 6071 6072 static int 6073 i_mdi_power_all_phci(mdi_client_t *ct) 6074 { 6075 mdi_pathinfo_t *pip; 6076 int succeeded = 0; 6077 6078 ASSERT(MDI_CLIENT_LOCKED(ct)); 6079 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6080 while (pip != NULL) { 6081 /* 6082 * Don't power if MDI_PATHINFO_STATE_FAULT 6083 * or MDI_PATHINFO_STATE_OFFLINE. 6084 */ 6085 if (MDI_PI_IS_INIT(pip) || 6086 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6087 mdi_hold_path(pip); 6088 MDI_CLIENT_UNLOCK(ct); 6089 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6090 succeeded = 1; 6091 6092 ASSERT(ct == MDI_PI(pip)->pi_client); 6093 MDI_CLIENT_LOCK(ct); 6094 mdi_rele_path(pip); 6095 } 6096 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6097 } 6098 6099 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6100 } 6101 6102 /* 6103 * mdi_bus_power(): 6104 * 1. Place the phci(s) into powered up state so that 6105 * client can do power management 6106 * 2. Ensure phci powered up as client power managing 6107 * Return Values: 6108 * MDI_SUCCESS 6109 * MDI_FAILURE 6110 */ 6111 int 6112 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6113 void *arg, void *result) 6114 { 6115 int ret = MDI_SUCCESS; 6116 pm_bp_child_pwrchg_t *bpc; 6117 mdi_client_t *ct; 6118 dev_info_t *cdip; 6119 pm_bp_has_changed_t *bphc; 6120 6121 /* 6122 * BUS_POWER_NOINVOL not supported 6123 */ 6124 if (op == BUS_POWER_NOINVOL) 6125 return (MDI_FAILURE); 6126 6127 /* 6128 * ignore other OPs. 6129 * return quickly to save cou cycles on the ct processing 6130 */ 6131 switch (op) { 6132 case BUS_POWER_PRE_NOTIFICATION: 6133 case BUS_POWER_POST_NOTIFICATION: 6134 bpc = (pm_bp_child_pwrchg_t *)arg; 6135 cdip = bpc->bpc_dip; 6136 break; 6137 case BUS_POWER_HAS_CHANGED: 6138 bphc = (pm_bp_has_changed_t *)arg; 6139 cdip = bphc->bphc_dip; 6140 break; 6141 default: 6142 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6143 } 6144 6145 ASSERT(MDI_CLIENT(cdip)); 6146 6147 ct = i_devi_get_client(cdip); 6148 if (ct == NULL) 6149 return (MDI_FAILURE); 6150 6151 /* 6152 * wait till the mdi_pathinfo node state change are processed 6153 */ 6154 MDI_CLIENT_LOCK(ct); 6155 switch (op) { 6156 case BUS_POWER_PRE_NOTIFICATION: 6157 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6158 "BUS_POWER_PRE_NOTIFICATION:" 6159 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6160 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6161 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6162 6163 /* serialize power level change per client */ 6164 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6165 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6166 6167 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6168 6169 if (ct->ct_power_cnt == 0) { 6170 ret = i_mdi_power_all_phci(ct); 6171 } 6172 6173 /* 6174 * if new_level > 0: 6175 * - hold phci(s) 6176 * - power up phci(s) if not already 6177 * ignore power down 6178 */ 6179 if (bpc->bpc_nlevel > 0) { 6180 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6181 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6182 "mdi_bus_power i_mdi_pm_hold_client\n")); 6183 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6184 } 6185 } 6186 break; 6187 case BUS_POWER_POST_NOTIFICATION: 6188 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6189 "BUS_POWER_POST_NOTIFICATION:" 6190 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6191 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6192 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6193 *(int *)result)); 6194 6195 if (*(int *)result == DDI_SUCCESS) { 6196 if (bpc->bpc_nlevel > 0) { 6197 MDI_CLIENT_SET_POWER_UP(ct); 6198 } else { 6199 MDI_CLIENT_SET_POWER_DOWN(ct); 6200 } 6201 } 6202 6203 /* release the hold we did in pre-notification */ 6204 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6205 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6206 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6207 "mdi_bus_power i_mdi_pm_rele_client\n")); 6208 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6209 } 6210 6211 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6212 /* another thread might started attaching */ 6213 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6214 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6215 "mdi_bus_power i_mdi_pm_rele_client\n")); 6216 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6217 /* detaching has been taken care in pm_post_unconfig */ 6218 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6219 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6220 "mdi_bus_power i_mdi_pm_reset_client\n")); 6221 i_mdi_pm_reset_client(ct); 6222 } 6223 } 6224 6225 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6226 cv_broadcast(&ct->ct_powerchange_cv); 6227 6228 break; 6229 6230 /* need to do more */ 6231 case BUS_POWER_HAS_CHANGED: 6232 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6233 "BUS_POWER_HAS_CHANGED:" 6234 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6235 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6236 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6237 6238 if (bphc->bphc_nlevel > 0 && 6239 bphc->bphc_nlevel > bphc->bphc_olevel) { 6240 if (ct->ct_power_cnt == 0) { 6241 ret = i_mdi_power_all_phci(ct); 6242 } 6243 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6244 "mdi_bus_power i_mdi_pm_hold_client\n")); 6245 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6246 } 6247 6248 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6249 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6250 "mdi_bus_power i_mdi_pm_rele_client\n")); 6251 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6252 } 6253 break; 6254 } 6255 6256 MDI_CLIENT_UNLOCK(ct); 6257 return (ret); 6258 } 6259 6260 static int 6261 i_mdi_pm_pre_config_one(dev_info_t *child) 6262 { 6263 int ret = MDI_SUCCESS; 6264 mdi_client_t *ct; 6265 6266 ct = i_devi_get_client(child); 6267 if (ct == NULL) 6268 return (MDI_FAILURE); 6269 6270 MDI_CLIENT_LOCK(ct); 6271 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6272 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6273 6274 if (!MDI_CLIENT_IS_FAILED(ct)) { 6275 MDI_CLIENT_UNLOCK(ct); 6276 MDI_DEBUG(4, (CE_NOTE, child, 6277 "i_mdi_pm_pre_config_one already configured\n")); 6278 return (MDI_SUCCESS); 6279 } 6280 6281 if (ct->ct_powercnt_config) { 6282 MDI_CLIENT_UNLOCK(ct); 6283 MDI_DEBUG(4, (CE_NOTE, child, 6284 "i_mdi_pm_pre_config_one ALREADY held\n")); 6285 return (MDI_SUCCESS); 6286 } 6287 6288 if (ct->ct_power_cnt == 0) { 6289 ret = i_mdi_power_all_phci(ct); 6290 } 6291 MDI_DEBUG(4, (CE_NOTE, child, 6292 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6293 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6294 ct->ct_powercnt_config = 1; 6295 ct->ct_powercnt_reset = 0; 6296 MDI_CLIENT_UNLOCK(ct); 6297 return (ret); 6298 } 6299 6300 static int 6301 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6302 { 6303 int ret = MDI_SUCCESS; 6304 dev_info_t *cdip; 6305 int circ; 6306 6307 ASSERT(MDI_VHCI(vdip)); 6308 6309 /* ndi_devi_config_one */ 6310 if (child) { 6311 ASSERT(DEVI_BUSY_OWNED(vdip)); 6312 return (i_mdi_pm_pre_config_one(child)); 6313 } 6314 6315 /* devi_config_common */ 6316 ndi_devi_enter(vdip, &circ); 6317 cdip = ddi_get_child(vdip); 6318 while (cdip) { 6319 dev_info_t *next = ddi_get_next_sibling(cdip); 6320 6321 ret = i_mdi_pm_pre_config_one(cdip); 6322 if (ret != MDI_SUCCESS) 6323 break; 6324 cdip = next; 6325 } 6326 ndi_devi_exit(vdip, circ); 6327 return (ret); 6328 } 6329 6330 static int 6331 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6332 { 6333 int ret = MDI_SUCCESS; 6334 mdi_client_t *ct; 6335 6336 ct = i_devi_get_client(child); 6337 if (ct == NULL) 6338 return (MDI_FAILURE); 6339 6340 MDI_CLIENT_LOCK(ct); 6341 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6342 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6343 6344 if (!i_ddi_devi_attached(ct->ct_dip)) { 6345 MDI_DEBUG(4, (CE_NOTE, child, 6346 "i_mdi_pm_pre_unconfig node detached already\n")); 6347 MDI_CLIENT_UNLOCK(ct); 6348 return (MDI_SUCCESS); 6349 } 6350 6351 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6352 (flags & NDI_AUTODETACH)) { 6353 MDI_DEBUG(4, (CE_NOTE, child, 6354 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6355 MDI_CLIENT_UNLOCK(ct); 6356 return (MDI_FAILURE); 6357 } 6358 6359 if (ct->ct_powercnt_unconfig) { 6360 MDI_DEBUG(4, (CE_NOTE, child, 6361 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6362 MDI_CLIENT_UNLOCK(ct); 6363 *held = 1; 6364 return (MDI_SUCCESS); 6365 } 6366 6367 if (ct->ct_power_cnt == 0) { 6368 ret = i_mdi_power_all_phci(ct); 6369 } 6370 MDI_DEBUG(4, (CE_NOTE, child, 6371 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6372 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6373 ct->ct_powercnt_unconfig = 1; 6374 ct->ct_powercnt_reset = 0; 6375 MDI_CLIENT_UNLOCK(ct); 6376 if (ret == MDI_SUCCESS) 6377 *held = 1; 6378 return (ret); 6379 } 6380 6381 static int 6382 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6383 int flags) 6384 { 6385 int ret = MDI_SUCCESS; 6386 dev_info_t *cdip; 6387 int circ; 6388 6389 ASSERT(MDI_VHCI(vdip)); 6390 *held = 0; 6391 6392 /* ndi_devi_unconfig_one */ 6393 if (child) { 6394 ASSERT(DEVI_BUSY_OWNED(vdip)); 6395 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6396 } 6397 6398 /* devi_unconfig_common */ 6399 ndi_devi_enter(vdip, &circ); 6400 cdip = ddi_get_child(vdip); 6401 while (cdip) { 6402 dev_info_t *next = ddi_get_next_sibling(cdip); 6403 6404 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6405 cdip = next; 6406 } 6407 ndi_devi_exit(vdip, circ); 6408 6409 if (*held) 6410 ret = MDI_SUCCESS; 6411 6412 return (ret); 6413 } 6414 6415 static void 6416 i_mdi_pm_post_config_one(dev_info_t *child) 6417 { 6418 mdi_client_t *ct; 6419 6420 ct = i_devi_get_client(child); 6421 if (ct == NULL) 6422 return; 6423 6424 MDI_CLIENT_LOCK(ct); 6425 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6426 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6427 6428 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6429 MDI_DEBUG(4, (CE_NOTE, child, 6430 "i_mdi_pm_post_config_one NOT configured\n")); 6431 MDI_CLIENT_UNLOCK(ct); 6432 return; 6433 } 6434 6435 /* client has not been updated */ 6436 if (MDI_CLIENT_IS_FAILED(ct)) { 6437 MDI_DEBUG(4, (CE_NOTE, child, 6438 "i_mdi_pm_post_config_one NOT configured\n")); 6439 MDI_CLIENT_UNLOCK(ct); 6440 return; 6441 } 6442 6443 /* another thread might have powered it down or detached it */ 6444 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6445 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6446 (!i_ddi_devi_attached(ct->ct_dip) && 6447 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6448 MDI_DEBUG(4, (CE_NOTE, child, 6449 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6450 i_mdi_pm_reset_client(ct); 6451 } else { 6452 mdi_pathinfo_t *pip, *next; 6453 int valid_path_count = 0; 6454 6455 MDI_DEBUG(4, (CE_NOTE, child, 6456 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6457 pip = ct->ct_path_head; 6458 while (pip != NULL) { 6459 MDI_PI_LOCK(pip); 6460 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6461 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6462 valid_path_count ++; 6463 MDI_PI_UNLOCK(pip); 6464 pip = next; 6465 } 6466 i_mdi_pm_rele_client(ct, valid_path_count); 6467 } 6468 ct->ct_powercnt_config = 0; 6469 MDI_CLIENT_UNLOCK(ct); 6470 } 6471 6472 static void 6473 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6474 { 6475 int circ; 6476 dev_info_t *cdip; 6477 6478 ASSERT(MDI_VHCI(vdip)); 6479 6480 /* ndi_devi_config_one */ 6481 if (child) { 6482 ASSERT(DEVI_BUSY_OWNED(vdip)); 6483 i_mdi_pm_post_config_one(child); 6484 return; 6485 } 6486 6487 /* devi_config_common */ 6488 ndi_devi_enter(vdip, &circ); 6489 cdip = ddi_get_child(vdip); 6490 while (cdip) { 6491 dev_info_t *next = ddi_get_next_sibling(cdip); 6492 6493 i_mdi_pm_post_config_one(cdip); 6494 cdip = next; 6495 } 6496 ndi_devi_exit(vdip, circ); 6497 } 6498 6499 static void 6500 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6501 { 6502 mdi_client_t *ct; 6503 6504 ct = i_devi_get_client(child); 6505 if (ct == NULL) 6506 return; 6507 6508 MDI_CLIENT_LOCK(ct); 6509 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6510 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6511 6512 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6513 MDI_DEBUG(4, (CE_NOTE, child, 6514 "i_mdi_pm_post_unconfig NOT held\n")); 6515 MDI_CLIENT_UNLOCK(ct); 6516 return; 6517 } 6518 6519 /* failure detaching or another thread just attached it */ 6520 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6521 i_ddi_devi_attached(ct->ct_dip)) || 6522 (!i_ddi_devi_attached(ct->ct_dip) && 6523 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6524 MDI_DEBUG(4, (CE_NOTE, child, 6525 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6526 i_mdi_pm_reset_client(ct); 6527 } else { 6528 mdi_pathinfo_t *pip, *next; 6529 int valid_path_count = 0; 6530 6531 MDI_DEBUG(4, (CE_NOTE, child, 6532 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6533 pip = ct->ct_path_head; 6534 while (pip != NULL) { 6535 MDI_PI_LOCK(pip); 6536 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6537 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6538 valid_path_count ++; 6539 MDI_PI_UNLOCK(pip); 6540 pip = next; 6541 } 6542 i_mdi_pm_rele_client(ct, valid_path_count); 6543 ct->ct_powercnt_unconfig = 0; 6544 } 6545 6546 MDI_CLIENT_UNLOCK(ct); 6547 } 6548 6549 static void 6550 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6551 { 6552 int circ; 6553 dev_info_t *cdip; 6554 6555 ASSERT(MDI_VHCI(vdip)); 6556 6557 if (!held) { 6558 MDI_DEBUG(4, (CE_NOTE, vdip, 6559 "i_mdi_pm_post_unconfig held = %d\n", held)); 6560 return; 6561 } 6562 6563 if (child) { 6564 ASSERT(DEVI_BUSY_OWNED(vdip)); 6565 i_mdi_pm_post_unconfig_one(child); 6566 return; 6567 } 6568 6569 ndi_devi_enter(vdip, &circ); 6570 cdip = ddi_get_child(vdip); 6571 while (cdip) { 6572 dev_info_t *next = ddi_get_next_sibling(cdip); 6573 6574 i_mdi_pm_post_unconfig_one(cdip); 6575 cdip = next; 6576 } 6577 ndi_devi_exit(vdip, circ); 6578 } 6579 6580 int 6581 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6582 { 6583 int circ, ret = MDI_SUCCESS; 6584 dev_info_t *client_dip = NULL; 6585 mdi_client_t *ct; 6586 6587 /* 6588 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6589 * Power up pHCI for the named client device. 6590 * Note: Before the client is enumerated under vhci by phci, 6591 * client_dip can be NULL. Then proceed to power up all the 6592 * pHCIs. 6593 */ 6594 if (devnm != NULL) { 6595 ndi_devi_enter(vdip, &circ); 6596 client_dip = ndi_devi_findchild(vdip, devnm); 6597 } 6598 6599 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6600 op, devnm ? devnm : "NULL", (void *)client_dip)); 6601 6602 switch (op) { 6603 case MDI_PM_PRE_CONFIG: 6604 ret = i_mdi_pm_pre_config(vdip, client_dip); 6605 break; 6606 6607 case MDI_PM_PRE_UNCONFIG: 6608 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6609 flags); 6610 break; 6611 6612 case MDI_PM_POST_CONFIG: 6613 i_mdi_pm_post_config(vdip, client_dip); 6614 break; 6615 6616 case MDI_PM_POST_UNCONFIG: 6617 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6618 break; 6619 6620 case MDI_PM_HOLD_POWER: 6621 case MDI_PM_RELE_POWER: 6622 ASSERT(args); 6623 6624 client_dip = (dev_info_t *)args; 6625 ASSERT(MDI_CLIENT(client_dip)); 6626 6627 ct = i_devi_get_client(client_dip); 6628 MDI_CLIENT_LOCK(ct); 6629 6630 if (op == MDI_PM_HOLD_POWER) { 6631 if (ct->ct_power_cnt == 0) { 6632 (void) i_mdi_power_all_phci(ct); 6633 MDI_DEBUG(4, (CE_NOTE, client_dip, 6634 "mdi_power i_mdi_pm_hold_client\n")); 6635 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6636 } 6637 } else { 6638 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6639 MDI_DEBUG(4, (CE_NOTE, client_dip, 6640 "mdi_power i_mdi_pm_rele_client\n")); 6641 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6642 } else { 6643 MDI_DEBUG(4, (CE_NOTE, client_dip, 6644 "mdi_power i_mdi_pm_reset_client\n")); 6645 i_mdi_pm_reset_client(ct); 6646 } 6647 } 6648 6649 MDI_CLIENT_UNLOCK(ct); 6650 break; 6651 6652 default: 6653 break; 6654 } 6655 6656 if (devnm) 6657 ndi_devi_exit(vdip, circ); 6658 6659 return (ret); 6660 } 6661 6662 int 6663 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6664 { 6665 mdi_vhci_t *vhci; 6666 6667 if (!MDI_VHCI(dip)) 6668 return (MDI_FAILURE); 6669 6670 if (mdi_class) { 6671 vhci = DEVI(dip)->devi_mdi_xhci; 6672 ASSERT(vhci); 6673 *mdi_class = vhci->vh_class; 6674 } 6675 6676 return (MDI_SUCCESS); 6677 } 6678 6679 int 6680 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6681 { 6682 mdi_phci_t *phci; 6683 6684 if (!MDI_PHCI(dip)) 6685 return (MDI_FAILURE); 6686 6687 if (mdi_class) { 6688 phci = DEVI(dip)->devi_mdi_xhci; 6689 ASSERT(phci); 6690 *mdi_class = phci->ph_vhci->vh_class; 6691 } 6692 6693 return (MDI_SUCCESS); 6694 } 6695 6696 int 6697 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6698 { 6699 mdi_client_t *client; 6700 6701 if (!MDI_CLIENT(dip)) 6702 return (MDI_FAILURE); 6703 6704 if (mdi_class) { 6705 client = DEVI(dip)->devi_mdi_client; 6706 ASSERT(client); 6707 *mdi_class = client->ct_vhci->vh_class; 6708 } 6709 6710 return (MDI_SUCCESS); 6711 } 6712 6713 void * 6714 mdi_client_get_vhci_private(dev_info_t *dip) 6715 { 6716 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6717 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6718 mdi_client_t *ct; 6719 ct = i_devi_get_client(dip); 6720 return (ct->ct_vprivate); 6721 } 6722 return (NULL); 6723 } 6724 6725 void 6726 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6727 { 6728 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6729 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6730 mdi_client_t *ct; 6731 ct = i_devi_get_client(dip); 6732 ct->ct_vprivate = data; 6733 } 6734 } 6735 /* 6736 * mdi_pi_get_vhci_private(): 6737 * Get the vhci private information associated with the 6738 * mdi_pathinfo node 6739 */ 6740 void * 6741 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6742 { 6743 caddr_t vprivate = NULL; 6744 if (pip) { 6745 vprivate = MDI_PI(pip)->pi_vprivate; 6746 } 6747 return (vprivate); 6748 } 6749 6750 /* 6751 * mdi_pi_set_vhci_private(): 6752 * Set the vhci private information in the mdi_pathinfo node 6753 */ 6754 void 6755 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6756 { 6757 if (pip) { 6758 MDI_PI(pip)->pi_vprivate = priv; 6759 } 6760 } 6761 6762 /* 6763 * mdi_phci_get_vhci_private(): 6764 * Get the vhci private information associated with the 6765 * mdi_phci node 6766 */ 6767 void * 6768 mdi_phci_get_vhci_private(dev_info_t *dip) 6769 { 6770 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6771 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6772 mdi_phci_t *ph; 6773 ph = i_devi_get_phci(dip); 6774 return (ph->ph_vprivate); 6775 } 6776 return (NULL); 6777 } 6778 6779 /* 6780 * mdi_phci_set_vhci_private(): 6781 * Set the vhci private information in the mdi_phci node 6782 */ 6783 void 6784 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6785 { 6786 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6787 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6788 mdi_phci_t *ph; 6789 ph = i_devi_get_phci(dip); 6790 ph->ph_vprivate = priv; 6791 } 6792 } 6793 6794 /* 6795 * List of vhci class names: 6796 * A vhci class name must be in this list only if the corresponding vhci 6797 * driver intends to use the mdi provided bus config implementation 6798 * (i.e., mdi_vhci_bus_config()). 6799 */ 6800 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6801 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6802 6803 /* 6804 * During boot time, the on-disk vhci cache for every vhci class is read 6805 * in the form of an nvlist and stored here. 6806 */ 6807 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6808 6809 /* nvpair names in vhci cache nvlist */ 6810 #define MDI_VHCI_CACHE_VERSION 1 6811 #define MDI_NVPNAME_VERSION "version" 6812 #define MDI_NVPNAME_PHCIS "phcis" 6813 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6814 6815 /* 6816 * Given vhci class name, return its on-disk vhci cache filename. 6817 * Memory for the returned filename which includes the full path is allocated 6818 * by this function. 6819 */ 6820 static char * 6821 vhclass2vhcache_filename(char *vhclass) 6822 { 6823 char *filename; 6824 int len; 6825 static char *fmt = "/etc/devices/mdi_%s_cache"; 6826 6827 /* 6828 * fmt contains the on-disk vhci cache file name format; 6829 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6830 */ 6831 6832 /* the -1 below is to account for "%s" in the format string */ 6833 len = strlen(fmt) + strlen(vhclass) - 1; 6834 filename = kmem_alloc(len, KM_SLEEP); 6835 (void) snprintf(filename, len, fmt, vhclass); 6836 ASSERT(len == (strlen(filename) + 1)); 6837 return (filename); 6838 } 6839 6840 /* 6841 * initialize the vhci cache related data structures and read the on-disk 6842 * vhci cached data into memory. 6843 */ 6844 static void 6845 setup_vhci_cache(mdi_vhci_t *vh) 6846 { 6847 mdi_vhci_config_t *vhc; 6848 mdi_vhci_cache_t *vhcache; 6849 int i; 6850 nvlist_t *nvl = NULL; 6851 6852 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6853 vh->vh_config = vhc; 6854 vhcache = &vhc->vhc_vhcache; 6855 6856 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6857 6858 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6859 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6860 6861 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6862 6863 /* 6864 * Create string hash; same as mod_hash_create_strhash() except that 6865 * we use NULL key destructor. 6866 */ 6867 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6868 mdi_bus_config_cache_hash_size, 6869 mod_hash_null_keydtor, mod_hash_null_valdtor, 6870 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6871 6872 /* 6873 * The on-disk vhci cache is read during booting prior to the 6874 * lights-out period by mdi_read_devices_files(). 6875 */ 6876 for (i = 0; i < N_VHCI_CLASSES; i++) { 6877 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6878 nvl = vhcache_nvl[i]; 6879 vhcache_nvl[i] = NULL; 6880 break; 6881 } 6882 } 6883 6884 /* 6885 * this is to cover the case of some one manually causing unloading 6886 * (or detaching) and reloading (or attaching) of a vhci driver. 6887 */ 6888 if (nvl == NULL && modrootloaded) 6889 nvl = read_on_disk_vhci_cache(vh->vh_class); 6890 6891 if (nvl != NULL) { 6892 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6893 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6894 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6895 else { 6896 cmn_err(CE_WARN, 6897 "%s: data file corrupted, will recreate\n", 6898 vhc->vhc_vhcache_filename); 6899 } 6900 rw_exit(&vhcache->vhcache_lock); 6901 nvlist_free(nvl); 6902 } 6903 6904 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6905 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6906 6907 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6908 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6909 } 6910 6911 /* 6912 * free all vhci cache related resources 6913 */ 6914 static int 6915 destroy_vhci_cache(mdi_vhci_t *vh) 6916 { 6917 mdi_vhci_config_t *vhc = vh->vh_config; 6918 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6919 mdi_vhcache_phci_t *cphci, *cphci_next; 6920 mdi_vhcache_client_t *cct, *cct_next; 6921 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6922 6923 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6924 return (MDI_FAILURE); 6925 6926 kmem_free(vhc->vhc_vhcache_filename, 6927 strlen(vhc->vhc_vhcache_filename) + 1); 6928 6929 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6930 6931 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6932 cphci = cphci_next) { 6933 cphci_next = cphci->cphci_next; 6934 free_vhcache_phci(cphci); 6935 } 6936 6937 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6938 cct_next = cct->cct_next; 6939 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6940 cpi_next = cpi->cpi_next; 6941 free_vhcache_pathinfo(cpi); 6942 } 6943 free_vhcache_client(cct); 6944 } 6945 6946 rw_destroy(&vhcache->vhcache_lock); 6947 6948 mutex_destroy(&vhc->vhc_lock); 6949 cv_destroy(&vhc->vhc_cv); 6950 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6951 return (MDI_SUCCESS); 6952 } 6953 6954 /* 6955 * Stop all vhci cache related async threads and free their resources. 6956 */ 6957 static int 6958 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6959 { 6960 mdi_async_client_config_t *acc, *acc_next; 6961 6962 mutex_enter(&vhc->vhc_lock); 6963 vhc->vhc_flags |= MDI_VHC_EXIT; 6964 ASSERT(vhc->vhc_acc_thrcount >= 0); 6965 cv_broadcast(&vhc->vhc_cv); 6966 6967 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6968 vhc->vhc_acc_thrcount != 0) { 6969 mutex_exit(&vhc->vhc_lock); 6970 delay(1); 6971 mutex_enter(&vhc->vhc_lock); 6972 } 6973 6974 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6975 6976 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6977 acc_next = acc->acc_next; 6978 free_async_client_config(acc); 6979 } 6980 vhc->vhc_acc_list_head = NULL; 6981 vhc->vhc_acc_list_tail = NULL; 6982 vhc->vhc_acc_count = 0; 6983 6984 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6985 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6986 mutex_exit(&vhc->vhc_lock); 6987 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6988 vhcache_dirty(vhc); 6989 return (MDI_FAILURE); 6990 } 6991 } else 6992 mutex_exit(&vhc->vhc_lock); 6993 6994 if (callb_delete(vhc->vhc_cbid) != 0) 6995 return (MDI_FAILURE); 6996 6997 return (MDI_SUCCESS); 6998 } 6999 7000 /* 7001 * Stop vhci cache flush thread 7002 */ 7003 /* ARGSUSED */ 7004 static boolean_t 7005 stop_vhcache_flush_thread(void *arg, int code) 7006 { 7007 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7008 7009 mutex_enter(&vhc->vhc_lock); 7010 vhc->vhc_flags |= MDI_VHC_EXIT; 7011 cv_broadcast(&vhc->vhc_cv); 7012 7013 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7014 mutex_exit(&vhc->vhc_lock); 7015 delay(1); 7016 mutex_enter(&vhc->vhc_lock); 7017 } 7018 7019 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7020 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7021 mutex_exit(&vhc->vhc_lock); 7022 (void) flush_vhcache(vhc, 1); 7023 } else 7024 mutex_exit(&vhc->vhc_lock); 7025 7026 return (B_TRUE); 7027 } 7028 7029 /* 7030 * Enqueue the vhcache phci (cphci) at the tail of the list 7031 */ 7032 static void 7033 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7034 { 7035 cphci->cphci_next = NULL; 7036 if (vhcache->vhcache_phci_head == NULL) 7037 vhcache->vhcache_phci_head = cphci; 7038 else 7039 vhcache->vhcache_phci_tail->cphci_next = cphci; 7040 vhcache->vhcache_phci_tail = cphci; 7041 } 7042 7043 /* 7044 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7045 */ 7046 static void 7047 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7048 mdi_vhcache_pathinfo_t *cpi) 7049 { 7050 cpi->cpi_next = NULL; 7051 if (cct->cct_cpi_head == NULL) 7052 cct->cct_cpi_head = cpi; 7053 else 7054 cct->cct_cpi_tail->cpi_next = cpi; 7055 cct->cct_cpi_tail = cpi; 7056 } 7057 7058 /* 7059 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7060 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7061 * flag set come at the beginning of the list. All cpis which have this 7062 * flag set come at the end of the list. 7063 */ 7064 static void 7065 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7066 mdi_vhcache_pathinfo_t *newcpi) 7067 { 7068 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7069 7070 if (cct->cct_cpi_head == NULL || 7071 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7072 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7073 else { 7074 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7075 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7076 prev_cpi = cpi, cpi = cpi->cpi_next) 7077 ; 7078 7079 if (prev_cpi == NULL) 7080 cct->cct_cpi_head = newcpi; 7081 else 7082 prev_cpi->cpi_next = newcpi; 7083 7084 newcpi->cpi_next = cpi; 7085 7086 if (cpi == NULL) 7087 cct->cct_cpi_tail = newcpi; 7088 } 7089 } 7090 7091 /* 7092 * Enqueue the vhcache client (cct) at the tail of the list 7093 */ 7094 static void 7095 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7096 mdi_vhcache_client_t *cct) 7097 { 7098 cct->cct_next = NULL; 7099 if (vhcache->vhcache_client_head == NULL) 7100 vhcache->vhcache_client_head = cct; 7101 else 7102 vhcache->vhcache_client_tail->cct_next = cct; 7103 vhcache->vhcache_client_tail = cct; 7104 } 7105 7106 static void 7107 free_string_array(char **str, int nelem) 7108 { 7109 int i; 7110 7111 if (str) { 7112 for (i = 0; i < nelem; i++) { 7113 if (str[i]) 7114 kmem_free(str[i], strlen(str[i]) + 1); 7115 } 7116 kmem_free(str, sizeof (char *) * nelem); 7117 } 7118 } 7119 7120 static void 7121 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7122 { 7123 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7124 kmem_free(cphci, sizeof (*cphci)); 7125 } 7126 7127 static void 7128 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7129 { 7130 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7131 kmem_free(cpi, sizeof (*cpi)); 7132 } 7133 7134 static void 7135 free_vhcache_client(mdi_vhcache_client_t *cct) 7136 { 7137 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7138 kmem_free(cct, sizeof (*cct)); 7139 } 7140 7141 static char * 7142 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7143 { 7144 char *name_addr; 7145 int len; 7146 7147 len = strlen(ct_name) + strlen(ct_addr) + 2; 7148 name_addr = kmem_alloc(len, KM_SLEEP); 7149 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7150 7151 if (ret_len) 7152 *ret_len = len; 7153 return (name_addr); 7154 } 7155 7156 /* 7157 * Copy the contents of paddrnvl to vhci cache. 7158 * paddrnvl nvlist contains path information for a vhci client. 7159 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7160 */ 7161 static void 7162 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7163 mdi_vhcache_client_t *cct) 7164 { 7165 nvpair_t *nvp = NULL; 7166 mdi_vhcache_pathinfo_t *cpi; 7167 uint_t nelem; 7168 uint32_t *val; 7169 7170 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7171 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7172 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7173 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7174 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7175 ASSERT(nelem == 2); 7176 cpi->cpi_cphci = cphci_list[val[0]]; 7177 cpi->cpi_flags = val[1]; 7178 enqueue_tail_vhcache_pathinfo(cct, cpi); 7179 } 7180 } 7181 7182 /* 7183 * Copy the contents of caddrmapnvl to vhci cache. 7184 * caddrmapnvl nvlist contains vhci client address to phci client address 7185 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7186 * this nvlist. 7187 */ 7188 static void 7189 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7190 mdi_vhcache_phci_t *cphci_list[]) 7191 { 7192 nvpair_t *nvp = NULL; 7193 nvlist_t *paddrnvl; 7194 mdi_vhcache_client_t *cct; 7195 7196 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7197 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7198 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7199 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7200 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7201 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7202 /* the client must contain at least one path */ 7203 ASSERT(cct->cct_cpi_head != NULL); 7204 7205 enqueue_vhcache_client(vhcache, cct); 7206 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7207 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7208 } 7209 } 7210 7211 /* 7212 * Copy the contents of the main nvlist to vhci cache. 7213 * 7214 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7215 * The nvlist contains the mappings between the vhci client addresses and 7216 * their corresponding phci client addresses. 7217 * 7218 * The structure of the nvlist is as follows: 7219 * 7220 * Main nvlist: 7221 * NAME TYPE DATA 7222 * version int32 version number 7223 * phcis string array array of phci paths 7224 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7225 * 7226 * structure of c2paddrs_nvl: 7227 * NAME TYPE DATA 7228 * caddr1 nvlist_t paddrs_nvl1 7229 * caddr2 nvlist_t paddrs_nvl2 7230 * ... 7231 * where caddr1, caddr2, ... are vhci client name and addresses in the 7232 * form of "<clientname>@<clientaddress>". 7233 * (for example: "ssd@2000002037cd9f72"); 7234 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7235 * 7236 * structure of paddrs_nvl: 7237 * NAME TYPE DATA 7238 * pi_addr1 uint32_array (phci-id, cpi_flags) 7239 * pi_addr2 uint32_array (phci-id, cpi_flags) 7240 * ... 7241 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7242 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7243 * phci-ids are integers that identify PHCIs to which the 7244 * the bus specific address belongs to. These integers are used as an index 7245 * into to the phcis string array in the main nvlist to get the PHCI path. 7246 */ 7247 static int 7248 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7249 { 7250 char **phcis, **phci_namep; 7251 uint_t nphcis; 7252 mdi_vhcache_phci_t *cphci, **cphci_list; 7253 nvlist_t *caddrmapnvl; 7254 int32_t ver; 7255 int i; 7256 size_t cphci_list_size; 7257 7258 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7259 7260 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7261 ver != MDI_VHCI_CACHE_VERSION) 7262 return (MDI_FAILURE); 7263 7264 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7265 &nphcis) != 0) 7266 return (MDI_SUCCESS); 7267 7268 ASSERT(nphcis > 0); 7269 7270 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7271 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7272 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7273 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7274 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7275 enqueue_vhcache_phci(vhcache, cphci); 7276 cphci_list[i] = cphci; 7277 } 7278 7279 ASSERT(vhcache->vhcache_phci_head != NULL); 7280 7281 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7282 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7283 7284 kmem_free(cphci_list, cphci_list_size); 7285 return (MDI_SUCCESS); 7286 } 7287 7288 /* 7289 * Build paddrnvl for the specified client using the information in the 7290 * vhci cache and add it to the caddrmapnnvl. 7291 * Returns 0 on success, errno on failure. 7292 */ 7293 static int 7294 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7295 nvlist_t *caddrmapnvl) 7296 { 7297 mdi_vhcache_pathinfo_t *cpi; 7298 nvlist_t *nvl; 7299 int err; 7300 uint32_t val[2]; 7301 7302 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7303 7304 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7305 return (err); 7306 7307 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7308 val[0] = cpi->cpi_cphci->cphci_id; 7309 val[1] = cpi->cpi_flags; 7310 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7311 != 0) 7312 goto out; 7313 } 7314 7315 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7316 out: 7317 nvlist_free(nvl); 7318 return (err); 7319 } 7320 7321 /* 7322 * Build caddrmapnvl using the information in the vhci cache 7323 * and add it to the mainnvl. 7324 * Returns 0 on success, errno on failure. 7325 */ 7326 static int 7327 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7328 { 7329 mdi_vhcache_client_t *cct; 7330 nvlist_t *nvl; 7331 int err; 7332 7333 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7334 7335 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7336 return (err); 7337 7338 for (cct = vhcache->vhcache_client_head; cct != NULL; 7339 cct = cct->cct_next) { 7340 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7341 goto out; 7342 } 7343 7344 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7345 out: 7346 nvlist_free(nvl); 7347 return (err); 7348 } 7349 7350 /* 7351 * Build nvlist using the information in the vhci cache. 7352 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7353 * Returns nvl on success, NULL on failure. 7354 */ 7355 static nvlist_t * 7356 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7357 { 7358 mdi_vhcache_phci_t *cphci; 7359 uint_t phci_count; 7360 char **phcis; 7361 nvlist_t *nvl; 7362 int err, i; 7363 7364 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7365 nvl = NULL; 7366 goto out; 7367 } 7368 7369 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7370 MDI_VHCI_CACHE_VERSION)) != 0) 7371 goto out; 7372 7373 rw_enter(&vhcache->vhcache_lock, RW_READER); 7374 if (vhcache->vhcache_phci_head == NULL) { 7375 rw_exit(&vhcache->vhcache_lock); 7376 return (nvl); 7377 } 7378 7379 phci_count = 0; 7380 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7381 cphci = cphci->cphci_next) 7382 cphci->cphci_id = phci_count++; 7383 7384 /* build phci pathname list */ 7385 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7386 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7387 cphci = cphci->cphci_next, i++) 7388 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7389 7390 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7391 phci_count); 7392 free_string_array(phcis, phci_count); 7393 7394 if (err == 0 && 7395 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7396 rw_exit(&vhcache->vhcache_lock); 7397 return (nvl); 7398 } 7399 7400 rw_exit(&vhcache->vhcache_lock); 7401 out: 7402 if (nvl) 7403 nvlist_free(nvl); 7404 return (NULL); 7405 } 7406 7407 /* 7408 * Lookup vhcache phci structure for the specified phci path. 7409 */ 7410 static mdi_vhcache_phci_t * 7411 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7412 { 7413 mdi_vhcache_phci_t *cphci; 7414 7415 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7416 7417 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7418 cphci = cphci->cphci_next) { 7419 if (strcmp(cphci->cphci_path, phci_path) == 0) 7420 return (cphci); 7421 } 7422 7423 return (NULL); 7424 } 7425 7426 /* 7427 * Lookup vhcache phci structure for the specified phci. 7428 */ 7429 static mdi_vhcache_phci_t * 7430 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7431 { 7432 mdi_vhcache_phci_t *cphci; 7433 7434 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7435 7436 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7437 cphci = cphci->cphci_next) { 7438 if (cphci->cphci_phci == ph) 7439 return (cphci); 7440 } 7441 7442 return (NULL); 7443 } 7444 7445 /* 7446 * Add the specified phci to the vhci cache if not already present. 7447 */ 7448 static void 7449 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7450 { 7451 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7452 mdi_vhcache_phci_t *cphci; 7453 char *pathname; 7454 int cache_updated; 7455 7456 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7457 7458 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7459 (void) ddi_pathname(ph->ph_dip, pathname); 7460 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7461 != NULL) { 7462 cphci->cphci_phci = ph; 7463 cache_updated = 0; 7464 } else { 7465 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7466 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7467 cphci->cphci_phci = ph; 7468 enqueue_vhcache_phci(vhcache, cphci); 7469 cache_updated = 1; 7470 } 7471 7472 rw_exit(&vhcache->vhcache_lock); 7473 7474 /* 7475 * Since a new phci has been added, reset 7476 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7477 * during next vhcache_discover_paths(). 7478 */ 7479 mutex_enter(&vhc->vhc_lock); 7480 vhc->vhc_path_discovery_cutoff_time = 0; 7481 mutex_exit(&vhc->vhc_lock); 7482 7483 kmem_free(pathname, MAXPATHLEN); 7484 if (cache_updated) 7485 vhcache_dirty(vhc); 7486 } 7487 7488 /* 7489 * Remove the reference to the specified phci from the vhci cache. 7490 */ 7491 static void 7492 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7493 { 7494 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7495 mdi_vhcache_phci_t *cphci; 7496 7497 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7498 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7499 /* do not remove the actual mdi_vhcache_phci structure */ 7500 cphci->cphci_phci = NULL; 7501 } 7502 rw_exit(&vhcache->vhcache_lock); 7503 } 7504 7505 static void 7506 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7507 mdi_vhcache_lookup_token_t *src) 7508 { 7509 if (src == NULL) { 7510 dst->lt_cct = NULL; 7511 dst->lt_cct_lookup_time = 0; 7512 } else { 7513 dst->lt_cct = src->lt_cct; 7514 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7515 } 7516 } 7517 7518 /* 7519 * Look up vhcache client for the specified client. 7520 */ 7521 static mdi_vhcache_client_t * 7522 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7523 mdi_vhcache_lookup_token_t *token) 7524 { 7525 mod_hash_val_t hv; 7526 char *name_addr; 7527 int len; 7528 7529 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7530 7531 /* 7532 * If no vhcache clean occurred since the last lookup, we can 7533 * simply return the cct from the last lookup operation. 7534 * It works because ccts are never freed except during the vhcache 7535 * cleanup operation. 7536 */ 7537 if (token != NULL && 7538 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7539 return (token->lt_cct); 7540 7541 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7542 if (mod_hash_find(vhcache->vhcache_client_hash, 7543 (mod_hash_key_t)name_addr, &hv) == 0) { 7544 if (token) { 7545 token->lt_cct = (mdi_vhcache_client_t *)hv; 7546 token->lt_cct_lookup_time = lbolt64; 7547 } 7548 } else { 7549 if (token) { 7550 token->lt_cct = NULL; 7551 token->lt_cct_lookup_time = 0; 7552 } 7553 hv = NULL; 7554 } 7555 kmem_free(name_addr, len); 7556 return ((mdi_vhcache_client_t *)hv); 7557 } 7558 7559 /* 7560 * Add the specified path to the vhci cache if not already present. 7561 * Also add the vhcache client for the client corresponding to this path 7562 * if it doesn't already exist. 7563 */ 7564 static void 7565 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7566 { 7567 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7568 mdi_vhcache_client_t *cct; 7569 mdi_vhcache_pathinfo_t *cpi; 7570 mdi_phci_t *ph = pip->pi_phci; 7571 mdi_client_t *ct = pip->pi_client; 7572 int cache_updated = 0; 7573 7574 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7575 7576 /* if vhcache client for this pip doesn't already exist, add it */ 7577 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7578 NULL)) == NULL) { 7579 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7580 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7581 ct->ct_guid, NULL); 7582 enqueue_vhcache_client(vhcache, cct); 7583 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7584 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7585 cache_updated = 1; 7586 } 7587 7588 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7589 if (cpi->cpi_cphci->cphci_phci == ph && 7590 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7591 cpi->cpi_pip = pip; 7592 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7593 cpi->cpi_flags &= 7594 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7595 sort_vhcache_paths(cct); 7596 cache_updated = 1; 7597 } 7598 break; 7599 } 7600 } 7601 7602 if (cpi == NULL) { 7603 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7604 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7605 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7606 ASSERT(cpi->cpi_cphci != NULL); 7607 cpi->cpi_pip = pip; 7608 enqueue_vhcache_pathinfo(cct, cpi); 7609 cache_updated = 1; 7610 } 7611 7612 rw_exit(&vhcache->vhcache_lock); 7613 7614 if (cache_updated) 7615 vhcache_dirty(vhc); 7616 } 7617 7618 /* 7619 * Remove the reference to the specified path from the vhci cache. 7620 */ 7621 static void 7622 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7623 { 7624 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7625 mdi_client_t *ct = pip->pi_client; 7626 mdi_vhcache_client_t *cct; 7627 mdi_vhcache_pathinfo_t *cpi; 7628 7629 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7630 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7631 NULL)) != NULL) { 7632 for (cpi = cct->cct_cpi_head; cpi != NULL; 7633 cpi = cpi->cpi_next) { 7634 if (cpi->cpi_pip == pip) { 7635 cpi->cpi_pip = NULL; 7636 break; 7637 } 7638 } 7639 } 7640 rw_exit(&vhcache->vhcache_lock); 7641 } 7642 7643 /* 7644 * Flush the vhci cache to disk. 7645 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7646 */ 7647 static int 7648 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7649 { 7650 nvlist_t *nvl; 7651 int err; 7652 int rv; 7653 7654 /* 7655 * It is possible that the system may shutdown before 7656 * i_ddi_io_initialized (during stmsboot for example). To allow for 7657 * flushing the cache in this case do not check for 7658 * i_ddi_io_initialized when force flag is set. 7659 */ 7660 if (force_flag == 0 && !i_ddi_io_initialized()) 7661 return (MDI_FAILURE); 7662 7663 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7664 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7665 nvlist_free(nvl); 7666 } else 7667 err = EFAULT; 7668 7669 rv = MDI_SUCCESS; 7670 mutex_enter(&vhc->vhc_lock); 7671 if (err != 0) { 7672 if (err == EROFS) { 7673 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7674 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7675 MDI_VHC_VHCACHE_DIRTY); 7676 } else { 7677 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7678 cmn_err(CE_CONT, "%s: update failed\n", 7679 vhc->vhc_vhcache_filename); 7680 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7681 } 7682 rv = MDI_FAILURE; 7683 } 7684 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7685 cmn_err(CE_CONT, 7686 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7687 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7688 } 7689 mutex_exit(&vhc->vhc_lock); 7690 7691 return (rv); 7692 } 7693 7694 /* 7695 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7696 * Exits itself if left idle for the idle timeout period. 7697 */ 7698 static void 7699 vhcache_flush_thread(void *arg) 7700 { 7701 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7702 clock_t idle_time, quit_at_ticks; 7703 callb_cpr_t cprinfo; 7704 7705 /* number of seconds to sleep idle before exiting */ 7706 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7707 7708 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7709 "mdi_vhcache_flush"); 7710 mutex_enter(&vhc->vhc_lock); 7711 for (; ; ) { 7712 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7713 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7714 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7715 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7716 (void) cv_timedwait(&vhc->vhc_cv, 7717 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7718 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7719 } else { 7720 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7721 mutex_exit(&vhc->vhc_lock); 7722 7723 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7724 vhcache_dirty(vhc); 7725 7726 mutex_enter(&vhc->vhc_lock); 7727 } 7728 } 7729 7730 quit_at_ticks = ddi_get_lbolt() + idle_time; 7731 7732 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7733 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7734 ddi_get_lbolt() < quit_at_ticks) { 7735 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7736 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7737 quit_at_ticks); 7738 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7739 } 7740 7741 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7742 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7743 goto out; 7744 } 7745 7746 out: 7747 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7748 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7749 CALLB_CPR_EXIT(&cprinfo); 7750 } 7751 7752 /* 7753 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7754 */ 7755 static void 7756 vhcache_dirty(mdi_vhci_config_t *vhc) 7757 { 7758 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7759 int create_thread; 7760 7761 rw_enter(&vhcache->vhcache_lock, RW_READER); 7762 /* do not flush cache until the cache is fully built */ 7763 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7764 rw_exit(&vhcache->vhcache_lock); 7765 return; 7766 } 7767 rw_exit(&vhcache->vhcache_lock); 7768 7769 mutex_enter(&vhc->vhc_lock); 7770 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7771 mutex_exit(&vhc->vhc_lock); 7772 return; 7773 } 7774 7775 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7776 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7777 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7778 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7779 cv_broadcast(&vhc->vhc_cv); 7780 create_thread = 0; 7781 } else { 7782 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7783 create_thread = 1; 7784 } 7785 mutex_exit(&vhc->vhc_lock); 7786 7787 if (create_thread) 7788 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7789 0, &p0, TS_RUN, minclsyspri); 7790 } 7791 7792 /* 7793 * phci bus config structure - one for for each phci bus config operation that 7794 * we initiate on behalf of a vhci. 7795 */ 7796 typedef struct mdi_phci_bus_config_s { 7797 char *phbc_phci_path; 7798 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7799 struct mdi_phci_bus_config_s *phbc_next; 7800 } mdi_phci_bus_config_t; 7801 7802 /* vhci bus config structure - one for each vhci bus config operation */ 7803 typedef struct mdi_vhci_bus_config_s { 7804 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7805 major_t vhbc_op_major; /* bus config op major */ 7806 uint_t vhbc_op_flags; /* bus config op flags */ 7807 kmutex_t vhbc_lock; 7808 kcondvar_t vhbc_cv; 7809 int vhbc_thr_count; 7810 } mdi_vhci_bus_config_t; 7811 7812 /* 7813 * bus config the specified phci 7814 */ 7815 static void 7816 bus_config_phci(void *arg) 7817 { 7818 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7819 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7820 dev_info_t *ph_dip; 7821 7822 /* 7823 * first configure all path components upto phci and then configure 7824 * the phci children. 7825 */ 7826 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7827 != NULL) { 7828 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7829 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7830 (void) ndi_devi_config_driver(ph_dip, 7831 vhbc->vhbc_op_flags, 7832 vhbc->vhbc_op_major); 7833 } else 7834 (void) ndi_devi_config(ph_dip, 7835 vhbc->vhbc_op_flags); 7836 7837 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7838 ndi_rele_devi(ph_dip); 7839 } 7840 7841 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7842 kmem_free(phbc, sizeof (*phbc)); 7843 7844 mutex_enter(&vhbc->vhbc_lock); 7845 vhbc->vhbc_thr_count--; 7846 if (vhbc->vhbc_thr_count == 0) 7847 cv_broadcast(&vhbc->vhbc_cv); 7848 mutex_exit(&vhbc->vhbc_lock); 7849 } 7850 7851 /* 7852 * Bus config all phcis associated with the vhci in parallel. 7853 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7854 */ 7855 static void 7856 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7857 ddi_bus_config_op_t op, major_t maj) 7858 { 7859 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7860 mdi_vhci_bus_config_t *vhbc; 7861 mdi_vhcache_phci_t *cphci; 7862 7863 rw_enter(&vhcache->vhcache_lock, RW_READER); 7864 if (vhcache->vhcache_phci_head == NULL) { 7865 rw_exit(&vhcache->vhcache_lock); 7866 return; 7867 } 7868 7869 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7870 7871 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7872 cphci = cphci->cphci_next) { 7873 /* skip phcis that haven't attached before root is available */ 7874 if (!modrootloaded && (cphci->cphci_phci == NULL)) 7875 continue; 7876 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7877 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7878 KM_SLEEP); 7879 phbc->phbc_vhbusconfig = vhbc; 7880 phbc->phbc_next = phbc_head; 7881 phbc_head = phbc; 7882 vhbc->vhbc_thr_count++; 7883 } 7884 rw_exit(&vhcache->vhcache_lock); 7885 7886 vhbc->vhbc_op = op; 7887 vhbc->vhbc_op_major = maj; 7888 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7889 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7890 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7891 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7892 7893 /* now create threads to initiate bus config on all phcis in parallel */ 7894 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7895 phbc_next = phbc->phbc_next; 7896 if (mdi_mtc_off) 7897 bus_config_phci((void *)phbc); 7898 else 7899 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7900 0, &p0, TS_RUN, minclsyspri); 7901 } 7902 7903 mutex_enter(&vhbc->vhbc_lock); 7904 /* wait until all threads exit */ 7905 while (vhbc->vhbc_thr_count > 0) 7906 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7907 mutex_exit(&vhbc->vhbc_lock); 7908 7909 mutex_destroy(&vhbc->vhbc_lock); 7910 cv_destroy(&vhbc->vhbc_cv); 7911 kmem_free(vhbc, sizeof (*vhbc)); 7912 } 7913 7914 /* 7915 * Single threaded version of bus_config_all_phcis() 7916 */ 7917 static void 7918 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7919 ddi_bus_config_op_t op, major_t maj) 7920 { 7921 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7922 7923 single_threaded_vhconfig_enter(vhc); 7924 bus_config_all_phcis(vhcache, flags, op, maj); 7925 single_threaded_vhconfig_exit(vhc); 7926 } 7927 7928 /* 7929 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7930 * The path includes the child component in addition to the phci path. 7931 */ 7932 static int 7933 bus_config_one_phci_child(char *path) 7934 { 7935 dev_info_t *ph_dip, *child; 7936 char *devnm; 7937 int rv = MDI_FAILURE; 7938 7939 /* extract the child component of the phci */ 7940 devnm = strrchr(path, '/'); 7941 *devnm++ = '\0'; 7942 7943 /* 7944 * first configure all path components upto phci and then 7945 * configure the phci child. 7946 */ 7947 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7948 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7949 NDI_SUCCESS) { 7950 /* 7951 * release the hold that ndi_devi_config_one() placed 7952 */ 7953 ndi_rele_devi(child); 7954 rv = MDI_SUCCESS; 7955 } 7956 7957 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7958 ndi_rele_devi(ph_dip); 7959 } 7960 7961 devnm--; 7962 *devnm = '/'; 7963 return (rv); 7964 } 7965 7966 /* 7967 * Build a list of phci client paths for the specified vhci client. 7968 * The list includes only those phci client paths which aren't configured yet. 7969 */ 7970 static mdi_phys_path_t * 7971 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7972 { 7973 mdi_vhcache_pathinfo_t *cpi; 7974 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7975 int config_path, len; 7976 7977 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7978 /* 7979 * include only those paths that aren't configured. 7980 */ 7981 config_path = 0; 7982 if (cpi->cpi_pip == NULL) 7983 config_path = 1; 7984 else { 7985 MDI_PI_LOCK(cpi->cpi_pip); 7986 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7987 config_path = 1; 7988 MDI_PI_UNLOCK(cpi->cpi_pip); 7989 } 7990 7991 if (config_path) { 7992 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7993 len = strlen(cpi->cpi_cphci->cphci_path) + 7994 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7995 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7996 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7997 cpi->cpi_cphci->cphci_path, ct_name, 7998 cpi->cpi_addr); 7999 pp->phys_path_next = NULL; 8000 8001 if (pp_head == NULL) 8002 pp_head = pp; 8003 else 8004 pp_tail->phys_path_next = pp; 8005 pp_tail = pp; 8006 } 8007 } 8008 8009 return (pp_head); 8010 } 8011 8012 /* 8013 * Free the memory allocated for phci client path list. 8014 */ 8015 static void 8016 free_phclient_path_list(mdi_phys_path_t *pp_head) 8017 { 8018 mdi_phys_path_t *pp, *pp_next; 8019 8020 for (pp = pp_head; pp != NULL; pp = pp_next) { 8021 pp_next = pp->phys_path_next; 8022 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8023 kmem_free(pp, sizeof (*pp)); 8024 } 8025 } 8026 8027 /* 8028 * Allocated async client structure and initialize with the specified values. 8029 */ 8030 static mdi_async_client_config_t * 8031 alloc_async_client_config(char *ct_name, char *ct_addr, 8032 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8033 { 8034 mdi_async_client_config_t *acc; 8035 8036 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8037 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8038 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8039 acc->acc_phclient_path_list_head = pp_head; 8040 init_vhcache_lookup_token(&acc->acc_token, tok); 8041 acc->acc_next = NULL; 8042 return (acc); 8043 } 8044 8045 /* 8046 * Free the memory allocated for the async client structure and their members. 8047 */ 8048 static void 8049 free_async_client_config(mdi_async_client_config_t *acc) 8050 { 8051 if (acc->acc_phclient_path_list_head) 8052 free_phclient_path_list(acc->acc_phclient_path_list_head); 8053 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8054 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8055 kmem_free(acc, sizeof (*acc)); 8056 } 8057 8058 /* 8059 * Sort vhcache pathinfos (cpis) of the specified client. 8060 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8061 * flag set come at the beginning of the list. All cpis which have this 8062 * flag set come at the end of the list. 8063 */ 8064 static void 8065 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8066 { 8067 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8068 8069 cpi_head = cct->cct_cpi_head; 8070 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8071 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8072 cpi_next = cpi->cpi_next; 8073 enqueue_vhcache_pathinfo(cct, cpi); 8074 } 8075 } 8076 8077 /* 8078 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8079 * every vhcache pathinfo of the specified client. If not adjust the flag 8080 * setting appropriately. 8081 * 8082 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8083 * on-disk vhci cache. So every time this flag is updated the cache must be 8084 * flushed. 8085 */ 8086 static void 8087 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8088 mdi_vhcache_lookup_token_t *tok) 8089 { 8090 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8091 mdi_vhcache_client_t *cct; 8092 mdi_vhcache_pathinfo_t *cpi; 8093 8094 rw_enter(&vhcache->vhcache_lock, RW_READER); 8095 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8096 == NULL) { 8097 rw_exit(&vhcache->vhcache_lock); 8098 return; 8099 } 8100 8101 /* 8102 * to avoid unnecessary on-disk cache updates, first check if an 8103 * update is really needed. If no update is needed simply return. 8104 */ 8105 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8106 if ((cpi->cpi_pip != NULL && 8107 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8108 (cpi->cpi_pip == NULL && 8109 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8110 break; 8111 } 8112 } 8113 if (cpi == NULL) { 8114 rw_exit(&vhcache->vhcache_lock); 8115 return; 8116 } 8117 8118 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8119 rw_exit(&vhcache->vhcache_lock); 8120 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8121 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8122 tok)) == NULL) { 8123 rw_exit(&vhcache->vhcache_lock); 8124 return; 8125 } 8126 } 8127 8128 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8129 if (cpi->cpi_pip != NULL) 8130 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8131 else 8132 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8133 } 8134 sort_vhcache_paths(cct); 8135 8136 rw_exit(&vhcache->vhcache_lock); 8137 vhcache_dirty(vhc); 8138 } 8139 8140 /* 8141 * Configure all specified paths of the client. 8142 */ 8143 static void 8144 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8145 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8146 { 8147 mdi_phys_path_t *pp; 8148 8149 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8150 (void) bus_config_one_phci_child(pp->phys_path); 8151 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8152 } 8153 8154 /* 8155 * Dequeue elements from vhci async client config list and bus configure 8156 * their corresponding phci clients. 8157 */ 8158 static void 8159 config_client_paths_thread(void *arg) 8160 { 8161 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8162 mdi_async_client_config_t *acc; 8163 clock_t quit_at_ticks; 8164 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8165 callb_cpr_t cprinfo; 8166 8167 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8168 "mdi_config_client_paths"); 8169 8170 for (; ; ) { 8171 quit_at_ticks = ddi_get_lbolt() + idle_time; 8172 8173 mutex_enter(&vhc->vhc_lock); 8174 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8175 vhc->vhc_acc_list_head == NULL && 8176 ddi_get_lbolt() < quit_at_ticks) { 8177 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8178 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8179 quit_at_ticks); 8180 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8181 } 8182 8183 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8184 vhc->vhc_acc_list_head == NULL) 8185 goto out; 8186 8187 acc = vhc->vhc_acc_list_head; 8188 vhc->vhc_acc_list_head = acc->acc_next; 8189 if (vhc->vhc_acc_list_head == NULL) 8190 vhc->vhc_acc_list_tail = NULL; 8191 vhc->vhc_acc_count--; 8192 mutex_exit(&vhc->vhc_lock); 8193 8194 config_client_paths_sync(vhc, acc->acc_ct_name, 8195 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8196 &acc->acc_token); 8197 8198 free_async_client_config(acc); 8199 } 8200 8201 out: 8202 vhc->vhc_acc_thrcount--; 8203 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8204 CALLB_CPR_EXIT(&cprinfo); 8205 } 8206 8207 /* 8208 * Arrange for all the phci client paths (pp_head) for the specified client 8209 * to be bus configured asynchronously by a thread. 8210 */ 8211 static void 8212 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8213 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8214 { 8215 mdi_async_client_config_t *acc, *newacc; 8216 int create_thread; 8217 8218 if (pp_head == NULL) 8219 return; 8220 8221 if (mdi_mtc_off) { 8222 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8223 free_phclient_path_list(pp_head); 8224 return; 8225 } 8226 8227 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8228 ASSERT(newacc); 8229 8230 mutex_enter(&vhc->vhc_lock); 8231 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8232 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8233 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8234 free_async_client_config(newacc); 8235 mutex_exit(&vhc->vhc_lock); 8236 return; 8237 } 8238 } 8239 8240 if (vhc->vhc_acc_list_head == NULL) 8241 vhc->vhc_acc_list_head = newacc; 8242 else 8243 vhc->vhc_acc_list_tail->acc_next = newacc; 8244 vhc->vhc_acc_list_tail = newacc; 8245 vhc->vhc_acc_count++; 8246 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8247 cv_broadcast(&vhc->vhc_cv); 8248 create_thread = 0; 8249 } else { 8250 vhc->vhc_acc_thrcount++; 8251 create_thread = 1; 8252 } 8253 mutex_exit(&vhc->vhc_lock); 8254 8255 if (create_thread) 8256 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8257 0, &p0, TS_RUN, minclsyspri); 8258 } 8259 8260 /* 8261 * Return number of online paths for the specified client. 8262 */ 8263 static int 8264 nonline_paths(mdi_vhcache_client_t *cct) 8265 { 8266 mdi_vhcache_pathinfo_t *cpi; 8267 int online_count = 0; 8268 8269 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8270 if (cpi->cpi_pip != NULL) { 8271 MDI_PI_LOCK(cpi->cpi_pip); 8272 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8273 online_count++; 8274 MDI_PI_UNLOCK(cpi->cpi_pip); 8275 } 8276 } 8277 8278 return (online_count); 8279 } 8280 8281 /* 8282 * Bus configure all paths for the specified vhci client. 8283 * If at least one path for the client is already online, the remaining paths 8284 * will be configured asynchronously. Otherwise, it synchronously configures 8285 * the paths until at least one path is online and then rest of the paths 8286 * will be configured asynchronously. 8287 */ 8288 static void 8289 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8290 { 8291 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8292 mdi_phys_path_t *pp_head, *pp; 8293 mdi_vhcache_client_t *cct; 8294 mdi_vhcache_lookup_token_t tok; 8295 8296 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8297 8298 init_vhcache_lookup_token(&tok, NULL); 8299 8300 if (ct_name == NULL || ct_addr == NULL || 8301 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8302 == NULL || 8303 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8304 rw_exit(&vhcache->vhcache_lock); 8305 return; 8306 } 8307 8308 /* if at least one path is online, configure the rest asynchronously */ 8309 if (nonline_paths(cct) > 0) { 8310 rw_exit(&vhcache->vhcache_lock); 8311 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8312 return; 8313 } 8314 8315 rw_exit(&vhcache->vhcache_lock); 8316 8317 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8318 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8319 rw_enter(&vhcache->vhcache_lock, RW_READER); 8320 8321 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8322 ct_addr, &tok)) == NULL) { 8323 rw_exit(&vhcache->vhcache_lock); 8324 goto out; 8325 } 8326 8327 if (nonline_paths(cct) > 0 && 8328 pp->phys_path_next != NULL) { 8329 rw_exit(&vhcache->vhcache_lock); 8330 config_client_paths_async(vhc, ct_name, ct_addr, 8331 pp->phys_path_next, &tok); 8332 pp->phys_path_next = NULL; 8333 goto out; 8334 } 8335 8336 rw_exit(&vhcache->vhcache_lock); 8337 } 8338 } 8339 8340 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8341 out: 8342 free_phclient_path_list(pp_head); 8343 } 8344 8345 static void 8346 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8347 { 8348 mutex_enter(&vhc->vhc_lock); 8349 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8350 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8351 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8352 mutex_exit(&vhc->vhc_lock); 8353 } 8354 8355 static void 8356 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8357 { 8358 mutex_enter(&vhc->vhc_lock); 8359 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8360 cv_broadcast(&vhc->vhc_cv); 8361 mutex_exit(&vhc->vhc_lock); 8362 } 8363 8364 typedef struct mdi_phci_driver_info { 8365 char *phdriver_name; /* name of the phci driver */ 8366 8367 /* set to non zero if the phci driver supports root device */ 8368 int phdriver_root_support; 8369 } mdi_phci_driver_info_t; 8370 8371 /* 8372 * vhci class and root support capability of a phci driver can be 8373 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8374 * phci driver.conf file. The built-in tables below contain this information 8375 * for those phci drivers whose driver.conf files don't yet contain this info. 8376 * 8377 * All phci drivers expect iscsi have root device support. 8378 */ 8379 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8380 { "fp", 1 }, 8381 { "iscsi", 0 }, 8382 { "ibsrp", 1 } 8383 }; 8384 8385 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8386 8387 static void * 8388 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8389 { 8390 void *new_ptr; 8391 8392 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8393 if (old_ptr) { 8394 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8395 kmem_free(old_ptr, old_size); 8396 } 8397 return (new_ptr); 8398 } 8399 8400 static void 8401 add_to_phci_list(char ***driver_list, int **root_support_list, 8402 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8403 { 8404 ASSERT(*cur_elements <= *max_elements); 8405 if (*cur_elements == *max_elements) { 8406 *max_elements += 10; 8407 *driver_list = mdi_realloc(*driver_list, 8408 sizeof (char *) * (*cur_elements), 8409 sizeof (char *) * (*max_elements)); 8410 *root_support_list = mdi_realloc(*root_support_list, 8411 sizeof (int) * (*cur_elements), 8412 sizeof (int) * (*max_elements)); 8413 } 8414 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8415 (*root_support_list)[*cur_elements] = root_support; 8416 (*cur_elements)++; 8417 } 8418 8419 static void 8420 get_phci_driver_list(char *vhci_class, char ***driver_list, 8421 int **root_support_list, int *cur_elements, int *max_elements) 8422 { 8423 mdi_phci_driver_info_t *st_driver_list, *p; 8424 int st_ndrivers, root_support, i, j, driver_conf_count; 8425 major_t m; 8426 struct devnames *dnp; 8427 ddi_prop_t *propp; 8428 8429 *driver_list = NULL; 8430 *root_support_list = NULL; 8431 *cur_elements = 0; 8432 *max_elements = 0; 8433 8434 /* add the phci drivers derived from the phci driver.conf files */ 8435 for (m = 0; m < devcnt; m++) { 8436 dnp = &devnamesp[m]; 8437 8438 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8439 LOCK_DEV_OPS(&dnp->dn_lock); 8440 if (dnp->dn_global_prop_ptr != NULL && 8441 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8442 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8443 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8444 strcmp(propp->prop_val, vhci_class) == 0) { 8445 8446 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8447 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8448 &dnp->dn_global_prop_ptr->prop_list) 8449 == NULL) ? 1 : 0; 8450 8451 add_to_phci_list(driver_list, root_support_list, 8452 cur_elements, max_elements, dnp->dn_name, 8453 root_support); 8454 8455 UNLOCK_DEV_OPS(&dnp->dn_lock); 8456 } else 8457 UNLOCK_DEV_OPS(&dnp->dn_lock); 8458 } 8459 } 8460 8461 driver_conf_count = *cur_elements; 8462 8463 /* add the phci drivers specified in the built-in tables */ 8464 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8465 st_driver_list = scsi_phci_driver_list; 8466 st_ndrivers = sizeof (scsi_phci_driver_list) / 8467 sizeof (mdi_phci_driver_info_t); 8468 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8469 st_driver_list = ib_phci_driver_list; 8470 st_ndrivers = sizeof (ib_phci_driver_list) / 8471 sizeof (mdi_phci_driver_info_t); 8472 } else { 8473 st_driver_list = NULL; 8474 st_ndrivers = 0; 8475 } 8476 8477 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8478 /* add this phci driver if not already added before */ 8479 for (j = 0; j < driver_conf_count; j++) { 8480 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8481 break; 8482 } 8483 if (j == driver_conf_count) { 8484 add_to_phci_list(driver_list, root_support_list, 8485 cur_elements, max_elements, p->phdriver_name, 8486 p->phdriver_root_support); 8487 } 8488 } 8489 } 8490 8491 /* 8492 * Attach the phci driver instances associated with the specified vhci class. 8493 * If root is mounted attach all phci driver instances. 8494 * If root is not mounted, attach the instances of only those phci 8495 * drivers that have the root support. 8496 */ 8497 static void 8498 attach_phci_drivers(char *vhci_class) 8499 { 8500 char **driver_list, **p; 8501 int *root_support_list; 8502 int cur_elements, max_elements, i; 8503 major_t m; 8504 8505 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8506 &cur_elements, &max_elements); 8507 8508 for (i = 0; i < cur_elements; i++) { 8509 if (modrootloaded || root_support_list[i]) { 8510 m = ddi_name_to_major(driver_list[i]); 8511 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8512 ddi_rele_driver(m); 8513 } 8514 } 8515 8516 if (driver_list) { 8517 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8518 kmem_free(*p, strlen(*p) + 1); 8519 kmem_free(driver_list, sizeof (char *) * max_elements); 8520 kmem_free(root_support_list, sizeof (int) * max_elements); 8521 } 8522 } 8523 8524 /* 8525 * Build vhci cache: 8526 * 8527 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8528 * the phci driver instances. During this process the cache gets built. 8529 * 8530 * Cache is built fully if the root is mounted. 8531 * If the root is not mounted, phci drivers that do not have root support 8532 * are not attached. As a result the cache is built partially. The entries 8533 * in the cache reflect only those phci drivers that have root support. 8534 */ 8535 static int 8536 build_vhci_cache(mdi_vhci_t *vh) 8537 { 8538 mdi_vhci_config_t *vhc = vh->vh_config; 8539 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8540 8541 single_threaded_vhconfig_enter(vhc); 8542 8543 rw_enter(&vhcache->vhcache_lock, RW_READER); 8544 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8545 rw_exit(&vhcache->vhcache_lock); 8546 single_threaded_vhconfig_exit(vhc); 8547 return (0); 8548 } 8549 rw_exit(&vhcache->vhcache_lock); 8550 8551 attach_phci_drivers(vh->vh_class); 8552 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8553 BUS_CONFIG_ALL, (major_t)-1); 8554 8555 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8556 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8557 rw_exit(&vhcache->vhcache_lock); 8558 8559 single_threaded_vhconfig_exit(vhc); 8560 vhcache_dirty(vhc); 8561 return (1); 8562 } 8563 8564 /* 8565 * Determine if discovery of paths is needed. 8566 */ 8567 static int 8568 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8569 { 8570 int rv = 1; 8571 8572 mutex_enter(&vhc->vhc_lock); 8573 if (i_ddi_io_initialized() == 0) { 8574 if (vhc->vhc_path_discovery_boot > 0) { 8575 vhc->vhc_path_discovery_boot--; 8576 goto out; 8577 } 8578 } else { 8579 if (vhc->vhc_path_discovery_postboot > 0) { 8580 vhc->vhc_path_discovery_postboot--; 8581 goto out; 8582 } 8583 } 8584 8585 /* 8586 * Do full path discovery at most once per mdi_path_discovery_interval. 8587 * This is to avoid a series of full path discoveries when opening 8588 * stale /dev/[r]dsk links. 8589 */ 8590 if (mdi_path_discovery_interval != -1 && 8591 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8592 goto out; 8593 8594 rv = 0; 8595 out: 8596 mutex_exit(&vhc->vhc_lock); 8597 return (rv); 8598 } 8599 8600 /* 8601 * Discover all paths: 8602 * 8603 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8604 * driver instances. During this process all paths will be discovered. 8605 */ 8606 static int 8607 vhcache_discover_paths(mdi_vhci_t *vh) 8608 { 8609 mdi_vhci_config_t *vhc = vh->vh_config; 8610 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8611 int rv = 0; 8612 8613 single_threaded_vhconfig_enter(vhc); 8614 8615 if (vhcache_do_discovery(vhc)) { 8616 attach_phci_drivers(vh->vh_class); 8617 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8618 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8619 8620 mutex_enter(&vhc->vhc_lock); 8621 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8622 mdi_path_discovery_interval * TICKS_PER_SECOND; 8623 mutex_exit(&vhc->vhc_lock); 8624 rv = 1; 8625 } 8626 8627 single_threaded_vhconfig_exit(vhc); 8628 return (rv); 8629 } 8630 8631 /* 8632 * Generic vhci bus config implementation: 8633 * 8634 * Parameters 8635 * vdip vhci dip 8636 * flags bus config flags 8637 * op bus config operation 8638 * The remaining parameters are bus config operation specific 8639 * 8640 * for BUS_CONFIG_ONE 8641 * arg pointer to name@addr 8642 * child upon successful return from this function, *child will be 8643 * set to the configured and held devinfo child node of vdip. 8644 * ct_addr pointer to client address (i.e. GUID) 8645 * 8646 * for BUS_CONFIG_DRIVER 8647 * arg major number of the driver 8648 * child and ct_addr parameters are ignored 8649 * 8650 * for BUS_CONFIG_ALL 8651 * arg, child, and ct_addr parameters are ignored 8652 * 8653 * Note that for the rest of the bus config operations, this function simply 8654 * calls the framework provided default bus config routine. 8655 */ 8656 int 8657 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8658 void *arg, dev_info_t **child, char *ct_addr) 8659 { 8660 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8661 mdi_vhci_config_t *vhc = vh->vh_config; 8662 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8663 int rv = 0; 8664 int params_valid = 0; 8665 char *cp; 8666 8667 /* 8668 * To bus config vhcis we relay operation, possibly using another 8669 * thread, to phcis. The phci driver then interacts with MDI to cause 8670 * vhci child nodes to be enumerated under the vhci node. Adding a 8671 * vhci child requires an ndi_devi_enter of the vhci. Since another 8672 * thread may be adding the child, to avoid deadlock we can't wait 8673 * for the relayed operations to complete if we have already entered 8674 * the vhci node. 8675 */ 8676 if (DEVI_BUSY_OWNED(vdip)) { 8677 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8678 "vhci dip is busy owned %p\n", (void *)vdip)); 8679 goto default_bus_config; 8680 } 8681 8682 rw_enter(&vhcache->vhcache_lock, RW_READER); 8683 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8684 rw_exit(&vhcache->vhcache_lock); 8685 rv = build_vhci_cache(vh); 8686 rw_enter(&vhcache->vhcache_lock, RW_READER); 8687 } 8688 8689 switch (op) { 8690 case BUS_CONFIG_ONE: 8691 if (arg != NULL && ct_addr != NULL) { 8692 /* extract node name */ 8693 cp = (char *)arg; 8694 while (*cp != '\0' && *cp != '@') 8695 cp++; 8696 if (*cp == '@') { 8697 params_valid = 1; 8698 *cp = '\0'; 8699 config_client_paths(vhc, (char *)arg, ct_addr); 8700 /* config_client_paths() releases cache_lock */ 8701 *cp = '@'; 8702 break; 8703 } 8704 } 8705 8706 rw_exit(&vhcache->vhcache_lock); 8707 break; 8708 8709 case BUS_CONFIG_DRIVER: 8710 rw_exit(&vhcache->vhcache_lock); 8711 if (rv == 0) 8712 st_bus_config_all_phcis(vhc, flags, op, 8713 (major_t)(uintptr_t)arg); 8714 break; 8715 8716 case BUS_CONFIG_ALL: 8717 rw_exit(&vhcache->vhcache_lock); 8718 if (rv == 0) 8719 st_bus_config_all_phcis(vhc, flags, op, -1); 8720 break; 8721 8722 default: 8723 rw_exit(&vhcache->vhcache_lock); 8724 break; 8725 } 8726 8727 8728 default_bus_config: 8729 /* 8730 * All requested child nodes are enumerated under the vhci. 8731 * Now configure them. 8732 */ 8733 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8734 NDI_SUCCESS) { 8735 return (MDI_SUCCESS); 8736 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8737 /* discover all paths and try configuring again */ 8738 if (vhcache_discover_paths(vh) && 8739 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8740 NDI_SUCCESS) 8741 return (MDI_SUCCESS); 8742 } 8743 8744 return (MDI_FAILURE); 8745 } 8746 8747 /* 8748 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8749 */ 8750 static nvlist_t * 8751 read_on_disk_vhci_cache(char *vhci_class) 8752 { 8753 nvlist_t *nvl; 8754 int err; 8755 char *filename; 8756 8757 filename = vhclass2vhcache_filename(vhci_class); 8758 8759 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8760 kmem_free(filename, strlen(filename) + 1); 8761 return (nvl); 8762 } else if (err == EIO) 8763 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8764 else if (err == EINVAL) 8765 cmn_err(CE_WARN, 8766 "%s: data file corrupted, will recreate\n", filename); 8767 8768 kmem_free(filename, strlen(filename) + 1); 8769 return (NULL); 8770 } 8771 8772 /* 8773 * Read on-disk vhci cache into nvlists for all vhci classes. 8774 * Called during booting by i_ddi_read_devices_files(). 8775 */ 8776 void 8777 mdi_read_devices_files(void) 8778 { 8779 int i; 8780 8781 for (i = 0; i < N_VHCI_CLASSES; i++) 8782 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8783 } 8784 8785 /* 8786 * Remove all stale entries from vhci cache. 8787 */ 8788 static void 8789 clean_vhcache(mdi_vhci_config_t *vhc) 8790 { 8791 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8792 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8793 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8794 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8795 8796 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8797 8798 cct_head = vhcache->vhcache_client_head; 8799 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8800 for (cct = cct_head; cct != NULL; cct = cct_next) { 8801 cct_next = cct->cct_next; 8802 8803 cpi_head = cct->cct_cpi_head; 8804 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8805 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8806 cpi_next = cpi->cpi_next; 8807 if (cpi->cpi_pip != NULL) { 8808 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8809 enqueue_tail_vhcache_pathinfo(cct, cpi); 8810 } else 8811 free_vhcache_pathinfo(cpi); 8812 } 8813 8814 if (cct->cct_cpi_head != NULL) 8815 enqueue_vhcache_client(vhcache, cct); 8816 else { 8817 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8818 (mod_hash_key_t)cct->cct_name_addr); 8819 free_vhcache_client(cct); 8820 } 8821 } 8822 8823 cphci_head = vhcache->vhcache_phci_head; 8824 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8825 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8826 cphci_next = cphci->cphci_next; 8827 if (cphci->cphci_phci != NULL) 8828 enqueue_vhcache_phci(vhcache, cphci); 8829 else 8830 free_vhcache_phci(cphci); 8831 } 8832 8833 vhcache->vhcache_clean_time = lbolt64; 8834 rw_exit(&vhcache->vhcache_lock); 8835 vhcache_dirty(vhc); 8836 } 8837 8838 /* 8839 * Remove all stale entries from vhci cache. 8840 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8841 */ 8842 void 8843 mdi_clean_vhcache(void) 8844 { 8845 mdi_vhci_t *vh; 8846 8847 mutex_enter(&mdi_mutex); 8848 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8849 vh->vh_refcnt++; 8850 mutex_exit(&mdi_mutex); 8851 clean_vhcache(vh->vh_config); 8852 mutex_enter(&mdi_mutex); 8853 vh->vh_refcnt--; 8854 } 8855 mutex_exit(&mdi_mutex); 8856 } 8857 8858 /* 8859 * mdi_vhci_walk_clients(): 8860 * Walker routine to traverse client dev_info nodes 8861 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8862 * below the client, including nexus devices, which we dont want. 8863 * So we just traverse the immediate siblings, starting from 1st client. 8864 */ 8865 void 8866 mdi_vhci_walk_clients(dev_info_t *vdip, 8867 int (*f)(dev_info_t *, void *), void *arg) 8868 { 8869 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8870 dev_info_t *cdip; 8871 mdi_client_t *ct; 8872 8873 MDI_VHCI_CLIENT_LOCK(vh); 8874 cdip = ddi_get_child(vdip); 8875 while (cdip) { 8876 ct = i_devi_get_client(cdip); 8877 MDI_CLIENT_LOCK(ct); 8878 8879 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8880 cdip = ddi_get_next_sibling(cdip); 8881 else 8882 cdip = NULL; 8883 8884 MDI_CLIENT_UNLOCK(ct); 8885 } 8886 MDI_VHCI_CLIENT_UNLOCK(vh); 8887 } 8888 8889 /* 8890 * mdi_vhci_walk_phcis(): 8891 * Walker routine to traverse phci dev_info nodes 8892 */ 8893 void 8894 mdi_vhci_walk_phcis(dev_info_t *vdip, 8895 int (*f)(dev_info_t *, void *), void *arg) 8896 { 8897 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8898 mdi_phci_t *ph, *next; 8899 8900 MDI_VHCI_PHCI_LOCK(vh); 8901 ph = vh->vh_phci_head; 8902 while (ph) { 8903 MDI_PHCI_LOCK(ph); 8904 8905 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8906 next = ph->ph_next; 8907 else 8908 next = NULL; 8909 8910 MDI_PHCI_UNLOCK(ph); 8911 ph = next; 8912 } 8913 MDI_VHCI_PHCI_UNLOCK(vh); 8914 } 8915 8916 8917 /* 8918 * mdi_walk_vhcis(): 8919 * Walker routine to traverse vhci dev_info nodes 8920 */ 8921 void 8922 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8923 { 8924 mdi_vhci_t *vh = NULL; 8925 8926 mutex_enter(&mdi_mutex); 8927 /* 8928 * Scan for already registered vhci 8929 */ 8930 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8931 vh->vh_refcnt++; 8932 mutex_exit(&mdi_mutex); 8933 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8934 mutex_enter(&mdi_mutex); 8935 vh->vh_refcnt--; 8936 break; 8937 } else { 8938 mutex_enter(&mdi_mutex); 8939 vh->vh_refcnt--; 8940 } 8941 } 8942 8943 mutex_exit(&mdi_mutex); 8944 } 8945 8946 /* 8947 * i_mdi_log_sysevent(): 8948 * Logs events for pickup by syseventd 8949 */ 8950 static void 8951 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8952 { 8953 char *path_name; 8954 nvlist_t *attr_list; 8955 8956 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8957 KM_SLEEP) != DDI_SUCCESS) { 8958 goto alloc_failed; 8959 } 8960 8961 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8962 (void) ddi_pathname(dip, path_name); 8963 8964 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8965 ddi_driver_name(dip)) != DDI_SUCCESS) { 8966 goto error; 8967 } 8968 8969 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8970 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8971 goto error; 8972 } 8973 8974 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8975 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8976 goto error; 8977 } 8978 8979 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8980 path_name) != DDI_SUCCESS) { 8981 goto error; 8982 } 8983 8984 if (nvlist_add_string(attr_list, DDI_CLASS, 8985 ph_vh_class) != DDI_SUCCESS) { 8986 goto error; 8987 } 8988 8989 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8990 attr_list, NULL, DDI_SLEEP); 8991 8992 error: 8993 kmem_free(path_name, MAXPATHLEN); 8994 nvlist_free(attr_list); 8995 return; 8996 8997 alloc_failed: 8998 MDI_DEBUG(1, (CE_WARN, dip, 8999 "!i_mdi_log_sysevent: Unable to send sysevent")); 9000 } 9001 9002 char ** 9003 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9004 { 9005 char **driver_list, **ret_driver_list = NULL; 9006 int *root_support_list; 9007 int cur_elements, max_elements; 9008 9009 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9010 &cur_elements, &max_elements); 9011 9012 9013 if (driver_list) { 9014 kmem_free(root_support_list, sizeof (int) * max_elements); 9015 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9016 * max_elements, sizeof (char *) * cur_elements); 9017 } 9018 *ndrivers = cur_elements; 9019 9020 return (ret_driver_list); 9021 9022 } 9023 9024 void 9025 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9026 { 9027 char **p; 9028 int i; 9029 9030 if (driver_list) { 9031 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9032 kmem_free(*p, strlen(*p) + 1); 9033 kmem_free(driver_list, sizeof (char *) * ndrivers); 9034 } 9035 } 9036