1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * MDI component property name/value string definitions 161 */ 162 const char *mdi_component_prop = "mpxio-component"; 163 const char *mdi_component_prop_vhci = "vhci"; 164 const char *mdi_component_prop_phci = "phci"; 165 const char *mdi_component_prop_client = "client"; 166 167 /* 168 * MDI client global unique identifier property name 169 */ 170 const char *mdi_client_guid_prop = "client-guid"; 171 172 /* 173 * MDI client load balancing property name/value string definitions 174 */ 175 const char *mdi_load_balance = "load-balance"; 176 const char *mdi_load_balance_none = "none"; 177 const char *mdi_load_balance_rr = "round-robin"; 178 const char *mdi_load_balance_lba = "logical-block"; 179 180 /* 181 * Obsolete vHCI class definition; to be removed after Leadville update 182 */ 183 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 184 185 static char vhci_greeting[] = 186 "\tThere already exists one vHCI driver for class %s\n" 187 "\tOnly one vHCI driver for each class is allowed\n"; 188 189 /* 190 * Static function prototypes 191 */ 192 static int i_mdi_phci_offline(dev_info_t *, uint_t); 193 static int i_mdi_client_offline(dev_info_t *, uint_t); 194 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 195 static void i_mdi_phci_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static int i_mdi_client_pre_detach(dev_info_t *, 198 ddi_detach_cmd_t); 199 static void i_mdi_client_post_detach(dev_info_t *, 200 ddi_detach_cmd_t, int); 201 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 202 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 203 static int i_mdi_lba_lb(mdi_client_t *ct, 204 mdi_pathinfo_t **ret_pip, struct buf *buf); 205 static void i_mdi_pm_hold_client(mdi_client_t *, int); 206 static void i_mdi_pm_rele_client(mdi_client_t *, int); 207 static void i_mdi_pm_reset_client(mdi_client_t *); 208 static int i_mdi_power_all_phci(mdi_client_t *); 209 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 210 211 212 /* 213 * Internal mdi_pathinfo node functions 214 */ 215 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 216 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 217 218 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 219 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 220 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 221 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 222 static void i_mdi_phci_unlock(mdi_phci_t *); 223 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 224 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 225 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 226 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 227 mdi_client_t *); 228 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 229 static void i_mdi_client_remove_path(mdi_client_t *, 230 mdi_pathinfo_t *); 231 232 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 233 mdi_pathinfo_state_t, int); 234 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 235 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 236 char **, int); 237 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 238 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 239 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 240 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 241 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 242 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 243 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 244 static void i_mdi_client_update_state(mdi_client_t *); 245 static int i_mdi_client_compute_state(mdi_client_t *, 246 mdi_phci_t *); 247 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 248 static void i_mdi_client_unlock(mdi_client_t *); 249 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 250 static mdi_client_t *i_devi_get_client(dev_info_t *); 251 /* 252 * NOTE: this will be removed once the NWS files are changed to use the new 253 * mdi_{enable,disable}_path interfaces 254 */ 255 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 256 int, int); 257 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 258 mdi_vhci_t *vh, int flags, int op); 259 /* 260 * Failover related function prototypes 261 */ 262 static int i_mdi_failover(void *); 263 264 /* 265 * misc internal functions 266 */ 267 static int i_mdi_get_hash_key(char *); 268 static int i_map_nvlist_error_to_mdi(int); 269 static void i_mdi_report_path_state(mdi_client_t *, 270 mdi_pathinfo_t *); 271 272 static void setup_vhci_cache(mdi_vhci_t *); 273 static int destroy_vhci_cache(mdi_vhci_t *); 274 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 275 static boolean_t stop_vhcache_flush_thread(void *, int); 276 static void free_string_array(char **, int); 277 static void free_vhcache_phci(mdi_vhcache_phci_t *); 278 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 279 static void free_vhcache_client(mdi_vhcache_client_t *); 280 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 281 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 282 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 284 static void vhcache_pi_add(mdi_vhci_config_t *, 285 struct mdi_pathinfo *); 286 static void vhcache_pi_remove(mdi_vhci_config_t *, 287 struct mdi_pathinfo *); 288 static void free_phclient_path_list(mdi_phys_path_t *); 289 static void sort_vhcache_paths(mdi_vhcache_client_t *); 290 static int flush_vhcache(mdi_vhci_config_t *, int); 291 static void vhcache_dirty(mdi_vhci_config_t *); 292 static void free_async_client_config(mdi_async_client_config_t *); 293 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 294 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 295 static nvlist_t *read_on_disk_vhci_cache(char *); 296 extern int fread_nvlist(char *, nvlist_t **); 297 extern int fwrite_nvlist(char *, nvlist_t *); 298 299 /* called once when first vhci registers with mdi */ 300 static void 301 i_mdi_init() 302 { 303 static int initialized = 0; 304 305 if (initialized) 306 return; 307 initialized = 1; 308 309 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 310 /* 311 * Create our taskq resources 312 */ 313 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 314 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 315 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 316 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 317 } 318 319 /* 320 * mdi_get_component_type(): 321 * Return mpxio component type 322 * Return Values: 323 * MDI_COMPONENT_NONE 324 * MDI_COMPONENT_VHCI 325 * MDI_COMPONENT_PHCI 326 * MDI_COMPONENT_CLIENT 327 * XXX This doesn't work under multi-level MPxIO and should be 328 * removed when clients migrate mdi_component_is_*() interfaces. 329 */ 330 int 331 mdi_get_component_type(dev_info_t *dip) 332 { 333 return (DEVI(dip)->devi_mdi_component); 334 } 335 336 /* 337 * mdi_vhci_register(): 338 * Register a vHCI module with the mpxio framework 339 * mdi_vhci_register() is called by vHCI drivers to register the 340 * 'class_driver' vHCI driver and its MDI entrypoints with the 341 * mpxio framework. The vHCI driver must call this interface as 342 * part of its attach(9e) handler. 343 * Competing threads may try to attach mdi_vhci_register() as 344 * the vHCI drivers are loaded and attached as a result of pHCI 345 * driver instance registration (mdi_phci_register()) with the 346 * framework. 347 * Return Values: 348 * MDI_SUCCESS 349 * MDI_FAILURE 350 */ 351 /*ARGSUSED*/ 352 int 353 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 354 int flags) 355 { 356 mdi_vhci_t *vh = NULL; 357 358 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 359 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 360 361 i_mdi_init(); 362 363 mutex_enter(&mdi_mutex); 364 /* 365 * Scan for already registered vhci 366 */ 367 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 368 if (strcmp(vh->vh_class, class) == 0) { 369 /* 370 * vHCI has already been created. Check for valid 371 * vHCI ops registration. We only support one vHCI 372 * module per class 373 */ 374 if (vh->vh_ops != NULL) { 375 mutex_exit(&mdi_mutex); 376 cmn_err(CE_NOTE, vhci_greeting, class); 377 return (MDI_FAILURE); 378 } 379 break; 380 } 381 } 382 383 /* 384 * if not yet created, create the vHCI component 385 */ 386 if (vh == NULL) { 387 struct client_hash *hash = NULL; 388 char *load_balance; 389 390 /* 391 * Allocate and initialize the mdi extensions 392 */ 393 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 394 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 395 KM_SLEEP); 396 vh->vh_client_table = hash; 397 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 398 (void) strcpy(vh->vh_class, class); 399 vh->vh_lb = LOAD_BALANCE_RR; 400 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 401 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 402 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 403 vh->vh_lb = LOAD_BALANCE_NONE; 404 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 405 == 0) { 406 vh->vh_lb = LOAD_BALANCE_LBA; 407 } 408 ddi_prop_free(load_balance); 409 } 410 411 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 412 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 413 414 /* 415 * Store the vHCI ops vectors 416 */ 417 vh->vh_dip = vdip; 418 vh->vh_ops = vops; 419 420 setup_vhci_cache(vh); 421 422 if (mdi_vhci_head == NULL) { 423 mdi_vhci_head = vh; 424 } 425 if (mdi_vhci_tail) { 426 mdi_vhci_tail->vh_next = vh; 427 } 428 mdi_vhci_tail = vh; 429 mdi_vhci_count++; 430 } 431 432 /* 433 * Claim the devfs node as a vhci component 434 */ 435 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 436 437 /* 438 * Initialize our back reference from dev_info node 439 */ 440 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 441 mutex_exit(&mdi_mutex); 442 return (MDI_SUCCESS); 443 } 444 445 /* 446 * mdi_vhci_unregister(): 447 * Unregister a vHCI module from mpxio framework 448 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 449 * of a vhci to unregister it from the framework. 450 * Return Values: 451 * MDI_SUCCESS 452 * MDI_FAILURE 453 */ 454 /*ARGSUSED*/ 455 int 456 mdi_vhci_unregister(dev_info_t *vdip, int flags) 457 { 458 mdi_vhci_t *found, *vh, *prev = NULL; 459 460 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 461 462 /* 463 * Check for invalid VHCI 464 */ 465 if ((vh = i_devi_get_vhci(vdip)) == NULL) 466 return (MDI_FAILURE); 467 468 /* 469 * Scan the list of registered vHCIs for a match 470 */ 471 mutex_enter(&mdi_mutex); 472 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 473 if (found == vh) 474 break; 475 prev = found; 476 } 477 478 if (found == NULL) { 479 mutex_exit(&mdi_mutex); 480 return (MDI_FAILURE); 481 } 482 483 /* 484 * Check the vHCI, pHCI and client count. All the pHCIs and clients 485 * should have been unregistered, before a vHCI can be 486 * unregistered. 487 */ 488 MDI_VHCI_PHCI_LOCK(vh); 489 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 490 MDI_VHCI_PHCI_UNLOCK(vh); 491 mutex_exit(&mdi_mutex); 492 return (MDI_FAILURE); 493 } 494 MDI_VHCI_PHCI_UNLOCK(vh); 495 496 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 497 mutex_exit(&mdi_mutex); 498 return (MDI_FAILURE); 499 } 500 501 /* 502 * Remove the vHCI from the global list 503 */ 504 if (vh == mdi_vhci_head) { 505 mdi_vhci_head = vh->vh_next; 506 } else { 507 prev->vh_next = vh->vh_next; 508 } 509 if (vh == mdi_vhci_tail) { 510 mdi_vhci_tail = prev; 511 } 512 mdi_vhci_count--; 513 mutex_exit(&mdi_mutex); 514 515 vh->vh_ops = NULL; 516 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 517 DEVI(vdip)->devi_mdi_xhci = NULL; 518 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 519 kmem_free(vh->vh_client_table, 520 mdi_client_table_size * sizeof (struct client_hash)); 521 mutex_destroy(&vh->vh_phci_mutex); 522 mutex_destroy(&vh->vh_client_mutex); 523 524 kmem_free(vh, sizeof (mdi_vhci_t)); 525 return (MDI_SUCCESS); 526 } 527 528 /* 529 * i_mdi_vhci_class2vhci(): 530 * Look for a matching vHCI module given a vHCI class name 531 * Return Values: 532 * Handle to a vHCI component 533 * NULL 534 */ 535 static mdi_vhci_t * 536 i_mdi_vhci_class2vhci(char *class) 537 { 538 mdi_vhci_t *vh = NULL; 539 540 ASSERT(!MUTEX_HELD(&mdi_mutex)); 541 542 mutex_enter(&mdi_mutex); 543 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 544 if (strcmp(vh->vh_class, class) == 0) { 545 break; 546 } 547 } 548 mutex_exit(&mdi_mutex); 549 return (vh); 550 } 551 552 /* 553 * i_devi_get_vhci(): 554 * Utility function to get the handle to a vHCI component 555 * Return Values: 556 * Handle to a vHCI component 557 * NULL 558 */ 559 mdi_vhci_t * 560 i_devi_get_vhci(dev_info_t *vdip) 561 { 562 mdi_vhci_t *vh = NULL; 563 if (MDI_VHCI(vdip)) { 564 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 565 } 566 return (vh); 567 } 568 569 /* 570 * mdi_phci_register(): 571 * Register a pHCI module with mpxio framework 572 * mdi_phci_register() is called by pHCI drivers to register with 573 * the mpxio framework and a specific 'class_driver' vHCI. The 574 * pHCI driver must call this interface as part of its attach(9e) 575 * handler. 576 * Return Values: 577 * MDI_SUCCESS 578 * MDI_FAILURE 579 */ 580 /*ARGSUSED*/ 581 int 582 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 583 { 584 mdi_phci_t *ph; 585 mdi_vhci_t *vh; 586 char *data; 587 char *pathname; 588 589 /* 590 * Some subsystems, like fcp, perform pHCI registration from a 591 * different thread than the one doing the pHCI attach(9E) - the 592 * driver attach code is waiting for this other thread to complete. 593 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 594 * (indicating that some thread has done an ndi_devi_enter of parent) 595 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 596 */ 597 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 598 599 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 600 (void) ddi_pathname(pdip, pathname); 601 602 /* 603 * Check for mpxio-disable property. Enable mpxio if the property is 604 * missing or not set to "yes". 605 * If the property is set to "yes" then emit a brief message. 606 */ 607 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 608 &data) == DDI_SUCCESS)) { 609 if (strcmp(data, "yes") == 0) { 610 MDI_DEBUG(1, (CE_CONT, pdip, 611 "?%s (%s%d) multipath capabilities " 612 "disabled via %s.conf.\n", pathname, 613 ddi_driver_name(pdip), ddi_get_instance(pdip), 614 ddi_driver_name(pdip))); 615 ddi_prop_free(data); 616 kmem_free(pathname, MAXPATHLEN); 617 return (MDI_FAILURE); 618 } 619 ddi_prop_free(data); 620 } 621 622 kmem_free(pathname, MAXPATHLEN); 623 624 /* 625 * Search for a matching vHCI 626 */ 627 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 628 if (vh == NULL) { 629 return (MDI_FAILURE); 630 } 631 632 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 633 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 634 ph->ph_dip = pdip; 635 ph->ph_vhci = vh; 636 ph->ph_next = NULL; 637 ph->ph_unstable = 0; 638 ph->ph_vprivate = 0; 639 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 640 641 MDI_PHCI_LOCK(ph); 642 MDI_PHCI_SET_POWER_UP(ph); 643 MDI_PHCI_UNLOCK(ph); 644 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 645 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 646 647 vhcache_phci_add(vh->vh_config, ph); 648 649 MDI_VHCI_PHCI_LOCK(vh); 650 if (vh->vh_phci_head == NULL) { 651 vh->vh_phci_head = ph; 652 } 653 if (vh->vh_phci_tail) { 654 vh->vh_phci_tail->ph_next = ph; 655 } 656 vh->vh_phci_tail = ph; 657 vh->vh_phci_count++; 658 MDI_VHCI_PHCI_UNLOCK(vh); 659 660 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 661 return (MDI_SUCCESS); 662 } 663 664 /* 665 * mdi_phci_unregister(): 666 * Unregister a pHCI module from mpxio framework 667 * mdi_phci_unregister() is called by the pHCI drivers from their 668 * detach(9E) handler to unregister their instances from the 669 * framework. 670 * Return Values: 671 * MDI_SUCCESS 672 * MDI_FAILURE 673 */ 674 /*ARGSUSED*/ 675 int 676 mdi_phci_unregister(dev_info_t *pdip, int flags) 677 { 678 mdi_vhci_t *vh; 679 mdi_phci_t *ph; 680 mdi_phci_t *tmp; 681 mdi_phci_t *prev = NULL; 682 683 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 684 685 ph = i_devi_get_phci(pdip); 686 if (ph == NULL) { 687 MDI_DEBUG(1, (CE_WARN, pdip, 688 "!pHCI unregister: Not a valid pHCI")); 689 return (MDI_FAILURE); 690 } 691 692 vh = ph->ph_vhci; 693 ASSERT(vh != NULL); 694 if (vh == NULL) { 695 MDI_DEBUG(1, (CE_WARN, pdip, 696 "!pHCI unregister: Not a valid vHCI")); 697 return (MDI_FAILURE); 698 } 699 700 MDI_VHCI_PHCI_LOCK(vh); 701 tmp = vh->vh_phci_head; 702 while (tmp) { 703 if (tmp == ph) { 704 break; 705 } 706 prev = tmp; 707 tmp = tmp->ph_next; 708 } 709 710 if (ph == vh->vh_phci_head) { 711 vh->vh_phci_head = ph->ph_next; 712 } else { 713 prev->ph_next = ph->ph_next; 714 } 715 716 if (ph == vh->vh_phci_tail) { 717 vh->vh_phci_tail = prev; 718 } 719 720 vh->vh_phci_count--; 721 MDI_VHCI_PHCI_UNLOCK(vh); 722 723 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 724 ESC_DDI_INITIATOR_UNREGISTER); 725 vhcache_phci_remove(vh->vh_config, ph); 726 cv_destroy(&ph->ph_unstable_cv); 727 mutex_destroy(&ph->ph_mutex); 728 kmem_free(ph, sizeof (mdi_phci_t)); 729 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 730 DEVI(pdip)->devi_mdi_xhci = NULL; 731 return (MDI_SUCCESS); 732 } 733 734 /* 735 * i_devi_get_phci(): 736 * Utility function to return the phci extensions. 737 */ 738 static mdi_phci_t * 739 i_devi_get_phci(dev_info_t *pdip) 740 { 741 mdi_phci_t *ph = NULL; 742 if (MDI_PHCI(pdip)) { 743 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 744 } 745 return (ph); 746 } 747 748 /* 749 * Single thread mdi entry into devinfo node for modifying its children. 750 * If necessary we perform an ndi_devi_enter of the vHCI before doing 751 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 752 * for the vHCI and one for the pHCI. 753 */ 754 void 755 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 756 { 757 dev_info_t *vdip; 758 int vcircular, pcircular; 759 760 /* Verify calling context */ 761 ASSERT(MDI_PHCI(phci_dip)); 762 vdip = mdi_devi_get_vdip(phci_dip); 763 ASSERT(vdip); /* A pHCI always has a vHCI */ 764 765 /* 766 * If pHCI is detaching then the framework has already entered the 767 * vHCI on a threads that went down the code path leading to 768 * detach_node(). This framework enter of the vHCI during pHCI 769 * detach is done to avoid deadlock with vHCI power management 770 * operations which enter the vHCI and the enter down the path 771 * to the pHCI. If pHCI is detaching then we piggyback this calls 772 * enter of the vHCI on frameworks vHCI enter that has already 773 * occurred - this is OK because we know that the framework thread 774 * doing detach is waiting for our completion. 775 * 776 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 777 * race with detach - but we can't do that because the framework has 778 * already entered the parent, so we have some complexity instead. 779 */ 780 for (;;) { 781 if (ndi_devi_tryenter(vdip, &vcircular)) { 782 ASSERT(vcircular != -1); 783 if (DEVI_IS_DETACHING(phci_dip)) { 784 ndi_devi_exit(vdip, vcircular); 785 vcircular = -1; 786 } 787 break; 788 } else if (DEVI_IS_DETACHING(phci_dip)) { 789 vcircular = -1; 790 break; 791 } else { 792 delay(1); 793 } 794 } 795 796 ndi_devi_enter(phci_dip, &pcircular); 797 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 798 } 799 800 /* 801 * Release mdi_devi_enter or successful mdi_devi_tryenter. 802 */ 803 void 804 mdi_devi_exit(dev_info_t *phci_dip, int circular) 805 { 806 dev_info_t *vdip; 807 int vcircular, pcircular; 808 809 /* Verify calling context */ 810 ASSERT(MDI_PHCI(phci_dip)); 811 vdip = mdi_devi_get_vdip(phci_dip); 812 ASSERT(vdip); /* A pHCI always has a vHCI */ 813 814 /* extract two circular recursion values from single int */ 815 pcircular = (short)(circular & 0xFFFF); 816 vcircular = (short)((circular >> 16) & 0xFFFF); 817 818 ndi_devi_exit(phci_dip, pcircular); 819 if (vcircular != -1) 820 ndi_devi_exit(vdip, vcircular); 821 } 822 823 /* 824 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 825 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 826 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 827 * with vHCI power management code during path online/offline. Each 828 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 829 * occur within the scope of an active mdi_devi_enter that establishes the 830 * circular value. 831 */ 832 void 833 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 834 { 835 int pcircular; 836 837 /* Verify calling context */ 838 ASSERT(MDI_PHCI(phci_dip)); 839 840 pcircular = (short)(circular & 0xFFFF); 841 ndi_devi_exit(phci_dip, pcircular); 842 } 843 844 void 845 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 846 { 847 int pcircular; 848 849 /* Verify calling context */ 850 ASSERT(MDI_PHCI(phci_dip)); 851 852 ndi_devi_enter(phci_dip, &pcircular); 853 854 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 855 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 856 } 857 858 /* 859 * mdi_devi_get_vdip(): 860 * given a pHCI dip return vHCI dip 861 */ 862 dev_info_t * 863 mdi_devi_get_vdip(dev_info_t *pdip) 864 { 865 mdi_phci_t *ph; 866 867 ph = i_devi_get_phci(pdip); 868 if (ph && ph->ph_vhci) 869 return (ph->ph_vhci->vh_dip); 870 return (NULL); 871 } 872 873 /* 874 * mdi_devi_pdip_entered(): 875 * Return 1 if we are vHCI and have done an ndi_devi_enter 876 * of a pHCI 877 */ 878 int 879 mdi_devi_pdip_entered(dev_info_t *vdip) 880 { 881 mdi_vhci_t *vh; 882 mdi_phci_t *ph; 883 884 vh = i_devi_get_vhci(vdip); 885 if (vh == NULL) 886 return (0); 887 888 MDI_VHCI_PHCI_LOCK(vh); 889 ph = vh->vh_phci_head; 890 while (ph) { 891 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 892 MDI_VHCI_PHCI_UNLOCK(vh); 893 return (1); 894 } 895 ph = ph->ph_next; 896 } 897 MDI_VHCI_PHCI_UNLOCK(vh); 898 return (0); 899 } 900 901 /* 902 * mdi_phci_path2devinfo(): 903 * Utility function to search for a valid phci device given 904 * the devfs pathname. 905 */ 906 dev_info_t * 907 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 908 { 909 char *temp_pathname; 910 mdi_vhci_t *vh; 911 mdi_phci_t *ph; 912 dev_info_t *pdip = NULL; 913 914 vh = i_devi_get_vhci(vdip); 915 ASSERT(vh != NULL); 916 917 if (vh == NULL) { 918 /* 919 * Invalid vHCI component, return failure 920 */ 921 return (NULL); 922 } 923 924 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 925 MDI_VHCI_PHCI_LOCK(vh); 926 ph = vh->vh_phci_head; 927 while (ph != NULL) { 928 pdip = ph->ph_dip; 929 ASSERT(pdip != NULL); 930 *temp_pathname = '\0'; 931 (void) ddi_pathname(pdip, temp_pathname); 932 if (strcmp(temp_pathname, pathname) == 0) { 933 break; 934 } 935 ph = ph->ph_next; 936 } 937 if (ph == NULL) { 938 pdip = NULL; 939 } 940 MDI_VHCI_PHCI_UNLOCK(vh); 941 kmem_free(temp_pathname, MAXPATHLEN); 942 return (pdip); 943 } 944 945 /* 946 * mdi_phci_get_path_count(): 947 * get number of path information nodes associated with a given 948 * pHCI device. 949 */ 950 int 951 mdi_phci_get_path_count(dev_info_t *pdip) 952 { 953 mdi_phci_t *ph; 954 int count = 0; 955 956 ph = i_devi_get_phci(pdip); 957 if (ph != NULL) { 958 count = ph->ph_path_count; 959 } 960 return (count); 961 } 962 963 /* 964 * i_mdi_phci_lock(): 965 * Lock a pHCI device 966 * Return Values: 967 * None 968 * Note: 969 * The default locking order is: 970 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 971 * But there are number of situations where locks need to be 972 * grabbed in reverse order. This routine implements try and lock 973 * mechanism depending on the requested parameter option. 974 */ 975 static void 976 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 977 { 978 if (pip) { 979 /* Reverse locking is requested. */ 980 while (MDI_PHCI_TRYLOCK(ph) == 0) { 981 /* 982 * tryenter failed. Try to grab again 983 * after a small delay 984 */ 985 MDI_PI_HOLD(pip); 986 MDI_PI_UNLOCK(pip); 987 delay(1); 988 MDI_PI_LOCK(pip); 989 MDI_PI_RELE(pip); 990 } 991 } else { 992 MDI_PHCI_LOCK(ph); 993 } 994 } 995 996 /* 997 * i_mdi_phci_unlock(): 998 * Unlock the pHCI component 999 */ 1000 static void 1001 i_mdi_phci_unlock(mdi_phci_t *ph) 1002 { 1003 MDI_PHCI_UNLOCK(ph); 1004 } 1005 1006 /* 1007 * i_mdi_devinfo_create(): 1008 * create client device's devinfo node 1009 * Return Values: 1010 * dev_info 1011 * NULL 1012 * Notes: 1013 */ 1014 static dev_info_t * 1015 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1016 char **compatible, int ncompatible) 1017 { 1018 dev_info_t *cdip = NULL; 1019 1020 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1021 1022 /* Verify for duplicate entry */ 1023 cdip = i_mdi_devinfo_find(vh, name, guid); 1024 ASSERT(cdip == NULL); 1025 if (cdip) { 1026 cmn_err(CE_WARN, 1027 "i_mdi_devinfo_create: client dip %p already exists", 1028 (void *)cdip); 1029 } 1030 1031 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1032 if (cdip == NULL) 1033 goto fail; 1034 1035 /* 1036 * Create component type and Global unique identifier 1037 * properties 1038 */ 1039 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1040 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1041 goto fail; 1042 } 1043 1044 /* Decorate the node with compatible property */ 1045 if (compatible && 1046 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1047 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1048 goto fail; 1049 } 1050 1051 return (cdip); 1052 1053 fail: 1054 if (cdip) { 1055 (void) ndi_prop_remove_all(cdip); 1056 (void) ndi_devi_free(cdip); 1057 } 1058 return (NULL); 1059 } 1060 1061 /* 1062 * i_mdi_devinfo_find(): 1063 * Find a matching devinfo node for given client node name 1064 * and its guid. 1065 * Return Values: 1066 * Handle to a dev_info node or NULL 1067 */ 1068 static dev_info_t * 1069 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1070 { 1071 char *data; 1072 dev_info_t *cdip = NULL; 1073 dev_info_t *ndip = NULL; 1074 int circular; 1075 1076 ndi_devi_enter(vh->vh_dip, &circular); 1077 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1078 while ((cdip = ndip) != NULL) { 1079 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1080 1081 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1082 continue; 1083 } 1084 1085 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1086 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1087 &data) != DDI_PROP_SUCCESS) { 1088 continue; 1089 } 1090 1091 if (strcmp(data, guid) != 0) { 1092 ddi_prop_free(data); 1093 continue; 1094 } 1095 ddi_prop_free(data); 1096 break; 1097 } 1098 ndi_devi_exit(vh->vh_dip, circular); 1099 return (cdip); 1100 } 1101 1102 /* 1103 * i_mdi_devinfo_remove(): 1104 * Remove a client device node 1105 */ 1106 static int 1107 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1108 { 1109 int rv = MDI_SUCCESS; 1110 1111 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1112 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1113 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1114 if (rv != NDI_SUCCESS) { 1115 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1116 " failed. cdip = %p\n", (void *)cdip)); 1117 } 1118 /* 1119 * Convert to MDI error code 1120 */ 1121 switch (rv) { 1122 case NDI_SUCCESS: 1123 rv = MDI_SUCCESS; 1124 break; 1125 case NDI_BUSY: 1126 rv = MDI_BUSY; 1127 break; 1128 default: 1129 rv = MDI_FAILURE; 1130 break; 1131 } 1132 } 1133 return (rv); 1134 } 1135 1136 /* 1137 * i_devi_get_client() 1138 * Utility function to get mpxio component extensions 1139 */ 1140 static mdi_client_t * 1141 i_devi_get_client(dev_info_t *cdip) 1142 { 1143 mdi_client_t *ct = NULL; 1144 1145 if (MDI_CLIENT(cdip)) { 1146 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1147 } 1148 return (ct); 1149 } 1150 1151 /* 1152 * i_mdi_is_child_present(): 1153 * Search for the presence of client device dev_info node 1154 */ 1155 static int 1156 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1157 { 1158 int rv = MDI_FAILURE; 1159 struct dev_info *dip; 1160 int circular; 1161 1162 ndi_devi_enter(vdip, &circular); 1163 dip = DEVI(vdip)->devi_child; 1164 while (dip) { 1165 if (dip == DEVI(cdip)) { 1166 rv = MDI_SUCCESS; 1167 break; 1168 } 1169 dip = dip->devi_sibling; 1170 } 1171 ndi_devi_exit(vdip, circular); 1172 return (rv); 1173 } 1174 1175 1176 /* 1177 * i_mdi_client_lock(): 1178 * Grab client component lock 1179 * Return Values: 1180 * None 1181 * Note: 1182 * The default locking order is: 1183 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1184 * But there are number of situations where locks need to be 1185 * grabbed in reverse order. This routine implements try and lock 1186 * mechanism depending on the requested parameter option. 1187 */ 1188 static void 1189 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1190 { 1191 if (pip) { 1192 /* 1193 * Reverse locking is requested. 1194 */ 1195 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1196 /* 1197 * tryenter failed. Try to grab again 1198 * after a small delay 1199 */ 1200 MDI_PI_HOLD(pip); 1201 MDI_PI_UNLOCK(pip); 1202 delay(1); 1203 MDI_PI_LOCK(pip); 1204 MDI_PI_RELE(pip); 1205 } 1206 } else { 1207 MDI_CLIENT_LOCK(ct); 1208 } 1209 } 1210 1211 /* 1212 * i_mdi_client_unlock(): 1213 * Unlock a client component 1214 */ 1215 static void 1216 i_mdi_client_unlock(mdi_client_t *ct) 1217 { 1218 MDI_CLIENT_UNLOCK(ct); 1219 } 1220 1221 /* 1222 * i_mdi_client_alloc(): 1223 * Allocate and initialize a client structure. Caller should 1224 * hold the vhci client lock. 1225 * Return Values: 1226 * Handle to a client component 1227 */ 1228 /*ARGSUSED*/ 1229 static mdi_client_t * 1230 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1231 { 1232 mdi_client_t *ct; 1233 1234 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1235 1236 /* 1237 * Allocate and initialize a component structure. 1238 */ 1239 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1240 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1241 ct->ct_hnext = NULL; 1242 ct->ct_hprev = NULL; 1243 ct->ct_dip = NULL; 1244 ct->ct_vhci = vh; 1245 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1246 (void) strcpy(ct->ct_drvname, name); 1247 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1248 (void) strcpy(ct->ct_guid, lguid); 1249 ct->ct_cprivate = NULL; 1250 ct->ct_vprivate = NULL; 1251 ct->ct_flags = 0; 1252 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1253 MDI_CLIENT_LOCK(ct); 1254 MDI_CLIENT_SET_OFFLINE(ct); 1255 MDI_CLIENT_SET_DETACH(ct); 1256 MDI_CLIENT_SET_POWER_UP(ct); 1257 MDI_CLIENT_UNLOCK(ct); 1258 ct->ct_failover_flags = 0; 1259 ct->ct_failover_status = 0; 1260 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1261 ct->ct_unstable = 0; 1262 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1263 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1264 ct->ct_lb = vh->vh_lb; 1265 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1266 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1267 ct->ct_path_count = 0; 1268 ct->ct_path_head = NULL; 1269 ct->ct_path_tail = NULL; 1270 ct->ct_path_last = NULL; 1271 1272 /* 1273 * Add this client component to our client hash queue 1274 */ 1275 i_mdi_client_enlist_table(vh, ct); 1276 return (ct); 1277 } 1278 1279 /* 1280 * i_mdi_client_enlist_table(): 1281 * Attach the client device to the client hash table. Caller 1282 * should hold the vhci client lock. 1283 */ 1284 static void 1285 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1286 { 1287 int index; 1288 struct client_hash *head; 1289 1290 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1291 1292 index = i_mdi_get_hash_key(ct->ct_guid); 1293 head = &vh->vh_client_table[index]; 1294 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1295 head->ct_hash_head = ct; 1296 head->ct_hash_count++; 1297 vh->vh_client_count++; 1298 } 1299 1300 /* 1301 * i_mdi_client_delist_table(): 1302 * Attach the client device to the client hash table. 1303 * Caller should hold the vhci client lock. 1304 */ 1305 static void 1306 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1307 { 1308 int index; 1309 char *guid; 1310 struct client_hash *head; 1311 mdi_client_t *next; 1312 mdi_client_t *last; 1313 1314 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1315 1316 guid = ct->ct_guid; 1317 index = i_mdi_get_hash_key(guid); 1318 head = &vh->vh_client_table[index]; 1319 1320 last = NULL; 1321 next = (mdi_client_t *)head->ct_hash_head; 1322 while (next != NULL) { 1323 if (next == ct) { 1324 break; 1325 } 1326 last = next; 1327 next = next->ct_hnext; 1328 } 1329 1330 if (next) { 1331 head->ct_hash_count--; 1332 if (last == NULL) { 1333 head->ct_hash_head = ct->ct_hnext; 1334 } else { 1335 last->ct_hnext = ct->ct_hnext; 1336 } 1337 ct->ct_hnext = NULL; 1338 vh->vh_client_count--; 1339 } 1340 } 1341 1342 1343 /* 1344 * i_mdi_client_free(): 1345 * Free a client component 1346 */ 1347 static int 1348 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1349 { 1350 int rv = MDI_SUCCESS; 1351 int flags = ct->ct_flags; 1352 dev_info_t *cdip; 1353 dev_info_t *vdip; 1354 1355 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1356 1357 vdip = vh->vh_dip; 1358 cdip = ct->ct_dip; 1359 1360 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1361 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1362 DEVI(cdip)->devi_mdi_client = NULL; 1363 1364 /* 1365 * Clear out back ref. to dev_info_t node 1366 */ 1367 ct->ct_dip = NULL; 1368 1369 /* 1370 * Remove this client from our hash queue 1371 */ 1372 i_mdi_client_delist_table(vh, ct); 1373 1374 /* 1375 * Uninitialize and free the component 1376 */ 1377 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1378 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1379 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1380 cv_destroy(&ct->ct_failover_cv); 1381 cv_destroy(&ct->ct_unstable_cv); 1382 cv_destroy(&ct->ct_powerchange_cv); 1383 mutex_destroy(&ct->ct_mutex); 1384 kmem_free(ct, sizeof (*ct)); 1385 1386 if (cdip != NULL) { 1387 MDI_VHCI_CLIENT_UNLOCK(vh); 1388 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1389 MDI_VHCI_CLIENT_LOCK(vh); 1390 } 1391 return (rv); 1392 } 1393 1394 /* 1395 * i_mdi_client_find(): 1396 * Find the client structure corresponding to a given guid 1397 * Caller should hold the vhci client lock. 1398 */ 1399 static mdi_client_t * 1400 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1401 { 1402 int index; 1403 struct client_hash *head; 1404 mdi_client_t *ct; 1405 1406 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1407 1408 index = i_mdi_get_hash_key(guid); 1409 head = &vh->vh_client_table[index]; 1410 1411 ct = head->ct_hash_head; 1412 while (ct != NULL) { 1413 if (strcmp(ct->ct_guid, guid) == 0 && 1414 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1415 break; 1416 } 1417 ct = ct->ct_hnext; 1418 } 1419 return (ct); 1420 } 1421 1422 /* 1423 * i_mdi_client_update_state(): 1424 * Compute and update client device state 1425 * Notes: 1426 * A client device can be in any of three possible states: 1427 * 1428 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1429 * one online/standby paths. Can tolerate failures. 1430 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1431 * no alternate paths available as standby. A failure on the online 1432 * would result in loss of access to device data. 1433 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1434 * no paths available to access the device. 1435 */ 1436 static void 1437 i_mdi_client_update_state(mdi_client_t *ct) 1438 { 1439 int state; 1440 1441 ASSERT(MDI_CLIENT_LOCKED(ct)); 1442 state = i_mdi_client_compute_state(ct, NULL); 1443 MDI_CLIENT_SET_STATE(ct, state); 1444 } 1445 1446 /* 1447 * i_mdi_client_compute_state(): 1448 * Compute client device state 1449 * 1450 * mdi_phci_t * Pointer to pHCI structure which should 1451 * while computing the new value. Used by 1452 * i_mdi_phci_offline() to find the new 1453 * client state after DR of a pHCI. 1454 */ 1455 static int 1456 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1457 { 1458 int state; 1459 int online_count = 0; 1460 int standby_count = 0; 1461 mdi_pathinfo_t *pip, *next; 1462 1463 ASSERT(MDI_CLIENT_LOCKED(ct)); 1464 pip = ct->ct_path_head; 1465 while (pip != NULL) { 1466 MDI_PI_LOCK(pip); 1467 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1468 if (MDI_PI(pip)->pi_phci == ph) { 1469 MDI_PI_UNLOCK(pip); 1470 pip = next; 1471 continue; 1472 } 1473 1474 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1475 == MDI_PATHINFO_STATE_ONLINE) 1476 online_count++; 1477 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1478 == MDI_PATHINFO_STATE_STANDBY) 1479 standby_count++; 1480 MDI_PI_UNLOCK(pip); 1481 pip = next; 1482 } 1483 1484 if (online_count == 0) { 1485 if (standby_count == 0) { 1486 state = MDI_CLIENT_STATE_FAILED; 1487 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1488 " ct = %p\n", (void *)ct)); 1489 } else if (standby_count == 1) { 1490 state = MDI_CLIENT_STATE_DEGRADED; 1491 } else { 1492 state = MDI_CLIENT_STATE_OPTIMAL; 1493 } 1494 } else if (online_count == 1) { 1495 if (standby_count == 0) { 1496 state = MDI_CLIENT_STATE_DEGRADED; 1497 } else { 1498 state = MDI_CLIENT_STATE_OPTIMAL; 1499 } 1500 } else { 1501 state = MDI_CLIENT_STATE_OPTIMAL; 1502 } 1503 return (state); 1504 } 1505 1506 /* 1507 * i_mdi_client2devinfo(): 1508 * Utility function 1509 */ 1510 dev_info_t * 1511 i_mdi_client2devinfo(mdi_client_t *ct) 1512 { 1513 return (ct->ct_dip); 1514 } 1515 1516 /* 1517 * mdi_client_path2_devinfo(): 1518 * Given the parent devinfo and child devfs pathname, search for 1519 * a valid devfs node handle. 1520 */ 1521 dev_info_t * 1522 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1523 { 1524 dev_info_t *cdip = NULL; 1525 dev_info_t *ndip = NULL; 1526 char *temp_pathname; 1527 int circular; 1528 1529 /* 1530 * Allocate temp buffer 1531 */ 1532 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1533 1534 /* 1535 * Lock parent against changes 1536 */ 1537 ndi_devi_enter(vdip, &circular); 1538 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1539 while ((cdip = ndip) != NULL) { 1540 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1541 1542 *temp_pathname = '\0'; 1543 (void) ddi_pathname(cdip, temp_pathname); 1544 if (strcmp(temp_pathname, pathname) == 0) { 1545 break; 1546 } 1547 } 1548 /* 1549 * Release devinfo lock 1550 */ 1551 ndi_devi_exit(vdip, circular); 1552 1553 /* 1554 * Free the temp buffer 1555 */ 1556 kmem_free(temp_pathname, MAXPATHLEN); 1557 return (cdip); 1558 } 1559 1560 /* 1561 * mdi_client_get_path_count(): 1562 * Utility function to get number of path information nodes 1563 * associated with a given client device. 1564 */ 1565 int 1566 mdi_client_get_path_count(dev_info_t *cdip) 1567 { 1568 mdi_client_t *ct; 1569 int count = 0; 1570 1571 ct = i_devi_get_client(cdip); 1572 if (ct != NULL) { 1573 count = ct->ct_path_count; 1574 } 1575 return (count); 1576 } 1577 1578 1579 /* 1580 * i_mdi_get_hash_key(): 1581 * Create a hash using strings as keys 1582 * 1583 */ 1584 static int 1585 i_mdi_get_hash_key(char *str) 1586 { 1587 uint32_t g, hash = 0; 1588 char *p; 1589 1590 for (p = str; *p != '\0'; p++) { 1591 g = *p; 1592 hash += g; 1593 } 1594 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1595 } 1596 1597 /* 1598 * mdi_get_lb_policy(): 1599 * Get current load balancing policy for a given client device 1600 */ 1601 client_lb_t 1602 mdi_get_lb_policy(dev_info_t *cdip) 1603 { 1604 client_lb_t lb = LOAD_BALANCE_NONE; 1605 mdi_client_t *ct; 1606 1607 ct = i_devi_get_client(cdip); 1608 if (ct != NULL) { 1609 lb = ct->ct_lb; 1610 } 1611 return (lb); 1612 } 1613 1614 /* 1615 * mdi_set_lb_region_size(): 1616 * Set current region size for the load-balance 1617 */ 1618 int 1619 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1620 { 1621 mdi_client_t *ct; 1622 int rv = MDI_FAILURE; 1623 1624 ct = i_devi_get_client(cdip); 1625 if (ct != NULL && ct->ct_lb_args != NULL) { 1626 ct->ct_lb_args->region_size = region_size; 1627 rv = MDI_SUCCESS; 1628 } 1629 return (rv); 1630 } 1631 1632 /* 1633 * mdi_Set_lb_policy(): 1634 * Set current load balancing policy for a given client device 1635 */ 1636 int 1637 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1638 { 1639 mdi_client_t *ct; 1640 int rv = MDI_FAILURE; 1641 1642 ct = i_devi_get_client(cdip); 1643 if (ct != NULL) { 1644 ct->ct_lb = lb; 1645 rv = MDI_SUCCESS; 1646 } 1647 return (rv); 1648 } 1649 1650 /* 1651 * mdi_failover(): 1652 * failover function called by the vHCI drivers to initiate 1653 * a failover operation. This is typically due to non-availability 1654 * of online paths to route I/O requests. Failover can be 1655 * triggered through user application also. 1656 * 1657 * The vHCI driver calls mdi_failover() to initiate a failover 1658 * operation. mdi_failover() calls back into the vHCI driver's 1659 * vo_failover() entry point to perform the actual failover 1660 * operation. The reason for requiring the vHCI driver to 1661 * initiate failover by calling mdi_failover(), instead of directly 1662 * executing vo_failover() itself, is to ensure that the mdi 1663 * framework can keep track of the client state properly. 1664 * Additionally, mdi_failover() provides as a convenience the 1665 * option of performing the failover operation synchronously or 1666 * asynchronously 1667 * 1668 * Upon successful completion of the failover operation, the 1669 * paths that were previously ONLINE will be in the STANDBY state, 1670 * and the newly activated paths will be in the ONLINE state. 1671 * 1672 * The flags modifier determines whether the activation is done 1673 * synchronously: MDI_FAILOVER_SYNC 1674 * Return Values: 1675 * MDI_SUCCESS 1676 * MDI_FAILURE 1677 * MDI_BUSY 1678 */ 1679 /*ARGSUSED*/ 1680 int 1681 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1682 { 1683 int rv; 1684 mdi_client_t *ct; 1685 1686 ct = i_devi_get_client(cdip); 1687 ASSERT(ct != NULL); 1688 if (ct == NULL) { 1689 /* cdip is not a valid client device. Nothing more to do. */ 1690 return (MDI_FAILURE); 1691 } 1692 1693 MDI_CLIENT_LOCK(ct); 1694 1695 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1696 /* A path to the client is being freed */ 1697 MDI_CLIENT_UNLOCK(ct); 1698 return (MDI_BUSY); 1699 } 1700 1701 1702 if (MDI_CLIENT_IS_FAILED(ct)) { 1703 /* 1704 * Client is in failed state. Nothing more to do. 1705 */ 1706 MDI_CLIENT_UNLOCK(ct); 1707 return (MDI_FAILURE); 1708 } 1709 1710 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1711 /* 1712 * Failover is already in progress; return BUSY 1713 */ 1714 MDI_CLIENT_UNLOCK(ct); 1715 return (MDI_BUSY); 1716 } 1717 /* 1718 * Make sure that mdi_pathinfo node state changes are processed. 1719 * We do not allow failovers to progress while client path state 1720 * changes are in progress 1721 */ 1722 if (ct->ct_unstable) { 1723 if (flags == MDI_FAILOVER_ASYNC) { 1724 MDI_CLIENT_UNLOCK(ct); 1725 return (MDI_BUSY); 1726 } else { 1727 while (ct->ct_unstable) 1728 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1729 } 1730 } 1731 1732 /* 1733 * Client device is in stable state. Before proceeding, perform sanity 1734 * checks again. 1735 */ 1736 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1737 (!i_ddi_devi_attached(ct->ct_dip))) { 1738 /* 1739 * Client is in failed state. Nothing more to do. 1740 */ 1741 MDI_CLIENT_UNLOCK(ct); 1742 return (MDI_FAILURE); 1743 } 1744 1745 /* 1746 * Set the client state as failover in progress. 1747 */ 1748 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1749 ct->ct_failover_flags = flags; 1750 MDI_CLIENT_UNLOCK(ct); 1751 1752 if (flags == MDI_FAILOVER_ASYNC) { 1753 /* 1754 * Submit the initiate failover request via CPR safe 1755 * taskq threads. 1756 */ 1757 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1758 ct, KM_SLEEP); 1759 return (MDI_ACCEPT); 1760 } else { 1761 /* 1762 * Synchronous failover mode. Typically invoked from the user 1763 * land. 1764 */ 1765 rv = i_mdi_failover(ct); 1766 } 1767 return (rv); 1768 } 1769 1770 /* 1771 * i_mdi_failover(): 1772 * internal failover function. Invokes vHCI drivers failover 1773 * callback function and process the failover status 1774 * Return Values: 1775 * None 1776 * 1777 * Note: A client device in failover state can not be detached or freed. 1778 */ 1779 static int 1780 i_mdi_failover(void *arg) 1781 { 1782 int rv = MDI_SUCCESS; 1783 mdi_client_t *ct = (mdi_client_t *)arg; 1784 mdi_vhci_t *vh = ct->ct_vhci; 1785 1786 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1787 1788 if (vh->vh_ops->vo_failover != NULL) { 1789 /* 1790 * Call vHCI drivers callback routine 1791 */ 1792 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1793 ct->ct_failover_flags); 1794 } 1795 1796 MDI_CLIENT_LOCK(ct); 1797 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1798 1799 /* 1800 * Save the failover return status 1801 */ 1802 ct->ct_failover_status = rv; 1803 1804 /* 1805 * As a result of failover, client status would have been changed. 1806 * Update the client state and wake up anyone waiting on this client 1807 * device. 1808 */ 1809 i_mdi_client_update_state(ct); 1810 1811 cv_broadcast(&ct->ct_failover_cv); 1812 MDI_CLIENT_UNLOCK(ct); 1813 return (rv); 1814 } 1815 1816 /* 1817 * Load balancing is logical block. 1818 * IOs within the range described by region_size 1819 * would go on the same path. This would improve the 1820 * performance by cache-hit on some of the RAID devices. 1821 * Search only for online paths(At some point we 1822 * may want to balance across target ports). 1823 * If no paths are found then default to round-robin. 1824 */ 1825 static int 1826 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1827 { 1828 int path_index = -1; 1829 int online_path_count = 0; 1830 int online_nonpref_path_count = 0; 1831 int region_size = ct->ct_lb_args->region_size; 1832 mdi_pathinfo_t *pip; 1833 mdi_pathinfo_t *next; 1834 int preferred, path_cnt; 1835 1836 pip = ct->ct_path_head; 1837 while (pip) { 1838 MDI_PI_LOCK(pip); 1839 if (MDI_PI(pip)->pi_state == 1840 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1841 online_path_count++; 1842 } else if (MDI_PI(pip)->pi_state == 1843 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1844 online_nonpref_path_count++; 1845 } 1846 next = (mdi_pathinfo_t *) 1847 MDI_PI(pip)->pi_client_link; 1848 MDI_PI_UNLOCK(pip); 1849 pip = next; 1850 } 1851 /* if found any online/preferred then use this type */ 1852 if (online_path_count > 0) { 1853 path_cnt = online_path_count; 1854 preferred = 1; 1855 } else if (online_nonpref_path_count > 0) { 1856 path_cnt = online_nonpref_path_count; 1857 preferred = 0; 1858 } else { 1859 path_cnt = 0; 1860 } 1861 if (path_cnt) { 1862 path_index = (bp->b_blkno >> region_size) % path_cnt; 1863 pip = ct->ct_path_head; 1864 while (pip && path_index != -1) { 1865 MDI_PI_LOCK(pip); 1866 if (path_index == 0 && 1867 (MDI_PI(pip)->pi_state == 1868 MDI_PATHINFO_STATE_ONLINE) && 1869 MDI_PI(pip)->pi_preferred == preferred) { 1870 MDI_PI_HOLD(pip); 1871 MDI_PI_UNLOCK(pip); 1872 *ret_pip = pip; 1873 return (MDI_SUCCESS); 1874 } 1875 path_index --; 1876 next = (mdi_pathinfo_t *) 1877 MDI_PI(pip)->pi_client_link; 1878 MDI_PI_UNLOCK(pip); 1879 pip = next; 1880 } 1881 if (pip == NULL) { 1882 MDI_DEBUG(4, (CE_NOTE, NULL, 1883 "!lba %llx, no pip !!\n", 1884 bp->b_lblkno)); 1885 } else { 1886 MDI_DEBUG(4, (CE_NOTE, NULL, 1887 "!lba %llx, no pip for path_index, " 1888 "pip %p\n", bp->b_lblkno, (void *)pip)); 1889 } 1890 } 1891 return (MDI_FAILURE); 1892 } 1893 1894 /* 1895 * mdi_select_path(): 1896 * select a path to access a client device. 1897 * 1898 * mdi_select_path() function is called by the vHCI drivers to 1899 * select a path to route the I/O request to. The caller passes 1900 * the block I/O data transfer structure ("buf") as one of the 1901 * parameters. The mpxio framework uses the buf structure 1902 * contents to maintain per path statistics (total I/O size / 1903 * count pending). If more than one online paths are available to 1904 * select, the framework automatically selects a suitable path 1905 * for routing I/O request. If a failover operation is active for 1906 * this client device the call shall be failed with MDI_BUSY error 1907 * code. 1908 * 1909 * By default this function returns a suitable path in online 1910 * state based on the current load balancing policy. Currently 1911 * we support LOAD_BALANCE_NONE (Previously selected online path 1912 * will continue to be used till the path is usable) and 1913 * LOAD_BALANCE_RR (Online paths will be selected in a round 1914 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1915 * based on the logical block). The load balancing 1916 * through vHCI drivers configuration file (driver.conf). 1917 * 1918 * vHCI drivers may override this default behavior by specifying 1919 * appropriate flags. If start_pip is specified (non NULL) is 1920 * used as start point to walk and find the next appropriate path. 1921 * The following values are currently defined: 1922 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1923 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1924 * 1925 * The non-standard behavior is used by the scsi_vhci driver, 1926 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1927 * attach of client devices (to avoid an unnecessary failover 1928 * when the STANDBY path comes up first), during failover 1929 * (to activate a STANDBY path as ONLINE). 1930 * 1931 * The selected path is returned in a a mdi_hold_path() state 1932 * (pi_ref_cnt). Caller should release the hold by calling 1933 * mdi_rele_path(). 1934 * 1935 * Return Values: 1936 * MDI_SUCCESS - Completed successfully 1937 * MDI_BUSY - Client device is busy failing over 1938 * MDI_NOPATH - Client device is online, but no valid path are 1939 * available to access this client device 1940 * MDI_FAILURE - Invalid client device or state 1941 * MDI_DEVI_ONLINING 1942 * - Client device (struct dev_info state) is in 1943 * onlining state. 1944 */ 1945 1946 /*ARGSUSED*/ 1947 int 1948 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1949 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1950 { 1951 mdi_client_t *ct; 1952 mdi_pathinfo_t *pip; 1953 mdi_pathinfo_t *next; 1954 mdi_pathinfo_t *head; 1955 mdi_pathinfo_t *start; 1956 client_lb_t lbp; /* load balancing policy */ 1957 int sb = 1; /* standard behavior */ 1958 int preferred = 1; /* preferred path */ 1959 int cond, cont = 1; 1960 int retry = 0; 1961 1962 if (flags != 0) { 1963 /* 1964 * disable default behavior 1965 */ 1966 sb = 0; 1967 } 1968 1969 *ret_pip = NULL; 1970 ct = i_devi_get_client(cdip); 1971 if (ct == NULL) { 1972 /* mdi extensions are NULL, Nothing more to do */ 1973 return (MDI_FAILURE); 1974 } 1975 1976 MDI_CLIENT_LOCK(ct); 1977 1978 if (sb) { 1979 if (MDI_CLIENT_IS_FAILED(ct)) { 1980 /* 1981 * Client is not ready to accept any I/O requests. 1982 * Fail this request. 1983 */ 1984 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1985 "client state offline ct = %p\n", (void *)ct)); 1986 MDI_CLIENT_UNLOCK(ct); 1987 return (MDI_FAILURE); 1988 } 1989 1990 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1991 /* 1992 * Check for Failover is in progress. If so tell the 1993 * caller that this device is busy. 1994 */ 1995 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1996 "client failover in progress ct = %p\n", 1997 (void *)ct)); 1998 MDI_CLIENT_UNLOCK(ct); 1999 return (MDI_BUSY); 2000 } 2001 2002 /* 2003 * Check to see whether the client device is attached. 2004 * If not so, let the vHCI driver manually select a path 2005 * (standby) and let the probe/attach process to continue. 2006 */ 2007 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2008 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2009 "ct = %p\n", (void *)ct)); 2010 MDI_CLIENT_UNLOCK(ct); 2011 return (MDI_DEVI_ONLINING); 2012 } 2013 } 2014 2015 /* 2016 * Cache in the client list head. If head of the list is NULL 2017 * return MDI_NOPATH 2018 */ 2019 head = ct->ct_path_head; 2020 if (head == NULL) { 2021 MDI_CLIENT_UNLOCK(ct); 2022 return (MDI_NOPATH); 2023 } 2024 2025 /* 2026 * for non default behavior, bypass current 2027 * load balancing policy and always use LOAD_BALANCE_RR 2028 * except that the start point will be adjusted based 2029 * on the provided start_pip 2030 */ 2031 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2032 2033 switch (lbp) { 2034 case LOAD_BALANCE_NONE: 2035 /* 2036 * Load balancing is None or Alternate path mode 2037 * Start looking for a online mdi_pathinfo node starting from 2038 * last known selected path 2039 */ 2040 preferred = 1; 2041 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2042 if (pip == NULL) { 2043 pip = head; 2044 } 2045 start = pip; 2046 do { 2047 MDI_PI_LOCK(pip); 2048 /* 2049 * No need to explicitly check if the path is disabled. 2050 * Since we are checking for state == ONLINE and the 2051 * same veriable is used for DISABLE/ENABLE information. 2052 */ 2053 if ((MDI_PI(pip)->pi_state == 2054 MDI_PATHINFO_STATE_ONLINE) && 2055 preferred == MDI_PI(pip)->pi_preferred) { 2056 /* 2057 * Return the path in hold state. Caller should 2058 * release the lock by calling mdi_rele_path() 2059 */ 2060 MDI_PI_HOLD(pip); 2061 MDI_PI_UNLOCK(pip); 2062 ct->ct_path_last = pip; 2063 *ret_pip = pip; 2064 MDI_CLIENT_UNLOCK(ct); 2065 return (MDI_SUCCESS); 2066 } 2067 2068 /* 2069 * Path is busy. 2070 */ 2071 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2072 MDI_PI_IS_TRANSIENT(pip)) 2073 retry = 1; 2074 /* 2075 * Keep looking for a next available online path 2076 */ 2077 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2078 if (next == NULL) { 2079 next = head; 2080 } 2081 MDI_PI_UNLOCK(pip); 2082 pip = next; 2083 if (start == pip && preferred) { 2084 preferred = 0; 2085 } else if (start == pip && !preferred) { 2086 cont = 0; 2087 } 2088 } while (cont); 2089 break; 2090 2091 case LOAD_BALANCE_LBA: 2092 /* 2093 * Make sure we are looking 2094 * for an online path. Otherwise, if it is for a STANDBY 2095 * path request, it will go through and fetch an ONLINE 2096 * path which is not desirable. 2097 */ 2098 if ((ct->ct_lb_args != NULL) && 2099 (ct->ct_lb_args->region_size) && bp && 2100 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2101 if (i_mdi_lba_lb(ct, ret_pip, bp) 2102 == MDI_SUCCESS) { 2103 MDI_CLIENT_UNLOCK(ct); 2104 return (MDI_SUCCESS); 2105 } 2106 } 2107 /* FALLTHROUGH */ 2108 case LOAD_BALANCE_RR: 2109 /* 2110 * Load balancing is Round Robin. Start looking for a online 2111 * mdi_pathinfo node starting from last known selected path 2112 * as the start point. If override flags are specified, 2113 * process accordingly. 2114 * If the search is already in effect(start_pip not null), 2115 * then lets just use the same path preference to continue the 2116 * traversal. 2117 */ 2118 2119 if (start_pip != NULL) { 2120 preferred = MDI_PI(start_pip)->pi_preferred; 2121 } else { 2122 preferred = 1; 2123 } 2124 2125 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2126 if (start == NULL) { 2127 pip = head; 2128 } else { 2129 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2130 if (pip == NULL) { 2131 if (!sb) { 2132 if (preferred == 0) { 2133 /* 2134 * Looks like we have completed 2135 * the traversal as preferred 2136 * value is 0. Time to bail out. 2137 */ 2138 *ret_pip = NULL; 2139 MDI_CLIENT_UNLOCK(ct); 2140 return (MDI_NOPATH); 2141 } else { 2142 /* 2143 * Looks like we reached the 2144 * end of the list. Lets enable 2145 * traversal of non preferred 2146 * paths. 2147 */ 2148 preferred = 0; 2149 } 2150 } 2151 pip = head; 2152 } 2153 } 2154 start = pip; 2155 do { 2156 MDI_PI_LOCK(pip); 2157 if (sb) { 2158 cond = ((MDI_PI(pip)->pi_state == 2159 MDI_PATHINFO_STATE_ONLINE && 2160 MDI_PI(pip)->pi_preferred == 2161 preferred) ? 1 : 0); 2162 } else { 2163 if (flags == MDI_SELECT_ONLINE_PATH) { 2164 cond = ((MDI_PI(pip)->pi_state == 2165 MDI_PATHINFO_STATE_ONLINE && 2166 MDI_PI(pip)->pi_preferred == 2167 preferred) ? 1 : 0); 2168 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2169 cond = ((MDI_PI(pip)->pi_state == 2170 MDI_PATHINFO_STATE_STANDBY && 2171 MDI_PI(pip)->pi_preferred == 2172 preferred) ? 1 : 0); 2173 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2174 MDI_SELECT_STANDBY_PATH)) { 2175 cond = (((MDI_PI(pip)->pi_state == 2176 MDI_PATHINFO_STATE_ONLINE || 2177 (MDI_PI(pip)->pi_state == 2178 MDI_PATHINFO_STATE_STANDBY)) && 2179 MDI_PI(pip)->pi_preferred == 2180 preferred) ? 1 : 0); 2181 } else if (flags == 2182 (MDI_SELECT_STANDBY_PATH | 2183 MDI_SELECT_ONLINE_PATH | 2184 MDI_SELECT_USER_DISABLE_PATH)) { 2185 cond = (((MDI_PI(pip)->pi_state == 2186 MDI_PATHINFO_STATE_ONLINE || 2187 (MDI_PI(pip)->pi_state == 2188 MDI_PATHINFO_STATE_STANDBY) || 2189 (MDI_PI(pip)->pi_state == 2190 (MDI_PATHINFO_STATE_ONLINE| 2191 MDI_PATHINFO_STATE_USER_DISABLE)) || 2192 (MDI_PI(pip)->pi_state == 2193 (MDI_PATHINFO_STATE_STANDBY | 2194 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2195 MDI_PI(pip)->pi_preferred == 2196 preferred) ? 1 : 0); 2197 } else { 2198 cond = 0; 2199 } 2200 } 2201 /* 2202 * No need to explicitly check if the path is disabled. 2203 * Since we are checking for state == ONLINE and the 2204 * same veriable is used for DISABLE/ENABLE information. 2205 */ 2206 if (cond) { 2207 /* 2208 * Return the path in hold state. Caller should 2209 * release the lock by calling mdi_rele_path() 2210 */ 2211 MDI_PI_HOLD(pip); 2212 MDI_PI_UNLOCK(pip); 2213 if (sb) 2214 ct->ct_path_last = pip; 2215 *ret_pip = pip; 2216 MDI_CLIENT_UNLOCK(ct); 2217 return (MDI_SUCCESS); 2218 } 2219 /* 2220 * Path is busy. 2221 */ 2222 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2223 MDI_PI_IS_TRANSIENT(pip)) 2224 retry = 1; 2225 2226 /* 2227 * Keep looking for a next available online path 2228 */ 2229 do_again: 2230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2231 if (next == NULL) { 2232 if (!sb) { 2233 if (preferred == 1) { 2234 /* 2235 * Looks like we reached the 2236 * end of the list. Lets enable 2237 * traversal of non preferred 2238 * paths. 2239 */ 2240 preferred = 0; 2241 next = head; 2242 } else { 2243 /* 2244 * We have done both the passes 2245 * Preferred as well as for 2246 * Non-preferred. Bail out now. 2247 */ 2248 cont = 0; 2249 } 2250 } else { 2251 /* 2252 * Standard behavior case. 2253 */ 2254 next = head; 2255 } 2256 } 2257 MDI_PI_UNLOCK(pip); 2258 if (cont == 0) { 2259 break; 2260 } 2261 pip = next; 2262 2263 if (!sb) { 2264 /* 2265 * We need to handle the selection of 2266 * non-preferred path in the following 2267 * case: 2268 * 2269 * +------+ +------+ +------+ +-----+ 2270 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2271 * +------+ +------+ +------+ +-----+ 2272 * 2273 * If we start the search with B, we need to 2274 * skip beyond B to pick C which is non - 2275 * preferred in the second pass. The following 2276 * test, if true, will allow us to skip over 2277 * the 'start'(B in the example) to select 2278 * other non preferred elements. 2279 */ 2280 if ((start_pip != NULL) && (start_pip == pip) && 2281 (MDI_PI(start_pip)->pi_preferred 2282 != preferred)) { 2283 /* 2284 * try again after going past the start 2285 * pip 2286 */ 2287 MDI_PI_LOCK(pip); 2288 goto do_again; 2289 } 2290 } else { 2291 /* 2292 * Standard behavior case 2293 */ 2294 if (start == pip && preferred) { 2295 /* look for nonpreferred paths */ 2296 preferred = 0; 2297 } else if (start == pip && !preferred) { 2298 /* 2299 * Exit condition 2300 */ 2301 cont = 0; 2302 } 2303 } 2304 } while (cont); 2305 break; 2306 } 2307 2308 MDI_CLIENT_UNLOCK(ct); 2309 if (retry == 1) { 2310 return (MDI_BUSY); 2311 } else { 2312 return (MDI_NOPATH); 2313 } 2314 } 2315 2316 /* 2317 * For a client, return the next available path to any phci 2318 * 2319 * Note: 2320 * Caller should hold the branch's devinfo node to get a consistent 2321 * snap shot of the mdi_pathinfo nodes. 2322 * 2323 * Please note that even the list is stable the mdi_pathinfo 2324 * node state and properties are volatile. The caller should lock 2325 * and unlock the nodes by calling mdi_pi_lock() and 2326 * mdi_pi_unlock() functions to get a stable properties. 2327 * 2328 * If there is a need to use the nodes beyond the hold of the 2329 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2330 * need to be held against unexpected removal by calling 2331 * mdi_hold_path() and should be released by calling 2332 * mdi_rele_path() on completion. 2333 */ 2334 mdi_pathinfo_t * 2335 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2336 { 2337 mdi_client_t *ct; 2338 2339 if (!MDI_CLIENT(ct_dip)) 2340 return (NULL); 2341 2342 /* 2343 * Walk through client link 2344 */ 2345 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2346 ASSERT(ct != NULL); 2347 2348 if (pip == NULL) 2349 return ((mdi_pathinfo_t *)ct->ct_path_head); 2350 2351 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2352 } 2353 2354 /* 2355 * For a phci, return the next available path to any client 2356 * Note: ditto mdi_get_next_phci_path() 2357 */ 2358 mdi_pathinfo_t * 2359 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2360 { 2361 mdi_phci_t *ph; 2362 2363 if (!MDI_PHCI(ph_dip)) 2364 return (NULL); 2365 2366 /* 2367 * Walk through pHCI link 2368 */ 2369 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2370 ASSERT(ph != NULL); 2371 2372 if (pip == NULL) 2373 return ((mdi_pathinfo_t *)ph->ph_path_head); 2374 2375 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2376 } 2377 2378 /* 2379 * mdi_hold_path(): 2380 * Hold the mdi_pathinfo node against unwanted unexpected free. 2381 * Return Values: 2382 * None 2383 */ 2384 void 2385 mdi_hold_path(mdi_pathinfo_t *pip) 2386 { 2387 if (pip) { 2388 MDI_PI_LOCK(pip); 2389 MDI_PI_HOLD(pip); 2390 MDI_PI_UNLOCK(pip); 2391 } 2392 } 2393 2394 2395 /* 2396 * mdi_rele_path(): 2397 * Release the mdi_pathinfo node which was selected 2398 * through mdi_select_path() mechanism or manually held by 2399 * calling mdi_hold_path(). 2400 * Return Values: 2401 * None 2402 */ 2403 void 2404 mdi_rele_path(mdi_pathinfo_t *pip) 2405 { 2406 if (pip) { 2407 MDI_PI_LOCK(pip); 2408 MDI_PI_RELE(pip); 2409 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2410 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2411 } 2412 MDI_PI_UNLOCK(pip); 2413 } 2414 } 2415 2416 /* 2417 * mdi_pi_lock(): 2418 * Lock the mdi_pathinfo node. 2419 * Note: 2420 * The caller should release the lock by calling mdi_pi_unlock() 2421 */ 2422 void 2423 mdi_pi_lock(mdi_pathinfo_t *pip) 2424 { 2425 ASSERT(pip != NULL); 2426 if (pip) { 2427 MDI_PI_LOCK(pip); 2428 } 2429 } 2430 2431 2432 /* 2433 * mdi_pi_unlock(): 2434 * Unlock the mdi_pathinfo node. 2435 * Note: 2436 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2437 */ 2438 void 2439 mdi_pi_unlock(mdi_pathinfo_t *pip) 2440 { 2441 ASSERT(pip != NULL); 2442 if (pip) { 2443 MDI_PI_UNLOCK(pip); 2444 } 2445 } 2446 2447 /* 2448 * mdi_pi_find(): 2449 * Search the list of mdi_pathinfo nodes attached to the 2450 * pHCI/Client device node whose path address matches "paddr". 2451 * Returns a pointer to the mdi_pathinfo node if a matching node is 2452 * found. 2453 * Return Values: 2454 * mdi_pathinfo node handle 2455 * NULL 2456 * Notes: 2457 * Caller need not hold any locks to call this function. 2458 */ 2459 mdi_pathinfo_t * 2460 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2461 { 2462 mdi_phci_t *ph; 2463 mdi_vhci_t *vh; 2464 mdi_client_t *ct; 2465 mdi_pathinfo_t *pip = NULL; 2466 2467 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2468 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2469 if ((pdip == NULL) || (paddr == NULL)) { 2470 return (NULL); 2471 } 2472 ph = i_devi_get_phci(pdip); 2473 if (ph == NULL) { 2474 /* 2475 * Invalid pHCI device, Nothing more to do. 2476 */ 2477 MDI_DEBUG(2, (CE_WARN, pdip, 2478 "!mdi_pi_find: invalid phci")); 2479 return (NULL); 2480 } 2481 2482 vh = ph->ph_vhci; 2483 if (vh == NULL) { 2484 /* 2485 * Invalid vHCI device, Nothing more to do. 2486 */ 2487 MDI_DEBUG(2, (CE_WARN, pdip, 2488 "!mdi_pi_find: invalid vhci")); 2489 return (NULL); 2490 } 2491 2492 /* 2493 * Look for pathinfo node identified by paddr. 2494 */ 2495 if (caddr == NULL) { 2496 /* 2497 * Find a mdi_pathinfo node under pHCI list for a matching 2498 * unit address. 2499 */ 2500 MDI_PHCI_LOCK(ph); 2501 if (MDI_PHCI_IS_OFFLINE(ph)) { 2502 MDI_DEBUG(2, (CE_WARN, pdip, 2503 "!mdi_pi_find: offline phci %p", (void *)ph)); 2504 MDI_PHCI_UNLOCK(ph); 2505 return (NULL); 2506 } 2507 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2508 2509 while (pip != NULL) { 2510 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2511 break; 2512 } 2513 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2514 } 2515 MDI_PHCI_UNLOCK(ph); 2516 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2517 (void *)pip)); 2518 return (pip); 2519 } 2520 2521 /* 2522 * XXX - Is the rest of the code in this function really necessary? 2523 * The consumers of mdi_pi_find() can search for the desired pathinfo 2524 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2525 * whether the search is based on the pathinfo nodes attached to 2526 * the pHCI or the client node, the result will be the same. 2527 */ 2528 2529 /* 2530 * Find the client device corresponding to 'caddr' 2531 */ 2532 MDI_VHCI_CLIENT_LOCK(vh); 2533 2534 /* 2535 * XXX - Passing NULL to the following function works as long as the 2536 * the client addresses (caddr) are unique per vhci basis. 2537 */ 2538 ct = i_mdi_client_find(vh, NULL, caddr); 2539 if (ct == NULL) { 2540 /* 2541 * Client not found, Obviously mdi_pathinfo node has not been 2542 * created yet. 2543 */ 2544 MDI_VHCI_CLIENT_UNLOCK(vh); 2545 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2546 "found for caddr %s", caddr ? caddr : "NULL")); 2547 return (NULL); 2548 } 2549 2550 /* 2551 * Hold the client lock and look for a mdi_pathinfo node with matching 2552 * pHCI and paddr 2553 */ 2554 MDI_CLIENT_LOCK(ct); 2555 2556 /* 2557 * Release the global mutex as it is no more needed. Note: We always 2558 * respect the locking order while acquiring. 2559 */ 2560 MDI_VHCI_CLIENT_UNLOCK(vh); 2561 2562 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2563 while (pip != NULL) { 2564 /* 2565 * Compare the unit address 2566 */ 2567 if ((MDI_PI(pip)->pi_phci == ph) && 2568 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2569 break; 2570 } 2571 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2572 } 2573 MDI_CLIENT_UNLOCK(ct); 2574 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2575 return (pip); 2576 } 2577 2578 /* 2579 * mdi_pi_alloc(): 2580 * Allocate and initialize a new instance of a mdi_pathinfo node. 2581 * The mdi_pathinfo node returned by this function identifies a 2582 * unique device path is capable of having properties attached 2583 * and passed to mdi_pi_online() to fully attach and online the 2584 * path and client device node. 2585 * The mdi_pathinfo node returned by this function must be 2586 * destroyed using mdi_pi_free() if the path is no longer 2587 * operational or if the caller fails to attach a client device 2588 * node when calling mdi_pi_online(). The framework will not free 2589 * the resources allocated. 2590 * This function can be called from both interrupt and kernel 2591 * contexts. DDI_NOSLEEP flag should be used while calling 2592 * from interrupt contexts. 2593 * Return Values: 2594 * MDI_SUCCESS 2595 * MDI_FAILURE 2596 * MDI_NOMEM 2597 */ 2598 /*ARGSUSED*/ 2599 int 2600 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2601 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2602 { 2603 mdi_vhci_t *vh; 2604 mdi_phci_t *ph; 2605 mdi_client_t *ct; 2606 mdi_pathinfo_t *pip = NULL; 2607 dev_info_t *cdip; 2608 int rv = MDI_NOMEM; 2609 int path_allocated = 0; 2610 2611 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2612 cname ? cname : "NULL", caddr ? caddr : "NULL", 2613 paddr ? paddr : "NULL")); 2614 2615 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2616 ret_pip == NULL) { 2617 /* Nothing more to do */ 2618 return (MDI_FAILURE); 2619 } 2620 2621 *ret_pip = NULL; 2622 2623 /* No allocations on detaching pHCI */ 2624 if (DEVI_IS_DETACHING(pdip)) { 2625 /* Invalid pHCI device, return failure */ 2626 MDI_DEBUG(1, (CE_WARN, pdip, 2627 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2628 return (MDI_FAILURE); 2629 } 2630 2631 ph = i_devi_get_phci(pdip); 2632 ASSERT(ph != NULL); 2633 if (ph == NULL) { 2634 /* Invalid pHCI device, return failure */ 2635 MDI_DEBUG(1, (CE_WARN, pdip, 2636 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2637 return (MDI_FAILURE); 2638 } 2639 2640 MDI_PHCI_LOCK(ph); 2641 vh = ph->ph_vhci; 2642 if (vh == NULL) { 2643 /* Invalid vHCI device, return failure */ 2644 MDI_DEBUG(1, (CE_WARN, pdip, 2645 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2646 MDI_PHCI_UNLOCK(ph); 2647 return (MDI_FAILURE); 2648 } 2649 2650 if (MDI_PHCI_IS_READY(ph) == 0) { 2651 /* 2652 * Do not allow new node creation when pHCI is in 2653 * offline/suspended states 2654 */ 2655 MDI_DEBUG(1, (CE_WARN, pdip, 2656 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2657 MDI_PHCI_UNLOCK(ph); 2658 return (MDI_BUSY); 2659 } 2660 MDI_PHCI_UNSTABLE(ph); 2661 MDI_PHCI_UNLOCK(ph); 2662 2663 /* look for a matching client, create one if not found */ 2664 MDI_VHCI_CLIENT_LOCK(vh); 2665 ct = i_mdi_client_find(vh, cname, caddr); 2666 if (ct == NULL) { 2667 ct = i_mdi_client_alloc(vh, cname, caddr); 2668 ASSERT(ct != NULL); 2669 } 2670 2671 if (ct->ct_dip == NULL) { 2672 /* 2673 * Allocate a devinfo node 2674 */ 2675 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2676 compatible, ncompatible); 2677 if (ct->ct_dip == NULL) { 2678 (void) i_mdi_client_free(vh, ct); 2679 goto fail; 2680 } 2681 } 2682 cdip = ct->ct_dip; 2683 2684 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2685 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2686 2687 MDI_CLIENT_LOCK(ct); 2688 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2689 while (pip != NULL) { 2690 /* 2691 * Compare the unit address 2692 */ 2693 if ((MDI_PI(pip)->pi_phci == ph) && 2694 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2695 break; 2696 } 2697 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2698 } 2699 MDI_CLIENT_UNLOCK(ct); 2700 2701 if (pip == NULL) { 2702 /* 2703 * This is a new path for this client device. Allocate and 2704 * initialize a new pathinfo node 2705 */ 2706 pip = i_mdi_pi_alloc(ph, paddr, ct); 2707 ASSERT(pip != NULL); 2708 path_allocated = 1; 2709 } 2710 rv = MDI_SUCCESS; 2711 2712 fail: 2713 /* 2714 * Release the global mutex. 2715 */ 2716 MDI_VHCI_CLIENT_UNLOCK(vh); 2717 2718 /* 2719 * Mark the pHCI as stable 2720 */ 2721 MDI_PHCI_LOCK(ph); 2722 MDI_PHCI_STABLE(ph); 2723 MDI_PHCI_UNLOCK(ph); 2724 *ret_pip = pip; 2725 2726 MDI_DEBUG(2, (CE_NOTE, pdip, 2727 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2728 2729 if (path_allocated) 2730 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2731 2732 return (rv); 2733 } 2734 2735 /*ARGSUSED*/ 2736 int 2737 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2738 int flags, mdi_pathinfo_t **ret_pip) 2739 { 2740 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2741 flags, ret_pip)); 2742 } 2743 2744 /* 2745 * i_mdi_pi_alloc(): 2746 * Allocate a mdi_pathinfo node and add to the pHCI path list 2747 * Return Values: 2748 * mdi_pathinfo 2749 */ 2750 /*ARGSUSED*/ 2751 static mdi_pathinfo_t * 2752 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2753 { 2754 mdi_pathinfo_t *pip; 2755 int ct_circular; 2756 int ph_circular; 2757 int se_flag; 2758 int kmem_flag; 2759 2760 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2761 2762 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2763 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2764 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2765 MDI_PATHINFO_STATE_TRANSIENT; 2766 2767 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2768 MDI_PI_SET_USER_DISABLE(pip); 2769 2770 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2771 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2772 2773 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2774 MDI_PI_SET_DRV_DISABLE(pip); 2775 2776 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2777 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2778 MDI_PI(pip)->pi_client = ct; 2779 MDI_PI(pip)->pi_phci = ph; 2780 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2781 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2782 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2783 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2784 MDI_PI(pip)->pi_pprivate = NULL; 2785 MDI_PI(pip)->pi_cprivate = NULL; 2786 MDI_PI(pip)->pi_vprivate = NULL; 2787 MDI_PI(pip)->pi_client_link = NULL; 2788 MDI_PI(pip)->pi_phci_link = NULL; 2789 MDI_PI(pip)->pi_ref_cnt = 0; 2790 MDI_PI(pip)->pi_kstats = NULL; 2791 MDI_PI(pip)->pi_preferred = 1; 2792 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2793 2794 /* 2795 * Lock both dev_info nodes against changes in parallel. 2796 * 2797 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2798 * This atypical operation is done to synchronize pathinfo nodes 2799 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2800 * the pathinfo nodes are children of the Client. 2801 */ 2802 ndi_devi_enter(ct->ct_dip, &ct_circular); 2803 ndi_devi_enter(ph->ph_dip, &ph_circular); 2804 2805 i_mdi_phci_add_path(ph, pip); 2806 i_mdi_client_add_path(ct, pip); 2807 2808 ndi_devi_exit(ph->ph_dip, ph_circular); 2809 ndi_devi_exit(ct->ct_dip, ct_circular); 2810 2811 /* determine interrupt context */ 2812 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2813 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2814 2815 i_ddi_di_cache_invalidate(kmem_flag); 2816 2817 return (pip); 2818 } 2819 2820 /* 2821 * i_mdi_phci_add_path(): 2822 * Add a mdi_pathinfo node to pHCI list. 2823 * Notes: 2824 * Caller should per-pHCI mutex 2825 */ 2826 static void 2827 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2828 { 2829 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2830 2831 MDI_PHCI_LOCK(ph); 2832 if (ph->ph_path_head == NULL) { 2833 ph->ph_path_head = pip; 2834 } else { 2835 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2836 } 2837 ph->ph_path_tail = pip; 2838 ph->ph_path_count++; 2839 MDI_PHCI_UNLOCK(ph); 2840 } 2841 2842 /* 2843 * i_mdi_client_add_path(): 2844 * Add mdi_pathinfo node to client list 2845 */ 2846 static void 2847 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2848 { 2849 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2850 2851 MDI_CLIENT_LOCK(ct); 2852 if (ct->ct_path_head == NULL) { 2853 ct->ct_path_head = pip; 2854 } else { 2855 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2856 } 2857 ct->ct_path_tail = pip; 2858 ct->ct_path_count++; 2859 MDI_CLIENT_UNLOCK(ct); 2860 } 2861 2862 /* 2863 * mdi_pi_free(): 2864 * Free the mdi_pathinfo node and also client device node if this 2865 * is the last path to the device 2866 * Return Values: 2867 * MDI_SUCCESS 2868 * MDI_FAILURE 2869 * MDI_BUSY 2870 */ 2871 /*ARGSUSED*/ 2872 int 2873 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2874 { 2875 int rv = MDI_SUCCESS; 2876 mdi_vhci_t *vh; 2877 mdi_phci_t *ph; 2878 mdi_client_t *ct; 2879 int (*f)(); 2880 int client_held = 0; 2881 2882 MDI_PI_LOCK(pip); 2883 ph = MDI_PI(pip)->pi_phci; 2884 ASSERT(ph != NULL); 2885 if (ph == NULL) { 2886 /* 2887 * Invalid pHCI device, return failure 2888 */ 2889 MDI_DEBUG(1, (CE_WARN, NULL, 2890 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2891 MDI_PI_UNLOCK(pip); 2892 return (MDI_FAILURE); 2893 } 2894 2895 vh = ph->ph_vhci; 2896 ASSERT(vh != NULL); 2897 if (vh == NULL) { 2898 /* Invalid pHCI device, return failure */ 2899 MDI_DEBUG(1, (CE_WARN, NULL, 2900 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2901 MDI_PI_UNLOCK(pip); 2902 return (MDI_FAILURE); 2903 } 2904 2905 ct = MDI_PI(pip)->pi_client; 2906 ASSERT(ct != NULL); 2907 if (ct == NULL) { 2908 /* 2909 * Invalid Client device, return failure 2910 */ 2911 MDI_DEBUG(1, (CE_WARN, NULL, 2912 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2913 MDI_PI_UNLOCK(pip); 2914 return (MDI_FAILURE); 2915 } 2916 2917 /* 2918 * Check to see for busy condition. A mdi_pathinfo can only be freed 2919 * if the node state is either offline or init and the reference count 2920 * is zero. 2921 */ 2922 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2923 MDI_PI_IS_INITING(pip))) { 2924 /* 2925 * Node is busy 2926 */ 2927 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2928 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2929 MDI_PI_UNLOCK(pip); 2930 return (MDI_BUSY); 2931 } 2932 2933 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2934 /* 2935 * Give a chance for pending I/Os to complete. 2936 */ 2937 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2938 "%d cmds still pending on path: %p\n", 2939 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2940 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2941 &MDI_PI(pip)->pi_mutex, 2942 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2943 /* 2944 * The timeout time reached without ref_cnt being zero 2945 * being signaled. 2946 */ 2947 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2948 "!mdi_pi_free: " 2949 "Timeout reached on path %p without the cond\n", 2950 (void *)pip)); 2951 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2952 "!mdi_pi_free: " 2953 "%d cmds still pending on path: %p\n", 2954 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2955 MDI_PI_UNLOCK(pip); 2956 return (MDI_BUSY); 2957 } 2958 } 2959 if (MDI_PI(pip)->pi_pm_held) { 2960 client_held = 1; 2961 } 2962 MDI_PI_UNLOCK(pip); 2963 2964 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2965 2966 MDI_CLIENT_LOCK(ct); 2967 2968 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2969 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2970 2971 /* 2972 * Wait till failover is complete before removing this node. 2973 */ 2974 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2975 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2976 2977 MDI_CLIENT_UNLOCK(ct); 2978 MDI_VHCI_CLIENT_LOCK(vh); 2979 MDI_CLIENT_LOCK(ct); 2980 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2981 2982 if (!MDI_PI_IS_INITING(pip)) { 2983 f = vh->vh_ops->vo_pi_uninit; 2984 if (f != NULL) { 2985 rv = (*f)(vh->vh_dip, pip, 0); 2986 } 2987 } 2988 /* 2989 * If vo_pi_uninit() completed successfully. 2990 */ 2991 if (rv == MDI_SUCCESS) { 2992 if (client_held) { 2993 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2994 "i_mdi_pm_rele_client\n")); 2995 i_mdi_pm_rele_client(ct, 1); 2996 } 2997 i_mdi_pi_free(ph, pip, ct); 2998 if (ct->ct_path_count == 0) { 2999 /* 3000 * Client lost its last path. 3001 * Clean up the client device 3002 */ 3003 MDI_CLIENT_UNLOCK(ct); 3004 (void) i_mdi_client_free(ct->ct_vhci, ct); 3005 MDI_VHCI_CLIENT_UNLOCK(vh); 3006 return (rv); 3007 } 3008 } 3009 MDI_CLIENT_UNLOCK(ct); 3010 MDI_VHCI_CLIENT_UNLOCK(vh); 3011 3012 if (rv == MDI_FAILURE) 3013 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3014 3015 return (rv); 3016 } 3017 3018 /* 3019 * i_mdi_pi_free(): 3020 * Free the mdi_pathinfo node 3021 */ 3022 static void 3023 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3024 { 3025 int ct_circular; 3026 int ph_circular; 3027 int se_flag; 3028 int kmem_flag; 3029 3030 ASSERT(MDI_CLIENT_LOCKED(ct)); 3031 3032 /* 3033 * remove any per-path kstats 3034 */ 3035 i_mdi_pi_kstat_destroy(pip); 3036 3037 /* See comments in i_mdi_pi_alloc() */ 3038 ndi_devi_enter(ct->ct_dip, &ct_circular); 3039 ndi_devi_enter(ph->ph_dip, &ph_circular); 3040 3041 i_mdi_client_remove_path(ct, pip); 3042 i_mdi_phci_remove_path(ph, pip); 3043 3044 ndi_devi_exit(ph->ph_dip, ph_circular); 3045 ndi_devi_exit(ct->ct_dip, ct_circular); 3046 3047 /* determine interrupt context */ 3048 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3049 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3050 3051 i_ddi_di_cache_invalidate(kmem_flag); 3052 3053 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3054 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3055 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3056 if (MDI_PI(pip)->pi_addr) { 3057 kmem_free(MDI_PI(pip)->pi_addr, 3058 strlen(MDI_PI(pip)->pi_addr) + 1); 3059 MDI_PI(pip)->pi_addr = NULL; 3060 } 3061 3062 if (MDI_PI(pip)->pi_prop) { 3063 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3064 MDI_PI(pip)->pi_prop = NULL; 3065 } 3066 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3067 } 3068 3069 3070 /* 3071 * i_mdi_phci_remove_path(): 3072 * Remove a mdi_pathinfo node from pHCI list. 3073 * Notes: 3074 * Caller should hold per-pHCI mutex 3075 */ 3076 static void 3077 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3078 { 3079 mdi_pathinfo_t *prev = NULL; 3080 mdi_pathinfo_t *path = NULL; 3081 3082 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3083 3084 MDI_PHCI_LOCK(ph); 3085 path = ph->ph_path_head; 3086 while (path != NULL) { 3087 if (path == pip) { 3088 break; 3089 } 3090 prev = path; 3091 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3092 } 3093 3094 if (path) { 3095 ph->ph_path_count--; 3096 if (prev) { 3097 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3098 } else { 3099 ph->ph_path_head = 3100 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3101 } 3102 if (ph->ph_path_tail == path) { 3103 ph->ph_path_tail = prev; 3104 } 3105 } 3106 3107 /* 3108 * Clear the pHCI link 3109 */ 3110 MDI_PI(pip)->pi_phci_link = NULL; 3111 MDI_PI(pip)->pi_phci = NULL; 3112 MDI_PHCI_UNLOCK(ph); 3113 } 3114 3115 /* 3116 * i_mdi_client_remove_path(): 3117 * Remove a mdi_pathinfo node from client path list. 3118 */ 3119 static void 3120 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3121 { 3122 mdi_pathinfo_t *prev = NULL; 3123 mdi_pathinfo_t *path; 3124 3125 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3126 3127 ASSERT(MDI_CLIENT_LOCKED(ct)); 3128 path = ct->ct_path_head; 3129 while (path != NULL) { 3130 if (path == pip) { 3131 break; 3132 } 3133 prev = path; 3134 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3135 } 3136 3137 if (path) { 3138 ct->ct_path_count--; 3139 if (prev) { 3140 MDI_PI(prev)->pi_client_link = 3141 MDI_PI(path)->pi_client_link; 3142 } else { 3143 ct->ct_path_head = 3144 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3145 } 3146 if (ct->ct_path_tail == path) { 3147 ct->ct_path_tail = prev; 3148 } 3149 if (ct->ct_path_last == path) { 3150 ct->ct_path_last = ct->ct_path_head; 3151 } 3152 } 3153 MDI_PI(pip)->pi_client_link = NULL; 3154 MDI_PI(pip)->pi_client = NULL; 3155 } 3156 3157 /* 3158 * i_mdi_pi_state_change(): 3159 * online a mdi_pathinfo node 3160 * 3161 * Return Values: 3162 * MDI_SUCCESS 3163 * MDI_FAILURE 3164 */ 3165 /*ARGSUSED*/ 3166 static int 3167 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3168 { 3169 int rv = MDI_SUCCESS; 3170 mdi_vhci_t *vh; 3171 mdi_phci_t *ph; 3172 mdi_client_t *ct; 3173 int (*f)(); 3174 dev_info_t *cdip; 3175 3176 MDI_PI_LOCK(pip); 3177 3178 ph = MDI_PI(pip)->pi_phci; 3179 ASSERT(ph); 3180 if (ph == NULL) { 3181 /* 3182 * Invalid pHCI device, fail the request 3183 */ 3184 MDI_PI_UNLOCK(pip); 3185 MDI_DEBUG(1, (CE_WARN, NULL, 3186 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3187 return (MDI_FAILURE); 3188 } 3189 3190 vh = ph->ph_vhci; 3191 ASSERT(vh); 3192 if (vh == NULL) { 3193 /* 3194 * Invalid vHCI device, fail the request 3195 */ 3196 MDI_PI_UNLOCK(pip); 3197 MDI_DEBUG(1, (CE_WARN, NULL, 3198 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3199 return (MDI_FAILURE); 3200 } 3201 3202 ct = MDI_PI(pip)->pi_client; 3203 ASSERT(ct != NULL); 3204 if (ct == NULL) { 3205 /* 3206 * Invalid client device, fail the request 3207 */ 3208 MDI_PI_UNLOCK(pip); 3209 MDI_DEBUG(1, (CE_WARN, NULL, 3210 "!mdi_pi_state_change: invalid client pip=%p", 3211 (void *)pip)); 3212 return (MDI_FAILURE); 3213 } 3214 3215 /* 3216 * If this path has not been initialized yet, Callback vHCI driver's 3217 * pathinfo node initialize entry point 3218 */ 3219 3220 if (MDI_PI_IS_INITING(pip)) { 3221 MDI_PI_UNLOCK(pip); 3222 f = vh->vh_ops->vo_pi_init; 3223 if (f != NULL) { 3224 rv = (*f)(vh->vh_dip, pip, 0); 3225 if (rv != MDI_SUCCESS) { 3226 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3227 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3228 (void *)vh, (void *)pip)); 3229 return (MDI_FAILURE); 3230 } 3231 } 3232 MDI_PI_LOCK(pip); 3233 MDI_PI_CLEAR_TRANSIENT(pip); 3234 } 3235 3236 /* 3237 * Do not allow state transition when pHCI is in offline/suspended 3238 * states 3239 */ 3240 i_mdi_phci_lock(ph, pip); 3241 if (MDI_PHCI_IS_READY(ph) == 0) { 3242 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3243 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3244 (void *)ph)); 3245 MDI_PI_UNLOCK(pip); 3246 i_mdi_phci_unlock(ph); 3247 return (MDI_BUSY); 3248 } 3249 MDI_PHCI_UNSTABLE(ph); 3250 i_mdi_phci_unlock(ph); 3251 3252 /* 3253 * Check if mdi_pathinfo state is in transient state. 3254 * If yes, offlining is in progress and wait till transient state is 3255 * cleared. 3256 */ 3257 if (MDI_PI_IS_TRANSIENT(pip)) { 3258 while (MDI_PI_IS_TRANSIENT(pip)) { 3259 cv_wait(&MDI_PI(pip)->pi_state_cv, 3260 &MDI_PI(pip)->pi_mutex); 3261 } 3262 } 3263 3264 /* 3265 * Grab the client lock in reverse order sequence and release the 3266 * mdi_pathinfo mutex. 3267 */ 3268 i_mdi_client_lock(ct, pip); 3269 MDI_PI_UNLOCK(pip); 3270 3271 /* 3272 * Wait till failover state is cleared 3273 */ 3274 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3275 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3276 3277 /* 3278 * Mark the mdi_pathinfo node state as transient 3279 */ 3280 MDI_PI_LOCK(pip); 3281 switch (state) { 3282 case MDI_PATHINFO_STATE_ONLINE: 3283 MDI_PI_SET_ONLINING(pip); 3284 break; 3285 3286 case MDI_PATHINFO_STATE_STANDBY: 3287 MDI_PI_SET_STANDBYING(pip); 3288 break; 3289 3290 case MDI_PATHINFO_STATE_FAULT: 3291 /* 3292 * Mark the pathinfo state as FAULTED 3293 */ 3294 MDI_PI_SET_FAULTING(pip); 3295 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3296 break; 3297 3298 case MDI_PATHINFO_STATE_OFFLINE: 3299 /* 3300 * ndi_devi_offline() cannot hold pip or ct locks. 3301 */ 3302 MDI_PI_UNLOCK(pip); 3303 /* 3304 * Do not offline if path will become last path and path 3305 * is busy for user initiated events. 3306 */ 3307 cdip = ct->ct_dip; 3308 if ((flag & NDI_DEVI_REMOVE) && 3309 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3310 i_mdi_client_unlock(ct); 3311 rv = ndi_devi_offline(cdip, 0); 3312 if (rv != NDI_SUCCESS) { 3313 /* 3314 * Convert to MDI error code 3315 */ 3316 switch (rv) { 3317 case NDI_BUSY: 3318 rv = MDI_BUSY; 3319 break; 3320 default: 3321 rv = MDI_FAILURE; 3322 break; 3323 } 3324 goto state_change_exit; 3325 } else { 3326 i_mdi_client_lock(ct, NULL); 3327 } 3328 } 3329 /* 3330 * Mark the mdi_pathinfo node state as transient 3331 */ 3332 MDI_PI_LOCK(pip); 3333 MDI_PI_SET_OFFLINING(pip); 3334 break; 3335 } 3336 MDI_PI_UNLOCK(pip); 3337 MDI_CLIENT_UNSTABLE(ct); 3338 i_mdi_client_unlock(ct); 3339 3340 f = vh->vh_ops->vo_pi_state_change; 3341 if (f != NULL) 3342 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3343 3344 MDI_CLIENT_LOCK(ct); 3345 MDI_PI_LOCK(pip); 3346 if (rv == MDI_NOT_SUPPORTED) { 3347 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3348 } 3349 if (rv != MDI_SUCCESS) { 3350 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3351 "!vo_pi_state_change: failed rv = %x", rv)); 3352 } 3353 if (MDI_PI_IS_TRANSIENT(pip)) { 3354 if (rv == MDI_SUCCESS) { 3355 MDI_PI_CLEAR_TRANSIENT(pip); 3356 } else { 3357 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3358 } 3359 } 3360 3361 /* 3362 * Wake anyone waiting for this mdi_pathinfo node 3363 */ 3364 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3365 MDI_PI_UNLOCK(pip); 3366 3367 /* 3368 * Mark the client device as stable 3369 */ 3370 MDI_CLIENT_STABLE(ct); 3371 if (rv == MDI_SUCCESS) { 3372 if (ct->ct_unstable == 0) { 3373 cdip = ct->ct_dip; 3374 3375 /* 3376 * Onlining the mdi_pathinfo node will impact the 3377 * client state Update the client and dev_info node 3378 * state accordingly 3379 */ 3380 rv = NDI_SUCCESS; 3381 i_mdi_client_update_state(ct); 3382 switch (MDI_CLIENT_STATE(ct)) { 3383 case MDI_CLIENT_STATE_OPTIMAL: 3384 case MDI_CLIENT_STATE_DEGRADED: 3385 if (cdip && !i_ddi_devi_attached(cdip) && 3386 ((state == MDI_PATHINFO_STATE_ONLINE) || 3387 (state == MDI_PATHINFO_STATE_STANDBY))) { 3388 3389 /* 3390 * Must do ndi_devi_online() through 3391 * hotplug thread for deferred 3392 * attach mechanism to work 3393 */ 3394 MDI_CLIENT_UNLOCK(ct); 3395 rv = ndi_devi_online(cdip, 0); 3396 MDI_CLIENT_LOCK(ct); 3397 if ((rv != NDI_SUCCESS) && 3398 (MDI_CLIENT_STATE(ct) == 3399 MDI_CLIENT_STATE_DEGRADED)) { 3400 /* 3401 * ndi_devi_online failed. 3402 * Reset client flags to 3403 * offline. 3404 */ 3405 MDI_DEBUG(1, (CE_WARN, cdip, 3406 "!ndi_devi_online: failed " 3407 " Error: %x", rv)); 3408 MDI_CLIENT_SET_OFFLINE(ct); 3409 } 3410 if (rv != NDI_SUCCESS) { 3411 /* Reset the path state */ 3412 MDI_PI_LOCK(pip); 3413 MDI_PI(pip)->pi_state = 3414 MDI_PI_OLD_STATE(pip); 3415 MDI_PI_UNLOCK(pip); 3416 } 3417 } 3418 break; 3419 3420 case MDI_CLIENT_STATE_FAILED: 3421 /* 3422 * This is the last path case for 3423 * non-user initiated events. 3424 */ 3425 if (((flag & NDI_DEVI_REMOVE) == 0) && 3426 cdip && (i_ddi_node_state(cdip) >= 3427 DS_INITIALIZED)) { 3428 MDI_CLIENT_UNLOCK(ct); 3429 rv = ndi_devi_offline(cdip, 0); 3430 MDI_CLIENT_LOCK(ct); 3431 3432 if (rv != NDI_SUCCESS) { 3433 /* 3434 * ndi_devi_offline failed. 3435 * Reset client flags to 3436 * online as the path could not 3437 * be offlined. 3438 */ 3439 MDI_DEBUG(1, (CE_WARN, cdip, 3440 "!ndi_devi_offline: failed " 3441 " Error: %x", rv)); 3442 MDI_CLIENT_SET_ONLINE(ct); 3443 } 3444 } 3445 break; 3446 } 3447 /* 3448 * Convert to MDI error code 3449 */ 3450 switch (rv) { 3451 case NDI_SUCCESS: 3452 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3453 i_mdi_report_path_state(ct, pip); 3454 rv = MDI_SUCCESS; 3455 break; 3456 case NDI_BUSY: 3457 rv = MDI_BUSY; 3458 break; 3459 default: 3460 rv = MDI_FAILURE; 3461 break; 3462 } 3463 } 3464 } 3465 MDI_CLIENT_UNLOCK(ct); 3466 3467 state_change_exit: 3468 /* 3469 * Mark the pHCI as stable again. 3470 */ 3471 MDI_PHCI_LOCK(ph); 3472 MDI_PHCI_STABLE(ph); 3473 MDI_PHCI_UNLOCK(ph); 3474 return (rv); 3475 } 3476 3477 /* 3478 * mdi_pi_online(): 3479 * Place the path_info node in the online state. The path is 3480 * now available to be selected by mdi_select_path() for 3481 * transporting I/O requests to client devices. 3482 * Return Values: 3483 * MDI_SUCCESS 3484 * MDI_FAILURE 3485 */ 3486 int 3487 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3488 { 3489 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3490 dev_info_t *cdip; 3491 int client_held = 0; 3492 int rv; 3493 3494 ASSERT(ct != NULL); 3495 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3496 if (rv != MDI_SUCCESS) 3497 return (rv); 3498 3499 MDI_PI_LOCK(pip); 3500 if (MDI_PI(pip)->pi_pm_held == 0) { 3501 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3502 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3503 i_mdi_pm_hold_pip(pip); 3504 client_held = 1; 3505 } 3506 MDI_PI_UNLOCK(pip); 3507 3508 if (client_held) { 3509 MDI_CLIENT_LOCK(ct); 3510 if (ct->ct_power_cnt == 0) { 3511 rv = i_mdi_power_all_phci(ct); 3512 } 3513 3514 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3515 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3516 i_mdi_pm_hold_client(ct, 1); 3517 MDI_CLIENT_UNLOCK(ct); 3518 } 3519 3520 /* 3521 * Create the per-path (pathinfo) IO and error kstats which 3522 * are reported via iostat(1m). 3523 * 3524 * Defer creating the per-path kstats if device is not yet 3525 * attached; the names of the kstats are constructed in part 3526 * using the devices instance number which is assigned during 3527 * process of attaching the client device. 3528 * 3529 * The framework post_attach handler, mdi_post_attach(), is 3530 * is responsible for initializing the client's pathinfo list 3531 * once successfully attached. 3532 */ 3533 cdip = ct->ct_dip; 3534 ASSERT(cdip); 3535 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3536 return (rv); 3537 3538 MDI_CLIENT_LOCK(ct); 3539 rv = i_mdi_pi_kstat_create(pip); 3540 MDI_CLIENT_UNLOCK(ct); 3541 return (rv); 3542 } 3543 3544 /* 3545 * mdi_pi_standby(): 3546 * Place the mdi_pathinfo node in standby state 3547 * 3548 * Return Values: 3549 * MDI_SUCCESS 3550 * MDI_FAILURE 3551 */ 3552 int 3553 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3554 { 3555 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3556 } 3557 3558 /* 3559 * mdi_pi_fault(): 3560 * Place the mdi_pathinfo node in fault'ed state 3561 * Return Values: 3562 * MDI_SUCCESS 3563 * MDI_FAILURE 3564 */ 3565 int 3566 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3567 { 3568 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3569 } 3570 3571 /* 3572 * mdi_pi_offline(): 3573 * Offline a mdi_pathinfo node. 3574 * Return Values: 3575 * MDI_SUCCESS 3576 * MDI_FAILURE 3577 */ 3578 int 3579 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3580 { 3581 int ret, client_held = 0; 3582 mdi_client_t *ct; 3583 3584 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3585 3586 if (ret == MDI_SUCCESS) { 3587 MDI_PI_LOCK(pip); 3588 if (MDI_PI(pip)->pi_pm_held) { 3589 client_held = 1; 3590 } 3591 MDI_PI_UNLOCK(pip); 3592 3593 if (client_held) { 3594 ct = MDI_PI(pip)->pi_client; 3595 MDI_CLIENT_LOCK(ct); 3596 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3597 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3598 i_mdi_pm_rele_client(ct, 1); 3599 MDI_CLIENT_UNLOCK(ct); 3600 } 3601 } 3602 3603 return (ret); 3604 } 3605 3606 /* 3607 * i_mdi_pi_offline(): 3608 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3609 */ 3610 static int 3611 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3612 { 3613 dev_info_t *vdip = NULL; 3614 mdi_vhci_t *vh = NULL; 3615 mdi_client_t *ct = NULL; 3616 int (*f)(); 3617 int rv; 3618 3619 MDI_PI_LOCK(pip); 3620 ct = MDI_PI(pip)->pi_client; 3621 ASSERT(ct != NULL); 3622 3623 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3624 /* 3625 * Give a chance for pending I/Os to complete. 3626 */ 3627 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3628 "%d cmds still pending on path: %p\n", 3629 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3630 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3631 &MDI_PI(pip)->pi_mutex, 3632 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3633 /* 3634 * The timeout time reached without ref_cnt being zero 3635 * being signaled. 3636 */ 3637 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3638 "Timeout reached on path %p without the cond\n", 3639 (void *)pip)); 3640 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3641 "%d cmds still pending on path: %p\n", 3642 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3643 } 3644 } 3645 vh = ct->ct_vhci; 3646 vdip = vh->vh_dip; 3647 3648 /* 3649 * Notify vHCI that has registered this event 3650 */ 3651 ASSERT(vh->vh_ops); 3652 f = vh->vh_ops->vo_pi_state_change; 3653 3654 if (f != NULL) { 3655 MDI_PI_UNLOCK(pip); 3656 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3657 flags)) != MDI_SUCCESS) { 3658 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3659 "!vo_path_offline failed " 3660 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3661 } 3662 MDI_PI_LOCK(pip); 3663 } 3664 3665 /* 3666 * Set the mdi_pathinfo node state and clear the transient condition 3667 */ 3668 MDI_PI_SET_OFFLINE(pip); 3669 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3670 MDI_PI_UNLOCK(pip); 3671 3672 MDI_CLIENT_LOCK(ct); 3673 if (rv == MDI_SUCCESS) { 3674 if (ct->ct_unstable == 0) { 3675 dev_info_t *cdip = ct->ct_dip; 3676 3677 /* 3678 * Onlining the mdi_pathinfo node will impact the 3679 * client state Update the client and dev_info node 3680 * state accordingly 3681 */ 3682 i_mdi_client_update_state(ct); 3683 rv = NDI_SUCCESS; 3684 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3685 if (cdip && 3686 (i_ddi_node_state(cdip) >= 3687 DS_INITIALIZED)) { 3688 MDI_CLIENT_UNLOCK(ct); 3689 rv = ndi_devi_offline(cdip, 0); 3690 MDI_CLIENT_LOCK(ct); 3691 if (rv != NDI_SUCCESS) { 3692 /* 3693 * ndi_devi_offline failed. 3694 * Reset client flags to 3695 * online. 3696 */ 3697 MDI_DEBUG(4, (CE_WARN, cdip, 3698 "!ndi_devi_offline: failed " 3699 " Error: %x", rv)); 3700 MDI_CLIENT_SET_ONLINE(ct); 3701 } 3702 } 3703 } 3704 /* 3705 * Convert to MDI error code 3706 */ 3707 switch (rv) { 3708 case NDI_SUCCESS: 3709 rv = MDI_SUCCESS; 3710 break; 3711 case NDI_BUSY: 3712 rv = MDI_BUSY; 3713 break; 3714 default: 3715 rv = MDI_FAILURE; 3716 break; 3717 } 3718 } 3719 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3720 i_mdi_report_path_state(ct, pip); 3721 } 3722 3723 MDI_CLIENT_UNLOCK(ct); 3724 3725 /* 3726 * Change in the mdi_pathinfo node state will impact the client state 3727 */ 3728 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3729 (void *)ct, (void *)pip)); 3730 return (rv); 3731 } 3732 3733 3734 /* 3735 * mdi_pi_get_addr(): 3736 * Get the unit address associated with a mdi_pathinfo node 3737 * 3738 * Return Values: 3739 * char * 3740 */ 3741 char * 3742 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3743 { 3744 if (pip == NULL) 3745 return (NULL); 3746 3747 return (MDI_PI(pip)->pi_addr); 3748 } 3749 3750 /* 3751 * mdi_pi_get_client(): 3752 * Get the client devinfo associated with a mdi_pathinfo node 3753 * 3754 * Return Values: 3755 * Handle to client device dev_info node 3756 */ 3757 dev_info_t * 3758 mdi_pi_get_client(mdi_pathinfo_t *pip) 3759 { 3760 dev_info_t *dip = NULL; 3761 if (pip) { 3762 dip = MDI_PI(pip)->pi_client->ct_dip; 3763 } 3764 return (dip); 3765 } 3766 3767 /* 3768 * mdi_pi_get_phci(): 3769 * Get the pHCI devinfo associated with the mdi_pathinfo node 3770 * Return Values: 3771 * Handle to dev_info node 3772 */ 3773 dev_info_t * 3774 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3775 { 3776 dev_info_t *dip = NULL; 3777 if (pip) { 3778 dip = MDI_PI(pip)->pi_phci->ph_dip; 3779 } 3780 return (dip); 3781 } 3782 3783 /* 3784 * mdi_pi_get_client_private(): 3785 * Get the client private information associated with the 3786 * mdi_pathinfo node 3787 */ 3788 void * 3789 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3790 { 3791 void *cprivate = NULL; 3792 if (pip) { 3793 cprivate = MDI_PI(pip)->pi_cprivate; 3794 } 3795 return (cprivate); 3796 } 3797 3798 /* 3799 * mdi_pi_set_client_private(): 3800 * Set the client private information in the mdi_pathinfo node 3801 */ 3802 void 3803 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3804 { 3805 if (pip) { 3806 MDI_PI(pip)->pi_cprivate = priv; 3807 } 3808 } 3809 3810 /* 3811 * mdi_pi_get_phci_private(): 3812 * Get the pHCI private information associated with the 3813 * mdi_pathinfo node 3814 */ 3815 caddr_t 3816 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3817 { 3818 caddr_t pprivate = NULL; 3819 if (pip) { 3820 pprivate = MDI_PI(pip)->pi_pprivate; 3821 } 3822 return (pprivate); 3823 } 3824 3825 /* 3826 * mdi_pi_set_phci_private(): 3827 * Set the pHCI private information in the mdi_pathinfo node 3828 */ 3829 void 3830 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3831 { 3832 if (pip) { 3833 MDI_PI(pip)->pi_pprivate = priv; 3834 } 3835 } 3836 3837 /* 3838 * mdi_pi_get_state(): 3839 * Get the mdi_pathinfo node state. Transient states are internal 3840 * and not provided to the users 3841 */ 3842 mdi_pathinfo_state_t 3843 mdi_pi_get_state(mdi_pathinfo_t *pip) 3844 { 3845 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3846 3847 if (pip) { 3848 if (MDI_PI_IS_TRANSIENT(pip)) { 3849 /* 3850 * mdi_pathinfo is in state transition. Return the 3851 * last good state. 3852 */ 3853 state = MDI_PI_OLD_STATE(pip); 3854 } else { 3855 state = MDI_PI_STATE(pip); 3856 } 3857 } 3858 return (state); 3859 } 3860 3861 /* 3862 * Note that the following function needs to be the new interface for 3863 * mdi_pi_get_state when mpxio gets integrated to ON. 3864 */ 3865 int 3866 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3867 uint32_t *ext_state) 3868 { 3869 *state = MDI_PATHINFO_STATE_INIT; 3870 3871 if (pip) { 3872 if (MDI_PI_IS_TRANSIENT(pip)) { 3873 /* 3874 * mdi_pathinfo is in state transition. Return the 3875 * last good state. 3876 */ 3877 *state = MDI_PI_OLD_STATE(pip); 3878 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3879 } else { 3880 *state = MDI_PI_STATE(pip); 3881 *ext_state = MDI_PI_EXT_STATE(pip); 3882 } 3883 } 3884 return (MDI_SUCCESS); 3885 } 3886 3887 /* 3888 * mdi_pi_get_preferred: 3889 * Get the preferred path flag 3890 */ 3891 int 3892 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3893 { 3894 if (pip) { 3895 return (MDI_PI(pip)->pi_preferred); 3896 } 3897 return (0); 3898 } 3899 3900 /* 3901 * mdi_pi_set_preferred: 3902 * Set the preferred path flag 3903 */ 3904 void 3905 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3906 { 3907 if (pip) { 3908 MDI_PI(pip)->pi_preferred = preferred; 3909 } 3910 } 3911 3912 /* 3913 * mdi_pi_set_state(): 3914 * Set the mdi_pathinfo node state 3915 */ 3916 void 3917 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3918 { 3919 uint32_t ext_state; 3920 3921 if (pip) { 3922 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3923 MDI_PI(pip)->pi_state = state; 3924 MDI_PI(pip)->pi_state |= ext_state; 3925 } 3926 } 3927 3928 /* 3929 * Property functions: 3930 */ 3931 int 3932 i_map_nvlist_error_to_mdi(int val) 3933 { 3934 int rv; 3935 3936 switch (val) { 3937 case 0: 3938 rv = DDI_PROP_SUCCESS; 3939 break; 3940 case EINVAL: 3941 case ENOTSUP: 3942 rv = DDI_PROP_INVAL_ARG; 3943 break; 3944 case ENOMEM: 3945 rv = DDI_PROP_NO_MEMORY; 3946 break; 3947 default: 3948 rv = DDI_PROP_NOT_FOUND; 3949 break; 3950 } 3951 return (rv); 3952 } 3953 3954 /* 3955 * mdi_pi_get_next_prop(): 3956 * Property walk function. The caller should hold mdi_pi_lock() 3957 * and release by calling mdi_pi_unlock() at the end of walk to 3958 * get a consistent value. 3959 */ 3960 nvpair_t * 3961 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3962 { 3963 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3964 return (NULL); 3965 } 3966 ASSERT(MDI_PI_LOCKED(pip)); 3967 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3968 } 3969 3970 /* 3971 * mdi_prop_remove(): 3972 * Remove the named property from the named list. 3973 */ 3974 int 3975 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3976 { 3977 if (pip == NULL) { 3978 return (DDI_PROP_NOT_FOUND); 3979 } 3980 ASSERT(!MDI_PI_LOCKED(pip)); 3981 MDI_PI_LOCK(pip); 3982 if (MDI_PI(pip)->pi_prop == NULL) { 3983 MDI_PI_UNLOCK(pip); 3984 return (DDI_PROP_NOT_FOUND); 3985 } 3986 if (name) { 3987 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3988 } else { 3989 char nvp_name[MAXNAMELEN]; 3990 nvpair_t *nvp; 3991 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3992 while (nvp) { 3993 nvpair_t *next; 3994 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3995 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3996 nvpair_name(nvp)); 3997 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3998 nvp_name); 3999 nvp = next; 4000 } 4001 } 4002 MDI_PI_UNLOCK(pip); 4003 return (DDI_PROP_SUCCESS); 4004 } 4005 4006 /* 4007 * mdi_prop_size(): 4008 * Get buffer size needed to pack the property data. 4009 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4010 * buffer size. 4011 */ 4012 int 4013 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4014 { 4015 int rv; 4016 size_t bufsize; 4017 4018 *buflenp = 0; 4019 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4020 return (DDI_PROP_NOT_FOUND); 4021 } 4022 ASSERT(MDI_PI_LOCKED(pip)); 4023 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4024 &bufsize, NV_ENCODE_NATIVE); 4025 *buflenp = bufsize; 4026 return (i_map_nvlist_error_to_mdi(rv)); 4027 } 4028 4029 /* 4030 * mdi_prop_pack(): 4031 * pack the property list. The caller should hold the 4032 * mdi_pathinfo_t node to get a consistent data 4033 */ 4034 int 4035 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4036 { 4037 int rv; 4038 size_t bufsize; 4039 4040 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4041 return (DDI_PROP_NOT_FOUND); 4042 } 4043 4044 ASSERT(MDI_PI_LOCKED(pip)); 4045 4046 bufsize = buflen; 4047 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4048 NV_ENCODE_NATIVE, KM_SLEEP); 4049 4050 return (i_map_nvlist_error_to_mdi(rv)); 4051 } 4052 4053 /* 4054 * mdi_prop_update_byte(): 4055 * Create/Update a byte property 4056 */ 4057 int 4058 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4059 { 4060 int rv; 4061 4062 if (pip == NULL) { 4063 return (DDI_PROP_INVAL_ARG); 4064 } 4065 ASSERT(!MDI_PI_LOCKED(pip)); 4066 MDI_PI_LOCK(pip); 4067 if (MDI_PI(pip)->pi_prop == NULL) { 4068 MDI_PI_UNLOCK(pip); 4069 return (DDI_PROP_NOT_FOUND); 4070 } 4071 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4072 MDI_PI_UNLOCK(pip); 4073 return (i_map_nvlist_error_to_mdi(rv)); 4074 } 4075 4076 /* 4077 * mdi_prop_update_byte_array(): 4078 * Create/Update a byte array property 4079 */ 4080 int 4081 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4082 uint_t nelements) 4083 { 4084 int rv; 4085 4086 if (pip == NULL) { 4087 return (DDI_PROP_INVAL_ARG); 4088 } 4089 ASSERT(!MDI_PI_LOCKED(pip)); 4090 MDI_PI_LOCK(pip); 4091 if (MDI_PI(pip)->pi_prop == NULL) { 4092 MDI_PI_UNLOCK(pip); 4093 return (DDI_PROP_NOT_FOUND); 4094 } 4095 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4096 MDI_PI_UNLOCK(pip); 4097 return (i_map_nvlist_error_to_mdi(rv)); 4098 } 4099 4100 /* 4101 * mdi_prop_update_int(): 4102 * Create/Update a 32 bit integer property 4103 */ 4104 int 4105 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4106 { 4107 int rv; 4108 4109 if (pip == NULL) { 4110 return (DDI_PROP_INVAL_ARG); 4111 } 4112 ASSERT(!MDI_PI_LOCKED(pip)); 4113 MDI_PI_LOCK(pip); 4114 if (MDI_PI(pip)->pi_prop == NULL) { 4115 MDI_PI_UNLOCK(pip); 4116 return (DDI_PROP_NOT_FOUND); 4117 } 4118 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4119 MDI_PI_UNLOCK(pip); 4120 return (i_map_nvlist_error_to_mdi(rv)); 4121 } 4122 4123 /* 4124 * mdi_prop_update_int64(): 4125 * Create/Update a 64 bit integer property 4126 */ 4127 int 4128 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4129 { 4130 int rv; 4131 4132 if (pip == NULL) { 4133 return (DDI_PROP_INVAL_ARG); 4134 } 4135 ASSERT(!MDI_PI_LOCKED(pip)); 4136 MDI_PI_LOCK(pip); 4137 if (MDI_PI(pip)->pi_prop == NULL) { 4138 MDI_PI_UNLOCK(pip); 4139 return (DDI_PROP_NOT_FOUND); 4140 } 4141 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4142 MDI_PI_UNLOCK(pip); 4143 return (i_map_nvlist_error_to_mdi(rv)); 4144 } 4145 4146 /* 4147 * mdi_prop_update_int_array(): 4148 * Create/Update a int array property 4149 */ 4150 int 4151 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4152 uint_t nelements) 4153 { 4154 int rv; 4155 4156 if (pip == NULL) { 4157 return (DDI_PROP_INVAL_ARG); 4158 } 4159 ASSERT(!MDI_PI_LOCKED(pip)); 4160 MDI_PI_LOCK(pip); 4161 if (MDI_PI(pip)->pi_prop == NULL) { 4162 MDI_PI_UNLOCK(pip); 4163 return (DDI_PROP_NOT_FOUND); 4164 } 4165 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4166 nelements); 4167 MDI_PI_UNLOCK(pip); 4168 return (i_map_nvlist_error_to_mdi(rv)); 4169 } 4170 4171 /* 4172 * mdi_prop_update_string(): 4173 * Create/Update a string property 4174 */ 4175 int 4176 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4177 { 4178 int rv; 4179 4180 if (pip == NULL) { 4181 return (DDI_PROP_INVAL_ARG); 4182 } 4183 ASSERT(!MDI_PI_LOCKED(pip)); 4184 MDI_PI_LOCK(pip); 4185 if (MDI_PI(pip)->pi_prop == NULL) { 4186 MDI_PI_UNLOCK(pip); 4187 return (DDI_PROP_NOT_FOUND); 4188 } 4189 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4190 MDI_PI_UNLOCK(pip); 4191 return (i_map_nvlist_error_to_mdi(rv)); 4192 } 4193 4194 /* 4195 * mdi_prop_update_string_array(): 4196 * Create/Update a string array property 4197 */ 4198 int 4199 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4200 uint_t nelements) 4201 { 4202 int rv; 4203 4204 if (pip == NULL) { 4205 return (DDI_PROP_INVAL_ARG); 4206 } 4207 ASSERT(!MDI_PI_LOCKED(pip)); 4208 MDI_PI_LOCK(pip); 4209 if (MDI_PI(pip)->pi_prop == NULL) { 4210 MDI_PI_UNLOCK(pip); 4211 return (DDI_PROP_NOT_FOUND); 4212 } 4213 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4214 nelements); 4215 MDI_PI_UNLOCK(pip); 4216 return (i_map_nvlist_error_to_mdi(rv)); 4217 } 4218 4219 /* 4220 * mdi_prop_lookup_byte(): 4221 * Look for byte property identified by name. The data returned 4222 * is the actual property and valid as long as mdi_pathinfo_t node 4223 * is alive. 4224 */ 4225 int 4226 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4227 { 4228 int rv; 4229 4230 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4231 return (DDI_PROP_NOT_FOUND); 4232 } 4233 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4234 return (i_map_nvlist_error_to_mdi(rv)); 4235 } 4236 4237 4238 /* 4239 * mdi_prop_lookup_byte_array(): 4240 * Look for byte array property identified by name. The data 4241 * returned is the actual property and valid as long as 4242 * mdi_pathinfo_t node is alive. 4243 */ 4244 int 4245 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4246 uint_t *nelements) 4247 { 4248 int rv; 4249 4250 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4251 return (DDI_PROP_NOT_FOUND); 4252 } 4253 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4254 nelements); 4255 return (i_map_nvlist_error_to_mdi(rv)); 4256 } 4257 4258 /* 4259 * mdi_prop_lookup_int(): 4260 * Look for int property identified by name. The data returned 4261 * is the actual property and valid as long as mdi_pathinfo_t 4262 * node is alive. 4263 */ 4264 int 4265 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4266 { 4267 int rv; 4268 4269 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4270 return (DDI_PROP_NOT_FOUND); 4271 } 4272 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4273 return (i_map_nvlist_error_to_mdi(rv)); 4274 } 4275 4276 /* 4277 * mdi_prop_lookup_int64(): 4278 * Look for int64 property identified by name. The data returned 4279 * is the actual property and valid as long as mdi_pathinfo_t node 4280 * is alive. 4281 */ 4282 int 4283 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4284 { 4285 int rv; 4286 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4287 return (DDI_PROP_NOT_FOUND); 4288 } 4289 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4290 return (i_map_nvlist_error_to_mdi(rv)); 4291 } 4292 4293 /* 4294 * mdi_prop_lookup_int_array(): 4295 * Look for int array property identified by name. The data 4296 * returned is the actual property and valid as long as 4297 * mdi_pathinfo_t node is alive. 4298 */ 4299 int 4300 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4301 uint_t *nelements) 4302 { 4303 int rv; 4304 4305 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4306 return (DDI_PROP_NOT_FOUND); 4307 } 4308 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4309 (int32_t **)data, nelements); 4310 return (i_map_nvlist_error_to_mdi(rv)); 4311 } 4312 4313 /* 4314 * mdi_prop_lookup_string(): 4315 * Look for string property identified by name. The data 4316 * returned is the actual property and valid as long as 4317 * mdi_pathinfo_t node is alive. 4318 */ 4319 int 4320 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4321 { 4322 int rv; 4323 4324 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4325 return (DDI_PROP_NOT_FOUND); 4326 } 4327 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4328 return (i_map_nvlist_error_to_mdi(rv)); 4329 } 4330 4331 /* 4332 * mdi_prop_lookup_string_array(): 4333 * Look for string array property identified by name. The data 4334 * returned is the actual property and valid as long as 4335 * mdi_pathinfo_t node is alive. 4336 */ 4337 int 4338 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4339 uint_t *nelements) 4340 { 4341 int rv; 4342 4343 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4344 return (DDI_PROP_NOT_FOUND); 4345 } 4346 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4347 nelements); 4348 return (i_map_nvlist_error_to_mdi(rv)); 4349 } 4350 4351 /* 4352 * mdi_prop_free(): 4353 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4354 * functions return the pointer to actual property data and not a 4355 * copy of it. So the data returned is valid as long as 4356 * mdi_pathinfo_t node is valid. 4357 */ 4358 /*ARGSUSED*/ 4359 int 4360 mdi_prop_free(void *data) 4361 { 4362 return (DDI_PROP_SUCCESS); 4363 } 4364 4365 /*ARGSUSED*/ 4366 static void 4367 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4368 { 4369 char *phci_path, *ct_path; 4370 char *ct_status; 4371 char *status; 4372 dev_info_t *dip = ct->ct_dip; 4373 char lb_buf[64]; 4374 4375 ASSERT(MDI_CLIENT_LOCKED(ct)); 4376 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4377 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4378 return; 4379 } 4380 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4381 ct_status = "optimal"; 4382 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4383 ct_status = "degraded"; 4384 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4385 ct_status = "failed"; 4386 } else { 4387 ct_status = "unknown"; 4388 } 4389 4390 if (MDI_PI_IS_OFFLINE(pip)) { 4391 status = "offline"; 4392 } else if (MDI_PI_IS_ONLINE(pip)) { 4393 status = "online"; 4394 } else if (MDI_PI_IS_STANDBY(pip)) { 4395 status = "standby"; 4396 } else if (MDI_PI_IS_FAULT(pip)) { 4397 status = "faulted"; 4398 } else { 4399 status = "unknown"; 4400 } 4401 4402 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4403 (void) snprintf(lb_buf, sizeof (lb_buf), 4404 "%s, region-size: %d", mdi_load_balance_lba, 4405 ct->ct_lb_args->region_size); 4406 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4407 (void) snprintf(lb_buf, sizeof (lb_buf), 4408 "%s", mdi_load_balance_none); 4409 } else { 4410 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4411 mdi_load_balance_rr); 4412 } 4413 4414 if (dip) { 4415 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4416 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4417 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4418 "path %s (%s%d) to target address: %s is %s" 4419 " Load balancing: %s\n", 4420 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4421 ddi_get_instance(dip), ct_status, 4422 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4423 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4424 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4425 MDI_PI(pip)->pi_addr, status, lb_buf); 4426 kmem_free(phci_path, MAXPATHLEN); 4427 kmem_free(ct_path, MAXPATHLEN); 4428 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4429 } 4430 } 4431 4432 #ifdef DEBUG 4433 /* 4434 * i_mdi_log(): 4435 * Utility function for error message management 4436 * 4437 */ 4438 /*PRINTFLIKE3*/ 4439 static void 4440 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4441 { 4442 char name[MAXNAMELEN]; 4443 char buf[MAXNAMELEN]; 4444 char *bp; 4445 va_list ap; 4446 int log_only = 0; 4447 int boot_only = 0; 4448 int console_only = 0; 4449 4450 if (dip) { 4451 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4452 ddi_node_name(dip), ddi_get_instance(dip)); 4453 } else { 4454 name[0] = 0; 4455 } 4456 4457 va_start(ap, fmt); 4458 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4459 va_end(ap); 4460 4461 switch (buf[0]) { 4462 case '!': 4463 bp = &buf[1]; 4464 log_only = 1; 4465 break; 4466 case '?': 4467 bp = &buf[1]; 4468 boot_only = 1; 4469 break; 4470 case '^': 4471 bp = &buf[1]; 4472 console_only = 1; 4473 break; 4474 default: 4475 bp = buf; 4476 break; 4477 } 4478 if (mdi_debug_logonly) { 4479 log_only = 1; 4480 boot_only = 0; 4481 console_only = 0; 4482 } 4483 4484 switch (level) { 4485 case CE_NOTE: 4486 level = CE_CONT; 4487 /* FALLTHROUGH */ 4488 case CE_CONT: 4489 case CE_WARN: 4490 case CE_PANIC: 4491 if (boot_only) { 4492 cmn_err(level, "?mdi: %s%s", name, bp); 4493 } else if (console_only) { 4494 cmn_err(level, "^mdi: %s%s", name, bp); 4495 } else if (log_only) { 4496 cmn_err(level, "!mdi: %s%s", name, bp); 4497 } else { 4498 cmn_err(level, "mdi: %s%s", name, bp); 4499 } 4500 break; 4501 default: 4502 cmn_err(level, "mdi: %s%s", name, bp); 4503 break; 4504 } 4505 } 4506 #endif /* DEBUG */ 4507 4508 void 4509 i_mdi_client_online(dev_info_t *ct_dip) 4510 { 4511 mdi_client_t *ct; 4512 4513 /* 4514 * Client online notification. Mark client state as online 4515 * restore our binding with dev_info node 4516 */ 4517 ct = i_devi_get_client(ct_dip); 4518 ASSERT(ct != NULL); 4519 MDI_CLIENT_LOCK(ct); 4520 MDI_CLIENT_SET_ONLINE(ct); 4521 /* catch for any memory leaks */ 4522 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4523 ct->ct_dip = ct_dip; 4524 4525 if (ct->ct_power_cnt == 0) 4526 (void) i_mdi_power_all_phci(ct); 4527 4528 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4529 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4530 i_mdi_pm_hold_client(ct, 1); 4531 4532 MDI_CLIENT_UNLOCK(ct); 4533 } 4534 4535 void 4536 i_mdi_phci_online(dev_info_t *ph_dip) 4537 { 4538 mdi_phci_t *ph; 4539 4540 /* pHCI online notification. Mark state accordingly */ 4541 ph = i_devi_get_phci(ph_dip); 4542 ASSERT(ph != NULL); 4543 MDI_PHCI_LOCK(ph); 4544 MDI_PHCI_SET_ONLINE(ph); 4545 MDI_PHCI_UNLOCK(ph); 4546 } 4547 4548 /* 4549 * mdi_devi_online(): 4550 * Online notification from NDI framework on pHCI/client 4551 * device online. 4552 * Return Values: 4553 * NDI_SUCCESS 4554 * MDI_FAILURE 4555 */ 4556 /*ARGSUSED*/ 4557 int 4558 mdi_devi_online(dev_info_t *dip, uint_t flags) 4559 { 4560 if (MDI_PHCI(dip)) { 4561 i_mdi_phci_online(dip); 4562 } 4563 4564 if (MDI_CLIENT(dip)) { 4565 i_mdi_client_online(dip); 4566 } 4567 return (NDI_SUCCESS); 4568 } 4569 4570 /* 4571 * mdi_devi_offline(): 4572 * Offline notification from NDI framework on pHCI/Client device 4573 * offline. 4574 * 4575 * Return Values: 4576 * NDI_SUCCESS 4577 * NDI_FAILURE 4578 */ 4579 /*ARGSUSED*/ 4580 int 4581 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4582 { 4583 int rv = NDI_SUCCESS; 4584 4585 if (MDI_CLIENT(dip)) { 4586 rv = i_mdi_client_offline(dip, flags); 4587 if (rv != NDI_SUCCESS) 4588 return (rv); 4589 } 4590 4591 if (MDI_PHCI(dip)) { 4592 rv = i_mdi_phci_offline(dip, flags); 4593 4594 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4595 /* set client back online */ 4596 i_mdi_client_online(dip); 4597 } 4598 } 4599 4600 return (rv); 4601 } 4602 4603 /*ARGSUSED*/ 4604 static int 4605 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4606 { 4607 int rv = NDI_SUCCESS; 4608 mdi_phci_t *ph; 4609 mdi_client_t *ct; 4610 mdi_pathinfo_t *pip; 4611 mdi_pathinfo_t *next; 4612 mdi_pathinfo_t *failed_pip = NULL; 4613 dev_info_t *cdip; 4614 4615 /* 4616 * pHCI component offline notification 4617 * Make sure that this pHCI instance is free to be offlined. 4618 * If it is OK to proceed, Offline and remove all the child 4619 * mdi_pathinfo nodes. This process automatically offlines 4620 * corresponding client devices, for which this pHCI provides 4621 * critical services. 4622 */ 4623 ph = i_devi_get_phci(dip); 4624 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4625 (void *)dip, (void *)ph)); 4626 if (ph == NULL) { 4627 return (rv); 4628 } 4629 4630 MDI_PHCI_LOCK(ph); 4631 4632 if (MDI_PHCI_IS_OFFLINE(ph)) { 4633 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4634 (void *)ph)); 4635 MDI_PHCI_UNLOCK(ph); 4636 return (NDI_SUCCESS); 4637 } 4638 4639 /* 4640 * Check to see if the pHCI can be offlined 4641 */ 4642 if (ph->ph_unstable) { 4643 MDI_DEBUG(1, (CE_WARN, dip, 4644 "!One or more target devices are in transient " 4645 "state. This device can not be removed at " 4646 "this moment. Please try again later.")); 4647 MDI_PHCI_UNLOCK(ph); 4648 return (NDI_BUSY); 4649 } 4650 4651 pip = ph->ph_path_head; 4652 while (pip != NULL) { 4653 MDI_PI_LOCK(pip); 4654 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4655 4656 /* 4657 * The mdi_pathinfo state is OK. Check the client state. 4658 * If failover in progress fail the pHCI from offlining 4659 */ 4660 ct = MDI_PI(pip)->pi_client; 4661 i_mdi_client_lock(ct, pip); 4662 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4663 (ct->ct_unstable)) { 4664 /* 4665 * Failover is in progress, Fail the DR 4666 */ 4667 MDI_DEBUG(1, (CE_WARN, dip, 4668 "!pHCI device (%s%d) is Busy. %s", 4669 ddi_driver_name(dip), ddi_get_instance(dip), 4670 "This device can not be removed at " 4671 "this moment. Please try again later.")); 4672 MDI_PI_UNLOCK(pip); 4673 i_mdi_client_unlock(ct); 4674 MDI_PHCI_UNLOCK(ph); 4675 return (NDI_BUSY); 4676 } 4677 MDI_PI_UNLOCK(pip); 4678 4679 /* 4680 * Check to see of we are removing the last path of this 4681 * client device... 4682 */ 4683 cdip = ct->ct_dip; 4684 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4685 (i_mdi_client_compute_state(ct, ph) == 4686 MDI_CLIENT_STATE_FAILED)) { 4687 i_mdi_client_unlock(ct); 4688 MDI_PHCI_UNLOCK(ph); 4689 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4690 /* 4691 * ndi_devi_offline() failed. 4692 * This pHCI provides the critical path 4693 * to one or more client devices. 4694 * Return busy. 4695 */ 4696 MDI_PHCI_LOCK(ph); 4697 MDI_DEBUG(1, (CE_WARN, dip, 4698 "!pHCI device (%s%d) is Busy. %s", 4699 ddi_driver_name(dip), ddi_get_instance(dip), 4700 "This device can not be removed at " 4701 "this moment. Please try again later.")); 4702 failed_pip = pip; 4703 break; 4704 } else { 4705 MDI_PHCI_LOCK(ph); 4706 pip = next; 4707 } 4708 } else { 4709 i_mdi_client_unlock(ct); 4710 pip = next; 4711 } 4712 } 4713 4714 if (failed_pip) { 4715 pip = ph->ph_path_head; 4716 while (pip != failed_pip) { 4717 MDI_PI_LOCK(pip); 4718 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4719 ct = MDI_PI(pip)->pi_client; 4720 i_mdi_client_lock(ct, pip); 4721 cdip = ct->ct_dip; 4722 switch (MDI_CLIENT_STATE(ct)) { 4723 case MDI_CLIENT_STATE_OPTIMAL: 4724 case MDI_CLIENT_STATE_DEGRADED: 4725 if (cdip) { 4726 MDI_PI_UNLOCK(pip); 4727 i_mdi_client_unlock(ct); 4728 MDI_PHCI_UNLOCK(ph); 4729 (void) ndi_devi_online(cdip, 0); 4730 MDI_PHCI_LOCK(ph); 4731 pip = next; 4732 continue; 4733 } 4734 break; 4735 4736 case MDI_CLIENT_STATE_FAILED: 4737 if (cdip) { 4738 MDI_PI_UNLOCK(pip); 4739 i_mdi_client_unlock(ct); 4740 MDI_PHCI_UNLOCK(ph); 4741 (void) ndi_devi_offline(cdip, 0); 4742 MDI_PHCI_LOCK(ph); 4743 pip = next; 4744 continue; 4745 } 4746 break; 4747 } 4748 MDI_PI_UNLOCK(pip); 4749 i_mdi_client_unlock(ct); 4750 pip = next; 4751 } 4752 MDI_PHCI_UNLOCK(ph); 4753 return (NDI_BUSY); 4754 } 4755 4756 /* 4757 * Mark the pHCI as offline 4758 */ 4759 MDI_PHCI_SET_OFFLINE(ph); 4760 4761 /* 4762 * Mark the child mdi_pathinfo nodes as transient 4763 */ 4764 pip = ph->ph_path_head; 4765 while (pip != NULL) { 4766 MDI_PI_LOCK(pip); 4767 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4768 MDI_PI_SET_OFFLINING(pip); 4769 MDI_PI_UNLOCK(pip); 4770 pip = next; 4771 } 4772 MDI_PHCI_UNLOCK(ph); 4773 /* 4774 * Give a chance for any pending commands to execute 4775 */ 4776 delay(1); 4777 MDI_PHCI_LOCK(ph); 4778 pip = ph->ph_path_head; 4779 while (pip != NULL) { 4780 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4781 (void) i_mdi_pi_offline(pip, flags); 4782 MDI_PI_LOCK(pip); 4783 ct = MDI_PI(pip)->pi_client; 4784 if (!MDI_PI_IS_OFFLINE(pip)) { 4785 MDI_DEBUG(1, (CE_WARN, dip, 4786 "!pHCI device (%s%d) is Busy. %s", 4787 ddi_driver_name(dip), ddi_get_instance(dip), 4788 "This device can not be removed at " 4789 "this moment. Please try again later.")); 4790 MDI_PI_UNLOCK(pip); 4791 MDI_PHCI_SET_ONLINE(ph); 4792 MDI_PHCI_UNLOCK(ph); 4793 return (NDI_BUSY); 4794 } 4795 MDI_PI_UNLOCK(pip); 4796 pip = next; 4797 } 4798 MDI_PHCI_UNLOCK(ph); 4799 4800 return (rv); 4801 } 4802 4803 /*ARGSUSED*/ 4804 static int 4805 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4806 { 4807 int rv = NDI_SUCCESS; 4808 mdi_client_t *ct; 4809 4810 /* 4811 * Client component to go offline. Make sure that we are 4812 * not in failing over state and update client state 4813 * accordingly 4814 */ 4815 ct = i_devi_get_client(dip); 4816 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4817 (void *)dip, (void *)ct)); 4818 if (ct != NULL) { 4819 MDI_CLIENT_LOCK(ct); 4820 if (ct->ct_unstable) { 4821 /* 4822 * One or more paths are in transient state, 4823 * Dont allow offline of a client device 4824 */ 4825 MDI_DEBUG(1, (CE_WARN, dip, 4826 "!One or more paths to this device is " 4827 "in transient state. This device can not " 4828 "be removed at this moment. " 4829 "Please try again later.")); 4830 MDI_CLIENT_UNLOCK(ct); 4831 return (NDI_BUSY); 4832 } 4833 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4834 /* 4835 * Failover is in progress, Dont allow DR of 4836 * a client device 4837 */ 4838 MDI_DEBUG(1, (CE_WARN, dip, 4839 "!Client device (%s%d) is Busy. %s", 4840 ddi_driver_name(dip), ddi_get_instance(dip), 4841 "This device can not be removed at " 4842 "this moment. Please try again later.")); 4843 MDI_CLIENT_UNLOCK(ct); 4844 return (NDI_BUSY); 4845 } 4846 MDI_CLIENT_SET_OFFLINE(ct); 4847 4848 /* 4849 * Unbind our relationship with the dev_info node 4850 */ 4851 if (flags & NDI_DEVI_REMOVE) { 4852 ct->ct_dip = NULL; 4853 } 4854 MDI_CLIENT_UNLOCK(ct); 4855 } 4856 return (rv); 4857 } 4858 4859 /* 4860 * mdi_pre_attach(): 4861 * Pre attach() notification handler 4862 */ 4863 /*ARGSUSED*/ 4864 int 4865 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4866 { 4867 /* don't support old DDI_PM_RESUME */ 4868 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4869 (cmd == DDI_PM_RESUME)) 4870 return (DDI_FAILURE); 4871 4872 return (DDI_SUCCESS); 4873 } 4874 4875 /* 4876 * mdi_post_attach(): 4877 * Post attach() notification handler 4878 */ 4879 /*ARGSUSED*/ 4880 void 4881 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4882 { 4883 mdi_phci_t *ph; 4884 mdi_client_t *ct; 4885 mdi_pathinfo_t *pip; 4886 4887 if (MDI_PHCI(dip)) { 4888 ph = i_devi_get_phci(dip); 4889 ASSERT(ph != NULL); 4890 4891 MDI_PHCI_LOCK(ph); 4892 switch (cmd) { 4893 case DDI_ATTACH: 4894 MDI_DEBUG(2, (CE_NOTE, dip, 4895 "!pHCI post_attach: called %p\n", (void *)ph)); 4896 if (error == DDI_SUCCESS) { 4897 MDI_PHCI_SET_ATTACH(ph); 4898 } else { 4899 MDI_DEBUG(1, (CE_NOTE, dip, 4900 "!pHCI post_attach: failed error=%d\n", 4901 error)); 4902 MDI_PHCI_SET_DETACH(ph); 4903 } 4904 break; 4905 4906 case DDI_RESUME: 4907 MDI_DEBUG(2, (CE_NOTE, dip, 4908 "!pHCI post_resume: called %p\n", (void *)ph)); 4909 if (error == DDI_SUCCESS) { 4910 MDI_PHCI_SET_RESUME(ph); 4911 } else { 4912 MDI_DEBUG(1, (CE_NOTE, dip, 4913 "!pHCI post_resume: failed error=%d\n", 4914 error)); 4915 MDI_PHCI_SET_SUSPEND(ph); 4916 } 4917 break; 4918 } 4919 MDI_PHCI_UNLOCK(ph); 4920 } 4921 4922 if (MDI_CLIENT(dip)) { 4923 ct = i_devi_get_client(dip); 4924 ASSERT(ct != NULL); 4925 4926 MDI_CLIENT_LOCK(ct); 4927 switch (cmd) { 4928 case DDI_ATTACH: 4929 MDI_DEBUG(2, (CE_NOTE, dip, 4930 "!Client post_attach: called %p\n", (void *)ct)); 4931 if (error != DDI_SUCCESS) { 4932 MDI_DEBUG(1, (CE_NOTE, dip, 4933 "!Client post_attach: failed error=%d\n", 4934 error)); 4935 MDI_CLIENT_SET_DETACH(ct); 4936 MDI_DEBUG(4, (CE_WARN, dip, 4937 "mdi_post_attach i_mdi_pm_reset_client\n")); 4938 i_mdi_pm_reset_client(ct); 4939 break; 4940 } 4941 4942 /* 4943 * Client device has successfully attached. 4944 * Create kstats for any pathinfo structures 4945 * initially associated with this client. 4946 */ 4947 for (pip = ct->ct_path_head; pip != NULL; 4948 pip = (mdi_pathinfo_t *) 4949 MDI_PI(pip)->pi_client_link) { 4950 if (!MDI_PI_IS_OFFLINE(pip)) { 4951 (void) i_mdi_pi_kstat_create(pip); 4952 i_mdi_report_path_state(ct, pip); 4953 } 4954 } 4955 MDI_CLIENT_SET_ATTACH(ct); 4956 break; 4957 4958 case DDI_RESUME: 4959 MDI_DEBUG(2, (CE_NOTE, dip, 4960 "!Client post_attach: called %p\n", (void *)ct)); 4961 if (error == DDI_SUCCESS) { 4962 MDI_CLIENT_SET_RESUME(ct); 4963 } else { 4964 MDI_DEBUG(1, (CE_NOTE, dip, 4965 "!Client post_resume: failed error=%d\n", 4966 error)); 4967 MDI_CLIENT_SET_SUSPEND(ct); 4968 } 4969 break; 4970 } 4971 MDI_CLIENT_UNLOCK(ct); 4972 } 4973 } 4974 4975 /* 4976 * mdi_pre_detach(): 4977 * Pre detach notification handler 4978 */ 4979 /*ARGSUSED*/ 4980 int 4981 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4982 { 4983 int rv = DDI_SUCCESS; 4984 4985 if (MDI_CLIENT(dip)) { 4986 (void) i_mdi_client_pre_detach(dip, cmd); 4987 } 4988 4989 if (MDI_PHCI(dip)) { 4990 rv = i_mdi_phci_pre_detach(dip, cmd); 4991 } 4992 4993 return (rv); 4994 } 4995 4996 /*ARGSUSED*/ 4997 static int 4998 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4999 { 5000 int rv = DDI_SUCCESS; 5001 mdi_phci_t *ph; 5002 mdi_client_t *ct; 5003 mdi_pathinfo_t *pip; 5004 mdi_pathinfo_t *failed_pip = NULL; 5005 mdi_pathinfo_t *next; 5006 5007 ph = i_devi_get_phci(dip); 5008 if (ph == NULL) { 5009 return (rv); 5010 } 5011 5012 MDI_PHCI_LOCK(ph); 5013 switch (cmd) { 5014 case DDI_DETACH: 5015 MDI_DEBUG(2, (CE_NOTE, dip, 5016 "!pHCI pre_detach: called %p\n", (void *)ph)); 5017 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5018 /* 5019 * mdi_pathinfo nodes are still attached to 5020 * this pHCI. Fail the detach for this pHCI. 5021 */ 5022 MDI_DEBUG(2, (CE_WARN, dip, 5023 "!pHCI pre_detach: " 5024 "mdi_pathinfo nodes are still attached " 5025 "%p\n", (void *)ph)); 5026 rv = DDI_FAILURE; 5027 break; 5028 } 5029 MDI_PHCI_SET_DETACH(ph); 5030 break; 5031 5032 case DDI_SUSPEND: 5033 /* 5034 * pHCI is getting suspended. Since mpxio client 5035 * devices may not be suspended at this point, to avoid 5036 * a potential stack overflow, it is important to suspend 5037 * client devices before pHCI can be suspended. 5038 */ 5039 5040 MDI_DEBUG(2, (CE_NOTE, dip, 5041 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5042 /* 5043 * Suspend all the client devices accessible through this pHCI 5044 */ 5045 pip = ph->ph_path_head; 5046 while (pip != NULL && rv == DDI_SUCCESS) { 5047 dev_info_t *cdip; 5048 MDI_PI_LOCK(pip); 5049 next = 5050 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5051 ct = MDI_PI(pip)->pi_client; 5052 i_mdi_client_lock(ct, pip); 5053 cdip = ct->ct_dip; 5054 MDI_PI_UNLOCK(pip); 5055 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5056 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5057 i_mdi_client_unlock(ct); 5058 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5059 DDI_SUCCESS) { 5060 /* 5061 * Suspend of one of the client 5062 * device has failed. 5063 */ 5064 MDI_DEBUG(1, (CE_WARN, dip, 5065 "!Suspend of device (%s%d) failed.", 5066 ddi_driver_name(cdip), 5067 ddi_get_instance(cdip))); 5068 failed_pip = pip; 5069 break; 5070 } 5071 } else { 5072 i_mdi_client_unlock(ct); 5073 } 5074 pip = next; 5075 } 5076 5077 if (rv == DDI_SUCCESS) { 5078 /* 5079 * Suspend of client devices is complete. Proceed 5080 * with pHCI suspend. 5081 */ 5082 MDI_PHCI_SET_SUSPEND(ph); 5083 } else { 5084 /* 5085 * Revert back all the suspended client device states 5086 * to converse. 5087 */ 5088 pip = ph->ph_path_head; 5089 while (pip != failed_pip) { 5090 dev_info_t *cdip; 5091 MDI_PI_LOCK(pip); 5092 next = 5093 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5094 ct = MDI_PI(pip)->pi_client; 5095 i_mdi_client_lock(ct, pip); 5096 cdip = ct->ct_dip; 5097 MDI_PI_UNLOCK(pip); 5098 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5099 i_mdi_client_unlock(ct); 5100 (void) devi_attach(cdip, DDI_RESUME); 5101 } else { 5102 i_mdi_client_unlock(ct); 5103 } 5104 pip = next; 5105 } 5106 } 5107 break; 5108 5109 default: 5110 rv = DDI_FAILURE; 5111 break; 5112 } 5113 MDI_PHCI_UNLOCK(ph); 5114 return (rv); 5115 } 5116 5117 /*ARGSUSED*/ 5118 static int 5119 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5120 { 5121 int rv = DDI_SUCCESS; 5122 mdi_client_t *ct; 5123 5124 ct = i_devi_get_client(dip); 5125 if (ct == NULL) { 5126 return (rv); 5127 } 5128 5129 MDI_CLIENT_LOCK(ct); 5130 switch (cmd) { 5131 case DDI_DETACH: 5132 MDI_DEBUG(2, (CE_NOTE, dip, 5133 "!Client pre_detach: called %p\n", (void *)ct)); 5134 MDI_CLIENT_SET_DETACH(ct); 5135 break; 5136 5137 case DDI_SUSPEND: 5138 MDI_DEBUG(2, (CE_NOTE, dip, 5139 "!Client pre_suspend: called %p\n", (void *)ct)); 5140 MDI_CLIENT_SET_SUSPEND(ct); 5141 break; 5142 5143 default: 5144 rv = DDI_FAILURE; 5145 break; 5146 } 5147 MDI_CLIENT_UNLOCK(ct); 5148 return (rv); 5149 } 5150 5151 /* 5152 * mdi_post_detach(): 5153 * Post detach notification handler 5154 */ 5155 /*ARGSUSED*/ 5156 void 5157 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5158 { 5159 /* 5160 * Detach/Suspend of mpxio component failed. Update our state 5161 * too 5162 */ 5163 if (MDI_PHCI(dip)) 5164 i_mdi_phci_post_detach(dip, cmd, error); 5165 5166 if (MDI_CLIENT(dip)) 5167 i_mdi_client_post_detach(dip, cmd, error); 5168 } 5169 5170 /*ARGSUSED*/ 5171 static void 5172 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5173 { 5174 mdi_phci_t *ph; 5175 5176 /* 5177 * Detach/Suspend of phci component failed. Update our state 5178 * too 5179 */ 5180 ph = i_devi_get_phci(dip); 5181 if (ph == NULL) { 5182 return; 5183 } 5184 5185 MDI_PHCI_LOCK(ph); 5186 /* 5187 * Detach of pHCI failed. Restore back converse 5188 * state 5189 */ 5190 switch (cmd) { 5191 case DDI_DETACH: 5192 MDI_DEBUG(2, (CE_NOTE, dip, 5193 "!pHCI post_detach: called %p\n", (void *)ph)); 5194 if (error != DDI_SUCCESS) 5195 MDI_PHCI_SET_ATTACH(ph); 5196 break; 5197 5198 case DDI_SUSPEND: 5199 MDI_DEBUG(2, (CE_NOTE, dip, 5200 "!pHCI post_suspend: called %p\n", (void *)ph)); 5201 if (error != DDI_SUCCESS) 5202 MDI_PHCI_SET_RESUME(ph); 5203 break; 5204 } 5205 MDI_PHCI_UNLOCK(ph); 5206 } 5207 5208 /*ARGSUSED*/ 5209 static void 5210 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5211 { 5212 mdi_client_t *ct; 5213 5214 ct = i_devi_get_client(dip); 5215 if (ct == NULL) { 5216 return; 5217 } 5218 MDI_CLIENT_LOCK(ct); 5219 /* 5220 * Detach of Client failed. Restore back converse 5221 * state 5222 */ 5223 switch (cmd) { 5224 case DDI_DETACH: 5225 MDI_DEBUG(2, (CE_NOTE, dip, 5226 "!Client post_detach: called %p\n", (void *)ct)); 5227 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5228 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5229 "i_mdi_pm_rele_client\n")); 5230 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5231 } else { 5232 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5233 "i_mdi_pm_reset_client\n")); 5234 i_mdi_pm_reset_client(ct); 5235 } 5236 if (error != DDI_SUCCESS) 5237 MDI_CLIENT_SET_ATTACH(ct); 5238 break; 5239 5240 case DDI_SUSPEND: 5241 MDI_DEBUG(2, (CE_NOTE, dip, 5242 "!Client post_suspend: called %p\n", (void *)ct)); 5243 if (error != DDI_SUCCESS) 5244 MDI_CLIENT_SET_RESUME(ct); 5245 break; 5246 } 5247 MDI_CLIENT_UNLOCK(ct); 5248 } 5249 5250 /* 5251 * create and install per-path (client - pHCI) statistics 5252 * I/O stats supported: nread, nwritten, reads, and writes 5253 * Error stats - hard errors, soft errors, & transport errors 5254 */ 5255 static int 5256 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5257 { 5258 5259 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5260 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5261 char ksname[KSTAT_STRLEN]; 5262 mdi_pathinfo_t *cpip; 5263 const char *err_postfix = ",err"; 5264 kstat_t *kiosp, *kerrsp; 5265 struct pi_errs *nsp; 5266 struct mdi_pi_kstats *mdi_statp; 5267 5268 ASSERT(client != NULL && ppath != NULL); 5269 5270 ASSERT(MDI_CLIENT_LOCKED(MDI_PI(pip)->pi_client)); 5271 5272 if (MDI_PI(pip)->pi_kstats != NULL) 5273 return (MDI_SUCCESS); 5274 5275 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5276 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5277 if ((cpip == pip) || MDI_PI_IS_OFFLINE(pip)) 5278 continue; 5279 /* 5280 * We have found a different path with same parent 5281 * kstats for a given client-pHCI are common 5282 */ 5283 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5284 (MDI_PI(cpip)->pi_kstats != NULL)) { 5285 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5286 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5287 return (MDI_SUCCESS); 5288 } 5289 } 5290 5291 /* 5292 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5293 * clamp length of name against max length of error kstat name 5294 */ 5295 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5296 ddi_driver_name(client), ddi_get_instance(client), 5297 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5298 (KSTAT_STRLEN - strlen(err_postfix))) { 5299 return (MDI_FAILURE); 5300 } 5301 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5302 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5303 return (MDI_FAILURE); 5304 } 5305 5306 (void) strcat(ksname, err_postfix); 5307 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5308 KSTAT_TYPE_NAMED, 5309 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5310 5311 if (kerrsp == NULL) { 5312 kstat_delete(kiosp); 5313 return (MDI_FAILURE); 5314 } 5315 5316 nsp = (struct pi_errs *)kerrsp->ks_data; 5317 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5318 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5319 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5320 KSTAT_DATA_UINT32); 5321 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5322 KSTAT_DATA_UINT32); 5323 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5324 KSTAT_DATA_UINT32); 5325 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5326 KSTAT_DATA_UINT32); 5327 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5328 KSTAT_DATA_UINT32); 5329 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5330 KSTAT_DATA_UINT32); 5331 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5332 KSTAT_DATA_UINT32); 5333 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5334 5335 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5336 mdi_statp->pi_kstat_ref = 1; 5337 mdi_statp->pi_kstat_iostats = kiosp; 5338 mdi_statp->pi_kstat_errstats = kerrsp; 5339 kstat_install(kiosp); 5340 kstat_install(kerrsp); 5341 MDI_PI(pip)->pi_kstats = mdi_statp; 5342 return (MDI_SUCCESS); 5343 } 5344 5345 /* 5346 * destroy per-path properties 5347 */ 5348 static void 5349 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5350 { 5351 5352 struct mdi_pi_kstats *mdi_statp; 5353 5354 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5355 return; 5356 5357 MDI_PI(pip)->pi_kstats = NULL; 5358 5359 /* 5360 * the kstat may be shared between multiple pathinfo nodes 5361 * decrement this pathinfo's usage, removing the kstats 5362 * themselves when the last pathinfo reference is removed. 5363 */ 5364 ASSERT(mdi_statp->pi_kstat_ref > 0); 5365 if (--mdi_statp->pi_kstat_ref != 0) 5366 return; 5367 5368 kstat_delete(mdi_statp->pi_kstat_iostats); 5369 kstat_delete(mdi_statp->pi_kstat_errstats); 5370 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5371 } 5372 5373 /* 5374 * update I/O paths KSTATS 5375 */ 5376 void 5377 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5378 { 5379 kstat_t *iostatp; 5380 size_t xfer_cnt; 5381 5382 ASSERT(pip != NULL); 5383 5384 /* 5385 * I/O can be driven across a path prior to having path 5386 * statistics available, i.e. probe(9e). 5387 */ 5388 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5389 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5390 xfer_cnt = bp->b_bcount - bp->b_resid; 5391 if (bp->b_flags & B_READ) { 5392 KSTAT_IO_PTR(iostatp)->reads++; 5393 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5394 } else { 5395 KSTAT_IO_PTR(iostatp)->writes++; 5396 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5397 } 5398 } 5399 } 5400 5401 /* 5402 * Enable the path(specific client/target/initiator) 5403 * Enabling a path means that MPxIO may select the enabled path for routing 5404 * future I/O requests, subject to other path state constraints. 5405 */ 5406 int 5407 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5408 { 5409 mdi_phci_t *ph; 5410 5411 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5412 if (ph == NULL) { 5413 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5414 " failed. pip: %p ph = NULL\n", (void *)pip)); 5415 return (MDI_FAILURE); 5416 } 5417 5418 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5419 MDI_ENABLE_OP); 5420 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5421 " Returning success pip = %p. ph = %p\n", 5422 (void *)pip, (void *)ph)); 5423 return (MDI_SUCCESS); 5424 5425 } 5426 5427 /* 5428 * Disable the path (specific client/target/initiator) 5429 * Disabling a path means that MPxIO will not select the disabled path for 5430 * routing any new I/O requests. 5431 */ 5432 int 5433 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5434 { 5435 mdi_phci_t *ph; 5436 5437 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5438 if (ph == NULL) { 5439 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5440 " failed. pip: %p ph = NULL\n", (void *)pip)); 5441 return (MDI_FAILURE); 5442 } 5443 5444 (void) i_mdi_enable_disable_path(pip, 5445 ph->ph_vhci, flags, MDI_DISABLE_OP); 5446 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5447 "Returning success pip = %p. ph = %p", 5448 (void *)pip, (void *)ph)); 5449 return (MDI_SUCCESS); 5450 } 5451 5452 /* 5453 * disable the path to a particular pHCI (pHCI specified in the phci_path 5454 * argument) for a particular client (specified in the client_path argument). 5455 * Disabling a path means that MPxIO will not select the disabled path for 5456 * routing any new I/O requests. 5457 * NOTE: this will be removed once the NWS files are changed to use the new 5458 * mdi_{enable,disable}_path interfaces 5459 */ 5460 int 5461 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5462 { 5463 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5464 } 5465 5466 /* 5467 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5468 * argument) for a particular client (specified in the client_path argument). 5469 * Enabling a path means that MPxIO may select the enabled path for routing 5470 * future I/O requests, subject to other path state constraints. 5471 * NOTE: this will be removed once the NWS files are changed to use the new 5472 * mdi_{enable,disable}_path interfaces 5473 */ 5474 5475 int 5476 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5477 { 5478 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5479 } 5480 5481 /* 5482 * Common routine for doing enable/disable. 5483 */ 5484 static mdi_pathinfo_t * 5485 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5486 int op) 5487 { 5488 int sync_flag = 0; 5489 int rv; 5490 mdi_pathinfo_t *next; 5491 int (*f)() = NULL; 5492 5493 f = vh->vh_ops->vo_pi_state_change; 5494 5495 sync_flag = (flags << 8) & 0xf00; 5496 5497 /* 5498 * Do a callback into the mdi consumer to let it 5499 * know that path is about to get enabled/disabled. 5500 */ 5501 if (f != NULL) { 5502 rv = (*f)(vh->vh_dip, pip, 0, 5503 MDI_PI_EXT_STATE(pip), 5504 MDI_EXT_STATE_CHANGE | sync_flag | 5505 op | MDI_BEFORE_STATE_CHANGE); 5506 if (rv != MDI_SUCCESS) { 5507 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5508 "!vo_pi_state_change: failed rv = %x", rv)); 5509 } 5510 } 5511 MDI_PI_LOCK(pip); 5512 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5513 5514 switch (flags) { 5515 case USER_DISABLE: 5516 if (op == MDI_DISABLE_OP) { 5517 MDI_PI_SET_USER_DISABLE(pip); 5518 } else { 5519 MDI_PI_SET_USER_ENABLE(pip); 5520 } 5521 break; 5522 case DRIVER_DISABLE: 5523 if (op == MDI_DISABLE_OP) { 5524 MDI_PI_SET_DRV_DISABLE(pip); 5525 } else { 5526 MDI_PI_SET_DRV_ENABLE(pip); 5527 } 5528 break; 5529 case DRIVER_DISABLE_TRANSIENT: 5530 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5531 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5532 } else { 5533 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5534 } 5535 break; 5536 } 5537 MDI_PI_UNLOCK(pip); 5538 /* 5539 * Do a callback into the mdi consumer to let it 5540 * know that path is now enabled/disabled. 5541 */ 5542 if (f != NULL) { 5543 rv = (*f)(vh->vh_dip, pip, 0, 5544 MDI_PI_EXT_STATE(pip), 5545 MDI_EXT_STATE_CHANGE | sync_flag | 5546 op | MDI_AFTER_STATE_CHANGE); 5547 if (rv != MDI_SUCCESS) { 5548 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5549 "!vo_pi_state_change: failed rv = %x", rv)); 5550 } 5551 } 5552 return (next); 5553 } 5554 5555 /* 5556 * Common routine for doing enable/disable. 5557 * NOTE: this will be removed once the NWS files are changed to use the new 5558 * mdi_{enable,disable}_path has been putback 5559 */ 5560 int 5561 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5562 { 5563 5564 mdi_phci_t *ph; 5565 mdi_vhci_t *vh = NULL; 5566 mdi_client_t *ct; 5567 mdi_pathinfo_t *next, *pip; 5568 int found_it; 5569 5570 ph = i_devi_get_phci(pdip); 5571 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5572 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5573 (void *)cdip)); 5574 if (ph == NULL) { 5575 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5576 "Op %d failed. ph = NULL\n", op)); 5577 return (MDI_FAILURE); 5578 } 5579 5580 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5581 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5582 "Op Invalid operation = %d\n", op)); 5583 return (MDI_FAILURE); 5584 } 5585 5586 vh = ph->ph_vhci; 5587 5588 if (cdip == NULL) { 5589 /* 5590 * Need to mark the Phci as enabled/disabled. 5591 */ 5592 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5593 "Op %d for the phci\n", op)); 5594 MDI_PHCI_LOCK(ph); 5595 switch (flags) { 5596 case USER_DISABLE: 5597 if (op == MDI_DISABLE_OP) { 5598 MDI_PHCI_SET_USER_DISABLE(ph); 5599 } else { 5600 MDI_PHCI_SET_USER_ENABLE(ph); 5601 } 5602 break; 5603 case DRIVER_DISABLE: 5604 if (op == MDI_DISABLE_OP) { 5605 MDI_PHCI_SET_DRV_DISABLE(ph); 5606 } else { 5607 MDI_PHCI_SET_DRV_ENABLE(ph); 5608 } 5609 break; 5610 case DRIVER_DISABLE_TRANSIENT: 5611 if (op == MDI_DISABLE_OP) { 5612 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5613 } else { 5614 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5615 } 5616 break; 5617 default: 5618 MDI_PHCI_UNLOCK(ph); 5619 MDI_DEBUG(1, (CE_NOTE, NULL, 5620 "!i_mdi_pi_enable_disable:" 5621 " Invalid flag argument= %d\n", flags)); 5622 } 5623 5624 /* 5625 * Phci has been disabled. Now try to enable/disable 5626 * path info's to each client. 5627 */ 5628 pip = ph->ph_path_head; 5629 while (pip != NULL) { 5630 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5631 } 5632 MDI_PHCI_UNLOCK(ph); 5633 } else { 5634 5635 /* 5636 * Disable a specific client. 5637 */ 5638 ct = i_devi_get_client(cdip); 5639 if (ct == NULL) { 5640 MDI_DEBUG(1, (CE_NOTE, NULL, 5641 "!i_mdi_pi_enable_disable:" 5642 " failed. ct = NULL operation = %d\n", op)); 5643 return (MDI_FAILURE); 5644 } 5645 5646 MDI_CLIENT_LOCK(ct); 5647 pip = ct->ct_path_head; 5648 found_it = 0; 5649 while (pip != NULL) { 5650 MDI_PI_LOCK(pip); 5651 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5652 if (MDI_PI(pip)->pi_phci == ph) { 5653 MDI_PI_UNLOCK(pip); 5654 found_it = 1; 5655 break; 5656 } 5657 MDI_PI_UNLOCK(pip); 5658 pip = next; 5659 } 5660 5661 5662 MDI_CLIENT_UNLOCK(ct); 5663 if (found_it == 0) { 5664 MDI_DEBUG(1, (CE_NOTE, NULL, 5665 "!i_mdi_pi_enable_disable:" 5666 " failed. Could not find corresponding pip\n")); 5667 return (MDI_FAILURE); 5668 } 5669 5670 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5671 } 5672 5673 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5674 "Op %d Returning success pdip = %p cdip = %p\n", 5675 op, (void *)pdip, (void *)cdip)); 5676 return (MDI_SUCCESS); 5677 } 5678 5679 /* 5680 * Ensure phci powered up 5681 */ 5682 static void 5683 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5684 { 5685 dev_info_t *ph_dip; 5686 5687 ASSERT(pip != NULL); 5688 ASSERT(MDI_PI_LOCKED(pip)); 5689 5690 if (MDI_PI(pip)->pi_pm_held) { 5691 return; 5692 } 5693 5694 ph_dip = mdi_pi_get_phci(pip); 5695 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5696 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5697 if (ph_dip == NULL) { 5698 return; 5699 } 5700 5701 MDI_PI_UNLOCK(pip); 5702 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5703 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5704 5705 pm_hold_power(ph_dip); 5706 5707 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5708 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5709 MDI_PI_LOCK(pip); 5710 5711 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 5712 if (DEVI(ph_dip)->devi_pm_info) 5713 MDI_PI(pip)->pi_pm_held = 1; 5714 } 5715 5716 /* 5717 * Allow phci powered down 5718 */ 5719 static void 5720 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5721 { 5722 dev_info_t *ph_dip = NULL; 5723 5724 ASSERT(pip != NULL); 5725 ASSERT(MDI_PI_LOCKED(pip)); 5726 5727 if (MDI_PI(pip)->pi_pm_held == 0) { 5728 return; 5729 } 5730 5731 ph_dip = mdi_pi_get_phci(pip); 5732 ASSERT(ph_dip != NULL); 5733 5734 MDI_PI_UNLOCK(pip); 5735 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5736 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5737 5738 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5739 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5740 pm_rele_power(ph_dip); 5741 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5742 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5743 5744 MDI_PI_LOCK(pip); 5745 MDI_PI(pip)->pi_pm_held = 0; 5746 } 5747 5748 static void 5749 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5750 { 5751 ASSERT(MDI_CLIENT_LOCKED(ct)); 5752 5753 ct->ct_power_cnt += incr; 5754 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5755 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5756 ct->ct_power_cnt, incr)); 5757 ASSERT(ct->ct_power_cnt >= 0); 5758 } 5759 5760 static void 5761 i_mdi_rele_all_phci(mdi_client_t *ct) 5762 { 5763 mdi_pathinfo_t *pip; 5764 5765 ASSERT(MDI_CLIENT_LOCKED(ct)); 5766 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5767 while (pip != NULL) { 5768 mdi_hold_path(pip); 5769 MDI_PI_LOCK(pip); 5770 i_mdi_pm_rele_pip(pip); 5771 MDI_PI_UNLOCK(pip); 5772 mdi_rele_path(pip); 5773 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5774 } 5775 } 5776 5777 static void 5778 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5779 { 5780 ASSERT(MDI_CLIENT_LOCKED(ct)); 5781 5782 if (i_ddi_devi_attached(ct->ct_dip)) { 5783 ct->ct_power_cnt -= decr; 5784 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5785 "ct_power_cnt = %d decr = %d\n", 5786 (void *)ct, ct->ct_power_cnt, decr)); 5787 } 5788 5789 ASSERT(ct->ct_power_cnt >= 0); 5790 if (ct->ct_power_cnt == 0) { 5791 i_mdi_rele_all_phci(ct); 5792 return; 5793 } 5794 } 5795 5796 static void 5797 i_mdi_pm_reset_client(mdi_client_t *ct) 5798 { 5799 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5800 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5801 ASSERT(MDI_CLIENT_LOCKED(ct)); 5802 ct->ct_power_cnt = 0; 5803 i_mdi_rele_all_phci(ct); 5804 ct->ct_powercnt_config = 0; 5805 ct->ct_powercnt_unconfig = 0; 5806 ct->ct_powercnt_reset = 1; 5807 } 5808 5809 static int 5810 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5811 { 5812 int ret; 5813 dev_info_t *ph_dip; 5814 5815 MDI_PI_LOCK(pip); 5816 i_mdi_pm_hold_pip(pip); 5817 5818 ph_dip = mdi_pi_get_phci(pip); 5819 MDI_PI_UNLOCK(pip); 5820 5821 /* bring all components of phci to full power */ 5822 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5823 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5824 ddi_get_instance(ph_dip), (void *)pip)); 5825 5826 ret = pm_powerup(ph_dip); 5827 5828 if (ret == DDI_FAILURE) { 5829 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5830 "pm_powerup FAILED for %s%d %p\n", 5831 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5832 (void *)pip)); 5833 5834 MDI_PI_LOCK(pip); 5835 i_mdi_pm_rele_pip(pip); 5836 MDI_PI_UNLOCK(pip); 5837 return (MDI_FAILURE); 5838 } 5839 5840 return (MDI_SUCCESS); 5841 } 5842 5843 static int 5844 i_mdi_power_all_phci(mdi_client_t *ct) 5845 { 5846 mdi_pathinfo_t *pip; 5847 int succeeded = 0; 5848 5849 ASSERT(MDI_CLIENT_LOCKED(ct)); 5850 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5851 while (pip != NULL) { 5852 /* 5853 * Don't power if MDI_PATHINFO_STATE_FAULT 5854 * or MDI_PATHINFO_STATE_OFFLINE. 5855 */ 5856 if (MDI_PI_IS_INIT(pip) || 5857 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5858 mdi_hold_path(pip); 5859 MDI_CLIENT_UNLOCK(ct); 5860 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5861 succeeded = 1; 5862 5863 ASSERT(ct == MDI_PI(pip)->pi_client); 5864 MDI_CLIENT_LOCK(ct); 5865 mdi_rele_path(pip); 5866 } 5867 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5868 } 5869 5870 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5871 } 5872 5873 /* 5874 * mdi_bus_power(): 5875 * 1. Place the phci(s) into powered up state so that 5876 * client can do power management 5877 * 2. Ensure phci powered up as client power managing 5878 * Return Values: 5879 * MDI_SUCCESS 5880 * MDI_FAILURE 5881 */ 5882 int 5883 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5884 void *arg, void *result) 5885 { 5886 int ret = MDI_SUCCESS; 5887 pm_bp_child_pwrchg_t *bpc; 5888 mdi_client_t *ct; 5889 dev_info_t *cdip; 5890 pm_bp_has_changed_t *bphc; 5891 5892 /* 5893 * BUS_POWER_NOINVOL not supported 5894 */ 5895 if (op == BUS_POWER_NOINVOL) 5896 return (MDI_FAILURE); 5897 5898 /* 5899 * ignore other OPs. 5900 * return quickly to save cou cycles on the ct processing 5901 */ 5902 switch (op) { 5903 case BUS_POWER_PRE_NOTIFICATION: 5904 case BUS_POWER_POST_NOTIFICATION: 5905 bpc = (pm_bp_child_pwrchg_t *)arg; 5906 cdip = bpc->bpc_dip; 5907 break; 5908 case BUS_POWER_HAS_CHANGED: 5909 bphc = (pm_bp_has_changed_t *)arg; 5910 cdip = bphc->bphc_dip; 5911 break; 5912 default: 5913 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5914 } 5915 5916 ASSERT(MDI_CLIENT(cdip)); 5917 5918 ct = i_devi_get_client(cdip); 5919 if (ct == NULL) 5920 return (MDI_FAILURE); 5921 5922 /* 5923 * wait till the mdi_pathinfo node state change are processed 5924 */ 5925 MDI_CLIENT_LOCK(ct); 5926 switch (op) { 5927 case BUS_POWER_PRE_NOTIFICATION: 5928 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5929 "BUS_POWER_PRE_NOTIFICATION:" 5930 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5931 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5932 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5933 5934 /* serialize power level change per client */ 5935 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5936 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5937 5938 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5939 5940 if (ct->ct_power_cnt == 0) { 5941 ret = i_mdi_power_all_phci(ct); 5942 } 5943 5944 /* 5945 * if new_level > 0: 5946 * - hold phci(s) 5947 * - power up phci(s) if not already 5948 * ignore power down 5949 */ 5950 if (bpc->bpc_nlevel > 0) { 5951 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5952 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5953 "mdi_bus_power i_mdi_pm_hold_client\n")); 5954 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5955 } 5956 } 5957 break; 5958 case BUS_POWER_POST_NOTIFICATION: 5959 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5960 "BUS_POWER_POST_NOTIFICATION:" 5961 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5962 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5963 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5964 *(int *)result)); 5965 5966 if (*(int *)result == DDI_SUCCESS) { 5967 if (bpc->bpc_nlevel > 0) { 5968 MDI_CLIENT_SET_POWER_UP(ct); 5969 } else { 5970 MDI_CLIENT_SET_POWER_DOWN(ct); 5971 } 5972 } 5973 5974 /* release the hold we did in pre-notification */ 5975 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5976 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5977 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5978 "mdi_bus_power i_mdi_pm_rele_client\n")); 5979 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5980 } 5981 5982 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5983 /* another thread might started attaching */ 5984 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5985 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5986 "mdi_bus_power i_mdi_pm_rele_client\n")); 5987 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5988 /* detaching has been taken care in pm_post_unconfig */ 5989 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5990 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5991 "mdi_bus_power i_mdi_pm_reset_client\n")); 5992 i_mdi_pm_reset_client(ct); 5993 } 5994 } 5995 5996 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5997 cv_broadcast(&ct->ct_powerchange_cv); 5998 5999 break; 6000 6001 /* need to do more */ 6002 case BUS_POWER_HAS_CHANGED: 6003 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6004 "BUS_POWER_HAS_CHANGED:" 6005 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6006 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6007 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6008 6009 if (bphc->bphc_nlevel > 0 && 6010 bphc->bphc_nlevel > bphc->bphc_olevel) { 6011 if (ct->ct_power_cnt == 0) { 6012 ret = i_mdi_power_all_phci(ct); 6013 } 6014 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6015 "mdi_bus_power i_mdi_pm_hold_client\n")); 6016 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6017 } 6018 6019 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6020 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6021 "mdi_bus_power i_mdi_pm_rele_client\n")); 6022 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6023 } 6024 break; 6025 } 6026 6027 MDI_CLIENT_UNLOCK(ct); 6028 return (ret); 6029 } 6030 6031 static int 6032 i_mdi_pm_pre_config_one(dev_info_t *child) 6033 { 6034 int ret = MDI_SUCCESS; 6035 mdi_client_t *ct; 6036 6037 ct = i_devi_get_client(child); 6038 if (ct == NULL) 6039 return (MDI_FAILURE); 6040 6041 MDI_CLIENT_LOCK(ct); 6042 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6043 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6044 6045 if (!MDI_CLIENT_IS_FAILED(ct)) { 6046 MDI_CLIENT_UNLOCK(ct); 6047 MDI_DEBUG(4, (CE_NOTE, child, 6048 "i_mdi_pm_pre_config_one already configured\n")); 6049 return (MDI_SUCCESS); 6050 } 6051 6052 if (ct->ct_powercnt_config) { 6053 MDI_CLIENT_UNLOCK(ct); 6054 MDI_DEBUG(4, (CE_NOTE, child, 6055 "i_mdi_pm_pre_config_one ALREADY held\n")); 6056 return (MDI_SUCCESS); 6057 } 6058 6059 if (ct->ct_power_cnt == 0) { 6060 ret = i_mdi_power_all_phci(ct); 6061 } 6062 MDI_DEBUG(4, (CE_NOTE, child, 6063 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6064 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6065 ct->ct_powercnt_config = 1; 6066 ct->ct_powercnt_reset = 0; 6067 MDI_CLIENT_UNLOCK(ct); 6068 return (ret); 6069 } 6070 6071 static int 6072 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6073 { 6074 int ret = MDI_SUCCESS; 6075 dev_info_t *cdip; 6076 int circ; 6077 6078 ASSERT(MDI_VHCI(vdip)); 6079 6080 /* ndi_devi_config_one */ 6081 if (child) { 6082 ASSERT(DEVI_BUSY_OWNED(vdip)); 6083 return (i_mdi_pm_pre_config_one(child)); 6084 } 6085 6086 /* devi_config_common */ 6087 ndi_devi_enter(vdip, &circ); 6088 cdip = ddi_get_child(vdip); 6089 while (cdip) { 6090 dev_info_t *next = ddi_get_next_sibling(cdip); 6091 6092 ret = i_mdi_pm_pre_config_one(cdip); 6093 if (ret != MDI_SUCCESS) 6094 break; 6095 cdip = next; 6096 } 6097 ndi_devi_exit(vdip, circ); 6098 return (ret); 6099 } 6100 6101 static int 6102 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6103 { 6104 int ret = MDI_SUCCESS; 6105 mdi_client_t *ct; 6106 6107 ct = i_devi_get_client(child); 6108 if (ct == NULL) 6109 return (MDI_FAILURE); 6110 6111 MDI_CLIENT_LOCK(ct); 6112 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6113 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6114 6115 if (!i_ddi_devi_attached(ct->ct_dip)) { 6116 MDI_DEBUG(4, (CE_NOTE, child, 6117 "i_mdi_pm_pre_unconfig node detached already\n")); 6118 MDI_CLIENT_UNLOCK(ct); 6119 return (MDI_SUCCESS); 6120 } 6121 6122 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6123 (flags & NDI_AUTODETACH)) { 6124 MDI_DEBUG(4, (CE_NOTE, child, 6125 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6126 MDI_CLIENT_UNLOCK(ct); 6127 return (MDI_FAILURE); 6128 } 6129 6130 if (ct->ct_powercnt_unconfig) { 6131 MDI_DEBUG(4, (CE_NOTE, child, 6132 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6133 MDI_CLIENT_UNLOCK(ct); 6134 *held = 1; 6135 return (MDI_SUCCESS); 6136 } 6137 6138 if (ct->ct_power_cnt == 0) { 6139 ret = i_mdi_power_all_phci(ct); 6140 } 6141 MDI_DEBUG(4, (CE_NOTE, child, 6142 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6143 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6144 ct->ct_powercnt_unconfig = 1; 6145 ct->ct_powercnt_reset = 0; 6146 MDI_CLIENT_UNLOCK(ct); 6147 if (ret == MDI_SUCCESS) 6148 *held = 1; 6149 return (ret); 6150 } 6151 6152 static int 6153 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6154 int flags) 6155 { 6156 int ret = MDI_SUCCESS; 6157 dev_info_t *cdip; 6158 int circ; 6159 6160 ASSERT(MDI_VHCI(vdip)); 6161 *held = 0; 6162 6163 /* ndi_devi_unconfig_one */ 6164 if (child) { 6165 ASSERT(DEVI_BUSY_OWNED(vdip)); 6166 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6167 } 6168 6169 /* devi_unconfig_common */ 6170 ndi_devi_enter(vdip, &circ); 6171 cdip = ddi_get_child(vdip); 6172 while (cdip) { 6173 dev_info_t *next = ddi_get_next_sibling(cdip); 6174 6175 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6176 cdip = next; 6177 } 6178 ndi_devi_exit(vdip, circ); 6179 6180 if (*held) 6181 ret = MDI_SUCCESS; 6182 6183 return (ret); 6184 } 6185 6186 static void 6187 i_mdi_pm_post_config_one(dev_info_t *child) 6188 { 6189 mdi_client_t *ct; 6190 6191 ct = i_devi_get_client(child); 6192 if (ct == NULL) 6193 return; 6194 6195 MDI_CLIENT_LOCK(ct); 6196 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6197 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6198 6199 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6200 MDI_DEBUG(4, (CE_NOTE, child, 6201 "i_mdi_pm_post_config_one NOT configured\n")); 6202 MDI_CLIENT_UNLOCK(ct); 6203 return; 6204 } 6205 6206 /* client has not been updated */ 6207 if (MDI_CLIENT_IS_FAILED(ct)) { 6208 MDI_DEBUG(4, (CE_NOTE, child, 6209 "i_mdi_pm_post_config_one NOT configured\n")); 6210 MDI_CLIENT_UNLOCK(ct); 6211 return; 6212 } 6213 6214 /* another thread might have powered it down or detached it */ 6215 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6216 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6217 (!i_ddi_devi_attached(ct->ct_dip) && 6218 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6219 MDI_DEBUG(4, (CE_NOTE, child, 6220 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6221 i_mdi_pm_reset_client(ct); 6222 } else { 6223 mdi_pathinfo_t *pip, *next; 6224 int valid_path_count = 0; 6225 6226 MDI_DEBUG(4, (CE_NOTE, child, 6227 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6228 pip = ct->ct_path_head; 6229 while (pip != NULL) { 6230 MDI_PI_LOCK(pip); 6231 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6232 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6233 valid_path_count ++; 6234 MDI_PI_UNLOCK(pip); 6235 pip = next; 6236 } 6237 i_mdi_pm_rele_client(ct, valid_path_count); 6238 } 6239 ct->ct_powercnt_config = 0; 6240 MDI_CLIENT_UNLOCK(ct); 6241 } 6242 6243 static void 6244 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6245 { 6246 int circ; 6247 dev_info_t *cdip; 6248 6249 ASSERT(MDI_VHCI(vdip)); 6250 6251 /* ndi_devi_config_one */ 6252 if (child) { 6253 ASSERT(DEVI_BUSY_OWNED(vdip)); 6254 i_mdi_pm_post_config_one(child); 6255 return; 6256 } 6257 6258 /* devi_config_common */ 6259 ndi_devi_enter(vdip, &circ); 6260 cdip = ddi_get_child(vdip); 6261 while (cdip) { 6262 dev_info_t *next = ddi_get_next_sibling(cdip); 6263 6264 i_mdi_pm_post_config_one(cdip); 6265 cdip = next; 6266 } 6267 ndi_devi_exit(vdip, circ); 6268 } 6269 6270 static void 6271 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6272 { 6273 mdi_client_t *ct; 6274 6275 ct = i_devi_get_client(child); 6276 if (ct == NULL) 6277 return; 6278 6279 MDI_CLIENT_LOCK(ct); 6280 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6281 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6282 6283 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6284 MDI_DEBUG(4, (CE_NOTE, child, 6285 "i_mdi_pm_post_unconfig NOT held\n")); 6286 MDI_CLIENT_UNLOCK(ct); 6287 return; 6288 } 6289 6290 /* failure detaching or another thread just attached it */ 6291 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6292 i_ddi_devi_attached(ct->ct_dip)) || 6293 (!i_ddi_devi_attached(ct->ct_dip) && 6294 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6295 MDI_DEBUG(4, (CE_NOTE, child, 6296 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6297 i_mdi_pm_reset_client(ct); 6298 } else { 6299 mdi_pathinfo_t *pip, *next; 6300 int valid_path_count = 0; 6301 6302 MDI_DEBUG(4, (CE_NOTE, child, 6303 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6304 pip = ct->ct_path_head; 6305 while (pip != NULL) { 6306 MDI_PI_LOCK(pip); 6307 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6308 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6309 valid_path_count ++; 6310 MDI_PI_UNLOCK(pip); 6311 pip = next; 6312 } 6313 i_mdi_pm_rele_client(ct, valid_path_count); 6314 ct->ct_powercnt_unconfig = 0; 6315 } 6316 6317 MDI_CLIENT_UNLOCK(ct); 6318 } 6319 6320 static void 6321 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6322 { 6323 int circ; 6324 dev_info_t *cdip; 6325 6326 ASSERT(MDI_VHCI(vdip)); 6327 6328 if (!held) { 6329 MDI_DEBUG(4, (CE_NOTE, vdip, 6330 "i_mdi_pm_post_unconfig held = %d\n", held)); 6331 return; 6332 } 6333 6334 if (child) { 6335 ASSERT(DEVI_BUSY_OWNED(vdip)); 6336 i_mdi_pm_post_unconfig_one(child); 6337 return; 6338 } 6339 6340 ndi_devi_enter(vdip, &circ); 6341 cdip = ddi_get_child(vdip); 6342 while (cdip) { 6343 dev_info_t *next = ddi_get_next_sibling(cdip); 6344 6345 i_mdi_pm_post_unconfig_one(cdip); 6346 cdip = next; 6347 } 6348 ndi_devi_exit(vdip, circ); 6349 } 6350 6351 int 6352 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6353 { 6354 int circ, ret = MDI_SUCCESS; 6355 dev_info_t *client_dip = NULL; 6356 mdi_client_t *ct; 6357 6358 /* 6359 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6360 * Power up pHCI for the named client device. 6361 * Note: Before the client is enumerated under vhci by phci, 6362 * client_dip can be NULL. Then proceed to power up all the 6363 * pHCIs. 6364 */ 6365 if (devnm != NULL) { 6366 ndi_devi_enter(vdip, &circ); 6367 client_dip = ndi_devi_findchild(vdip, devnm); 6368 } 6369 6370 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6371 op, devnm ? devnm : "NULL", (void *)client_dip)); 6372 6373 switch (op) { 6374 case MDI_PM_PRE_CONFIG: 6375 ret = i_mdi_pm_pre_config(vdip, client_dip); 6376 break; 6377 6378 case MDI_PM_PRE_UNCONFIG: 6379 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6380 flags); 6381 break; 6382 6383 case MDI_PM_POST_CONFIG: 6384 i_mdi_pm_post_config(vdip, client_dip); 6385 break; 6386 6387 case MDI_PM_POST_UNCONFIG: 6388 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6389 break; 6390 6391 case MDI_PM_HOLD_POWER: 6392 case MDI_PM_RELE_POWER: 6393 ASSERT(args); 6394 6395 client_dip = (dev_info_t *)args; 6396 ASSERT(MDI_CLIENT(client_dip)); 6397 6398 ct = i_devi_get_client(client_dip); 6399 MDI_CLIENT_LOCK(ct); 6400 6401 if (op == MDI_PM_HOLD_POWER) { 6402 if (ct->ct_power_cnt == 0) { 6403 (void) i_mdi_power_all_phci(ct); 6404 MDI_DEBUG(4, (CE_NOTE, client_dip, 6405 "mdi_power i_mdi_pm_hold_client\n")); 6406 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6407 } 6408 } else { 6409 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6410 MDI_DEBUG(4, (CE_NOTE, client_dip, 6411 "mdi_power i_mdi_pm_rele_client\n")); 6412 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6413 } else { 6414 MDI_DEBUG(4, (CE_NOTE, client_dip, 6415 "mdi_power i_mdi_pm_reset_client\n")); 6416 i_mdi_pm_reset_client(ct); 6417 } 6418 } 6419 6420 MDI_CLIENT_UNLOCK(ct); 6421 break; 6422 6423 default: 6424 break; 6425 } 6426 6427 if (devnm) 6428 ndi_devi_exit(vdip, circ); 6429 6430 return (ret); 6431 } 6432 6433 int 6434 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6435 { 6436 mdi_vhci_t *vhci; 6437 6438 if (!MDI_VHCI(dip)) 6439 return (MDI_FAILURE); 6440 6441 if (mdi_class) { 6442 vhci = DEVI(dip)->devi_mdi_xhci; 6443 ASSERT(vhci); 6444 *mdi_class = vhci->vh_class; 6445 } 6446 6447 return (MDI_SUCCESS); 6448 } 6449 6450 int 6451 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6452 { 6453 mdi_phci_t *phci; 6454 6455 if (!MDI_PHCI(dip)) 6456 return (MDI_FAILURE); 6457 6458 if (mdi_class) { 6459 phci = DEVI(dip)->devi_mdi_xhci; 6460 ASSERT(phci); 6461 *mdi_class = phci->ph_vhci->vh_class; 6462 } 6463 6464 return (MDI_SUCCESS); 6465 } 6466 6467 int 6468 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6469 { 6470 mdi_client_t *client; 6471 6472 if (!MDI_CLIENT(dip)) 6473 return (MDI_FAILURE); 6474 6475 if (mdi_class) { 6476 client = DEVI(dip)->devi_mdi_client; 6477 ASSERT(client); 6478 *mdi_class = client->ct_vhci->vh_class; 6479 } 6480 6481 return (MDI_SUCCESS); 6482 } 6483 6484 void * 6485 mdi_client_get_vhci_private(dev_info_t *dip) 6486 { 6487 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6488 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6489 mdi_client_t *ct; 6490 ct = i_devi_get_client(dip); 6491 return (ct->ct_vprivate); 6492 } 6493 return (NULL); 6494 } 6495 6496 void 6497 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6498 { 6499 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6500 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6501 mdi_client_t *ct; 6502 ct = i_devi_get_client(dip); 6503 ct->ct_vprivate = data; 6504 } 6505 } 6506 /* 6507 * mdi_pi_get_vhci_private(): 6508 * Get the vhci private information associated with the 6509 * mdi_pathinfo node 6510 */ 6511 void * 6512 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6513 { 6514 caddr_t vprivate = NULL; 6515 if (pip) { 6516 vprivate = MDI_PI(pip)->pi_vprivate; 6517 } 6518 return (vprivate); 6519 } 6520 6521 /* 6522 * mdi_pi_set_vhci_private(): 6523 * Set the vhci private information in the mdi_pathinfo node 6524 */ 6525 void 6526 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6527 { 6528 if (pip) { 6529 MDI_PI(pip)->pi_vprivate = priv; 6530 } 6531 } 6532 6533 /* 6534 * mdi_phci_get_vhci_private(): 6535 * Get the vhci private information associated with the 6536 * mdi_phci node 6537 */ 6538 void * 6539 mdi_phci_get_vhci_private(dev_info_t *dip) 6540 { 6541 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6542 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6543 mdi_phci_t *ph; 6544 ph = i_devi_get_phci(dip); 6545 return (ph->ph_vprivate); 6546 } 6547 return (NULL); 6548 } 6549 6550 /* 6551 * mdi_phci_set_vhci_private(): 6552 * Set the vhci private information in the mdi_phci node 6553 */ 6554 void 6555 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6556 { 6557 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6558 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6559 mdi_phci_t *ph; 6560 ph = i_devi_get_phci(dip); 6561 ph->ph_vprivate = priv; 6562 } 6563 } 6564 6565 /* 6566 * List of vhci class names: 6567 * A vhci class name must be in this list only if the corresponding vhci 6568 * driver intends to use the mdi provided bus config implementation 6569 * (i.e., mdi_vhci_bus_config()). 6570 */ 6571 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6572 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6573 6574 /* 6575 * During boot time, the on-disk vhci cache for every vhci class is read 6576 * in the form of an nvlist and stored here. 6577 */ 6578 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6579 6580 /* nvpair names in vhci cache nvlist */ 6581 #define MDI_VHCI_CACHE_VERSION 1 6582 #define MDI_NVPNAME_VERSION "version" 6583 #define MDI_NVPNAME_PHCIS "phcis" 6584 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6585 6586 /* 6587 * Given vhci class name, return its on-disk vhci cache filename. 6588 * Memory for the returned filename which includes the full path is allocated 6589 * by this function. 6590 */ 6591 static char * 6592 vhclass2vhcache_filename(char *vhclass) 6593 { 6594 char *filename; 6595 int len; 6596 static char *fmt = "/etc/devices/mdi_%s_cache"; 6597 6598 /* 6599 * fmt contains the on-disk vhci cache file name format; 6600 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6601 */ 6602 6603 /* the -1 below is to account for "%s" in the format string */ 6604 len = strlen(fmt) + strlen(vhclass) - 1; 6605 filename = kmem_alloc(len, KM_SLEEP); 6606 (void) snprintf(filename, len, fmt, vhclass); 6607 ASSERT(len == (strlen(filename) + 1)); 6608 return (filename); 6609 } 6610 6611 /* 6612 * initialize the vhci cache related data structures and read the on-disk 6613 * vhci cached data into memory. 6614 */ 6615 static void 6616 setup_vhci_cache(mdi_vhci_t *vh) 6617 { 6618 mdi_vhci_config_t *vhc; 6619 mdi_vhci_cache_t *vhcache; 6620 int i; 6621 nvlist_t *nvl = NULL; 6622 6623 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6624 vh->vh_config = vhc; 6625 vhcache = &vhc->vhc_vhcache; 6626 6627 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6628 6629 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6630 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6631 6632 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6633 6634 /* 6635 * Create string hash; same as mod_hash_create_strhash() except that 6636 * we use NULL key destructor. 6637 */ 6638 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6639 mdi_bus_config_cache_hash_size, 6640 mod_hash_null_keydtor, mod_hash_null_valdtor, 6641 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6642 6643 /* 6644 * The on-disk vhci cache is read during booting prior to the 6645 * lights-out period by mdi_read_devices_files(). 6646 */ 6647 for (i = 0; i < N_VHCI_CLASSES; i++) { 6648 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6649 nvl = vhcache_nvl[i]; 6650 vhcache_nvl[i] = NULL; 6651 break; 6652 } 6653 } 6654 6655 /* 6656 * this is to cover the case of some one manually causing unloading 6657 * (or detaching) and reloading (or attaching) of a vhci driver. 6658 */ 6659 if (nvl == NULL && modrootloaded) 6660 nvl = read_on_disk_vhci_cache(vh->vh_class); 6661 6662 if (nvl != NULL) { 6663 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6664 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6665 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6666 else { 6667 cmn_err(CE_WARN, 6668 "%s: data file corrupted, will recreate\n", 6669 vhc->vhc_vhcache_filename); 6670 } 6671 rw_exit(&vhcache->vhcache_lock); 6672 nvlist_free(nvl); 6673 } 6674 6675 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6676 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6677 6678 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6679 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6680 } 6681 6682 /* 6683 * free all vhci cache related resources 6684 */ 6685 static int 6686 destroy_vhci_cache(mdi_vhci_t *vh) 6687 { 6688 mdi_vhci_config_t *vhc = vh->vh_config; 6689 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6690 mdi_vhcache_phci_t *cphci, *cphci_next; 6691 mdi_vhcache_client_t *cct, *cct_next; 6692 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6693 6694 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6695 return (MDI_FAILURE); 6696 6697 kmem_free(vhc->vhc_vhcache_filename, 6698 strlen(vhc->vhc_vhcache_filename) + 1); 6699 6700 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6701 6702 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6703 cphci = cphci_next) { 6704 cphci_next = cphci->cphci_next; 6705 free_vhcache_phci(cphci); 6706 } 6707 6708 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6709 cct_next = cct->cct_next; 6710 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6711 cpi_next = cpi->cpi_next; 6712 free_vhcache_pathinfo(cpi); 6713 } 6714 free_vhcache_client(cct); 6715 } 6716 6717 rw_destroy(&vhcache->vhcache_lock); 6718 6719 mutex_destroy(&vhc->vhc_lock); 6720 cv_destroy(&vhc->vhc_cv); 6721 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6722 return (MDI_SUCCESS); 6723 } 6724 6725 /* 6726 * Stop all vhci cache related async threads and free their resources. 6727 */ 6728 static int 6729 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6730 { 6731 mdi_async_client_config_t *acc, *acc_next; 6732 6733 mutex_enter(&vhc->vhc_lock); 6734 vhc->vhc_flags |= MDI_VHC_EXIT; 6735 ASSERT(vhc->vhc_acc_thrcount >= 0); 6736 cv_broadcast(&vhc->vhc_cv); 6737 6738 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6739 vhc->vhc_acc_thrcount != 0) { 6740 mutex_exit(&vhc->vhc_lock); 6741 delay(1); 6742 mutex_enter(&vhc->vhc_lock); 6743 } 6744 6745 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6746 6747 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6748 acc_next = acc->acc_next; 6749 free_async_client_config(acc); 6750 } 6751 vhc->vhc_acc_list_head = NULL; 6752 vhc->vhc_acc_list_tail = NULL; 6753 vhc->vhc_acc_count = 0; 6754 6755 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6756 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6757 mutex_exit(&vhc->vhc_lock); 6758 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6759 vhcache_dirty(vhc); 6760 return (MDI_FAILURE); 6761 } 6762 } else 6763 mutex_exit(&vhc->vhc_lock); 6764 6765 if (callb_delete(vhc->vhc_cbid) != 0) 6766 return (MDI_FAILURE); 6767 6768 return (MDI_SUCCESS); 6769 } 6770 6771 /* 6772 * Stop vhci cache flush thread 6773 */ 6774 /* ARGSUSED */ 6775 static boolean_t 6776 stop_vhcache_flush_thread(void *arg, int code) 6777 { 6778 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6779 6780 mutex_enter(&vhc->vhc_lock); 6781 vhc->vhc_flags |= MDI_VHC_EXIT; 6782 cv_broadcast(&vhc->vhc_cv); 6783 6784 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6785 mutex_exit(&vhc->vhc_lock); 6786 delay(1); 6787 mutex_enter(&vhc->vhc_lock); 6788 } 6789 6790 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6791 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6792 mutex_exit(&vhc->vhc_lock); 6793 (void) flush_vhcache(vhc, 1); 6794 } else 6795 mutex_exit(&vhc->vhc_lock); 6796 6797 return (B_TRUE); 6798 } 6799 6800 /* 6801 * Enqueue the vhcache phci (cphci) at the tail of the list 6802 */ 6803 static void 6804 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6805 { 6806 cphci->cphci_next = NULL; 6807 if (vhcache->vhcache_phci_head == NULL) 6808 vhcache->vhcache_phci_head = cphci; 6809 else 6810 vhcache->vhcache_phci_tail->cphci_next = cphci; 6811 vhcache->vhcache_phci_tail = cphci; 6812 } 6813 6814 /* 6815 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6816 */ 6817 static void 6818 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6819 mdi_vhcache_pathinfo_t *cpi) 6820 { 6821 cpi->cpi_next = NULL; 6822 if (cct->cct_cpi_head == NULL) 6823 cct->cct_cpi_head = cpi; 6824 else 6825 cct->cct_cpi_tail->cpi_next = cpi; 6826 cct->cct_cpi_tail = cpi; 6827 } 6828 6829 /* 6830 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6831 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6832 * flag set come at the beginning of the list. All cpis which have this 6833 * flag set come at the end of the list. 6834 */ 6835 static void 6836 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6837 mdi_vhcache_pathinfo_t *newcpi) 6838 { 6839 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6840 6841 if (cct->cct_cpi_head == NULL || 6842 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6843 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6844 else { 6845 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6846 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6847 prev_cpi = cpi, cpi = cpi->cpi_next) 6848 ; 6849 6850 if (prev_cpi == NULL) 6851 cct->cct_cpi_head = newcpi; 6852 else 6853 prev_cpi->cpi_next = newcpi; 6854 6855 newcpi->cpi_next = cpi; 6856 6857 if (cpi == NULL) 6858 cct->cct_cpi_tail = newcpi; 6859 } 6860 } 6861 6862 /* 6863 * Enqueue the vhcache client (cct) at the tail of the list 6864 */ 6865 static void 6866 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6867 mdi_vhcache_client_t *cct) 6868 { 6869 cct->cct_next = NULL; 6870 if (vhcache->vhcache_client_head == NULL) 6871 vhcache->vhcache_client_head = cct; 6872 else 6873 vhcache->vhcache_client_tail->cct_next = cct; 6874 vhcache->vhcache_client_tail = cct; 6875 } 6876 6877 static void 6878 free_string_array(char **str, int nelem) 6879 { 6880 int i; 6881 6882 if (str) { 6883 for (i = 0; i < nelem; i++) { 6884 if (str[i]) 6885 kmem_free(str[i], strlen(str[i]) + 1); 6886 } 6887 kmem_free(str, sizeof (char *) * nelem); 6888 } 6889 } 6890 6891 static void 6892 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6893 { 6894 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6895 kmem_free(cphci, sizeof (*cphci)); 6896 } 6897 6898 static void 6899 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6900 { 6901 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6902 kmem_free(cpi, sizeof (*cpi)); 6903 } 6904 6905 static void 6906 free_vhcache_client(mdi_vhcache_client_t *cct) 6907 { 6908 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6909 kmem_free(cct, sizeof (*cct)); 6910 } 6911 6912 static char * 6913 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6914 { 6915 char *name_addr; 6916 int len; 6917 6918 len = strlen(ct_name) + strlen(ct_addr) + 2; 6919 name_addr = kmem_alloc(len, KM_SLEEP); 6920 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6921 6922 if (ret_len) 6923 *ret_len = len; 6924 return (name_addr); 6925 } 6926 6927 /* 6928 * Copy the contents of paddrnvl to vhci cache. 6929 * paddrnvl nvlist contains path information for a vhci client. 6930 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6931 */ 6932 static void 6933 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6934 mdi_vhcache_client_t *cct) 6935 { 6936 nvpair_t *nvp = NULL; 6937 mdi_vhcache_pathinfo_t *cpi; 6938 uint_t nelem; 6939 uint32_t *val; 6940 6941 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6942 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6943 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6944 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6945 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6946 ASSERT(nelem == 2); 6947 cpi->cpi_cphci = cphci_list[val[0]]; 6948 cpi->cpi_flags = val[1]; 6949 enqueue_tail_vhcache_pathinfo(cct, cpi); 6950 } 6951 } 6952 6953 /* 6954 * Copy the contents of caddrmapnvl to vhci cache. 6955 * caddrmapnvl nvlist contains vhci client address to phci client address 6956 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6957 * this nvlist. 6958 */ 6959 static void 6960 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6961 mdi_vhcache_phci_t *cphci_list[]) 6962 { 6963 nvpair_t *nvp = NULL; 6964 nvlist_t *paddrnvl; 6965 mdi_vhcache_client_t *cct; 6966 6967 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6968 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6969 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6970 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6971 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6972 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6973 /* the client must contain at least one path */ 6974 ASSERT(cct->cct_cpi_head != NULL); 6975 6976 enqueue_vhcache_client(vhcache, cct); 6977 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6978 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6979 } 6980 } 6981 6982 /* 6983 * Copy the contents of the main nvlist to vhci cache. 6984 * 6985 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6986 * The nvlist contains the mappings between the vhci client addresses and 6987 * their corresponding phci client addresses. 6988 * 6989 * The structure of the nvlist is as follows: 6990 * 6991 * Main nvlist: 6992 * NAME TYPE DATA 6993 * version int32 version number 6994 * phcis string array array of phci paths 6995 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6996 * 6997 * structure of c2paddrs_nvl: 6998 * NAME TYPE DATA 6999 * caddr1 nvlist_t paddrs_nvl1 7000 * caddr2 nvlist_t paddrs_nvl2 7001 * ... 7002 * where caddr1, caddr2, ... are vhci client name and addresses in the 7003 * form of "<clientname>@<clientaddress>". 7004 * (for example: "ssd@2000002037cd9f72"); 7005 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7006 * 7007 * structure of paddrs_nvl: 7008 * NAME TYPE DATA 7009 * pi_addr1 uint32_array (phci-id, cpi_flags) 7010 * pi_addr2 uint32_array (phci-id, cpi_flags) 7011 * ... 7012 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7013 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7014 * phci-ids are integers that identify PHCIs to which the 7015 * the bus specific address belongs to. These integers are used as an index 7016 * into to the phcis string array in the main nvlist to get the PHCI path. 7017 */ 7018 static int 7019 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7020 { 7021 char **phcis, **phci_namep; 7022 uint_t nphcis; 7023 mdi_vhcache_phci_t *cphci, **cphci_list; 7024 nvlist_t *caddrmapnvl; 7025 int32_t ver; 7026 int i; 7027 size_t cphci_list_size; 7028 7029 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7030 7031 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7032 ver != MDI_VHCI_CACHE_VERSION) 7033 return (MDI_FAILURE); 7034 7035 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7036 &nphcis) != 0) 7037 return (MDI_SUCCESS); 7038 7039 ASSERT(nphcis > 0); 7040 7041 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7042 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7043 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7044 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7045 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7046 enqueue_vhcache_phci(vhcache, cphci); 7047 cphci_list[i] = cphci; 7048 } 7049 7050 ASSERT(vhcache->vhcache_phci_head != NULL); 7051 7052 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7053 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7054 7055 kmem_free(cphci_list, cphci_list_size); 7056 return (MDI_SUCCESS); 7057 } 7058 7059 /* 7060 * Build paddrnvl for the specified client using the information in the 7061 * vhci cache and add it to the caddrmapnnvl. 7062 * Returns 0 on success, errno on failure. 7063 */ 7064 static int 7065 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7066 nvlist_t *caddrmapnvl) 7067 { 7068 mdi_vhcache_pathinfo_t *cpi; 7069 nvlist_t *nvl; 7070 int err; 7071 uint32_t val[2]; 7072 7073 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7074 7075 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7076 return (err); 7077 7078 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7079 val[0] = cpi->cpi_cphci->cphci_id; 7080 val[1] = cpi->cpi_flags; 7081 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7082 != 0) 7083 goto out; 7084 } 7085 7086 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7087 out: 7088 nvlist_free(nvl); 7089 return (err); 7090 } 7091 7092 /* 7093 * Build caddrmapnvl using the information in the vhci cache 7094 * and add it to the mainnvl. 7095 * Returns 0 on success, errno on failure. 7096 */ 7097 static int 7098 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7099 { 7100 mdi_vhcache_client_t *cct; 7101 nvlist_t *nvl; 7102 int err; 7103 7104 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7105 7106 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7107 return (err); 7108 7109 for (cct = vhcache->vhcache_client_head; cct != NULL; 7110 cct = cct->cct_next) { 7111 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7112 goto out; 7113 } 7114 7115 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7116 out: 7117 nvlist_free(nvl); 7118 return (err); 7119 } 7120 7121 /* 7122 * Build nvlist using the information in the vhci cache. 7123 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7124 * Returns nvl on success, NULL on failure. 7125 */ 7126 static nvlist_t * 7127 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7128 { 7129 mdi_vhcache_phci_t *cphci; 7130 uint_t phci_count; 7131 char **phcis; 7132 nvlist_t *nvl; 7133 int err, i; 7134 7135 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7136 nvl = NULL; 7137 goto out; 7138 } 7139 7140 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7141 MDI_VHCI_CACHE_VERSION)) != 0) 7142 goto out; 7143 7144 rw_enter(&vhcache->vhcache_lock, RW_READER); 7145 if (vhcache->vhcache_phci_head == NULL) { 7146 rw_exit(&vhcache->vhcache_lock); 7147 return (nvl); 7148 } 7149 7150 phci_count = 0; 7151 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7152 cphci = cphci->cphci_next) 7153 cphci->cphci_id = phci_count++; 7154 7155 /* build phci pathname list */ 7156 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7157 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7158 cphci = cphci->cphci_next, i++) 7159 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7160 7161 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7162 phci_count); 7163 free_string_array(phcis, phci_count); 7164 7165 if (err == 0 && 7166 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7167 rw_exit(&vhcache->vhcache_lock); 7168 return (nvl); 7169 } 7170 7171 rw_exit(&vhcache->vhcache_lock); 7172 out: 7173 if (nvl) 7174 nvlist_free(nvl); 7175 return (NULL); 7176 } 7177 7178 /* 7179 * Lookup vhcache phci structure for the specified phci path. 7180 */ 7181 static mdi_vhcache_phci_t * 7182 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7183 { 7184 mdi_vhcache_phci_t *cphci; 7185 7186 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7187 7188 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7189 cphci = cphci->cphci_next) { 7190 if (strcmp(cphci->cphci_path, phci_path) == 0) 7191 return (cphci); 7192 } 7193 7194 return (NULL); 7195 } 7196 7197 /* 7198 * Lookup vhcache phci structure for the specified phci. 7199 */ 7200 static mdi_vhcache_phci_t * 7201 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7202 { 7203 mdi_vhcache_phci_t *cphci; 7204 7205 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7206 7207 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7208 cphci = cphci->cphci_next) { 7209 if (cphci->cphci_phci == ph) 7210 return (cphci); 7211 } 7212 7213 return (NULL); 7214 } 7215 7216 /* 7217 * Add the specified phci to the vhci cache if not already present. 7218 */ 7219 static void 7220 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7221 { 7222 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7223 mdi_vhcache_phci_t *cphci; 7224 char *pathname; 7225 int cache_updated; 7226 7227 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7228 7229 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7230 (void) ddi_pathname(ph->ph_dip, pathname); 7231 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7232 != NULL) { 7233 cphci->cphci_phci = ph; 7234 cache_updated = 0; 7235 } else { 7236 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7237 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7238 cphci->cphci_phci = ph; 7239 enqueue_vhcache_phci(vhcache, cphci); 7240 cache_updated = 1; 7241 } 7242 7243 rw_exit(&vhcache->vhcache_lock); 7244 7245 /* 7246 * Since a new phci has been added, reset 7247 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7248 * during next vhcache_discover_paths(). 7249 */ 7250 mutex_enter(&vhc->vhc_lock); 7251 vhc->vhc_path_discovery_cutoff_time = 0; 7252 mutex_exit(&vhc->vhc_lock); 7253 7254 kmem_free(pathname, MAXPATHLEN); 7255 if (cache_updated) 7256 vhcache_dirty(vhc); 7257 } 7258 7259 /* 7260 * Remove the reference to the specified phci from the vhci cache. 7261 */ 7262 static void 7263 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7264 { 7265 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7266 mdi_vhcache_phci_t *cphci; 7267 7268 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7269 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7270 /* do not remove the actual mdi_vhcache_phci structure */ 7271 cphci->cphci_phci = NULL; 7272 } 7273 rw_exit(&vhcache->vhcache_lock); 7274 } 7275 7276 static void 7277 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7278 mdi_vhcache_lookup_token_t *src) 7279 { 7280 if (src == NULL) { 7281 dst->lt_cct = NULL; 7282 dst->lt_cct_lookup_time = 0; 7283 } else { 7284 dst->lt_cct = src->lt_cct; 7285 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7286 } 7287 } 7288 7289 /* 7290 * Look up vhcache client for the specified client. 7291 */ 7292 static mdi_vhcache_client_t * 7293 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7294 mdi_vhcache_lookup_token_t *token) 7295 { 7296 mod_hash_val_t hv; 7297 char *name_addr; 7298 int len; 7299 7300 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7301 7302 /* 7303 * If no vhcache clean occurred since the last lookup, we can 7304 * simply return the cct from the last lookup operation. 7305 * It works because ccts are never freed except during the vhcache 7306 * cleanup operation. 7307 */ 7308 if (token != NULL && 7309 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7310 return (token->lt_cct); 7311 7312 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7313 if (mod_hash_find(vhcache->vhcache_client_hash, 7314 (mod_hash_key_t)name_addr, &hv) == 0) { 7315 if (token) { 7316 token->lt_cct = (mdi_vhcache_client_t *)hv; 7317 token->lt_cct_lookup_time = lbolt64; 7318 } 7319 } else { 7320 if (token) { 7321 token->lt_cct = NULL; 7322 token->lt_cct_lookup_time = 0; 7323 } 7324 hv = NULL; 7325 } 7326 kmem_free(name_addr, len); 7327 return ((mdi_vhcache_client_t *)hv); 7328 } 7329 7330 /* 7331 * Add the specified path to the vhci cache if not already present. 7332 * Also add the vhcache client for the client corresponding to this path 7333 * if it doesn't already exist. 7334 */ 7335 static void 7336 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7337 { 7338 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7339 mdi_vhcache_client_t *cct; 7340 mdi_vhcache_pathinfo_t *cpi; 7341 mdi_phci_t *ph = pip->pi_phci; 7342 mdi_client_t *ct = pip->pi_client; 7343 int cache_updated = 0; 7344 7345 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7346 7347 /* if vhcache client for this pip doesn't already exist, add it */ 7348 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7349 NULL)) == NULL) { 7350 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7351 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7352 ct->ct_guid, NULL); 7353 enqueue_vhcache_client(vhcache, cct); 7354 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7355 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7356 cache_updated = 1; 7357 } 7358 7359 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7360 if (cpi->cpi_cphci->cphci_phci == ph && 7361 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7362 cpi->cpi_pip = pip; 7363 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7364 cpi->cpi_flags &= 7365 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7366 sort_vhcache_paths(cct); 7367 cache_updated = 1; 7368 } 7369 break; 7370 } 7371 } 7372 7373 if (cpi == NULL) { 7374 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7375 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7376 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7377 ASSERT(cpi->cpi_cphci != NULL); 7378 cpi->cpi_pip = pip; 7379 enqueue_vhcache_pathinfo(cct, cpi); 7380 cache_updated = 1; 7381 } 7382 7383 rw_exit(&vhcache->vhcache_lock); 7384 7385 if (cache_updated) 7386 vhcache_dirty(vhc); 7387 } 7388 7389 /* 7390 * Remove the reference to the specified path from the vhci cache. 7391 */ 7392 static void 7393 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7394 { 7395 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7396 mdi_client_t *ct = pip->pi_client; 7397 mdi_vhcache_client_t *cct; 7398 mdi_vhcache_pathinfo_t *cpi; 7399 7400 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7401 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7402 NULL)) != NULL) { 7403 for (cpi = cct->cct_cpi_head; cpi != NULL; 7404 cpi = cpi->cpi_next) { 7405 if (cpi->cpi_pip == pip) { 7406 cpi->cpi_pip = NULL; 7407 break; 7408 } 7409 } 7410 } 7411 rw_exit(&vhcache->vhcache_lock); 7412 } 7413 7414 /* 7415 * Flush the vhci cache to disk. 7416 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7417 */ 7418 static int 7419 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7420 { 7421 nvlist_t *nvl; 7422 int err; 7423 int rv; 7424 7425 /* 7426 * It is possible that the system may shutdown before 7427 * i_ddi_io_initialized (during stmsboot for example). To allow for 7428 * flushing the cache in this case do not check for 7429 * i_ddi_io_initialized when force flag is set. 7430 */ 7431 if (force_flag == 0 && !i_ddi_io_initialized()) 7432 return (MDI_FAILURE); 7433 7434 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7435 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7436 nvlist_free(nvl); 7437 } else 7438 err = EFAULT; 7439 7440 rv = MDI_SUCCESS; 7441 mutex_enter(&vhc->vhc_lock); 7442 if (err != 0) { 7443 if (err == EROFS) { 7444 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7445 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7446 MDI_VHC_VHCACHE_DIRTY); 7447 } else { 7448 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7449 cmn_err(CE_CONT, "%s: update failed\n", 7450 vhc->vhc_vhcache_filename); 7451 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7452 } 7453 rv = MDI_FAILURE; 7454 } 7455 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7456 cmn_err(CE_CONT, 7457 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7458 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7459 } 7460 mutex_exit(&vhc->vhc_lock); 7461 7462 return (rv); 7463 } 7464 7465 /* 7466 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7467 * Exits itself if left idle for the idle timeout period. 7468 */ 7469 static void 7470 vhcache_flush_thread(void *arg) 7471 { 7472 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7473 clock_t idle_time, quit_at_ticks; 7474 callb_cpr_t cprinfo; 7475 7476 /* number of seconds to sleep idle before exiting */ 7477 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7478 7479 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7480 "mdi_vhcache_flush"); 7481 mutex_enter(&vhc->vhc_lock); 7482 for (; ; ) { 7483 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7484 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7485 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7486 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7487 (void) cv_timedwait(&vhc->vhc_cv, 7488 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7489 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7490 } else { 7491 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7492 mutex_exit(&vhc->vhc_lock); 7493 7494 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7495 vhcache_dirty(vhc); 7496 7497 mutex_enter(&vhc->vhc_lock); 7498 } 7499 } 7500 7501 quit_at_ticks = ddi_get_lbolt() + idle_time; 7502 7503 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7504 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7505 ddi_get_lbolt() < quit_at_ticks) { 7506 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7507 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7508 quit_at_ticks); 7509 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7510 } 7511 7512 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7513 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7514 goto out; 7515 } 7516 7517 out: 7518 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7519 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7520 CALLB_CPR_EXIT(&cprinfo); 7521 } 7522 7523 /* 7524 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7525 */ 7526 static void 7527 vhcache_dirty(mdi_vhci_config_t *vhc) 7528 { 7529 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7530 int create_thread; 7531 7532 rw_enter(&vhcache->vhcache_lock, RW_READER); 7533 /* do not flush cache until the cache is fully built */ 7534 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7535 rw_exit(&vhcache->vhcache_lock); 7536 return; 7537 } 7538 rw_exit(&vhcache->vhcache_lock); 7539 7540 mutex_enter(&vhc->vhc_lock); 7541 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7542 mutex_exit(&vhc->vhc_lock); 7543 return; 7544 } 7545 7546 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7547 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7548 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7549 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7550 cv_broadcast(&vhc->vhc_cv); 7551 create_thread = 0; 7552 } else { 7553 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7554 create_thread = 1; 7555 } 7556 mutex_exit(&vhc->vhc_lock); 7557 7558 if (create_thread) 7559 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7560 0, &p0, TS_RUN, minclsyspri); 7561 } 7562 7563 /* 7564 * phci bus config structure - one for for each phci bus config operation that 7565 * we initiate on behalf of a vhci. 7566 */ 7567 typedef struct mdi_phci_bus_config_s { 7568 char *phbc_phci_path; 7569 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7570 struct mdi_phci_bus_config_s *phbc_next; 7571 } mdi_phci_bus_config_t; 7572 7573 /* vhci bus config structure - one for each vhci bus config operation */ 7574 typedef struct mdi_vhci_bus_config_s { 7575 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7576 major_t vhbc_op_major; /* bus config op major */ 7577 uint_t vhbc_op_flags; /* bus config op flags */ 7578 kmutex_t vhbc_lock; 7579 kcondvar_t vhbc_cv; 7580 int vhbc_thr_count; 7581 } mdi_vhci_bus_config_t; 7582 7583 /* 7584 * bus config the specified phci 7585 */ 7586 static void 7587 bus_config_phci(void *arg) 7588 { 7589 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7590 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7591 dev_info_t *ph_dip; 7592 7593 /* 7594 * first configure all path components upto phci and then configure 7595 * the phci children. 7596 */ 7597 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7598 != NULL) { 7599 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7600 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7601 (void) ndi_devi_config_driver(ph_dip, 7602 vhbc->vhbc_op_flags, 7603 vhbc->vhbc_op_major); 7604 } else 7605 (void) ndi_devi_config(ph_dip, 7606 vhbc->vhbc_op_flags); 7607 7608 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7609 ndi_rele_devi(ph_dip); 7610 } 7611 7612 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7613 kmem_free(phbc, sizeof (*phbc)); 7614 7615 mutex_enter(&vhbc->vhbc_lock); 7616 vhbc->vhbc_thr_count--; 7617 if (vhbc->vhbc_thr_count == 0) 7618 cv_broadcast(&vhbc->vhbc_cv); 7619 mutex_exit(&vhbc->vhbc_lock); 7620 } 7621 7622 /* 7623 * Bus config all phcis associated with the vhci in parallel. 7624 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7625 */ 7626 static void 7627 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7628 ddi_bus_config_op_t op, major_t maj) 7629 { 7630 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7631 mdi_vhci_bus_config_t *vhbc; 7632 mdi_vhcache_phci_t *cphci; 7633 7634 rw_enter(&vhcache->vhcache_lock, RW_READER); 7635 if (vhcache->vhcache_phci_head == NULL) { 7636 rw_exit(&vhcache->vhcache_lock); 7637 return; 7638 } 7639 7640 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7641 7642 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7643 cphci = cphci->cphci_next) { 7644 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7645 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7646 KM_SLEEP); 7647 phbc->phbc_vhbusconfig = vhbc; 7648 phbc->phbc_next = phbc_head; 7649 phbc_head = phbc; 7650 vhbc->vhbc_thr_count++; 7651 } 7652 rw_exit(&vhcache->vhcache_lock); 7653 7654 vhbc->vhbc_op = op; 7655 vhbc->vhbc_op_major = maj; 7656 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7657 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7658 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7659 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7660 7661 /* now create threads to initiate bus config on all phcis in parallel */ 7662 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7663 phbc_next = phbc->phbc_next; 7664 if (mdi_mtc_off) 7665 bus_config_phci((void *)phbc); 7666 else 7667 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7668 0, &p0, TS_RUN, minclsyspri); 7669 } 7670 7671 mutex_enter(&vhbc->vhbc_lock); 7672 /* wait until all threads exit */ 7673 while (vhbc->vhbc_thr_count > 0) 7674 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7675 mutex_exit(&vhbc->vhbc_lock); 7676 7677 mutex_destroy(&vhbc->vhbc_lock); 7678 cv_destroy(&vhbc->vhbc_cv); 7679 kmem_free(vhbc, sizeof (*vhbc)); 7680 } 7681 7682 /* 7683 * Single threaded version of bus_config_all_phcis() 7684 */ 7685 static void 7686 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7687 ddi_bus_config_op_t op, major_t maj) 7688 { 7689 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7690 7691 single_threaded_vhconfig_enter(vhc); 7692 bus_config_all_phcis(vhcache, flags, op, maj); 7693 single_threaded_vhconfig_exit(vhc); 7694 } 7695 7696 /* 7697 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7698 * The path includes the child component in addition to the phci path. 7699 */ 7700 static int 7701 bus_config_one_phci_child(char *path) 7702 { 7703 dev_info_t *ph_dip, *child; 7704 char *devnm; 7705 int rv = MDI_FAILURE; 7706 7707 /* extract the child component of the phci */ 7708 devnm = strrchr(path, '/'); 7709 *devnm++ = '\0'; 7710 7711 /* 7712 * first configure all path components upto phci and then 7713 * configure the phci child. 7714 */ 7715 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7716 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7717 NDI_SUCCESS) { 7718 /* 7719 * release the hold that ndi_devi_config_one() placed 7720 */ 7721 ndi_rele_devi(child); 7722 rv = MDI_SUCCESS; 7723 } 7724 7725 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7726 ndi_rele_devi(ph_dip); 7727 } 7728 7729 devnm--; 7730 *devnm = '/'; 7731 return (rv); 7732 } 7733 7734 /* 7735 * Build a list of phci client paths for the specified vhci client. 7736 * The list includes only those phci client paths which aren't configured yet. 7737 */ 7738 static mdi_phys_path_t * 7739 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7740 { 7741 mdi_vhcache_pathinfo_t *cpi; 7742 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7743 int config_path, len; 7744 7745 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7746 /* 7747 * include only those paths that aren't configured. 7748 */ 7749 config_path = 0; 7750 if (cpi->cpi_pip == NULL) 7751 config_path = 1; 7752 else { 7753 MDI_PI_LOCK(cpi->cpi_pip); 7754 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7755 config_path = 1; 7756 MDI_PI_UNLOCK(cpi->cpi_pip); 7757 } 7758 7759 if (config_path) { 7760 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7761 len = strlen(cpi->cpi_cphci->cphci_path) + 7762 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7763 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7764 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7765 cpi->cpi_cphci->cphci_path, ct_name, 7766 cpi->cpi_addr); 7767 pp->phys_path_next = NULL; 7768 7769 if (pp_head == NULL) 7770 pp_head = pp; 7771 else 7772 pp_tail->phys_path_next = pp; 7773 pp_tail = pp; 7774 } 7775 } 7776 7777 return (pp_head); 7778 } 7779 7780 /* 7781 * Free the memory allocated for phci client path list. 7782 */ 7783 static void 7784 free_phclient_path_list(mdi_phys_path_t *pp_head) 7785 { 7786 mdi_phys_path_t *pp, *pp_next; 7787 7788 for (pp = pp_head; pp != NULL; pp = pp_next) { 7789 pp_next = pp->phys_path_next; 7790 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7791 kmem_free(pp, sizeof (*pp)); 7792 } 7793 } 7794 7795 /* 7796 * Allocated async client structure and initialize with the specified values. 7797 */ 7798 static mdi_async_client_config_t * 7799 alloc_async_client_config(char *ct_name, char *ct_addr, 7800 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7801 { 7802 mdi_async_client_config_t *acc; 7803 7804 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7805 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7806 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7807 acc->acc_phclient_path_list_head = pp_head; 7808 init_vhcache_lookup_token(&acc->acc_token, tok); 7809 acc->acc_next = NULL; 7810 return (acc); 7811 } 7812 7813 /* 7814 * Free the memory allocated for the async client structure and their members. 7815 */ 7816 static void 7817 free_async_client_config(mdi_async_client_config_t *acc) 7818 { 7819 if (acc->acc_phclient_path_list_head) 7820 free_phclient_path_list(acc->acc_phclient_path_list_head); 7821 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7822 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7823 kmem_free(acc, sizeof (*acc)); 7824 } 7825 7826 /* 7827 * Sort vhcache pathinfos (cpis) of the specified client. 7828 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7829 * flag set come at the beginning of the list. All cpis which have this 7830 * flag set come at the end of the list. 7831 */ 7832 static void 7833 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7834 { 7835 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7836 7837 cpi_head = cct->cct_cpi_head; 7838 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7839 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7840 cpi_next = cpi->cpi_next; 7841 enqueue_vhcache_pathinfo(cct, cpi); 7842 } 7843 } 7844 7845 /* 7846 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7847 * every vhcache pathinfo of the specified client. If not adjust the flag 7848 * setting appropriately. 7849 * 7850 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7851 * on-disk vhci cache. So every time this flag is updated the cache must be 7852 * flushed. 7853 */ 7854 static void 7855 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7856 mdi_vhcache_lookup_token_t *tok) 7857 { 7858 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7859 mdi_vhcache_client_t *cct; 7860 mdi_vhcache_pathinfo_t *cpi; 7861 7862 rw_enter(&vhcache->vhcache_lock, RW_READER); 7863 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7864 == NULL) { 7865 rw_exit(&vhcache->vhcache_lock); 7866 return; 7867 } 7868 7869 /* 7870 * to avoid unnecessary on-disk cache updates, first check if an 7871 * update is really needed. If no update is needed simply return. 7872 */ 7873 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7874 if ((cpi->cpi_pip != NULL && 7875 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7876 (cpi->cpi_pip == NULL && 7877 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7878 break; 7879 } 7880 } 7881 if (cpi == NULL) { 7882 rw_exit(&vhcache->vhcache_lock); 7883 return; 7884 } 7885 7886 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7887 rw_exit(&vhcache->vhcache_lock); 7888 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7889 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7890 tok)) == NULL) { 7891 rw_exit(&vhcache->vhcache_lock); 7892 return; 7893 } 7894 } 7895 7896 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7897 if (cpi->cpi_pip != NULL) 7898 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7899 else 7900 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7901 } 7902 sort_vhcache_paths(cct); 7903 7904 rw_exit(&vhcache->vhcache_lock); 7905 vhcache_dirty(vhc); 7906 } 7907 7908 /* 7909 * Configure all specified paths of the client. 7910 */ 7911 static void 7912 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7913 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7914 { 7915 mdi_phys_path_t *pp; 7916 7917 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7918 (void) bus_config_one_phci_child(pp->phys_path); 7919 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7920 } 7921 7922 /* 7923 * Dequeue elements from vhci async client config list and bus configure 7924 * their corresponding phci clients. 7925 */ 7926 static void 7927 config_client_paths_thread(void *arg) 7928 { 7929 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7930 mdi_async_client_config_t *acc; 7931 clock_t quit_at_ticks; 7932 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7933 callb_cpr_t cprinfo; 7934 7935 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7936 "mdi_config_client_paths"); 7937 7938 for (; ; ) { 7939 quit_at_ticks = ddi_get_lbolt() + idle_time; 7940 7941 mutex_enter(&vhc->vhc_lock); 7942 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7943 vhc->vhc_acc_list_head == NULL && 7944 ddi_get_lbolt() < quit_at_ticks) { 7945 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7946 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7947 quit_at_ticks); 7948 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7949 } 7950 7951 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7952 vhc->vhc_acc_list_head == NULL) 7953 goto out; 7954 7955 acc = vhc->vhc_acc_list_head; 7956 vhc->vhc_acc_list_head = acc->acc_next; 7957 if (vhc->vhc_acc_list_head == NULL) 7958 vhc->vhc_acc_list_tail = NULL; 7959 vhc->vhc_acc_count--; 7960 mutex_exit(&vhc->vhc_lock); 7961 7962 config_client_paths_sync(vhc, acc->acc_ct_name, 7963 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7964 &acc->acc_token); 7965 7966 free_async_client_config(acc); 7967 } 7968 7969 out: 7970 vhc->vhc_acc_thrcount--; 7971 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7972 CALLB_CPR_EXIT(&cprinfo); 7973 } 7974 7975 /* 7976 * Arrange for all the phci client paths (pp_head) for the specified client 7977 * to be bus configured asynchronously by a thread. 7978 */ 7979 static void 7980 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7981 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7982 { 7983 mdi_async_client_config_t *acc, *newacc; 7984 int create_thread; 7985 7986 if (pp_head == NULL) 7987 return; 7988 7989 if (mdi_mtc_off) { 7990 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7991 free_phclient_path_list(pp_head); 7992 return; 7993 } 7994 7995 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7996 ASSERT(newacc); 7997 7998 mutex_enter(&vhc->vhc_lock); 7999 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8000 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8001 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8002 free_async_client_config(newacc); 8003 mutex_exit(&vhc->vhc_lock); 8004 return; 8005 } 8006 } 8007 8008 if (vhc->vhc_acc_list_head == NULL) 8009 vhc->vhc_acc_list_head = newacc; 8010 else 8011 vhc->vhc_acc_list_tail->acc_next = newacc; 8012 vhc->vhc_acc_list_tail = newacc; 8013 vhc->vhc_acc_count++; 8014 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8015 cv_broadcast(&vhc->vhc_cv); 8016 create_thread = 0; 8017 } else { 8018 vhc->vhc_acc_thrcount++; 8019 create_thread = 1; 8020 } 8021 mutex_exit(&vhc->vhc_lock); 8022 8023 if (create_thread) 8024 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8025 0, &p0, TS_RUN, minclsyspri); 8026 } 8027 8028 /* 8029 * Return number of online paths for the specified client. 8030 */ 8031 static int 8032 nonline_paths(mdi_vhcache_client_t *cct) 8033 { 8034 mdi_vhcache_pathinfo_t *cpi; 8035 int online_count = 0; 8036 8037 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8038 if (cpi->cpi_pip != NULL) { 8039 MDI_PI_LOCK(cpi->cpi_pip); 8040 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8041 online_count++; 8042 MDI_PI_UNLOCK(cpi->cpi_pip); 8043 } 8044 } 8045 8046 return (online_count); 8047 } 8048 8049 /* 8050 * Bus configure all paths for the specified vhci client. 8051 * If at least one path for the client is already online, the remaining paths 8052 * will be configured asynchronously. Otherwise, it synchronously configures 8053 * the paths until at least one path is online and then rest of the paths 8054 * will be configured asynchronously. 8055 */ 8056 static void 8057 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8058 { 8059 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8060 mdi_phys_path_t *pp_head, *pp; 8061 mdi_vhcache_client_t *cct; 8062 mdi_vhcache_lookup_token_t tok; 8063 8064 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8065 8066 init_vhcache_lookup_token(&tok, NULL); 8067 8068 if (ct_name == NULL || ct_addr == NULL || 8069 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8070 == NULL || 8071 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8072 rw_exit(&vhcache->vhcache_lock); 8073 return; 8074 } 8075 8076 /* if at least one path is online, configure the rest asynchronously */ 8077 if (nonline_paths(cct) > 0) { 8078 rw_exit(&vhcache->vhcache_lock); 8079 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8080 return; 8081 } 8082 8083 rw_exit(&vhcache->vhcache_lock); 8084 8085 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8086 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8087 rw_enter(&vhcache->vhcache_lock, RW_READER); 8088 8089 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8090 ct_addr, &tok)) == NULL) { 8091 rw_exit(&vhcache->vhcache_lock); 8092 goto out; 8093 } 8094 8095 if (nonline_paths(cct) > 0 && 8096 pp->phys_path_next != NULL) { 8097 rw_exit(&vhcache->vhcache_lock); 8098 config_client_paths_async(vhc, ct_name, ct_addr, 8099 pp->phys_path_next, &tok); 8100 pp->phys_path_next = NULL; 8101 goto out; 8102 } 8103 8104 rw_exit(&vhcache->vhcache_lock); 8105 } 8106 } 8107 8108 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8109 out: 8110 free_phclient_path_list(pp_head); 8111 } 8112 8113 static void 8114 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8115 { 8116 mutex_enter(&vhc->vhc_lock); 8117 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8118 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8119 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8120 mutex_exit(&vhc->vhc_lock); 8121 } 8122 8123 static void 8124 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8125 { 8126 mutex_enter(&vhc->vhc_lock); 8127 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8128 cv_broadcast(&vhc->vhc_cv); 8129 mutex_exit(&vhc->vhc_lock); 8130 } 8131 8132 typedef struct mdi_phci_driver_info { 8133 char *phdriver_name; /* name of the phci driver */ 8134 8135 /* set to non zero if the phci driver supports root device */ 8136 int phdriver_root_support; 8137 } mdi_phci_driver_info_t; 8138 8139 /* 8140 * vhci class and root support capability of a phci driver can be 8141 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8142 * phci driver.conf file. The built-in tables below contain this information 8143 * for those phci drivers whose driver.conf files don't yet contain this info. 8144 * 8145 * All phci drivers expect iscsi have root device support. 8146 */ 8147 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8148 { "fp", 1 }, 8149 { "iscsi", 0 }, 8150 { "ibsrp", 1 } 8151 }; 8152 8153 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8154 8155 static void * 8156 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8157 { 8158 void *new_ptr; 8159 8160 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8161 if (old_ptr) { 8162 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8163 kmem_free(old_ptr, old_size); 8164 } 8165 return (new_ptr); 8166 } 8167 8168 static void 8169 add_to_phci_list(char ***driver_list, int **root_support_list, 8170 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8171 { 8172 ASSERT(*cur_elements <= *max_elements); 8173 if (*cur_elements == *max_elements) { 8174 *max_elements += 10; 8175 *driver_list = mdi_realloc(*driver_list, 8176 sizeof (char *) * (*cur_elements), 8177 sizeof (char *) * (*max_elements)); 8178 *root_support_list = mdi_realloc(*root_support_list, 8179 sizeof (int) * (*cur_elements), 8180 sizeof (int) * (*max_elements)); 8181 } 8182 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8183 (*root_support_list)[*cur_elements] = root_support; 8184 (*cur_elements)++; 8185 } 8186 8187 static void 8188 get_phci_driver_list(char *vhci_class, char ***driver_list, 8189 int **root_support_list, int *cur_elements, int *max_elements) 8190 { 8191 mdi_phci_driver_info_t *st_driver_list, *p; 8192 int st_ndrivers, root_support, i, j, driver_conf_count; 8193 major_t m; 8194 struct devnames *dnp; 8195 ddi_prop_t *propp; 8196 8197 *driver_list = NULL; 8198 *root_support_list = NULL; 8199 *cur_elements = 0; 8200 *max_elements = 0; 8201 8202 /* add the phci drivers derived from the phci driver.conf files */ 8203 for (m = 0; m < devcnt; m++) { 8204 dnp = &devnamesp[m]; 8205 8206 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8207 LOCK_DEV_OPS(&dnp->dn_lock); 8208 if (dnp->dn_global_prop_ptr != NULL && 8209 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8210 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8211 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8212 strcmp(propp->prop_val, vhci_class) == 0) { 8213 8214 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8215 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8216 &dnp->dn_global_prop_ptr->prop_list) 8217 == NULL) ? 1 : 0; 8218 8219 add_to_phci_list(driver_list, root_support_list, 8220 cur_elements, max_elements, dnp->dn_name, 8221 root_support); 8222 8223 UNLOCK_DEV_OPS(&dnp->dn_lock); 8224 } else 8225 UNLOCK_DEV_OPS(&dnp->dn_lock); 8226 } 8227 } 8228 8229 driver_conf_count = *cur_elements; 8230 8231 /* add the phci drivers specified in the built-in tables */ 8232 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8233 st_driver_list = scsi_phci_driver_list; 8234 st_ndrivers = sizeof (scsi_phci_driver_list) / 8235 sizeof (mdi_phci_driver_info_t); 8236 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8237 st_driver_list = ib_phci_driver_list; 8238 st_ndrivers = sizeof (ib_phci_driver_list) / 8239 sizeof (mdi_phci_driver_info_t); 8240 } else { 8241 st_driver_list = NULL; 8242 st_ndrivers = 0; 8243 } 8244 8245 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8246 /* add this phci driver if not already added before */ 8247 for (j = 0; j < driver_conf_count; j++) { 8248 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8249 break; 8250 } 8251 if (j == driver_conf_count) { 8252 add_to_phci_list(driver_list, root_support_list, 8253 cur_elements, max_elements, p->phdriver_name, 8254 p->phdriver_root_support); 8255 } 8256 } 8257 } 8258 8259 /* 8260 * Attach the phci driver instances associated with the specified vhci class. 8261 * If root is mounted attach all phci driver instances. 8262 * If root is not mounted, attach the instances of only those phci 8263 * drivers that have the root support. 8264 */ 8265 static void 8266 attach_phci_drivers(char *vhci_class) 8267 { 8268 char **driver_list, **p; 8269 int *root_support_list; 8270 int cur_elements, max_elements, i; 8271 major_t m; 8272 8273 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8274 &cur_elements, &max_elements); 8275 8276 for (i = 0; i < cur_elements; i++) { 8277 if (modrootloaded || root_support_list[i]) { 8278 m = ddi_name_to_major(driver_list[i]); 8279 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8280 ddi_rele_driver(m); 8281 } 8282 } 8283 8284 if (driver_list) { 8285 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8286 kmem_free(*p, strlen(*p) + 1); 8287 kmem_free(driver_list, sizeof (char *) * max_elements); 8288 kmem_free(root_support_list, sizeof (int) * max_elements); 8289 } 8290 } 8291 8292 /* 8293 * Build vhci cache: 8294 * 8295 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8296 * the phci driver instances. During this process the cache gets built. 8297 * 8298 * Cache is built fully if the root is mounted. 8299 * If the root is not mounted, phci drivers that do not have root support 8300 * are not attached. As a result the cache is built partially. The entries 8301 * in the cache reflect only those phci drivers that have root support. 8302 */ 8303 static int 8304 build_vhci_cache(mdi_vhci_t *vh) 8305 { 8306 mdi_vhci_config_t *vhc = vh->vh_config; 8307 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8308 8309 single_threaded_vhconfig_enter(vhc); 8310 8311 rw_enter(&vhcache->vhcache_lock, RW_READER); 8312 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8313 rw_exit(&vhcache->vhcache_lock); 8314 single_threaded_vhconfig_exit(vhc); 8315 return (0); 8316 } 8317 rw_exit(&vhcache->vhcache_lock); 8318 8319 attach_phci_drivers(vh->vh_class); 8320 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8321 BUS_CONFIG_ALL, (major_t)-1); 8322 8323 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8324 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8325 rw_exit(&vhcache->vhcache_lock); 8326 8327 single_threaded_vhconfig_exit(vhc); 8328 vhcache_dirty(vhc); 8329 return (1); 8330 } 8331 8332 /* 8333 * Determine if discovery of paths is needed. 8334 */ 8335 static int 8336 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8337 { 8338 int rv = 1; 8339 8340 mutex_enter(&vhc->vhc_lock); 8341 if (i_ddi_io_initialized() == 0) { 8342 if (vhc->vhc_path_discovery_boot > 0) { 8343 vhc->vhc_path_discovery_boot--; 8344 goto out; 8345 } 8346 } else { 8347 if (vhc->vhc_path_discovery_postboot > 0) { 8348 vhc->vhc_path_discovery_postboot--; 8349 goto out; 8350 } 8351 } 8352 8353 /* 8354 * Do full path discovery at most once per mdi_path_discovery_interval. 8355 * This is to avoid a series of full path discoveries when opening 8356 * stale /dev/[r]dsk links. 8357 */ 8358 if (mdi_path_discovery_interval != -1 && 8359 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8360 goto out; 8361 8362 rv = 0; 8363 out: 8364 mutex_exit(&vhc->vhc_lock); 8365 return (rv); 8366 } 8367 8368 /* 8369 * Discover all paths: 8370 * 8371 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8372 * driver instances. During this process all paths will be discovered. 8373 */ 8374 static int 8375 vhcache_discover_paths(mdi_vhci_t *vh) 8376 { 8377 mdi_vhci_config_t *vhc = vh->vh_config; 8378 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8379 int rv = 0; 8380 8381 single_threaded_vhconfig_enter(vhc); 8382 8383 if (vhcache_do_discovery(vhc)) { 8384 attach_phci_drivers(vh->vh_class); 8385 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8386 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8387 8388 mutex_enter(&vhc->vhc_lock); 8389 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8390 mdi_path_discovery_interval * TICKS_PER_SECOND; 8391 mutex_exit(&vhc->vhc_lock); 8392 rv = 1; 8393 } 8394 8395 single_threaded_vhconfig_exit(vhc); 8396 return (rv); 8397 } 8398 8399 /* 8400 * Generic vhci bus config implementation: 8401 * 8402 * Parameters 8403 * vdip vhci dip 8404 * flags bus config flags 8405 * op bus config operation 8406 * The remaining parameters are bus config operation specific 8407 * 8408 * for BUS_CONFIG_ONE 8409 * arg pointer to name@addr 8410 * child upon successful return from this function, *child will be 8411 * set to the configured and held devinfo child node of vdip. 8412 * ct_addr pointer to client address (i.e. GUID) 8413 * 8414 * for BUS_CONFIG_DRIVER 8415 * arg major number of the driver 8416 * child and ct_addr parameters are ignored 8417 * 8418 * for BUS_CONFIG_ALL 8419 * arg, child, and ct_addr parameters are ignored 8420 * 8421 * Note that for the rest of the bus config operations, this function simply 8422 * calls the framework provided default bus config routine. 8423 */ 8424 int 8425 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8426 void *arg, dev_info_t **child, char *ct_addr) 8427 { 8428 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8429 mdi_vhci_config_t *vhc = vh->vh_config; 8430 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8431 int rv = 0; 8432 int params_valid = 0; 8433 char *cp; 8434 8435 /* 8436 * To bus config vhcis we relay operation, possibly using another 8437 * thread, to phcis. The phci driver then interacts with MDI to cause 8438 * vhci child nodes to be enumerated under the vhci node. Adding a 8439 * vhci child requires an ndi_devi_enter of the vhci. Since another 8440 * thread may be adding the child, to avoid deadlock we can't wait 8441 * for the relayed operations to complete if we have already entered 8442 * the vhci node. 8443 */ 8444 if (DEVI_BUSY_OWNED(vdip)) { 8445 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8446 "vhci dip is busy owned %p\n", (void *)vdip)); 8447 goto default_bus_config; 8448 } 8449 8450 rw_enter(&vhcache->vhcache_lock, RW_READER); 8451 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8452 rw_exit(&vhcache->vhcache_lock); 8453 rv = build_vhci_cache(vh); 8454 rw_enter(&vhcache->vhcache_lock, RW_READER); 8455 } 8456 8457 switch (op) { 8458 case BUS_CONFIG_ONE: 8459 if (arg != NULL && ct_addr != NULL) { 8460 /* extract node name */ 8461 cp = (char *)arg; 8462 while (*cp != '\0' && *cp != '@') 8463 cp++; 8464 if (*cp == '@') { 8465 params_valid = 1; 8466 *cp = '\0'; 8467 config_client_paths(vhc, (char *)arg, ct_addr); 8468 /* config_client_paths() releases cache_lock */ 8469 *cp = '@'; 8470 break; 8471 } 8472 } 8473 8474 rw_exit(&vhcache->vhcache_lock); 8475 break; 8476 8477 case BUS_CONFIG_DRIVER: 8478 rw_exit(&vhcache->vhcache_lock); 8479 if (rv == 0) 8480 st_bus_config_all_phcis(vhc, flags, op, 8481 (major_t)(uintptr_t)arg); 8482 break; 8483 8484 case BUS_CONFIG_ALL: 8485 rw_exit(&vhcache->vhcache_lock); 8486 if (rv == 0) 8487 st_bus_config_all_phcis(vhc, flags, op, -1); 8488 break; 8489 8490 default: 8491 rw_exit(&vhcache->vhcache_lock); 8492 break; 8493 } 8494 8495 8496 default_bus_config: 8497 /* 8498 * All requested child nodes are enumerated under the vhci. 8499 * Now configure them. 8500 */ 8501 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8502 NDI_SUCCESS) { 8503 return (MDI_SUCCESS); 8504 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8505 /* discover all paths and try configuring again */ 8506 if (vhcache_discover_paths(vh) && 8507 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8508 NDI_SUCCESS) 8509 return (MDI_SUCCESS); 8510 } 8511 8512 return (MDI_FAILURE); 8513 } 8514 8515 /* 8516 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8517 */ 8518 static nvlist_t * 8519 read_on_disk_vhci_cache(char *vhci_class) 8520 { 8521 nvlist_t *nvl; 8522 int err; 8523 char *filename; 8524 8525 filename = vhclass2vhcache_filename(vhci_class); 8526 8527 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8528 kmem_free(filename, strlen(filename) + 1); 8529 return (nvl); 8530 } else if (err == EIO) 8531 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8532 else if (err == EINVAL) 8533 cmn_err(CE_WARN, 8534 "%s: data file corrupted, will recreate\n", filename); 8535 8536 kmem_free(filename, strlen(filename) + 1); 8537 return (NULL); 8538 } 8539 8540 /* 8541 * Read on-disk vhci cache into nvlists for all vhci classes. 8542 * Called during booting by i_ddi_read_devices_files(). 8543 */ 8544 void 8545 mdi_read_devices_files(void) 8546 { 8547 int i; 8548 8549 for (i = 0; i < N_VHCI_CLASSES; i++) 8550 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8551 } 8552 8553 /* 8554 * Remove all stale entries from vhci cache. 8555 */ 8556 static void 8557 clean_vhcache(mdi_vhci_config_t *vhc) 8558 { 8559 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8560 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8561 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8562 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8563 8564 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8565 8566 cct_head = vhcache->vhcache_client_head; 8567 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8568 for (cct = cct_head; cct != NULL; cct = cct_next) { 8569 cct_next = cct->cct_next; 8570 8571 cpi_head = cct->cct_cpi_head; 8572 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8573 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8574 cpi_next = cpi->cpi_next; 8575 if (cpi->cpi_pip != NULL) { 8576 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8577 enqueue_tail_vhcache_pathinfo(cct, cpi); 8578 } else 8579 free_vhcache_pathinfo(cpi); 8580 } 8581 8582 if (cct->cct_cpi_head != NULL) 8583 enqueue_vhcache_client(vhcache, cct); 8584 else { 8585 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8586 (mod_hash_key_t)cct->cct_name_addr); 8587 free_vhcache_client(cct); 8588 } 8589 } 8590 8591 cphci_head = vhcache->vhcache_phci_head; 8592 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8593 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8594 cphci_next = cphci->cphci_next; 8595 if (cphci->cphci_phci != NULL) 8596 enqueue_vhcache_phci(vhcache, cphci); 8597 else 8598 free_vhcache_phci(cphci); 8599 } 8600 8601 vhcache->vhcache_clean_time = lbolt64; 8602 rw_exit(&vhcache->vhcache_lock); 8603 vhcache_dirty(vhc); 8604 } 8605 8606 /* 8607 * Remove all stale entries from vhci cache. 8608 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8609 */ 8610 void 8611 mdi_clean_vhcache(void) 8612 { 8613 mdi_vhci_t *vh; 8614 8615 mutex_enter(&mdi_mutex); 8616 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8617 vh->vh_refcnt++; 8618 mutex_exit(&mdi_mutex); 8619 clean_vhcache(vh->vh_config); 8620 mutex_enter(&mdi_mutex); 8621 vh->vh_refcnt--; 8622 } 8623 mutex_exit(&mdi_mutex); 8624 } 8625 8626 /* 8627 * mdi_vhci_walk_clients(): 8628 * Walker routine to traverse client dev_info nodes 8629 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8630 * below the client, including nexus devices, which we dont want. 8631 * So we just traverse the immediate siblings, starting from 1st client. 8632 */ 8633 void 8634 mdi_vhci_walk_clients(dev_info_t *vdip, 8635 int (*f)(dev_info_t *, void *), void *arg) 8636 { 8637 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8638 dev_info_t *cdip; 8639 mdi_client_t *ct; 8640 8641 MDI_VHCI_CLIENT_LOCK(vh); 8642 cdip = ddi_get_child(vdip); 8643 while (cdip) { 8644 ct = i_devi_get_client(cdip); 8645 MDI_CLIENT_LOCK(ct); 8646 8647 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8648 cdip = ddi_get_next_sibling(cdip); 8649 else 8650 cdip = NULL; 8651 8652 MDI_CLIENT_UNLOCK(ct); 8653 } 8654 MDI_VHCI_CLIENT_UNLOCK(vh); 8655 } 8656 8657 /* 8658 * mdi_vhci_walk_phcis(): 8659 * Walker routine to traverse phci dev_info nodes 8660 */ 8661 void 8662 mdi_vhci_walk_phcis(dev_info_t *vdip, 8663 int (*f)(dev_info_t *, void *), void *arg) 8664 { 8665 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8666 mdi_phci_t *ph, *next; 8667 8668 MDI_VHCI_PHCI_LOCK(vh); 8669 ph = vh->vh_phci_head; 8670 while (ph) { 8671 MDI_PHCI_LOCK(ph); 8672 8673 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8674 next = ph->ph_next; 8675 else 8676 next = NULL; 8677 8678 MDI_PHCI_UNLOCK(ph); 8679 ph = next; 8680 } 8681 MDI_VHCI_PHCI_UNLOCK(vh); 8682 } 8683 8684 8685 /* 8686 * mdi_walk_vhcis(): 8687 * Walker routine to traverse vhci dev_info nodes 8688 */ 8689 void 8690 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8691 { 8692 mdi_vhci_t *vh = NULL; 8693 8694 mutex_enter(&mdi_mutex); 8695 /* 8696 * Scan for already registered vhci 8697 */ 8698 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8699 vh->vh_refcnt++; 8700 mutex_exit(&mdi_mutex); 8701 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8702 mutex_enter(&mdi_mutex); 8703 vh->vh_refcnt--; 8704 break; 8705 } else { 8706 mutex_enter(&mdi_mutex); 8707 vh->vh_refcnt--; 8708 } 8709 } 8710 8711 mutex_exit(&mdi_mutex); 8712 } 8713 8714 /* 8715 * i_mdi_log_sysevent(): 8716 * Logs events for pickup by syseventd 8717 */ 8718 static void 8719 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8720 { 8721 char *path_name; 8722 nvlist_t *attr_list; 8723 8724 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8725 KM_SLEEP) != DDI_SUCCESS) { 8726 goto alloc_failed; 8727 } 8728 8729 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8730 (void) ddi_pathname(dip, path_name); 8731 8732 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8733 ddi_driver_name(dip)) != DDI_SUCCESS) { 8734 goto error; 8735 } 8736 8737 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8738 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8739 goto error; 8740 } 8741 8742 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8743 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8744 goto error; 8745 } 8746 8747 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8748 path_name) != DDI_SUCCESS) { 8749 goto error; 8750 } 8751 8752 if (nvlist_add_string(attr_list, DDI_CLASS, 8753 ph_vh_class) != DDI_SUCCESS) { 8754 goto error; 8755 } 8756 8757 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8758 attr_list, NULL, DDI_SLEEP); 8759 8760 error: 8761 kmem_free(path_name, MAXPATHLEN); 8762 nvlist_free(attr_list); 8763 return; 8764 8765 alloc_failed: 8766 MDI_DEBUG(1, (CE_WARN, dip, 8767 "!i_mdi_log_sysevent: Unable to send sysevent")); 8768 } 8769 8770 char ** 8771 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 8772 { 8773 char **driver_list, **ret_driver_list = NULL; 8774 int *root_support_list; 8775 int cur_elements, max_elements; 8776 8777 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8778 &cur_elements, &max_elements); 8779 8780 8781 if (driver_list) { 8782 kmem_free(root_support_list, sizeof (int) * max_elements); 8783 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 8784 * max_elements, sizeof (char *) * cur_elements); 8785 } 8786 *ndrivers = cur_elements; 8787 8788 return (ret_driver_list); 8789 8790 } 8791 8792 void 8793 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 8794 { 8795 char **p; 8796 int i; 8797 8798 if (driver_list) { 8799 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 8800 kmem_free(*p, strlen(*p) + 1); 8801 kmem_free(driver_list, sizeof (char *) * ndrivers); 8802 } 8803 } 8804