1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 38 */ 39 40 #include <sys/note.h> 41 #include <sys/types.h> 42 #include <sys/varargs.h> 43 #include <sys/param.h> 44 #include <sys/errno.h> 45 #include <sys/uio.h> 46 #include <sys/buf.h> 47 #include <sys/modctl.h> 48 #include <sys/open.h> 49 #include <sys/kmem.h> 50 #include <sys/poll.h> 51 #include <sys/conf.h> 52 #include <sys/bootconf.h> 53 #include <sys/cmn_err.h> 54 #include <sys/stat.h> 55 #include <sys/ddi.h> 56 #include <sys/sunddi.h> 57 #include <sys/ddipropdefs.h> 58 #include <sys/sunndi.h> 59 #include <sys/ndi_impldefs.h> 60 #include <sys/promif.h> 61 #include <sys/sunmdi.h> 62 #include <sys/mdi_impldefs.h> 63 #include <sys/taskq.h> 64 #include <sys/epm.h> 65 #include <sys/sunpm.h> 66 #include <sys/modhash.h> 67 #include <sys/disp.h> 68 #include <sys/autoconf.h> 69 70 #ifdef DEBUG 71 #include <sys/debug.h> 72 int mdi_debug = 1; 73 #define MDI_DEBUG(level, stmnt) \ 74 if (mdi_debug >= (level)) i_mdi_log stmnt 75 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 76 #else /* !DEBUG */ 77 #define MDI_DEBUG(level, stmnt) 78 #endif /* DEBUG */ 79 80 extern pri_t minclsyspri; 81 extern int modrootloaded; 82 83 /* 84 * Global mutex: 85 * Protects vHCI list and structure members, pHCI and Client lists. 86 */ 87 kmutex_t mdi_mutex; 88 89 /* 90 * Registered vHCI class driver lists 91 */ 92 int mdi_vhci_count; 93 mdi_vhci_t *mdi_vhci_head; 94 mdi_vhci_t *mdi_vhci_tail; 95 96 /* 97 * Client Hash Table size 98 */ 99 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 100 101 /* 102 * taskq interface definitions 103 */ 104 #define MDI_TASKQ_N_THREADS 8 105 #define MDI_TASKQ_PRI minclsyspri 106 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 107 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 108 109 taskq_t *mdi_taskq; 110 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 111 112 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 113 114 /* 115 * The data should be "quiet" for this interval (in seconds) before the 116 * vhci cached data is flushed to the disk. 117 */ 118 static int mdi_vhcache_flush_delay = 10; 119 120 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 121 static int mdi_vhcache_flush_daemon_idle_time = 60; 122 123 /* 124 * MDI falls back to discovery of all paths when a bus_config_one fails. 125 * The following parameters can be used to tune this operation. 126 * 127 * mdi_path_discovery_boot 128 * Number of times path discovery will be attempted during early boot. 129 * Probably there is no reason to ever set this value to greater than one. 130 * 131 * mdi_path_discovery_postboot 132 * Number of times path discovery will be attempted after early boot. 133 * Set it to a minimum of two to allow for discovery of iscsi paths which 134 * may happen very late during booting. 135 * 136 * mdi_path_discovery_interval 137 * Minimum number of seconds MDI will wait between successive discovery 138 * of all paths. Set it to -1 to disable discovery of all paths. 139 */ 140 static int mdi_path_discovery_boot = 1; 141 static int mdi_path_discovery_postboot = 2; 142 static int mdi_path_discovery_interval = 10; 143 144 /* 145 * number of seconds the asynchronous configuration thread will sleep idle 146 * before exiting. 147 */ 148 static int mdi_async_config_idle_time = 600; 149 150 static int mdi_bus_config_cache_hash_size = 256; 151 152 /* turns off multithreaded configuration for certain operations */ 153 static int mdi_mtc_off = 0; 154 155 /* 156 * MDI component property name/value string definitions 157 */ 158 const char *mdi_component_prop = "mpxio-component"; 159 const char *mdi_component_prop_vhci = "vhci"; 160 const char *mdi_component_prop_phci = "phci"; 161 const char *mdi_component_prop_client = "client"; 162 163 /* 164 * MDI client global unique identifier property name 165 */ 166 const char *mdi_client_guid_prop = "client-guid"; 167 168 /* 169 * MDI client load balancing property name/value string definitions 170 */ 171 const char *mdi_load_balance = "load-balance"; 172 const char *mdi_load_balance_none = "none"; 173 const char *mdi_load_balance_rr = "round-robin"; 174 const char *mdi_load_balance_lba = "logical-block"; 175 176 /* 177 * Obsolete vHCI class definition; to be removed after Leadville update 178 */ 179 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 180 181 static char vhci_greeting[] = 182 "\tThere already exists one vHCI driver for class %s\n" 183 "\tOnly one vHCI driver for each class is allowed\n"; 184 185 /* 186 * Static function prototypes 187 */ 188 static int i_mdi_phci_offline(dev_info_t *, uint_t); 189 static int i_mdi_client_offline(dev_info_t *, uint_t); 190 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 191 static void i_mdi_phci_post_detach(dev_info_t *, 192 ddi_detach_cmd_t, int); 193 static int i_mdi_client_pre_detach(dev_info_t *, 194 ddi_detach_cmd_t); 195 static void i_mdi_client_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 198 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 199 static int i_mdi_lba_lb(mdi_client_t *ct, 200 mdi_pathinfo_t **ret_pip, struct buf *buf); 201 static void i_mdi_pm_hold_client(mdi_client_t *, int); 202 static void i_mdi_pm_rele_client(mdi_client_t *, int); 203 static void i_mdi_pm_reset_client(mdi_client_t *); 204 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 205 static int i_mdi_power_all_phci(mdi_client_t *); 206 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 207 208 209 /* 210 * Internal mdi_pathinfo node functions 211 */ 212 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 213 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 214 215 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 216 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 217 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 218 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 219 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 220 mdi_client_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 351 /*ARGSUSED*/ 352 int 353 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 354 int flags) 355 { 356 mdi_vhci_t *vh = NULL; 357 358 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 /* 411 * Store the vHCI ops vectors 412 */ 413 vh->vh_dip = vdip; 414 vh->vh_ops = vops; 415 416 setup_vhci_cache(vh); 417 418 if (mdi_vhci_head == NULL) { 419 mdi_vhci_head = vh; 420 } 421 if (mdi_vhci_tail) { 422 mdi_vhci_tail->vh_next = vh; 423 } 424 mdi_vhci_tail = vh; 425 mdi_vhci_count++; 426 } 427 428 /* 429 * Claim the devfs node as a vhci component 430 */ 431 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 432 433 /* 434 * Initialize our back reference from dev_info node 435 */ 436 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 437 mutex_exit(&mdi_mutex); 438 return (MDI_SUCCESS); 439 } 440 441 /* 442 * mdi_vhci_unregister(): 443 * Unregister a vHCI module from mpxio framework 444 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 445 * of a vhci to unregister it from the framework. 446 * Return Values: 447 * MDI_SUCCESS 448 * MDI_FAILURE 449 */ 450 451 /*ARGSUSED*/ 452 int 453 mdi_vhci_unregister(dev_info_t *vdip, int flags) 454 { 455 mdi_vhci_t *found, *vh, *prev = NULL; 456 457 /* 458 * Check for invalid VHCI 459 */ 460 if ((vh = i_devi_get_vhci(vdip)) == NULL) 461 return (MDI_FAILURE); 462 463 mutex_enter(&mdi_mutex); 464 465 /* 466 * Scan the list of registered vHCIs for a match 467 */ 468 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 469 if (found == vh) 470 break; 471 prev = found; 472 } 473 474 if (found == NULL) { 475 mutex_exit(&mdi_mutex); 476 return (MDI_FAILURE); 477 } 478 479 /* 480 * Check the vHCI, pHCI and client count. All the pHCIs and clients 481 * should have been unregistered, before a vHCI can be 482 * unregistered. 483 */ 484 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 485 mutex_exit(&mdi_mutex); 486 return (MDI_FAILURE); 487 } 488 489 /* 490 * Remove the vHCI from the global list 491 */ 492 if (vh == mdi_vhci_head) { 493 mdi_vhci_head = vh->vh_next; 494 } else { 495 prev->vh_next = vh->vh_next; 496 } 497 if (vh == mdi_vhci_tail) { 498 mdi_vhci_tail = prev; 499 } 500 501 mdi_vhci_count--; 502 mutex_exit(&mdi_mutex); 503 504 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 505 /* add vhci to the global list */ 506 mutex_enter(&mdi_mutex); 507 if (mdi_vhci_head == NULL) 508 mdi_vhci_head = vh; 509 else 510 mdi_vhci_tail->vh_next = vh; 511 mdi_vhci_tail = vh; 512 mdi_vhci_count++; 513 mutex_exit(&mdi_mutex); 514 return (MDI_FAILURE); 515 } 516 517 vh->vh_ops = NULL; 518 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 519 DEVI(vdip)->devi_mdi_xhci = NULL; 520 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 521 kmem_free(vh->vh_client_table, 522 mdi_client_table_size * sizeof (struct client_hash)); 523 524 kmem_free(vh, sizeof (mdi_vhci_t)); 525 return (MDI_SUCCESS); 526 } 527 528 /* 529 * i_mdi_vhci_class2vhci(): 530 * Look for a matching vHCI module given a vHCI class name 531 * Return Values: 532 * Handle to a vHCI component 533 * NULL 534 */ 535 static mdi_vhci_t * 536 i_mdi_vhci_class2vhci(char *class) 537 { 538 mdi_vhci_t *vh = NULL; 539 540 ASSERT(!MUTEX_HELD(&mdi_mutex)); 541 542 mutex_enter(&mdi_mutex); 543 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 544 if (strcmp(vh->vh_class, class) == 0) { 545 break; 546 } 547 } 548 mutex_exit(&mdi_mutex); 549 return (vh); 550 } 551 552 /* 553 * i_devi_get_vhci(): 554 * Utility function to get the handle to a vHCI component 555 * Return Values: 556 * Handle to a vHCI component 557 * NULL 558 */ 559 mdi_vhci_t * 560 i_devi_get_vhci(dev_info_t *vdip) 561 { 562 mdi_vhci_t *vh = NULL; 563 if (MDI_VHCI(vdip)) { 564 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 565 } 566 return (vh); 567 } 568 569 /* 570 * mdi_phci_register(): 571 * Register a pHCI module with mpxio framework 572 * mdi_phci_register() is called by pHCI drivers to register with 573 * the mpxio framework and a specific 'class_driver' vHCI. The 574 * pHCI driver must call this interface as part of its attach(9e) 575 * handler. 576 * Return Values: 577 * MDI_SUCCESS 578 * MDI_FAILURE 579 */ 580 581 /*ARGSUSED*/ 582 int 583 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 584 { 585 mdi_phci_t *ph; 586 mdi_vhci_t *vh; 587 char *data; 588 char *pathname; 589 590 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 591 (void) ddi_pathname(pdip, pathname); 592 593 /* 594 * Check for mpxio-disable property. Enable mpxio if the property is 595 * missing or not set to "yes". 596 * If the property is set to "yes" then emit a brief message. 597 */ 598 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 599 &data) == DDI_SUCCESS)) { 600 if (strcmp(data, "yes") == 0) { 601 MDI_DEBUG(1, (CE_CONT, pdip, 602 "?%s (%s%d) multipath capabilities " 603 "disabled via %s.conf.\n", pathname, 604 ddi_driver_name(pdip), ddi_get_instance(pdip), 605 ddi_driver_name(pdip))); 606 ddi_prop_free(data); 607 kmem_free(pathname, MAXPATHLEN); 608 return (MDI_FAILURE); 609 } 610 ddi_prop_free(data); 611 } 612 613 kmem_free(pathname, MAXPATHLEN); 614 615 /* 616 * Search for a matching vHCI 617 */ 618 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 619 if (vh == NULL) { 620 return (MDI_FAILURE); 621 } 622 623 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 624 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 625 ph->ph_dip = pdip; 626 ph->ph_vhci = vh; 627 ph->ph_next = NULL; 628 ph->ph_unstable = 0; 629 ph->ph_vprivate = 0; 630 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 631 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 632 633 MDI_PHCI_SET_POWER_UP(ph); 634 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 635 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 636 637 vhcache_phci_add(vh->vh_config, ph); 638 639 mutex_enter(&mdi_mutex); 640 if (vh->vh_phci_head == NULL) { 641 vh->vh_phci_head = ph; 642 } 643 if (vh->vh_phci_tail) { 644 vh->vh_phci_tail->ph_next = ph; 645 } 646 vh->vh_phci_tail = ph; 647 vh->vh_phci_count++; 648 mutex_exit(&mdi_mutex); 649 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 650 return (MDI_SUCCESS); 651 } 652 653 /* 654 * mdi_phci_unregister(): 655 * Unregister a pHCI module from mpxio framework 656 * mdi_phci_unregister() is called by the pHCI drivers from their 657 * detach(9E) handler to unregister their instances from the 658 * framework. 659 * Return Values: 660 * MDI_SUCCESS 661 * MDI_FAILURE 662 */ 663 664 /*ARGSUSED*/ 665 int 666 mdi_phci_unregister(dev_info_t *pdip, int flags) 667 { 668 mdi_vhci_t *vh; 669 mdi_phci_t *ph; 670 mdi_phci_t *tmp; 671 mdi_phci_t *prev = NULL; 672 673 ph = i_devi_get_phci(pdip); 674 if (ph == NULL) { 675 MDI_DEBUG(1, (CE_WARN, pdip, 676 "!pHCI unregister: Not a valid pHCI")); 677 return (MDI_FAILURE); 678 } 679 680 vh = ph->ph_vhci; 681 ASSERT(vh != NULL); 682 if (vh == NULL) { 683 MDI_DEBUG(1, (CE_WARN, pdip, 684 "!pHCI unregister: Not a valid vHCI")); 685 return (MDI_FAILURE); 686 } 687 688 mutex_enter(&mdi_mutex); 689 tmp = vh->vh_phci_head; 690 while (tmp) { 691 if (tmp == ph) { 692 break; 693 } 694 prev = tmp; 695 tmp = tmp->ph_next; 696 } 697 698 if (ph == vh->vh_phci_head) { 699 vh->vh_phci_head = ph->ph_next; 700 } else { 701 prev->ph_next = ph->ph_next; 702 } 703 704 if (ph == vh->vh_phci_tail) { 705 vh->vh_phci_tail = prev; 706 } 707 708 vh->vh_phci_count--; 709 710 mutex_exit(&mdi_mutex); 711 712 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 713 ESC_DDI_INITIATOR_UNREGISTER); 714 vhcache_phci_remove(vh->vh_config, ph); 715 cv_destroy(&ph->ph_unstable_cv); 716 cv_destroy(&ph->ph_powerchange_cv); 717 mutex_destroy(&ph->ph_mutex); 718 kmem_free(ph, sizeof (mdi_phci_t)); 719 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 720 DEVI(pdip)->devi_mdi_xhci = NULL; 721 return (MDI_SUCCESS); 722 } 723 724 /* 725 * i_devi_get_phci(): 726 * Utility function to return the phci extensions. 727 */ 728 static mdi_phci_t * 729 i_devi_get_phci(dev_info_t *pdip) 730 { 731 mdi_phci_t *ph = NULL; 732 if (MDI_PHCI(pdip)) { 733 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 734 } 735 return (ph); 736 } 737 738 /* 739 * mdi_phci_path2devinfo(): 740 * Utility function to search for a valid phci device given 741 * the devfs pathname. 742 */ 743 744 dev_info_t * 745 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 746 { 747 char *temp_pathname; 748 mdi_vhci_t *vh; 749 mdi_phci_t *ph; 750 dev_info_t *pdip = NULL; 751 752 vh = i_devi_get_vhci(vdip); 753 ASSERT(vh != NULL); 754 755 if (vh == NULL) { 756 /* 757 * Invalid vHCI component, return failure 758 */ 759 return (NULL); 760 } 761 762 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 763 mutex_enter(&mdi_mutex); 764 ph = vh->vh_phci_head; 765 while (ph != NULL) { 766 pdip = ph->ph_dip; 767 ASSERT(pdip != NULL); 768 *temp_pathname = '\0'; 769 (void) ddi_pathname(pdip, temp_pathname); 770 if (strcmp(temp_pathname, pathname) == 0) { 771 break; 772 } 773 ph = ph->ph_next; 774 } 775 if (ph == NULL) { 776 pdip = NULL; 777 } 778 mutex_exit(&mdi_mutex); 779 kmem_free(temp_pathname, MAXPATHLEN); 780 return (pdip); 781 } 782 783 /* 784 * mdi_phci_get_path_count(): 785 * get number of path information nodes associated with a given 786 * pHCI device. 787 */ 788 int 789 mdi_phci_get_path_count(dev_info_t *pdip) 790 { 791 mdi_phci_t *ph; 792 int count = 0; 793 794 ph = i_devi_get_phci(pdip); 795 if (ph != NULL) { 796 count = ph->ph_path_count; 797 } 798 return (count); 799 } 800 801 /* 802 * i_mdi_phci_lock(): 803 * Lock a pHCI device 804 * Return Values: 805 * None 806 * Note: 807 * The default locking order is: 808 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 809 * But there are number of situations where locks need to be 810 * grabbed in reverse order. This routine implements try and lock 811 * mechanism depending on the requested parameter option. 812 */ 813 static void 814 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 815 { 816 if (pip) { 817 /* Reverse locking is requested. */ 818 while (MDI_PHCI_TRYLOCK(ph) == 0) { 819 /* 820 * tryenter failed. Try to grab again 821 * after a small delay 822 */ 823 MDI_PI_HOLD(pip); 824 MDI_PI_UNLOCK(pip); 825 delay(1); 826 MDI_PI_LOCK(pip); 827 MDI_PI_RELE(pip); 828 } 829 } else { 830 MDI_PHCI_LOCK(ph); 831 } 832 } 833 834 /* 835 * i_mdi_phci_get_client_lock(): 836 * Lock a pHCI device 837 * Return Values: 838 * None 839 * Note: 840 * The default locking order is: 841 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 842 * But there are number of situations where locks need to be 843 * grabbed in reverse order. This routine implements try and lock 844 * mechanism depending on the requested parameter option. 845 */ 846 static void 847 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 848 { 849 if (ct) { 850 /* Reverse locking is requested. */ 851 while (MDI_PHCI_TRYLOCK(ph) == 0) { 852 /* 853 * tryenter failed. Try to grab again 854 * after a small delay 855 */ 856 MDI_CLIENT_UNLOCK(ct); 857 delay(1); 858 MDI_CLIENT_LOCK(ct); 859 } 860 } else { 861 MDI_PHCI_LOCK(ph); 862 } 863 } 864 865 /* 866 * i_mdi_phci_unlock(): 867 * Unlock the pHCI component 868 */ 869 static void 870 i_mdi_phci_unlock(mdi_phci_t *ph) 871 { 872 MDI_PHCI_UNLOCK(ph); 873 } 874 875 /* 876 * i_mdi_devinfo_create(): 877 * create client device's devinfo node 878 * Return Values: 879 * dev_info 880 * NULL 881 * Notes: 882 */ 883 static dev_info_t * 884 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 885 char **compatible, int ncompatible) 886 { 887 dev_info_t *cdip = NULL; 888 889 ASSERT(MUTEX_HELD(&mdi_mutex)); 890 891 /* Verify for duplicate entry */ 892 cdip = i_mdi_devinfo_find(vh, name, guid); 893 ASSERT(cdip == NULL); 894 if (cdip) { 895 cmn_err(CE_WARN, 896 "i_mdi_devinfo_create: client dip %p already exists", 897 (void *)cdip); 898 } 899 900 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 901 if (cdip == NULL) 902 goto fail; 903 904 /* 905 * Create component type and Global unique identifier 906 * properties 907 */ 908 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 909 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 910 goto fail; 911 } 912 913 /* Decorate the node with compatible property */ 914 if (compatible && 915 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 916 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 917 goto fail; 918 } 919 920 return (cdip); 921 922 fail: 923 if (cdip) { 924 (void) ndi_prop_remove_all(cdip); 925 (void) ndi_devi_free(cdip); 926 } 927 return (NULL); 928 } 929 930 /* 931 * i_mdi_devinfo_find(): 932 * Find a matching devinfo node for given client node name 933 * and its guid. 934 * Return Values: 935 * Handle to a dev_info node or NULL 936 */ 937 938 static dev_info_t * 939 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 940 { 941 char *data; 942 dev_info_t *cdip = NULL; 943 dev_info_t *ndip = NULL; 944 int circular; 945 946 ndi_devi_enter(vh->vh_dip, &circular); 947 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 948 while ((cdip = ndip) != NULL) { 949 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 950 951 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 952 continue; 953 } 954 955 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 956 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 957 &data) != DDI_PROP_SUCCESS) { 958 continue; 959 } 960 961 if (strcmp(data, guid) != 0) { 962 ddi_prop_free(data); 963 continue; 964 } 965 ddi_prop_free(data); 966 break; 967 } 968 ndi_devi_exit(vh->vh_dip, circular); 969 return (cdip); 970 } 971 972 /* 973 * i_mdi_devinfo_remove(): 974 * Remove a client device node 975 */ 976 static int 977 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 978 { 979 int rv = MDI_SUCCESS; 980 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 981 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 982 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 983 if (rv != NDI_SUCCESS) { 984 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 985 " failed. cdip = %p\n", cdip)); 986 } 987 /* 988 * Convert to MDI error code 989 */ 990 switch (rv) { 991 case NDI_SUCCESS: 992 rv = MDI_SUCCESS; 993 break; 994 case NDI_BUSY: 995 rv = MDI_BUSY; 996 break; 997 default: 998 rv = MDI_FAILURE; 999 break; 1000 } 1001 } 1002 return (rv); 1003 } 1004 1005 /* 1006 * i_devi_get_client() 1007 * Utility function to get mpxio component extensions 1008 */ 1009 static mdi_client_t * 1010 i_devi_get_client(dev_info_t *cdip) 1011 { 1012 mdi_client_t *ct = NULL; 1013 if (MDI_CLIENT(cdip)) { 1014 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1015 } 1016 return (ct); 1017 } 1018 1019 /* 1020 * i_mdi_is_child_present(): 1021 * Search for the presence of client device dev_info node 1022 */ 1023 1024 static int 1025 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1026 { 1027 int rv = MDI_FAILURE; 1028 struct dev_info *dip; 1029 int circular; 1030 1031 ndi_devi_enter(vdip, &circular); 1032 dip = DEVI(vdip)->devi_child; 1033 while (dip) { 1034 if (dip == DEVI(cdip)) { 1035 rv = MDI_SUCCESS; 1036 break; 1037 } 1038 dip = dip->devi_sibling; 1039 } 1040 ndi_devi_exit(vdip, circular); 1041 return (rv); 1042 } 1043 1044 1045 /* 1046 * i_mdi_client_lock(): 1047 * Grab client component lock 1048 * Return Values: 1049 * None 1050 * Note: 1051 * The default locking order is: 1052 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1053 * But there are number of situations where locks need to be 1054 * grabbed in reverse order. This routine implements try and lock 1055 * mechanism depending on the requested parameter option. 1056 */ 1057 1058 static void 1059 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1060 { 1061 if (pip) { 1062 /* 1063 * Reverse locking is requested. 1064 */ 1065 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1066 /* 1067 * tryenter failed. Try to grab again 1068 * after a small delay 1069 */ 1070 MDI_PI_HOLD(pip); 1071 MDI_PI_UNLOCK(pip); 1072 delay(1); 1073 MDI_PI_LOCK(pip); 1074 MDI_PI_RELE(pip); 1075 } 1076 } else { 1077 MDI_CLIENT_LOCK(ct); 1078 } 1079 } 1080 1081 /* 1082 * i_mdi_client_unlock(): 1083 * Unlock a client component 1084 */ 1085 1086 static void 1087 i_mdi_client_unlock(mdi_client_t *ct) 1088 { 1089 MDI_CLIENT_UNLOCK(ct); 1090 } 1091 1092 /* 1093 * i_mdi_client_alloc(): 1094 * Allocate and initialize a client structure. Caller should 1095 * hold the global mdi_mutex. 1096 * Return Values: 1097 * Handle to a client component 1098 */ 1099 /*ARGSUSED*/ 1100 static mdi_client_t * 1101 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1102 { 1103 mdi_client_t *ct; 1104 1105 ASSERT(MUTEX_HELD(&mdi_mutex)); 1106 1107 /* 1108 * Allocate and initialize a component structure. 1109 */ 1110 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1111 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1112 ct->ct_hnext = NULL; 1113 ct->ct_hprev = NULL; 1114 ct->ct_dip = NULL; 1115 ct->ct_vhci = vh; 1116 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1117 (void) strcpy(ct->ct_drvname, name); 1118 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1119 (void) strcpy(ct->ct_guid, lguid); 1120 ct->ct_cprivate = NULL; 1121 ct->ct_vprivate = NULL; 1122 ct->ct_flags = 0; 1123 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1124 MDI_CLIENT_SET_OFFLINE(ct); 1125 MDI_CLIENT_SET_DETACH(ct); 1126 MDI_CLIENT_SET_POWER_UP(ct); 1127 ct->ct_failover_flags = 0; 1128 ct->ct_failover_status = 0; 1129 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1130 ct->ct_unstable = 0; 1131 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1132 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1133 ct->ct_lb = vh->vh_lb; 1134 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1135 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1136 ct->ct_path_count = 0; 1137 ct->ct_path_head = NULL; 1138 ct->ct_path_tail = NULL; 1139 ct->ct_path_last = NULL; 1140 1141 /* 1142 * Add this client component to our client hash queue 1143 */ 1144 i_mdi_client_enlist_table(vh, ct); 1145 return (ct); 1146 } 1147 1148 /* 1149 * i_mdi_client_enlist_table(): 1150 * Attach the client device to the client hash table. Caller 1151 * should hold the mdi_mutex 1152 */ 1153 1154 static void 1155 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1156 { 1157 int index; 1158 struct client_hash *head; 1159 1160 ASSERT(MUTEX_HELD(&mdi_mutex)); 1161 index = i_mdi_get_hash_key(ct->ct_guid); 1162 head = &vh->vh_client_table[index]; 1163 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1164 head->ct_hash_head = ct; 1165 head->ct_hash_count++; 1166 vh->vh_client_count++; 1167 } 1168 1169 /* 1170 * i_mdi_client_delist_table(): 1171 * Attach the client device to the client hash table. 1172 * Caller should hold the mdi_mutex 1173 */ 1174 1175 static void 1176 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1177 { 1178 int index; 1179 char *guid; 1180 struct client_hash *head; 1181 mdi_client_t *next; 1182 mdi_client_t *last; 1183 1184 ASSERT(MUTEX_HELD(&mdi_mutex)); 1185 guid = ct->ct_guid; 1186 index = i_mdi_get_hash_key(guid); 1187 head = &vh->vh_client_table[index]; 1188 1189 last = NULL; 1190 next = (mdi_client_t *)head->ct_hash_head; 1191 while (next != NULL) { 1192 if (next == ct) { 1193 break; 1194 } 1195 last = next; 1196 next = next->ct_hnext; 1197 } 1198 1199 if (next) { 1200 head->ct_hash_count--; 1201 if (last == NULL) { 1202 head->ct_hash_head = ct->ct_hnext; 1203 } else { 1204 last->ct_hnext = ct->ct_hnext; 1205 } 1206 ct->ct_hnext = NULL; 1207 vh->vh_client_count--; 1208 } 1209 } 1210 1211 1212 /* 1213 * i_mdi_client_free(): 1214 * Free a client component 1215 */ 1216 static int 1217 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1218 { 1219 int rv = MDI_SUCCESS; 1220 int flags = ct->ct_flags; 1221 dev_info_t *cdip; 1222 dev_info_t *vdip; 1223 1224 ASSERT(MUTEX_HELD(&mdi_mutex)); 1225 vdip = vh->vh_dip; 1226 cdip = ct->ct_dip; 1227 1228 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1229 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1230 DEVI(cdip)->devi_mdi_client = NULL; 1231 1232 /* 1233 * Clear out back ref. to dev_info_t node 1234 */ 1235 ct->ct_dip = NULL; 1236 1237 /* 1238 * Remove this client from our hash queue 1239 */ 1240 i_mdi_client_delist_table(vh, ct); 1241 1242 /* 1243 * Uninitialize and free the component 1244 */ 1245 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1246 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1247 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1248 cv_destroy(&ct->ct_failover_cv); 1249 cv_destroy(&ct->ct_unstable_cv); 1250 cv_destroy(&ct->ct_powerchange_cv); 1251 mutex_destroy(&ct->ct_mutex); 1252 kmem_free(ct, sizeof (*ct)); 1253 1254 if (cdip != NULL) { 1255 mutex_exit(&mdi_mutex); 1256 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1257 mutex_enter(&mdi_mutex); 1258 } 1259 return (rv); 1260 } 1261 1262 /* 1263 * i_mdi_client_find(): 1264 * Find the client structure corresponding to a given guid 1265 * Caller should hold the mdi_mutex 1266 */ 1267 static mdi_client_t * 1268 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1269 { 1270 int index; 1271 struct client_hash *head; 1272 mdi_client_t *ct; 1273 1274 ASSERT(MUTEX_HELD(&mdi_mutex)); 1275 index = i_mdi_get_hash_key(guid); 1276 head = &vh->vh_client_table[index]; 1277 1278 ct = head->ct_hash_head; 1279 while (ct != NULL) { 1280 if (strcmp(ct->ct_guid, guid) == 0 && 1281 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1282 break; 1283 } 1284 ct = ct->ct_hnext; 1285 } 1286 return (ct); 1287 } 1288 1289 1290 1291 /* 1292 * i_mdi_client_update_state(): 1293 * Compute and update client device state 1294 * Notes: 1295 * A client device can be in any of three possible states: 1296 * 1297 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1298 * one online/standby paths. Can tolerate failures. 1299 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1300 * no alternate paths available as standby. A failure on the online 1301 * would result in loss of access to device data. 1302 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1303 * no paths available to access the device. 1304 */ 1305 static void 1306 i_mdi_client_update_state(mdi_client_t *ct) 1307 { 1308 int state; 1309 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1310 state = i_mdi_client_compute_state(ct, NULL); 1311 MDI_CLIENT_SET_STATE(ct, state); 1312 } 1313 1314 /* 1315 * i_mdi_client_compute_state(): 1316 * Compute client device state 1317 * 1318 * mdi_phci_t * Pointer to pHCI structure which should 1319 * while computing the new value. Used by 1320 * i_mdi_phci_offline() to find the new 1321 * client state after DR of a pHCI. 1322 */ 1323 static int 1324 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1325 { 1326 int state; 1327 int online_count = 0; 1328 int standby_count = 0; 1329 mdi_pathinfo_t *pip, *next; 1330 1331 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1332 pip = ct->ct_path_head; 1333 while (pip != NULL) { 1334 MDI_PI_LOCK(pip); 1335 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1336 if (MDI_PI(pip)->pi_phci == ph) { 1337 MDI_PI_UNLOCK(pip); 1338 pip = next; 1339 continue; 1340 } 1341 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1342 == MDI_PATHINFO_STATE_ONLINE) 1343 online_count++; 1344 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1345 == MDI_PATHINFO_STATE_STANDBY) 1346 standby_count++; 1347 MDI_PI_UNLOCK(pip); 1348 pip = next; 1349 } 1350 1351 if (online_count == 0) { 1352 if (standby_count == 0) { 1353 state = MDI_CLIENT_STATE_FAILED; 1354 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1355 " ct = %p\n", ct)); 1356 } else if (standby_count == 1) { 1357 state = MDI_CLIENT_STATE_DEGRADED; 1358 } else { 1359 state = MDI_CLIENT_STATE_OPTIMAL; 1360 } 1361 } else if (online_count == 1) { 1362 if (standby_count == 0) { 1363 state = MDI_CLIENT_STATE_DEGRADED; 1364 } else { 1365 state = MDI_CLIENT_STATE_OPTIMAL; 1366 } 1367 } else { 1368 state = MDI_CLIENT_STATE_OPTIMAL; 1369 } 1370 return (state); 1371 } 1372 1373 /* 1374 * i_mdi_client2devinfo(): 1375 * Utility function 1376 */ 1377 dev_info_t * 1378 i_mdi_client2devinfo(mdi_client_t *ct) 1379 { 1380 return (ct->ct_dip); 1381 } 1382 1383 /* 1384 * mdi_client_path2_devinfo(): 1385 * Given the parent devinfo and child devfs pathname, search for 1386 * a valid devfs node handle. 1387 */ 1388 dev_info_t * 1389 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1390 { 1391 dev_info_t *cdip = NULL; 1392 dev_info_t *ndip = NULL; 1393 char *temp_pathname; 1394 int circular; 1395 1396 /* 1397 * Allocate temp buffer 1398 */ 1399 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1400 1401 /* 1402 * Lock parent against changes 1403 */ 1404 ndi_devi_enter(vdip, &circular); 1405 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1406 while ((cdip = ndip) != NULL) { 1407 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1408 1409 *temp_pathname = '\0'; 1410 (void) ddi_pathname(cdip, temp_pathname); 1411 if (strcmp(temp_pathname, pathname) == 0) { 1412 break; 1413 } 1414 } 1415 /* 1416 * Release devinfo lock 1417 */ 1418 ndi_devi_exit(vdip, circular); 1419 1420 /* 1421 * Free the temp buffer 1422 */ 1423 kmem_free(temp_pathname, MAXPATHLEN); 1424 return (cdip); 1425 } 1426 1427 1428 /* 1429 * mdi_client_get_path_count(): 1430 * Utility function to get number of path information nodes 1431 * associated with a given client device. 1432 */ 1433 int 1434 mdi_client_get_path_count(dev_info_t *cdip) 1435 { 1436 mdi_client_t *ct; 1437 int count = 0; 1438 1439 ct = i_devi_get_client(cdip); 1440 if (ct != NULL) { 1441 count = ct->ct_path_count; 1442 } 1443 return (count); 1444 } 1445 1446 1447 /* 1448 * i_mdi_get_hash_key(): 1449 * Create a hash using strings as keys 1450 * 1451 */ 1452 static int 1453 i_mdi_get_hash_key(char *str) 1454 { 1455 uint32_t g, hash = 0; 1456 char *p; 1457 1458 for (p = str; *p != '\0'; p++) { 1459 g = *p; 1460 hash += g; 1461 } 1462 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1463 } 1464 1465 /* 1466 * mdi_get_lb_policy(): 1467 * Get current load balancing policy for a given client device 1468 */ 1469 client_lb_t 1470 mdi_get_lb_policy(dev_info_t *cdip) 1471 { 1472 client_lb_t lb = LOAD_BALANCE_NONE; 1473 mdi_client_t *ct; 1474 1475 ct = i_devi_get_client(cdip); 1476 if (ct != NULL) { 1477 lb = ct->ct_lb; 1478 } 1479 return (lb); 1480 } 1481 1482 /* 1483 * mdi_set_lb_region_size(): 1484 * Set current region size for the load-balance 1485 */ 1486 int 1487 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1488 { 1489 mdi_client_t *ct; 1490 int rv = MDI_FAILURE; 1491 1492 ct = i_devi_get_client(cdip); 1493 if (ct != NULL && ct->ct_lb_args != NULL) { 1494 ct->ct_lb_args->region_size = region_size; 1495 rv = MDI_SUCCESS; 1496 } 1497 return (rv); 1498 } 1499 1500 /* 1501 * mdi_Set_lb_policy(): 1502 * Set current load balancing policy for a given client device 1503 */ 1504 int 1505 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1506 { 1507 mdi_client_t *ct; 1508 int rv = MDI_FAILURE; 1509 1510 ct = i_devi_get_client(cdip); 1511 if (ct != NULL) { 1512 ct->ct_lb = lb; 1513 rv = MDI_SUCCESS; 1514 } 1515 return (rv); 1516 } 1517 1518 /* 1519 * mdi_failover(): 1520 * failover function called by the vHCI drivers to initiate 1521 * a failover operation. This is typically due to non-availability 1522 * of online paths to route I/O requests. Failover can be 1523 * triggered through user application also. 1524 * 1525 * The vHCI driver calls mdi_failover() to initiate a failover 1526 * operation. mdi_failover() calls back into the vHCI driver's 1527 * vo_failover() entry point to perform the actual failover 1528 * operation. The reason for requiring the vHCI driver to 1529 * initiate failover by calling mdi_failover(), instead of directly 1530 * executing vo_failover() itself, is to ensure that the mdi 1531 * framework can keep track of the client state properly. 1532 * Additionally, mdi_failover() provides as a convenience the 1533 * option of performing the failover operation synchronously or 1534 * asynchronously 1535 * 1536 * Upon successful completion of the failover operation, the 1537 * paths that were previously ONLINE will be in the STANDBY state, 1538 * and the newly activated paths will be in the ONLINE state. 1539 * 1540 * The flags modifier determines whether the activation is done 1541 * synchronously: MDI_FAILOVER_SYNC 1542 * Return Values: 1543 * MDI_SUCCESS 1544 * MDI_FAILURE 1545 * MDI_BUSY 1546 */ 1547 /*ARGSUSED*/ 1548 int 1549 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1550 { 1551 int rv; 1552 mdi_client_t *ct; 1553 1554 ct = i_devi_get_client(cdip); 1555 ASSERT(ct != NULL); 1556 if (ct == NULL) { 1557 /* cdip is not a valid client device. Nothing more to do. */ 1558 return (MDI_FAILURE); 1559 } 1560 1561 MDI_CLIENT_LOCK(ct); 1562 1563 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1564 /* A path to the client is being freed */ 1565 MDI_CLIENT_UNLOCK(ct); 1566 return (MDI_BUSY); 1567 } 1568 1569 1570 if (MDI_CLIENT_IS_FAILED(ct)) { 1571 /* 1572 * Client is in failed state. Nothing more to do. 1573 */ 1574 MDI_CLIENT_UNLOCK(ct); 1575 return (MDI_FAILURE); 1576 } 1577 1578 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1579 /* 1580 * Failover is already in progress; return BUSY 1581 */ 1582 MDI_CLIENT_UNLOCK(ct); 1583 return (MDI_BUSY); 1584 } 1585 /* 1586 * Make sure that mdi_pathinfo node state changes are processed. 1587 * We do not allow failovers to progress while client path state 1588 * changes are in progress 1589 */ 1590 if (ct->ct_unstable) { 1591 if (flags == MDI_FAILOVER_ASYNC) { 1592 MDI_CLIENT_UNLOCK(ct); 1593 return (MDI_BUSY); 1594 } else { 1595 while (ct->ct_unstable) 1596 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1597 } 1598 } 1599 1600 /* 1601 * Client device is in stable state. Before proceeding, perform sanity 1602 * checks again. 1603 */ 1604 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1605 (!i_ddi_devi_attached(ct->ct_dip))) { 1606 /* 1607 * Client is in failed state. Nothing more to do. 1608 */ 1609 MDI_CLIENT_UNLOCK(ct); 1610 return (MDI_FAILURE); 1611 } 1612 1613 /* 1614 * Set the client state as failover in progress. 1615 */ 1616 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1617 ct->ct_failover_flags = flags; 1618 MDI_CLIENT_UNLOCK(ct); 1619 1620 if (flags == MDI_FAILOVER_ASYNC) { 1621 /* 1622 * Submit the initiate failover request via CPR safe 1623 * taskq threads. 1624 */ 1625 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1626 ct, KM_SLEEP); 1627 return (MDI_ACCEPT); 1628 } else { 1629 /* 1630 * Synchronous failover mode. Typically invoked from the user 1631 * land. 1632 */ 1633 rv = i_mdi_failover(ct); 1634 } 1635 return (rv); 1636 } 1637 1638 /* 1639 * i_mdi_failover(): 1640 * internal failover function. Invokes vHCI drivers failover 1641 * callback function and process the failover status 1642 * Return Values: 1643 * None 1644 * 1645 * Note: A client device in failover state can not be detached or freed. 1646 */ 1647 static int 1648 i_mdi_failover(void *arg) 1649 { 1650 int rv = MDI_SUCCESS; 1651 mdi_client_t *ct = (mdi_client_t *)arg; 1652 mdi_vhci_t *vh = ct->ct_vhci; 1653 1654 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1655 1656 if (vh->vh_ops->vo_failover != NULL) { 1657 /* 1658 * Call vHCI drivers callback routine 1659 */ 1660 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1661 ct->ct_failover_flags); 1662 } 1663 1664 MDI_CLIENT_LOCK(ct); 1665 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1666 1667 /* 1668 * Save the failover return status 1669 */ 1670 ct->ct_failover_status = rv; 1671 1672 /* 1673 * As a result of failover, client status would have been changed. 1674 * Update the client state and wake up anyone waiting on this client 1675 * device. 1676 */ 1677 i_mdi_client_update_state(ct); 1678 1679 cv_broadcast(&ct->ct_failover_cv); 1680 MDI_CLIENT_UNLOCK(ct); 1681 return (rv); 1682 } 1683 1684 /* 1685 * Load balancing is logical block. 1686 * IOs within the range described by region_size 1687 * would go on the same path. This would improve the 1688 * performance by cache-hit on some of the RAID devices. 1689 * Search only for online paths(At some point we 1690 * may want to balance across target ports). 1691 * If no paths are found then default to round-robin. 1692 */ 1693 static int 1694 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1695 { 1696 int path_index = -1; 1697 int online_path_count = 0; 1698 int online_nonpref_path_count = 0; 1699 int region_size = ct->ct_lb_args->region_size; 1700 mdi_pathinfo_t *pip; 1701 mdi_pathinfo_t *next; 1702 int preferred, path_cnt; 1703 1704 pip = ct->ct_path_head; 1705 while (pip) { 1706 MDI_PI_LOCK(pip); 1707 if (MDI_PI(pip)->pi_state == 1708 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1709 online_path_count++; 1710 } else if (MDI_PI(pip)->pi_state == 1711 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1712 online_nonpref_path_count++; 1713 } 1714 next = (mdi_pathinfo_t *) 1715 MDI_PI(pip)->pi_client_link; 1716 MDI_PI_UNLOCK(pip); 1717 pip = next; 1718 } 1719 /* if found any online/preferred then use this type */ 1720 if (online_path_count > 0) { 1721 path_cnt = online_path_count; 1722 preferred = 1; 1723 } else if (online_nonpref_path_count > 0) { 1724 path_cnt = online_nonpref_path_count; 1725 preferred = 0; 1726 } else { 1727 path_cnt = 0; 1728 } 1729 if (path_cnt) { 1730 path_index = (bp->b_blkno >> region_size) % path_cnt; 1731 pip = ct->ct_path_head; 1732 while (pip && path_index != -1) { 1733 MDI_PI_LOCK(pip); 1734 if (path_index == 0 && 1735 (MDI_PI(pip)->pi_state == 1736 MDI_PATHINFO_STATE_ONLINE) && 1737 MDI_PI(pip)->pi_preferred == preferred) { 1738 MDI_PI_HOLD(pip); 1739 MDI_PI_UNLOCK(pip); 1740 *ret_pip = pip; 1741 return (MDI_SUCCESS); 1742 } 1743 path_index --; 1744 next = (mdi_pathinfo_t *) 1745 MDI_PI(pip)->pi_client_link; 1746 MDI_PI_UNLOCK(pip); 1747 pip = next; 1748 } 1749 if (pip == NULL) { 1750 MDI_DEBUG(4, (CE_NOTE, NULL, 1751 "!lba %p, no pip !!\n", 1752 bp->b_blkno)); 1753 } else { 1754 MDI_DEBUG(4, (CE_NOTE, NULL, 1755 "!lba %p, no pip for path_index, " 1756 "pip %p\n", pip)); 1757 } 1758 } 1759 return (MDI_FAILURE); 1760 } 1761 1762 /* 1763 * mdi_select_path(): 1764 * select a path to access a client device. 1765 * 1766 * mdi_select_path() function is called by the vHCI drivers to 1767 * select a path to route the I/O request to. The caller passes 1768 * the block I/O data transfer structure ("buf") as one of the 1769 * parameters. The mpxio framework uses the buf structure 1770 * contents to maintain per path statistics (total I/O size / 1771 * count pending). If more than one online paths are available to 1772 * select, the framework automatically selects a suitable path 1773 * for routing I/O request. If a failover operation is active for 1774 * this client device the call shall be failed with MDI_BUSY error 1775 * code. 1776 * 1777 * By default this function returns a suitable path in online 1778 * state based on the current load balancing policy. Currently 1779 * we support LOAD_BALANCE_NONE (Previously selected online path 1780 * will continue to be used till the path is usable) and 1781 * LOAD_BALANCE_RR (Online paths will be selected in a round 1782 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1783 * based on the logical block). The load balancing 1784 * through vHCI drivers configuration file (driver.conf). 1785 * 1786 * vHCI drivers may override this default behavior by specifying 1787 * appropriate flags. If start_pip is specified (non NULL) is 1788 * used as start point to walk and find the next appropriate path. 1789 * The following values are currently defined: 1790 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1791 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1792 * 1793 * The non-standard behavior is used by the scsi_vhci driver, 1794 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1795 * attach of client devices (to avoid an unnecessary failover 1796 * when the STANDBY path comes up first), during failover 1797 * (to activate a STANDBY path as ONLINE). 1798 * 1799 * The selected path in returned in a held state (ref_cnt). 1800 * Caller should release the hold by calling mdi_rele_path(). 1801 * 1802 * Return Values: 1803 * MDI_SUCCESS - Completed successfully 1804 * MDI_BUSY - Client device is busy failing over 1805 * MDI_NOPATH - Client device is online, but no valid path are 1806 * available to access this client device 1807 * MDI_FAILURE - Invalid client device or state 1808 * MDI_DEVI_ONLINING 1809 * - Client device (struct dev_info state) is in 1810 * onlining state. 1811 */ 1812 1813 /*ARGSUSED*/ 1814 int 1815 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1816 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1817 { 1818 mdi_client_t *ct; 1819 mdi_pathinfo_t *pip; 1820 mdi_pathinfo_t *next; 1821 mdi_pathinfo_t *head; 1822 mdi_pathinfo_t *start; 1823 client_lb_t lbp; /* load balancing policy */ 1824 int sb = 1; /* standard behavior */ 1825 int preferred = 1; /* preferred path */ 1826 int cond, cont = 1; 1827 int retry = 0; 1828 1829 if (flags != 0) { 1830 /* 1831 * disable default behavior 1832 */ 1833 sb = 0; 1834 } 1835 1836 *ret_pip = NULL; 1837 ct = i_devi_get_client(cdip); 1838 if (ct == NULL) { 1839 /* mdi extensions are NULL, Nothing more to do */ 1840 return (MDI_FAILURE); 1841 } 1842 1843 MDI_CLIENT_LOCK(ct); 1844 1845 if (sb) { 1846 if (MDI_CLIENT_IS_FAILED(ct)) { 1847 /* 1848 * Client is not ready to accept any I/O requests. 1849 * Fail this request. 1850 */ 1851 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1852 "client state offline ct = %p\n", ct)); 1853 MDI_CLIENT_UNLOCK(ct); 1854 return (MDI_FAILURE); 1855 } 1856 1857 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1858 /* 1859 * Check for Failover is in progress. If so tell the 1860 * caller that this device is busy. 1861 */ 1862 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1863 "client failover in progress ct = %p\n", ct)); 1864 MDI_CLIENT_UNLOCK(ct); 1865 return (MDI_BUSY); 1866 } 1867 1868 /* 1869 * Check to see whether the client device is attached. 1870 * If not so, let the vHCI driver manually select a path 1871 * (standby) and let the probe/attach process to continue. 1872 */ 1873 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 1874 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1875 MDI_CLIENT_UNLOCK(ct); 1876 return (MDI_DEVI_ONLINING); 1877 } 1878 } 1879 1880 /* 1881 * Cache in the client list head. If head of the list is NULL 1882 * return MDI_NOPATH 1883 */ 1884 head = ct->ct_path_head; 1885 if (head == NULL) { 1886 MDI_CLIENT_UNLOCK(ct); 1887 return (MDI_NOPATH); 1888 } 1889 1890 /* 1891 * for non default behavior, bypass current 1892 * load balancing policy and always use LOAD_BALANCE_RR 1893 * except that the start point will be adjusted based 1894 * on the provided start_pip 1895 */ 1896 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1897 1898 switch (lbp) { 1899 case LOAD_BALANCE_NONE: 1900 /* 1901 * Load balancing is None or Alternate path mode 1902 * Start looking for a online mdi_pathinfo node starting from 1903 * last known selected path 1904 */ 1905 preferred = 1; 1906 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1907 if (pip == NULL) { 1908 pip = head; 1909 } 1910 start = pip; 1911 do { 1912 MDI_PI_LOCK(pip); 1913 /* 1914 * No need to explicitly check if the path is disabled. 1915 * Since we are checking for state == ONLINE and the 1916 * same veriable is used for DISABLE/ENABLE information. 1917 */ 1918 if ((MDI_PI(pip)->pi_state == 1919 MDI_PATHINFO_STATE_ONLINE) && 1920 preferred == MDI_PI(pip)->pi_preferred) { 1921 /* 1922 * Return the path in hold state. Caller should 1923 * release the lock by calling mdi_rele_path() 1924 */ 1925 MDI_PI_HOLD(pip); 1926 MDI_PI_UNLOCK(pip); 1927 ct->ct_path_last = pip; 1928 *ret_pip = pip; 1929 MDI_CLIENT_UNLOCK(ct); 1930 return (MDI_SUCCESS); 1931 } 1932 1933 /* 1934 * Path is busy. 1935 */ 1936 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1937 MDI_PI_IS_TRANSIENT(pip)) 1938 retry = 1; 1939 /* 1940 * Keep looking for a next available online path 1941 */ 1942 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1943 if (next == NULL) { 1944 next = head; 1945 } 1946 MDI_PI_UNLOCK(pip); 1947 pip = next; 1948 if (start == pip && preferred) { 1949 preferred = 0; 1950 } else if (start == pip && !preferred) { 1951 cont = 0; 1952 } 1953 } while (cont); 1954 break; 1955 1956 case LOAD_BALANCE_LBA: 1957 /* 1958 * Make sure we are looking 1959 * for an online path. Otherwise, if it is for a STANDBY 1960 * path request, it will go through and fetch an ONLINE 1961 * path which is not desirable. 1962 */ 1963 if ((ct->ct_lb_args != NULL) && 1964 (ct->ct_lb_args->region_size) && bp && 1965 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1966 if (i_mdi_lba_lb(ct, ret_pip, bp) 1967 == MDI_SUCCESS) { 1968 MDI_CLIENT_UNLOCK(ct); 1969 return (MDI_SUCCESS); 1970 } 1971 } 1972 /* FALLTHROUGH */ 1973 case LOAD_BALANCE_RR: 1974 /* 1975 * Load balancing is Round Robin. Start looking for a online 1976 * mdi_pathinfo node starting from last known selected path 1977 * as the start point. If override flags are specified, 1978 * process accordingly. 1979 * If the search is already in effect(start_pip not null), 1980 * then lets just use the same path preference to continue the 1981 * traversal. 1982 */ 1983 1984 if (start_pip != NULL) { 1985 preferred = MDI_PI(start_pip)->pi_preferred; 1986 } else { 1987 preferred = 1; 1988 } 1989 1990 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1991 if (start == NULL) { 1992 pip = head; 1993 } else { 1994 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1995 if (pip == NULL) { 1996 if (!sb) { 1997 if (preferred == 0) { 1998 /* 1999 * Looks like we have completed 2000 * the traversal as preferred 2001 * value is 0. Time to bail out. 2002 */ 2003 *ret_pip = NULL; 2004 MDI_CLIENT_UNLOCK(ct); 2005 return (MDI_NOPATH); 2006 } else { 2007 /* 2008 * Looks like we reached the 2009 * end of the list. Lets enable 2010 * traversal of non preferred 2011 * paths. 2012 */ 2013 preferred = 0; 2014 } 2015 } 2016 pip = head; 2017 } 2018 } 2019 start = pip; 2020 do { 2021 MDI_PI_LOCK(pip); 2022 if (sb) { 2023 cond = ((MDI_PI(pip)->pi_state == 2024 MDI_PATHINFO_STATE_ONLINE && 2025 MDI_PI(pip)->pi_preferred == 2026 preferred) ? 1 : 0); 2027 } else { 2028 if (flags == MDI_SELECT_ONLINE_PATH) { 2029 cond = ((MDI_PI(pip)->pi_state == 2030 MDI_PATHINFO_STATE_ONLINE && 2031 MDI_PI(pip)->pi_preferred == 2032 preferred) ? 1 : 0); 2033 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2034 cond = ((MDI_PI(pip)->pi_state == 2035 MDI_PATHINFO_STATE_STANDBY && 2036 MDI_PI(pip)->pi_preferred == 2037 preferred) ? 1 : 0); 2038 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2039 MDI_SELECT_STANDBY_PATH)) { 2040 cond = (((MDI_PI(pip)->pi_state == 2041 MDI_PATHINFO_STATE_ONLINE || 2042 (MDI_PI(pip)->pi_state == 2043 MDI_PATHINFO_STATE_STANDBY)) && 2044 MDI_PI(pip)->pi_preferred == 2045 preferred) ? 1 : 0); 2046 } else if (flags == 2047 (MDI_SELECT_STANDBY_PATH | 2048 MDI_SELECT_ONLINE_PATH | 2049 MDI_SELECT_USER_DISABLE_PATH)) { 2050 cond = (((MDI_PI(pip)->pi_state == 2051 MDI_PATHINFO_STATE_ONLINE || 2052 (MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_STANDBY) || 2054 (MDI_PI(pip)->pi_state == 2055 (MDI_PATHINFO_STATE_ONLINE| 2056 MDI_PATHINFO_STATE_USER_DISABLE)) || 2057 (MDI_PI(pip)->pi_state == 2058 (MDI_PATHINFO_STATE_STANDBY | 2059 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2060 MDI_PI(pip)->pi_preferred == 2061 preferred) ? 1 : 0); 2062 } else { 2063 cond = 0; 2064 } 2065 } 2066 /* 2067 * No need to explicitly check if the path is disabled. 2068 * Since we are checking for state == ONLINE and the 2069 * same veriable is used for DISABLE/ENABLE information. 2070 */ 2071 if (cond) { 2072 /* 2073 * Return the path in hold state. Caller should 2074 * release the lock by calling mdi_rele_path() 2075 */ 2076 MDI_PI_HOLD(pip); 2077 MDI_PI_UNLOCK(pip); 2078 if (sb) 2079 ct->ct_path_last = pip; 2080 *ret_pip = pip; 2081 MDI_CLIENT_UNLOCK(ct); 2082 return (MDI_SUCCESS); 2083 } 2084 /* 2085 * Path is busy. 2086 */ 2087 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2088 MDI_PI_IS_TRANSIENT(pip)) 2089 retry = 1; 2090 2091 /* 2092 * Keep looking for a next available online path 2093 */ 2094 do_again: 2095 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2096 if (next == NULL) { 2097 if (!sb) { 2098 if (preferred == 1) { 2099 /* 2100 * Looks like we reached the 2101 * end of the list. Lets enable 2102 * traversal of non preferred 2103 * paths. 2104 */ 2105 preferred = 0; 2106 next = head; 2107 } else { 2108 /* 2109 * We have done both the passes 2110 * Preferred as well as for 2111 * Non-preferred. Bail out now. 2112 */ 2113 cont = 0; 2114 } 2115 } else { 2116 /* 2117 * Standard behavior case. 2118 */ 2119 next = head; 2120 } 2121 } 2122 MDI_PI_UNLOCK(pip); 2123 if (cont == 0) { 2124 break; 2125 } 2126 pip = next; 2127 2128 if (!sb) { 2129 /* 2130 * We need to handle the selection of 2131 * non-preferred path in the following 2132 * case: 2133 * 2134 * +------+ +------+ +------+ +-----+ 2135 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2136 * +------+ +------+ +------+ +-----+ 2137 * 2138 * If we start the search with B, we need to 2139 * skip beyond B to pick C which is non - 2140 * preferred in the second pass. The following 2141 * test, if true, will allow us to skip over 2142 * the 'start'(B in the example) to select 2143 * other non preferred elements. 2144 */ 2145 if ((start_pip != NULL) && (start_pip == pip) && 2146 (MDI_PI(start_pip)->pi_preferred 2147 != preferred)) { 2148 /* 2149 * try again after going past the start 2150 * pip 2151 */ 2152 MDI_PI_LOCK(pip); 2153 goto do_again; 2154 } 2155 } else { 2156 /* 2157 * Standard behavior case 2158 */ 2159 if (start == pip && preferred) { 2160 /* look for nonpreferred paths */ 2161 preferred = 0; 2162 } else if (start == pip && !preferred) { 2163 /* 2164 * Exit condition 2165 */ 2166 cont = 0; 2167 } 2168 } 2169 } while (cont); 2170 break; 2171 } 2172 2173 MDI_CLIENT_UNLOCK(ct); 2174 if (retry == 1) { 2175 return (MDI_BUSY); 2176 } else { 2177 return (MDI_NOPATH); 2178 } 2179 } 2180 2181 /* 2182 * For a client, return the next available path to any phci 2183 * 2184 * Note: 2185 * Caller should hold the branch's devinfo node to get a consistent 2186 * snap shot of the mdi_pathinfo nodes. 2187 * 2188 * Please note that even the list is stable the mdi_pathinfo 2189 * node state and properties are volatile. The caller should lock 2190 * and unlock the nodes by calling mdi_pi_lock() and 2191 * mdi_pi_unlock() functions to get a stable properties. 2192 * 2193 * If there is a need to use the nodes beyond the hold of the 2194 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2195 * need to be held against unexpected removal by calling 2196 * mdi_hold_path() and should be released by calling 2197 * mdi_rele_path() on completion. 2198 */ 2199 mdi_pathinfo_t * 2200 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2201 { 2202 mdi_client_t *ct; 2203 2204 if (!MDI_CLIENT(ct_dip)) 2205 return (NULL); 2206 2207 /* 2208 * Walk through client link 2209 */ 2210 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2211 ASSERT(ct != NULL); 2212 2213 if (pip == NULL) 2214 return ((mdi_pathinfo_t *)ct->ct_path_head); 2215 2216 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2217 } 2218 2219 /* 2220 * For a phci, return the next available path to any client 2221 * Note: ditto mdi_get_next_phci_path() 2222 */ 2223 mdi_pathinfo_t * 2224 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2225 { 2226 mdi_phci_t *ph; 2227 2228 if (!MDI_PHCI(ph_dip)) 2229 return (NULL); 2230 2231 /* 2232 * Walk through pHCI link 2233 */ 2234 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2235 ASSERT(ph != NULL); 2236 2237 if (pip == NULL) 2238 return ((mdi_pathinfo_t *)ph->ph_path_head); 2239 2240 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2241 } 2242 2243 /* 2244 * mdi_get_nextpath(): 2245 * mdi_pathinfo node walker function. Get the next node from the 2246 * client or pHCI device list. 2247 * 2248 * XXX This is wrapper function for compatibility purposes only. 2249 * 2250 * It doesn't work under Multi-level MPxIO, where a dip 2251 * is both client and phci (which link should next_path follow?). 2252 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2253 * this interface should be removed. 2254 */ 2255 void 2256 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2257 mdi_pathinfo_t **ret_pip) 2258 { 2259 if (MDI_CLIENT(dip)) { 2260 *ret_pip = mdi_get_next_phci_path(dip, pip); 2261 } else if (MDI_PHCI(dip)) { 2262 *ret_pip = mdi_get_next_client_path(dip, pip); 2263 } else { 2264 *ret_pip = NULL; 2265 } 2266 } 2267 2268 /* 2269 * mdi_hold_path(): 2270 * Hold the mdi_pathinfo node against unwanted unexpected free. 2271 * Return Values: 2272 * None 2273 */ 2274 void 2275 mdi_hold_path(mdi_pathinfo_t *pip) 2276 { 2277 if (pip) { 2278 MDI_PI_LOCK(pip); 2279 MDI_PI_HOLD(pip); 2280 MDI_PI_UNLOCK(pip); 2281 } 2282 } 2283 2284 2285 /* 2286 * mdi_rele_path(): 2287 * Release the mdi_pathinfo node which was selected 2288 * through mdi_select_path() mechanism or manually held by 2289 * calling mdi_hold_path(). 2290 * Return Values: 2291 * None 2292 */ 2293 void 2294 mdi_rele_path(mdi_pathinfo_t *pip) 2295 { 2296 if (pip) { 2297 MDI_PI_LOCK(pip); 2298 MDI_PI_RELE(pip); 2299 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2300 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2301 } 2302 MDI_PI_UNLOCK(pip); 2303 } 2304 } 2305 2306 2307 /* 2308 * mdi_pi_lock(): 2309 * Lock the mdi_pathinfo node. 2310 * Note: 2311 * The caller should release the lock by calling mdi_pi_unlock() 2312 */ 2313 void 2314 mdi_pi_lock(mdi_pathinfo_t *pip) 2315 { 2316 ASSERT(pip != NULL); 2317 if (pip) { 2318 MDI_PI_LOCK(pip); 2319 } 2320 } 2321 2322 2323 /* 2324 * mdi_pi_unlock(): 2325 * Unlock the mdi_pathinfo node. 2326 * Note: 2327 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2328 */ 2329 void 2330 mdi_pi_unlock(mdi_pathinfo_t *pip) 2331 { 2332 ASSERT(pip != NULL); 2333 if (pip) { 2334 MDI_PI_UNLOCK(pip); 2335 } 2336 } 2337 2338 /* 2339 * mdi_pi_find(): 2340 * Search the list of mdi_pathinfo nodes attached to the 2341 * pHCI/Client device node whose path address matches "paddr". 2342 * Returns a pointer to the mdi_pathinfo node if a matching node is 2343 * found. 2344 * Return Values: 2345 * mdi_pathinfo node handle 2346 * NULL 2347 * Notes: 2348 * Caller need not hold any locks to call this function. 2349 */ 2350 mdi_pathinfo_t * 2351 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2352 { 2353 mdi_phci_t *ph; 2354 mdi_vhci_t *vh; 2355 mdi_client_t *ct; 2356 mdi_pathinfo_t *pip = NULL; 2357 2358 if ((pdip == NULL) || (paddr == NULL)) { 2359 return (NULL); 2360 } 2361 ph = i_devi_get_phci(pdip); 2362 if (ph == NULL) { 2363 /* 2364 * Invalid pHCI device, Nothing more to do. 2365 */ 2366 MDI_DEBUG(2, (CE_WARN, NULL, 2367 "!mdi_pi_find: invalid phci")); 2368 return (NULL); 2369 } 2370 2371 vh = ph->ph_vhci; 2372 if (vh == NULL) { 2373 /* 2374 * Invalid vHCI device, Nothing more to do. 2375 */ 2376 MDI_DEBUG(2, (CE_WARN, NULL, 2377 "!mdi_pi_find: invalid phci")); 2378 return (NULL); 2379 } 2380 2381 /* 2382 * Look for client device identified by caddr (guid) 2383 */ 2384 if (caddr == NULL) { 2385 /* 2386 * Find a mdi_pathinfo node under pHCI list for a matching 2387 * unit address. 2388 */ 2389 mutex_enter(&ph->ph_mutex); 2390 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2391 2392 while (pip != NULL) { 2393 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2394 break; 2395 } 2396 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2397 } 2398 mutex_exit(&ph->ph_mutex); 2399 return (pip); 2400 } 2401 2402 /* 2403 * XXX - Is the rest of the code in this function really necessary? 2404 * The consumers of mdi_pi_find() can search for the desired pathinfo 2405 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2406 * whether the search is based on the pathinfo nodes attached to 2407 * the pHCI or the client node, the result will be the same. 2408 */ 2409 2410 /* 2411 * Find the client device corresponding to 'caddr' 2412 */ 2413 mutex_enter(&mdi_mutex); 2414 2415 /* 2416 * XXX - Passing NULL to the following function works as long as the 2417 * the client addresses (caddr) are unique per vhci basis. 2418 */ 2419 ct = i_mdi_client_find(vh, NULL, caddr); 2420 if (ct == NULL) { 2421 /* 2422 * Client not found, Obviously mdi_pathinfo node has not been 2423 * created yet. 2424 */ 2425 mutex_exit(&mdi_mutex); 2426 return (pip); 2427 } 2428 2429 /* 2430 * Hold the client lock and look for a mdi_pathinfo node with matching 2431 * pHCI and paddr 2432 */ 2433 MDI_CLIENT_LOCK(ct); 2434 2435 /* 2436 * Release the global mutex as it is no more needed. Note: We always 2437 * respect the locking order while acquiring. 2438 */ 2439 mutex_exit(&mdi_mutex); 2440 2441 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2442 while (pip != NULL) { 2443 /* 2444 * Compare the unit address 2445 */ 2446 if ((MDI_PI(pip)->pi_phci == ph) && 2447 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2448 break; 2449 } 2450 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2451 } 2452 MDI_CLIENT_UNLOCK(ct); 2453 return (pip); 2454 } 2455 2456 /* 2457 * mdi_pi_alloc(): 2458 * Allocate and initialize a new instance of a mdi_pathinfo node. 2459 * The mdi_pathinfo node returned by this function identifies a 2460 * unique device path is capable of having properties attached 2461 * and passed to mdi_pi_online() to fully attach and online the 2462 * path and client device node. 2463 * The mdi_pathinfo node returned by this function must be 2464 * destroyed using mdi_pi_free() if the path is no longer 2465 * operational or if the caller fails to attach a client device 2466 * node when calling mdi_pi_online(). The framework will not free 2467 * the resources allocated. 2468 * This function can be called from both interrupt and kernel 2469 * contexts. DDI_NOSLEEP flag should be used while calling 2470 * from interrupt contexts. 2471 * Return Values: 2472 * MDI_SUCCESS 2473 * MDI_FAILURE 2474 * MDI_NOMEM 2475 */ 2476 /*ARGSUSED*/ 2477 int 2478 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2479 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2480 { 2481 mdi_vhci_t *vh; 2482 mdi_phci_t *ph; 2483 mdi_client_t *ct; 2484 mdi_pathinfo_t *pip = NULL; 2485 dev_info_t *cdip; 2486 int rv = MDI_NOMEM; 2487 int path_allocated = 0; 2488 2489 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2490 ret_pip == NULL) { 2491 /* Nothing more to do */ 2492 return (MDI_FAILURE); 2493 } 2494 2495 *ret_pip = NULL; 2496 ph = i_devi_get_phci(pdip); 2497 ASSERT(ph != NULL); 2498 if (ph == NULL) { 2499 /* Invalid pHCI device, return failure */ 2500 MDI_DEBUG(1, (CE_WARN, NULL, 2501 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2502 return (MDI_FAILURE); 2503 } 2504 2505 MDI_PHCI_LOCK(ph); 2506 vh = ph->ph_vhci; 2507 if (vh == NULL) { 2508 /* Invalid vHCI device, return failure */ 2509 MDI_DEBUG(1, (CE_WARN, NULL, 2510 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2511 MDI_PHCI_UNLOCK(ph); 2512 return (MDI_FAILURE); 2513 } 2514 2515 if (MDI_PHCI_IS_READY(ph) == 0) { 2516 /* 2517 * Do not allow new node creation when pHCI is in 2518 * offline/suspended states 2519 */ 2520 MDI_DEBUG(1, (CE_WARN, NULL, 2521 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2522 MDI_PHCI_UNLOCK(ph); 2523 return (MDI_BUSY); 2524 } 2525 MDI_PHCI_UNSTABLE(ph); 2526 MDI_PHCI_UNLOCK(ph); 2527 2528 /* look for a matching client, create one if not found */ 2529 mutex_enter(&mdi_mutex); 2530 ct = i_mdi_client_find(vh, cname, caddr); 2531 if (ct == NULL) { 2532 ct = i_mdi_client_alloc(vh, cname, caddr); 2533 ASSERT(ct != NULL); 2534 } 2535 2536 if (ct->ct_dip == NULL) { 2537 /* 2538 * Allocate a devinfo node 2539 */ 2540 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2541 compatible, ncompatible); 2542 if (ct->ct_dip == NULL) { 2543 (void) i_mdi_client_free(vh, ct); 2544 goto fail; 2545 } 2546 } 2547 cdip = ct->ct_dip; 2548 2549 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2550 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2551 2552 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2553 while (pip != NULL) { 2554 /* 2555 * Compare the unit address 2556 */ 2557 if ((MDI_PI(pip)->pi_phci == ph) && 2558 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2559 break; 2560 } 2561 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2562 } 2563 2564 if (pip == NULL) { 2565 /* 2566 * This is a new path for this client device. Allocate and 2567 * initialize a new pathinfo node 2568 */ 2569 pip = i_mdi_pi_alloc(ph, paddr, ct); 2570 ASSERT(pip != NULL); 2571 path_allocated = 1; 2572 } 2573 rv = MDI_SUCCESS; 2574 2575 fail: 2576 /* 2577 * Release the global mutex. 2578 */ 2579 mutex_exit(&mdi_mutex); 2580 2581 /* 2582 * Mark the pHCI as stable 2583 */ 2584 MDI_PHCI_LOCK(ph); 2585 MDI_PHCI_STABLE(ph); 2586 MDI_PHCI_UNLOCK(ph); 2587 *ret_pip = pip; 2588 2589 if (path_allocated) 2590 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2591 2592 return (rv); 2593 } 2594 2595 /*ARGSUSED*/ 2596 int 2597 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2598 int flags, mdi_pathinfo_t **ret_pip) 2599 { 2600 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2601 flags, ret_pip)); 2602 } 2603 2604 /* 2605 * i_mdi_pi_alloc(): 2606 * Allocate a mdi_pathinfo node and add to the pHCI path list 2607 * Return Values: 2608 * mdi_pathinfo 2609 */ 2610 2611 /*ARGSUSED*/ 2612 static mdi_pathinfo_t * 2613 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2614 { 2615 mdi_pathinfo_t *pip; 2616 int ct_circular; 2617 int ph_circular; 2618 int se_flag; 2619 int kmem_flag; 2620 2621 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2622 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2623 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2624 MDI_PATHINFO_STATE_TRANSIENT; 2625 2626 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2627 MDI_PI_SET_USER_DISABLE(pip); 2628 2629 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2630 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2631 2632 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2633 MDI_PI_SET_DRV_DISABLE(pip); 2634 2635 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2636 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2637 MDI_PI(pip)->pi_client = ct; 2638 MDI_PI(pip)->pi_phci = ph; 2639 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2640 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2641 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2642 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2643 MDI_PI(pip)->pi_pprivate = NULL; 2644 MDI_PI(pip)->pi_cprivate = NULL; 2645 MDI_PI(pip)->pi_vprivate = NULL; 2646 MDI_PI(pip)->pi_client_link = NULL; 2647 MDI_PI(pip)->pi_phci_link = NULL; 2648 MDI_PI(pip)->pi_ref_cnt = 0; 2649 MDI_PI(pip)->pi_kstats = NULL; 2650 MDI_PI(pip)->pi_preferred = 1; 2651 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2652 2653 /* 2654 * Lock both dev_info nodes against changes in parallel. 2655 */ 2656 ndi_devi_enter(ct->ct_dip, &ct_circular); 2657 ndi_devi_enter(ph->ph_dip, &ph_circular); 2658 2659 i_mdi_phci_add_path(ph, pip); 2660 i_mdi_client_add_path(ct, pip); 2661 2662 ndi_devi_exit(ph->ph_dip, ph_circular); 2663 ndi_devi_exit(ct->ct_dip, ct_circular); 2664 2665 /* determine interrupt context */ 2666 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2667 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2668 2669 i_ddi_di_cache_invalidate(kmem_flag); 2670 2671 return (pip); 2672 } 2673 2674 /* 2675 * i_mdi_phci_add_path(): 2676 * Add a mdi_pathinfo node to pHCI list. 2677 * Notes: 2678 * Caller should per-pHCI mutex 2679 */ 2680 2681 static void 2682 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2683 { 2684 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2685 2686 if (ph->ph_path_head == NULL) { 2687 ph->ph_path_head = pip; 2688 } else { 2689 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2690 } 2691 ph->ph_path_tail = pip; 2692 ph->ph_path_count++; 2693 } 2694 2695 /* 2696 * i_mdi_client_add_path(): 2697 * Add mdi_pathinfo node to client list 2698 */ 2699 2700 static void 2701 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2702 { 2703 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2704 2705 if (ct->ct_path_head == NULL) { 2706 ct->ct_path_head = pip; 2707 } else { 2708 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2709 } 2710 ct->ct_path_tail = pip; 2711 ct->ct_path_count++; 2712 } 2713 2714 /* 2715 * mdi_pi_free(): 2716 * Free the mdi_pathinfo node and also client device node if this 2717 * is the last path to the device 2718 * Return Values: 2719 * MDI_SUCCESS 2720 * MDI_FAILURE 2721 * MDI_BUSY 2722 */ 2723 2724 /*ARGSUSED*/ 2725 int 2726 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2727 { 2728 int rv = MDI_SUCCESS; 2729 mdi_vhci_t *vh; 2730 mdi_phci_t *ph; 2731 mdi_client_t *ct; 2732 int (*f)(); 2733 int client_held = 0; 2734 2735 MDI_PI_LOCK(pip); 2736 ph = MDI_PI(pip)->pi_phci; 2737 ASSERT(ph != NULL); 2738 if (ph == NULL) { 2739 /* 2740 * Invalid pHCI device, return failure 2741 */ 2742 MDI_DEBUG(1, (CE_WARN, NULL, 2743 "!mdi_pi_free: invalid pHCI")); 2744 MDI_PI_UNLOCK(pip); 2745 return (MDI_FAILURE); 2746 } 2747 2748 vh = ph->ph_vhci; 2749 ASSERT(vh != NULL); 2750 if (vh == NULL) { 2751 /* Invalid pHCI device, return failure */ 2752 MDI_DEBUG(1, (CE_WARN, NULL, 2753 "!mdi_pi_free: invalid vHCI")); 2754 MDI_PI_UNLOCK(pip); 2755 return (MDI_FAILURE); 2756 } 2757 2758 ct = MDI_PI(pip)->pi_client; 2759 ASSERT(ct != NULL); 2760 if (ct == NULL) { 2761 /* 2762 * Invalid Client device, return failure 2763 */ 2764 MDI_DEBUG(1, (CE_WARN, NULL, 2765 "!mdi_pi_free: invalid client")); 2766 MDI_PI_UNLOCK(pip); 2767 return (MDI_FAILURE); 2768 } 2769 2770 /* 2771 * Check to see for busy condition. A mdi_pathinfo can only be freed 2772 * if the node state is either offline or init and the reference count 2773 * is zero. 2774 */ 2775 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2776 MDI_PI_IS_INITING(pip))) { 2777 /* 2778 * Node is busy 2779 */ 2780 MDI_DEBUG(1, (CE_WARN, NULL, 2781 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2782 MDI_PI_UNLOCK(pip); 2783 return (MDI_BUSY); 2784 } 2785 2786 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2787 /* 2788 * Give a chance for pending I/Os to complete. 2789 */ 2790 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2791 "%d cmds still pending on path: %p\n", 2792 MDI_PI(pip)->pi_ref_cnt, pip)); 2793 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2794 &MDI_PI(pip)->pi_mutex, 2795 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2796 /* 2797 * The timeout time reached without ref_cnt being zero 2798 * being signaled. 2799 */ 2800 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2801 "!mdi_pi_free: " 2802 "Timeout reached on path %p without the cond\n", 2803 pip)); 2804 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2805 "!mdi_pi_free: " 2806 "%d cmds still pending on path: %p\n", 2807 MDI_PI(pip)->pi_ref_cnt, pip)); 2808 MDI_PI_UNLOCK(pip); 2809 return (MDI_BUSY); 2810 } 2811 } 2812 if (MDI_PI(pip)->pi_pm_held) { 2813 client_held = 1; 2814 } 2815 MDI_PI_UNLOCK(pip); 2816 2817 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2818 2819 MDI_CLIENT_LOCK(ct); 2820 2821 /* Prevent further failovers till mdi_mutex is held */ 2822 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2823 2824 /* 2825 * Wait till failover is complete before removing this node. 2826 */ 2827 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2828 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2829 2830 MDI_CLIENT_UNLOCK(ct); 2831 mutex_enter(&mdi_mutex); 2832 MDI_CLIENT_LOCK(ct); 2833 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2834 2835 if (!MDI_PI_IS_INITING(pip)) { 2836 f = vh->vh_ops->vo_pi_uninit; 2837 if (f != NULL) { 2838 rv = (*f)(vh->vh_dip, pip, 0); 2839 } 2840 } 2841 /* 2842 * If vo_pi_uninit() completed successfully. 2843 */ 2844 if (rv == MDI_SUCCESS) { 2845 if (client_held) { 2846 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2847 "i_mdi_pm_rele_client\n")); 2848 i_mdi_pm_rele_client(ct, 1); 2849 } 2850 i_mdi_pi_free(ph, pip, ct); 2851 if (ct->ct_path_count == 0) { 2852 /* 2853 * Client lost its last path. 2854 * Clean up the client device 2855 */ 2856 MDI_CLIENT_UNLOCK(ct); 2857 (void) i_mdi_client_free(ct->ct_vhci, ct); 2858 mutex_exit(&mdi_mutex); 2859 return (rv); 2860 } 2861 } 2862 MDI_CLIENT_UNLOCK(ct); 2863 mutex_exit(&mdi_mutex); 2864 2865 if (rv == MDI_FAILURE) 2866 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2867 2868 return (rv); 2869 } 2870 2871 /* 2872 * i_mdi_pi_free(): 2873 * Free the mdi_pathinfo node 2874 */ 2875 static void 2876 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2877 { 2878 int ct_circular; 2879 int ph_circular; 2880 int se_flag; 2881 int kmem_flag; 2882 2883 /* 2884 * remove any per-path kstats 2885 */ 2886 i_mdi_pi_kstat_destroy(pip); 2887 2888 ndi_devi_enter(ct->ct_dip, &ct_circular); 2889 ndi_devi_enter(ph->ph_dip, &ph_circular); 2890 2891 i_mdi_client_remove_path(ct, pip); 2892 i_mdi_phci_remove_path(ph, pip); 2893 2894 ndi_devi_exit(ph->ph_dip, ph_circular); 2895 ndi_devi_exit(ct->ct_dip, ct_circular); 2896 2897 /* determine interrupt context */ 2898 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2899 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2900 2901 i_ddi_di_cache_invalidate(kmem_flag); 2902 2903 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2904 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2905 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2906 if (MDI_PI(pip)->pi_addr) { 2907 kmem_free(MDI_PI(pip)->pi_addr, 2908 strlen(MDI_PI(pip)->pi_addr) + 1); 2909 MDI_PI(pip)->pi_addr = NULL; 2910 } 2911 2912 if (MDI_PI(pip)->pi_prop) { 2913 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2914 MDI_PI(pip)->pi_prop = NULL; 2915 } 2916 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2917 } 2918 2919 2920 /* 2921 * i_mdi_phci_remove_path(): 2922 * Remove a mdi_pathinfo node from pHCI list. 2923 * Notes: 2924 * Caller should hold per-pHCI mutex 2925 */ 2926 2927 static void 2928 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2929 { 2930 mdi_pathinfo_t *prev = NULL; 2931 mdi_pathinfo_t *path = NULL; 2932 2933 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2934 2935 path = ph->ph_path_head; 2936 while (path != NULL) { 2937 if (path == pip) { 2938 break; 2939 } 2940 prev = path; 2941 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2942 } 2943 2944 if (path) { 2945 ph->ph_path_count--; 2946 if (prev) { 2947 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2948 } else { 2949 ph->ph_path_head = 2950 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2951 } 2952 if (ph->ph_path_tail == path) { 2953 ph->ph_path_tail = prev; 2954 } 2955 } 2956 2957 /* 2958 * Clear the pHCI link 2959 */ 2960 MDI_PI(pip)->pi_phci_link = NULL; 2961 MDI_PI(pip)->pi_phci = NULL; 2962 } 2963 2964 /* 2965 * i_mdi_client_remove_path(): 2966 * Remove a mdi_pathinfo node from client path list. 2967 */ 2968 2969 static void 2970 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2971 { 2972 mdi_pathinfo_t *prev = NULL; 2973 mdi_pathinfo_t *path; 2974 2975 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2976 2977 path = ct->ct_path_head; 2978 while (path != NULL) { 2979 if (path == pip) { 2980 break; 2981 } 2982 prev = path; 2983 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2984 } 2985 2986 if (path) { 2987 ct->ct_path_count--; 2988 if (prev) { 2989 MDI_PI(prev)->pi_client_link = 2990 MDI_PI(path)->pi_client_link; 2991 } else { 2992 ct->ct_path_head = 2993 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2994 } 2995 if (ct->ct_path_tail == path) { 2996 ct->ct_path_tail = prev; 2997 } 2998 if (ct->ct_path_last == path) { 2999 ct->ct_path_last = ct->ct_path_head; 3000 } 3001 } 3002 MDI_PI(pip)->pi_client_link = NULL; 3003 MDI_PI(pip)->pi_client = NULL; 3004 } 3005 3006 /* 3007 * i_mdi_pi_state_change(): 3008 * online a mdi_pathinfo node 3009 * 3010 * Return Values: 3011 * MDI_SUCCESS 3012 * MDI_FAILURE 3013 */ 3014 /*ARGSUSED*/ 3015 static int 3016 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3017 { 3018 int rv = MDI_SUCCESS; 3019 mdi_vhci_t *vh; 3020 mdi_phci_t *ph; 3021 mdi_client_t *ct; 3022 int (*f)(); 3023 dev_info_t *cdip; 3024 3025 MDI_PI_LOCK(pip); 3026 3027 ph = MDI_PI(pip)->pi_phci; 3028 ASSERT(ph); 3029 if (ph == NULL) { 3030 /* 3031 * Invalid pHCI device, fail the request 3032 */ 3033 MDI_PI_UNLOCK(pip); 3034 MDI_DEBUG(1, (CE_WARN, NULL, 3035 "!mdi_pi_state_change: invalid phci")); 3036 return (MDI_FAILURE); 3037 } 3038 3039 vh = ph->ph_vhci; 3040 ASSERT(vh); 3041 if (vh == NULL) { 3042 /* 3043 * Invalid vHCI device, fail the request 3044 */ 3045 MDI_PI_UNLOCK(pip); 3046 MDI_DEBUG(1, (CE_WARN, NULL, 3047 "!mdi_pi_state_change: invalid vhci")); 3048 return (MDI_FAILURE); 3049 } 3050 3051 ct = MDI_PI(pip)->pi_client; 3052 ASSERT(ct != NULL); 3053 if (ct == NULL) { 3054 /* 3055 * Invalid client device, fail the request 3056 */ 3057 MDI_PI_UNLOCK(pip); 3058 MDI_DEBUG(1, (CE_WARN, NULL, 3059 "!mdi_pi_state_change: invalid client")); 3060 return (MDI_FAILURE); 3061 } 3062 3063 /* 3064 * If this path has not been initialized yet, Callback vHCI driver's 3065 * pathinfo node initialize entry point 3066 */ 3067 3068 if (MDI_PI_IS_INITING(pip)) { 3069 MDI_PI_UNLOCK(pip); 3070 f = vh->vh_ops->vo_pi_init; 3071 if (f != NULL) { 3072 rv = (*f)(vh->vh_dip, pip, 0); 3073 if (rv != MDI_SUCCESS) { 3074 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3075 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3076 vh, pip)); 3077 return (MDI_FAILURE); 3078 } 3079 } 3080 MDI_PI_LOCK(pip); 3081 MDI_PI_CLEAR_TRANSIENT(pip); 3082 } 3083 3084 /* 3085 * Do not allow state transition when pHCI is in offline/suspended 3086 * states 3087 */ 3088 i_mdi_phci_lock(ph, pip); 3089 if (MDI_PHCI_IS_READY(ph) == 0) { 3090 MDI_DEBUG(1, (CE_WARN, NULL, 3091 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3092 MDI_PI_UNLOCK(pip); 3093 i_mdi_phci_unlock(ph); 3094 return (MDI_BUSY); 3095 } 3096 MDI_PHCI_UNSTABLE(ph); 3097 i_mdi_phci_unlock(ph); 3098 3099 /* 3100 * Check if mdi_pathinfo state is in transient state. 3101 * If yes, offlining is in progress and wait till transient state is 3102 * cleared. 3103 */ 3104 if (MDI_PI_IS_TRANSIENT(pip)) { 3105 while (MDI_PI_IS_TRANSIENT(pip)) { 3106 cv_wait(&MDI_PI(pip)->pi_state_cv, 3107 &MDI_PI(pip)->pi_mutex); 3108 } 3109 } 3110 3111 /* 3112 * Grab the client lock in reverse order sequence and release the 3113 * mdi_pathinfo mutex. 3114 */ 3115 i_mdi_client_lock(ct, pip); 3116 MDI_PI_UNLOCK(pip); 3117 3118 /* 3119 * Wait till failover state is cleared 3120 */ 3121 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3122 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3123 3124 /* 3125 * Mark the mdi_pathinfo node state as transient 3126 */ 3127 MDI_PI_LOCK(pip); 3128 switch (state) { 3129 case MDI_PATHINFO_STATE_ONLINE: 3130 MDI_PI_SET_ONLINING(pip); 3131 break; 3132 3133 case MDI_PATHINFO_STATE_STANDBY: 3134 MDI_PI_SET_STANDBYING(pip); 3135 break; 3136 3137 case MDI_PATHINFO_STATE_FAULT: 3138 /* 3139 * Mark the pathinfo state as FAULTED 3140 */ 3141 MDI_PI_SET_FAULTING(pip); 3142 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3143 break; 3144 3145 case MDI_PATHINFO_STATE_OFFLINE: 3146 /* 3147 * ndi_devi_offline() cannot hold pip or ct locks. 3148 */ 3149 MDI_PI_UNLOCK(pip); 3150 /* 3151 * Do not offline if path will become last path and path 3152 * is busy for user initiated events. 3153 */ 3154 cdip = ct->ct_dip; 3155 if ((flag & NDI_DEVI_REMOVE) && 3156 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3157 i_mdi_client_unlock(ct); 3158 rv = ndi_devi_offline(cdip, 0); 3159 if (rv != NDI_SUCCESS) { 3160 /* 3161 * Convert to MDI error code 3162 */ 3163 switch (rv) { 3164 case NDI_BUSY: 3165 rv = MDI_BUSY; 3166 break; 3167 default: 3168 rv = MDI_FAILURE; 3169 break; 3170 } 3171 goto state_change_exit; 3172 } else { 3173 i_mdi_client_lock(ct, NULL); 3174 } 3175 } 3176 /* 3177 * Mark the mdi_pathinfo node state as transient 3178 */ 3179 MDI_PI_LOCK(pip); 3180 MDI_PI_SET_OFFLINING(pip); 3181 break; 3182 } 3183 MDI_PI_UNLOCK(pip); 3184 MDI_CLIENT_UNSTABLE(ct); 3185 i_mdi_client_unlock(ct); 3186 3187 f = vh->vh_ops->vo_pi_state_change; 3188 if (f != NULL) { 3189 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3190 if (rv == MDI_NOT_SUPPORTED) { 3191 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3192 } 3193 if (rv != MDI_SUCCESS) { 3194 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3195 "!vo_pi_state_change: failed rv = %x", rv)); 3196 } 3197 } 3198 MDI_CLIENT_LOCK(ct); 3199 MDI_PI_LOCK(pip); 3200 if (MDI_PI_IS_TRANSIENT(pip)) { 3201 if (rv == MDI_SUCCESS) { 3202 MDI_PI_CLEAR_TRANSIENT(pip); 3203 } else { 3204 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3205 } 3206 } 3207 3208 /* 3209 * Wake anyone waiting for this mdi_pathinfo node 3210 */ 3211 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3212 MDI_PI_UNLOCK(pip); 3213 3214 /* 3215 * Mark the client device as stable 3216 */ 3217 MDI_CLIENT_STABLE(ct); 3218 if (rv == MDI_SUCCESS) { 3219 if (ct->ct_unstable == 0) { 3220 cdip = ct->ct_dip; 3221 3222 /* 3223 * Onlining the mdi_pathinfo node will impact the 3224 * client state Update the client and dev_info node 3225 * state accordingly 3226 */ 3227 rv = NDI_SUCCESS; 3228 i_mdi_client_update_state(ct); 3229 switch (MDI_CLIENT_STATE(ct)) { 3230 case MDI_CLIENT_STATE_OPTIMAL: 3231 case MDI_CLIENT_STATE_DEGRADED: 3232 if (cdip && !i_ddi_devi_attached(cdip) && 3233 ((state == MDI_PATHINFO_STATE_ONLINE) || 3234 (state == MDI_PATHINFO_STATE_STANDBY))) { 3235 3236 i_mdi_client_unlock(ct); 3237 /* 3238 * Must do ndi_devi_online() through 3239 * hotplug thread for deferred 3240 * attach mechanism to work 3241 */ 3242 rv = ndi_devi_online(cdip, 0); 3243 i_mdi_client_lock(ct, NULL); 3244 if ((rv != NDI_SUCCESS) && 3245 (MDI_CLIENT_STATE(ct) == 3246 MDI_CLIENT_STATE_DEGRADED)) { 3247 /* 3248 * ndi_devi_online failed. 3249 * Reset client flags to 3250 * offline. 3251 */ 3252 MDI_DEBUG(1, (CE_WARN, cdip, 3253 "!ndi_devi_online: failed " 3254 " Error: %x", rv)); 3255 MDI_CLIENT_SET_OFFLINE(ct); 3256 } 3257 if (rv != NDI_SUCCESS) { 3258 /* Reset the path state */ 3259 MDI_PI_LOCK(pip); 3260 MDI_PI(pip)->pi_state = 3261 MDI_PI_OLD_STATE(pip); 3262 MDI_PI_UNLOCK(pip); 3263 } 3264 } 3265 break; 3266 3267 case MDI_CLIENT_STATE_FAILED: 3268 /* 3269 * This is the last path case for 3270 * non-user initiated events. 3271 */ 3272 if (((flag & NDI_DEVI_REMOVE) == 0) && 3273 cdip && (i_ddi_node_state(cdip) >= 3274 DS_INITIALIZED)) { 3275 i_mdi_client_unlock(ct); 3276 rv = ndi_devi_offline(cdip, 0); 3277 i_mdi_client_lock(ct, NULL); 3278 3279 if (rv != NDI_SUCCESS) { 3280 /* 3281 * ndi_devi_offline failed. 3282 * Reset client flags to 3283 * online as the path could not 3284 * be offlined. 3285 */ 3286 MDI_DEBUG(1, (CE_WARN, cdip, 3287 "!ndi_devi_offline: failed " 3288 " Error: %x", rv)); 3289 MDI_CLIENT_SET_ONLINE(ct); 3290 } 3291 } 3292 break; 3293 } 3294 /* 3295 * Convert to MDI error code 3296 */ 3297 switch (rv) { 3298 case NDI_SUCCESS: 3299 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3300 i_mdi_report_path_state(ct, pip); 3301 rv = MDI_SUCCESS; 3302 break; 3303 case NDI_BUSY: 3304 rv = MDI_BUSY; 3305 break; 3306 default: 3307 rv = MDI_FAILURE; 3308 break; 3309 } 3310 } 3311 } 3312 MDI_CLIENT_UNLOCK(ct); 3313 3314 state_change_exit: 3315 /* 3316 * Mark the pHCI as stable again. 3317 */ 3318 MDI_PHCI_LOCK(ph); 3319 MDI_PHCI_STABLE(ph); 3320 MDI_PHCI_UNLOCK(ph); 3321 return (rv); 3322 } 3323 3324 /* 3325 * mdi_pi_online(): 3326 * Place the path_info node in the online state. The path is 3327 * now available to be selected by mdi_select_path() for 3328 * transporting I/O requests to client devices. 3329 * Return Values: 3330 * MDI_SUCCESS 3331 * MDI_FAILURE 3332 */ 3333 int 3334 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3335 { 3336 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3337 dev_info_t *cdip; 3338 int client_held = 0; 3339 int rv; 3340 3341 ASSERT(ct != NULL); 3342 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3343 if (rv != MDI_SUCCESS) 3344 return (rv); 3345 3346 MDI_PI_LOCK(pip); 3347 if (MDI_PI(pip)->pi_pm_held == 0) { 3348 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3349 "i_mdi_pm_hold_pip\n")); 3350 i_mdi_pm_hold_pip(pip); 3351 client_held = 1; 3352 } 3353 MDI_PI_UNLOCK(pip); 3354 3355 if (client_held) { 3356 MDI_CLIENT_LOCK(ct); 3357 if (ct->ct_power_cnt == 0) { 3358 rv = i_mdi_power_all_phci(ct); 3359 } 3360 3361 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3362 "i_mdi_pm_hold_client\n")); 3363 i_mdi_pm_hold_client(ct, 1); 3364 MDI_CLIENT_UNLOCK(ct); 3365 } 3366 3367 /* 3368 * Create the per-path (pathinfo) IO and error kstats which 3369 * are reported via iostat(1m). 3370 * 3371 * Defer creating the per-path kstats if device is not yet 3372 * attached; the names of the kstats are constructed in part 3373 * using the devices instance number which is assigned during 3374 * process of attaching the client device. 3375 * 3376 * The framework post_attach handler, mdi_post_attach(), is 3377 * is responsible for initializing the client's pathinfo list 3378 * once successfully attached. 3379 */ 3380 cdip = ct->ct_dip; 3381 ASSERT(cdip); 3382 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3383 return (rv); 3384 3385 MDI_CLIENT_LOCK(ct); 3386 rv = i_mdi_pi_kstat_create(pip); 3387 MDI_CLIENT_UNLOCK(ct); 3388 return (rv); 3389 } 3390 3391 /* 3392 * mdi_pi_standby(): 3393 * Place the mdi_pathinfo node in standby state 3394 * 3395 * Return Values: 3396 * MDI_SUCCESS 3397 * MDI_FAILURE 3398 */ 3399 int 3400 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3401 { 3402 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3403 } 3404 3405 /* 3406 * mdi_pi_fault(): 3407 * Place the mdi_pathinfo node in fault'ed state 3408 * Return Values: 3409 * MDI_SUCCESS 3410 * MDI_FAILURE 3411 */ 3412 int 3413 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3414 { 3415 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3416 } 3417 3418 /* 3419 * mdi_pi_offline(): 3420 * Offline a mdi_pathinfo node. 3421 * Return Values: 3422 * MDI_SUCCESS 3423 * MDI_FAILURE 3424 */ 3425 int 3426 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3427 { 3428 int ret, client_held = 0; 3429 mdi_client_t *ct; 3430 3431 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3432 3433 if (ret == MDI_SUCCESS) { 3434 MDI_PI_LOCK(pip); 3435 if (MDI_PI(pip)->pi_pm_held) { 3436 client_held = 1; 3437 } 3438 MDI_PI_UNLOCK(pip); 3439 3440 if (client_held) { 3441 ct = MDI_PI(pip)->pi_client; 3442 MDI_CLIENT_LOCK(ct); 3443 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3444 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3445 i_mdi_pm_rele_client(ct, 1); 3446 MDI_CLIENT_UNLOCK(ct); 3447 } 3448 } 3449 3450 return (ret); 3451 } 3452 3453 /* 3454 * i_mdi_pi_offline(): 3455 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3456 */ 3457 static int 3458 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3459 { 3460 dev_info_t *vdip = NULL; 3461 mdi_vhci_t *vh = NULL; 3462 mdi_client_t *ct = NULL; 3463 int (*f)(); 3464 int rv; 3465 3466 MDI_PI_LOCK(pip); 3467 ct = MDI_PI(pip)->pi_client; 3468 ASSERT(ct != NULL); 3469 3470 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3471 /* 3472 * Give a chance for pending I/Os to complete. 3473 */ 3474 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3475 "%d cmds still pending on path: %p\n", 3476 MDI_PI(pip)->pi_ref_cnt, pip)); 3477 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3478 &MDI_PI(pip)->pi_mutex, 3479 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3480 /* 3481 * The timeout time reached without ref_cnt being zero 3482 * being signaled. 3483 */ 3484 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3485 "Timeout reached on path %p without the cond\n", 3486 pip)); 3487 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3488 "%d cmds still pending on path: %p\n", 3489 MDI_PI(pip)->pi_ref_cnt, pip)); 3490 } 3491 } 3492 vh = ct->ct_vhci; 3493 vdip = vh->vh_dip; 3494 3495 /* 3496 * Notify vHCI that has registered this event 3497 */ 3498 ASSERT(vh->vh_ops); 3499 f = vh->vh_ops->vo_pi_state_change; 3500 3501 if (f != NULL) { 3502 MDI_PI_UNLOCK(pip); 3503 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3504 flags)) != MDI_SUCCESS) { 3505 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3506 "vdip 0x%x, pip 0x%x", vdip, pip)); 3507 } 3508 MDI_PI_LOCK(pip); 3509 } 3510 3511 /* 3512 * Set the mdi_pathinfo node state and clear the transient condition 3513 */ 3514 MDI_PI_SET_OFFLINE(pip); 3515 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3516 MDI_PI_UNLOCK(pip); 3517 3518 MDI_CLIENT_LOCK(ct); 3519 if (rv == MDI_SUCCESS) { 3520 if (ct->ct_unstable == 0) { 3521 dev_info_t *cdip = ct->ct_dip; 3522 3523 /* 3524 * Onlining the mdi_pathinfo node will impact the 3525 * client state Update the client and dev_info node 3526 * state accordingly 3527 */ 3528 i_mdi_client_update_state(ct); 3529 rv = NDI_SUCCESS; 3530 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3531 if (cdip && 3532 (i_ddi_node_state(cdip) >= 3533 DS_INITIALIZED)) { 3534 MDI_CLIENT_UNLOCK(ct); 3535 rv = ndi_devi_offline(cdip, 0); 3536 MDI_CLIENT_LOCK(ct); 3537 if (rv != NDI_SUCCESS) { 3538 /* 3539 * ndi_devi_offline failed. 3540 * Reset client flags to 3541 * online. 3542 */ 3543 MDI_DEBUG(4, (CE_WARN, cdip, 3544 "!ndi_devi_offline: failed " 3545 " Error: %x", rv)); 3546 MDI_CLIENT_SET_ONLINE(ct); 3547 } 3548 } 3549 } 3550 /* 3551 * Convert to MDI error code 3552 */ 3553 switch (rv) { 3554 case NDI_SUCCESS: 3555 rv = MDI_SUCCESS; 3556 break; 3557 case NDI_BUSY: 3558 rv = MDI_BUSY; 3559 break; 3560 default: 3561 rv = MDI_FAILURE; 3562 break; 3563 } 3564 } 3565 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3566 i_mdi_report_path_state(ct, pip); 3567 } 3568 3569 MDI_CLIENT_UNLOCK(ct); 3570 3571 /* 3572 * Change in the mdi_pathinfo node state will impact the client state 3573 */ 3574 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3575 ct, pip)); 3576 return (rv); 3577 } 3578 3579 3580 /* 3581 * mdi_pi_get_addr(): 3582 * Get the unit address associated with a mdi_pathinfo node 3583 * 3584 * Return Values: 3585 * char * 3586 */ 3587 char * 3588 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3589 { 3590 if (pip == NULL) 3591 return (NULL); 3592 3593 return (MDI_PI(pip)->pi_addr); 3594 } 3595 3596 /* 3597 * mdi_pi_get_client(): 3598 * Get the client devinfo associated with a mdi_pathinfo node 3599 * 3600 * Return Values: 3601 * Handle to client device dev_info node 3602 */ 3603 dev_info_t * 3604 mdi_pi_get_client(mdi_pathinfo_t *pip) 3605 { 3606 dev_info_t *dip = NULL; 3607 if (pip) { 3608 dip = MDI_PI(pip)->pi_client->ct_dip; 3609 } 3610 return (dip); 3611 } 3612 3613 /* 3614 * mdi_pi_get_phci(): 3615 * Get the pHCI devinfo associated with the mdi_pathinfo node 3616 * Return Values: 3617 * Handle to dev_info node 3618 */ 3619 dev_info_t * 3620 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3621 { 3622 dev_info_t *dip = NULL; 3623 if (pip) { 3624 dip = MDI_PI(pip)->pi_phci->ph_dip; 3625 } 3626 return (dip); 3627 } 3628 3629 /* 3630 * mdi_pi_get_client_private(): 3631 * Get the client private information associated with the 3632 * mdi_pathinfo node 3633 */ 3634 void * 3635 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3636 { 3637 void *cprivate = NULL; 3638 if (pip) { 3639 cprivate = MDI_PI(pip)->pi_cprivate; 3640 } 3641 return (cprivate); 3642 } 3643 3644 /* 3645 * mdi_pi_set_client_private(): 3646 * Set the client private information in the mdi_pathinfo node 3647 */ 3648 void 3649 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3650 { 3651 if (pip) { 3652 MDI_PI(pip)->pi_cprivate = priv; 3653 } 3654 } 3655 3656 /* 3657 * mdi_pi_get_phci_private(): 3658 * Get the pHCI private information associated with the 3659 * mdi_pathinfo node 3660 */ 3661 caddr_t 3662 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3663 { 3664 caddr_t pprivate = NULL; 3665 if (pip) { 3666 pprivate = MDI_PI(pip)->pi_pprivate; 3667 } 3668 return (pprivate); 3669 } 3670 3671 /* 3672 * mdi_pi_set_phci_private(): 3673 * Set the pHCI private information in the mdi_pathinfo node 3674 */ 3675 void 3676 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3677 { 3678 if (pip) { 3679 MDI_PI(pip)->pi_pprivate = priv; 3680 } 3681 } 3682 3683 /* 3684 * mdi_pi_get_state(): 3685 * Get the mdi_pathinfo node state. Transient states are internal 3686 * and not provided to the users 3687 */ 3688 mdi_pathinfo_state_t 3689 mdi_pi_get_state(mdi_pathinfo_t *pip) 3690 { 3691 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3692 3693 if (pip) { 3694 if (MDI_PI_IS_TRANSIENT(pip)) { 3695 /* 3696 * mdi_pathinfo is in state transition. Return the 3697 * last good state. 3698 */ 3699 state = MDI_PI_OLD_STATE(pip); 3700 } else { 3701 state = MDI_PI_STATE(pip); 3702 } 3703 } 3704 return (state); 3705 } 3706 3707 /* 3708 * Note that the following function needs to be the new interface for 3709 * mdi_pi_get_state when mpxio gets integrated to ON. 3710 */ 3711 int 3712 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3713 uint32_t *ext_state) 3714 { 3715 *state = MDI_PATHINFO_STATE_INIT; 3716 3717 if (pip) { 3718 if (MDI_PI_IS_TRANSIENT(pip)) { 3719 /* 3720 * mdi_pathinfo is in state transition. Return the 3721 * last good state. 3722 */ 3723 *state = MDI_PI_OLD_STATE(pip); 3724 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3725 } else { 3726 *state = MDI_PI_STATE(pip); 3727 *ext_state = MDI_PI_EXT_STATE(pip); 3728 } 3729 } 3730 return (MDI_SUCCESS); 3731 } 3732 3733 /* 3734 * mdi_pi_get_preferred: 3735 * Get the preferred path flag 3736 */ 3737 int 3738 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3739 { 3740 if (pip) { 3741 return (MDI_PI(pip)->pi_preferred); 3742 } 3743 return (0); 3744 } 3745 3746 /* 3747 * mdi_pi_set_preferred: 3748 * Set the preferred path flag 3749 */ 3750 void 3751 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3752 { 3753 if (pip) { 3754 MDI_PI(pip)->pi_preferred = preferred; 3755 } 3756 } 3757 3758 3759 /* 3760 * mdi_pi_set_state(): 3761 * Set the mdi_pathinfo node state 3762 */ 3763 void 3764 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3765 { 3766 uint32_t ext_state; 3767 3768 if (pip) { 3769 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3770 MDI_PI(pip)->pi_state = state; 3771 MDI_PI(pip)->pi_state |= ext_state; 3772 } 3773 } 3774 3775 /* 3776 * Property functions: 3777 */ 3778 3779 int 3780 i_map_nvlist_error_to_mdi(int val) 3781 { 3782 int rv; 3783 3784 switch (val) { 3785 case 0: 3786 rv = DDI_PROP_SUCCESS; 3787 break; 3788 case EINVAL: 3789 case ENOTSUP: 3790 rv = DDI_PROP_INVAL_ARG; 3791 break; 3792 case ENOMEM: 3793 rv = DDI_PROP_NO_MEMORY; 3794 break; 3795 default: 3796 rv = DDI_PROP_NOT_FOUND; 3797 break; 3798 } 3799 return (rv); 3800 } 3801 3802 /* 3803 * mdi_pi_get_next_prop(): 3804 * Property walk function. The caller should hold mdi_pi_lock() 3805 * and release by calling mdi_pi_unlock() at the end of walk to 3806 * get a consistent value. 3807 */ 3808 3809 nvpair_t * 3810 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3811 { 3812 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3813 return (NULL); 3814 } 3815 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3816 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3817 } 3818 3819 /* 3820 * mdi_prop_remove(): 3821 * Remove the named property from the named list. 3822 */ 3823 3824 int 3825 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3826 { 3827 if (pip == NULL) { 3828 return (DDI_PROP_NOT_FOUND); 3829 } 3830 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3831 MDI_PI_LOCK(pip); 3832 if (MDI_PI(pip)->pi_prop == NULL) { 3833 MDI_PI_UNLOCK(pip); 3834 return (DDI_PROP_NOT_FOUND); 3835 } 3836 if (name) { 3837 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3838 } else { 3839 char nvp_name[MAXNAMELEN]; 3840 nvpair_t *nvp; 3841 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3842 while (nvp) { 3843 nvpair_t *next; 3844 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3845 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3846 nvpair_name(nvp)); 3847 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3848 nvp_name); 3849 nvp = next; 3850 } 3851 } 3852 MDI_PI_UNLOCK(pip); 3853 return (DDI_PROP_SUCCESS); 3854 } 3855 3856 /* 3857 * mdi_prop_size(): 3858 * Get buffer size needed to pack the property data. 3859 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3860 * buffer size. 3861 */ 3862 3863 int 3864 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3865 { 3866 int rv; 3867 size_t bufsize; 3868 3869 *buflenp = 0; 3870 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3871 return (DDI_PROP_NOT_FOUND); 3872 } 3873 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3874 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3875 &bufsize, NV_ENCODE_NATIVE); 3876 *buflenp = bufsize; 3877 return (i_map_nvlist_error_to_mdi(rv)); 3878 } 3879 3880 /* 3881 * mdi_prop_pack(): 3882 * pack the property list. The caller should hold the 3883 * mdi_pathinfo_t node to get a consistent data 3884 */ 3885 3886 int 3887 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3888 { 3889 int rv; 3890 size_t bufsize; 3891 3892 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3893 return (DDI_PROP_NOT_FOUND); 3894 } 3895 3896 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3897 3898 bufsize = buflen; 3899 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3900 NV_ENCODE_NATIVE, KM_SLEEP); 3901 3902 return (i_map_nvlist_error_to_mdi(rv)); 3903 } 3904 3905 /* 3906 * mdi_prop_update_byte(): 3907 * Create/Update a byte property 3908 */ 3909 int 3910 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3911 { 3912 int rv; 3913 3914 if (pip == NULL) { 3915 return (DDI_PROP_INVAL_ARG); 3916 } 3917 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3918 MDI_PI_LOCK(pip); 3919 if (MDI_PI(pip)->pi_prop == NULL) { 3920 MDI_PI_UNLOCK(pip); 3921 return (DDI_PROP_NOT_FOUND); 3922 } 3923 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3924 MDI_PI_UNLOCK(pip); 3925 return (i_map_nvlist_error_to_mdi(rv)); 3926 } 3927 3928 /* 3929 * mdi_prop_update_byte_array(): 3930 * Create/Update a byte array property 3931 */ 3932 int 3933 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3934 uint_t nelements) 3935 { 3936 int rv; 3937 3938 if (pip == NULL) { 3939 return (DDI_PROP_INVAL_ARG); 3940 } 3941 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3942 MDI_PI_LOCK(pip); 3943 if (MDI_PI(pip)->pi_prop == NULL) { 3944 MDI_PI_UNLOCK(pip); 3945 return (DDI_PROP_NOT_FOUND); 3946 } 3947 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3948 MDI_PI_UNLOCK(pip); 3949 return (i_map_nvlist_error_to_mdi(rv)); 3950 } 3951 3952 /* 3953 * mdi_prop_update_int(): 3954 * Create/Update a 32 bit integer property 3955 */ 3956 int 3957 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3958 { 3959 int rv; 3960 3961 if (pip == NULL) { 3962 return (DDI_PROP_INVAL_ARG); 3963 } 3964 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3965 MDI_PI_LOCK(pip); 3966 if (MDI_PI(pip)->pi_prop == NULL) { 3967 MDI_PI_UNLOCK(pip); 3968 return (DDI_PROP_NOT_FOUND); 3969 } 3970 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3971 MDI_PI_UNLOCK(pip); 3972 return (i_map_nvlist_error_to_mdi(rv)); 3973 } 3974 3975 /* 3976 * mdi_prop_update_int64(): 3977 * Create/Update a 64 bit integer property 3978 */ 3979 int 3980 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3981 { 3982 int rv; 3983 3984 if (pip == NULL) { 3985 return (DDI_PROP_INVAL_ARG); 3986 } 3987 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3988 MDI_PI_LOCK(pip); 3989 if (MDI_PI(pip)->pi_prop == NULL) { 3990 MDI_PI_UNLOCK(pip); 3991 return (DDI_PROP_NOT_FOUND); 3992 } 3993 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3994 MDI_PI_UNLOCK(pip); 3995 return (i_map_nvlist_error_to_mdi(rv)); 3996 } 3997 3998 /* 3999 * mdi_prop_update_int_array(): 4000 * Create/Update a int array property 4001 */ 4002 int 4003 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4004 uint_t nelements) 4005 { 4006 int rv; 4007 4008 if (pip == NULL) { 4009 return (DDI_PROP_INVAL_ARG); 4010 } 4011 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4012 MDI_PI_LOCK(pip); 4013 if (MDI_PI(pip)->pi_prop == NULL) { 4014 MDI_PI_UNLOCK(pip); 4015 return (DDI_PROP_NOT_FOUND); 4016 } 4017 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4018 nelements); 4019 MDI_PI_UNLOCK(pip); 4020 return (i_map_nvlist_error_to_mdi(rv)); 4021 } 4022 4023 /* 4024 * mdi_prop_update_string(): 4025 * Create/Update a string property 4026 */ 4027 int 4028 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4029 { 4030 int rv; 4031 4032 if (pip == NULL) { 4033 return (DDI_PROP_INVAL_ARG); 4034 } 4035 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4036 MDI_PI_LOCK(pip); 4037 if (MDI_PI(pip)->pi_prop == NULL) { 4038 MDI_PI_UNLOCK(pip); 4039 return (DDI_PROP_NOT_FOUND); 4040 } 4041 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4042 MDI_PI_UNLOCK(pip); 4043 return (i_map_nvlist_error_to_mdi(rv)); 4044 } 4045 4046 /* 4047 * mdi_prop_update_string_array(): 4048 * Create/Update a string array property 4049 */ 4050 int 4051 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4052 uint_t nelements) 4053 { 4054 int rv; 4055 4056 if (pip == NULL) { 4057 return (DDI_PROP_INVAL_ARG); 4058 } 4059 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4060 MDI_PI_LOCK(pip); 4061 if (MDI_PI(pip)->pi_prop == NULL) { 4062 MDI_PI_UNLOCK(pip); 4063 return (DDI_PROP_NOT_FOUND); 4064 } 4065 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4066 nelements); 4067 MDI_PI_UNLOCK(pip); 4068 return (i_map_nvlist_error_to_mdi(rv)); 4069 } 4070 4071 /* 4072 * mdi_prop_lookup_byte(): 4073 * Look for byte property identified by name. The data returned 4074 * is the actual property and valid as long as mdi_pathinfo_t node 4075 * is alive. 4076 */ 4077 int 4078 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4079 { 4080 int rv; 4081 4082 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4083 return (DDI_PROP_NOT_FOUND); 4084 } 4085 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4086 return (i_map_nvlist_error_to_mdi(rv)); 4087 } 4088 4089 4090 /* 4091 * mdi_prop_lookup_byte_array(): 4092 * Look for byte array property identified by name. The data 4093 * returned is the actual property and valid as long as 4094 * mdi_pathinfo_t node is alive. 4095 */ 4096 int 4097 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4098 uint_t *nelements) 4099 { 4100 int rv; 4101 4102 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4103 return (DDI_PROP_NOT_FOUND); 4104 } 4105 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4106 nelements); 4107 return (i_map_nvlist_error_to_mdi(rv)); 4108 } 4109 4110 /* 4111 * mdi_prop_lookup_int(): 4112 * Look for int property identified by name. The data returned 4113 * is the actual property and valid as long as mdi_pathinfo_t 4114 * node is alive. 4115 */ 4116 int 4117 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4118 { 4119 int rv; 4120 4121 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4122 return (DDI_PROP_NOT_FOUND); 4123 } 4124 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4125 return (i_map_nvlist_error_to_mdi(rv)); 4126 } 4127 4128 /* 4129 * mdi_prop_lookup_int64(): 4130 * Look for int64 property identified by name. The data returned 4131 * is the actual property and valid as long as mdi_pathinfo_t node 4132 * is alive. 4133 */ 4134 int 4135 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4136 { 4137 int rv; 4138 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4139 return (DDI_PROP_NOT_FOUND); 4140 } 4141 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4142 return (i_map_nvlist_error_to_mdi(rv)); 4143 } 4144 4145 /* 4146 * mdi_prop_lookup_int_array(): 4147 * Look for int array property identified by name. The data 4148 * returned is the actual property and valid as long as 4149 * mdi_pathinfo_t node is alive. 4150 */ 4151 int 4152 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4153 uint_t *nelements) 4154 { 4155 int rv; 4156 4157 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4158 return (DDI_PROP_NOT_FOUND); 4159 } 4160 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4161 (int32_t **)data, nelements); 4162 return (i_map_nvlist_error_to_mdi(rv)); 4163 } 4164 4165 /* 4166 * mdi_prop_lookup_string(): 4167 * Look for string property identified by name. The data 4168 * returned is the actual property and valid as long as 4169 * mdi_pathinfo_t node is alive. 4170 */ 4171 int 4172 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4173 { 4174 int rv; 4175 4176 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4177 return (DDI_PROP_NOT_FOUND); 4178 } 4179 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4180 return (i_map_nvlist_error_to_mdi(rv)); 4181 } 4182 4183 /* 4184 * mdi_prop_lookup_string_array(): 4185 * Look for string array property identified by name. The data 4186 * returned is the actual property and valid as long as 4187 * mdi_pathinfo_t node is alive. 4188 */ 4189 4190 int 4191 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4192 uint_t *nelements) 4193 { 4194 int rv; 4195 4196 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4197 return (DDI_PROP_NOT_FOUND); 4198 } 4199 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4200 nelements); 4201 return (i_map_nvlist_error_to_mdi(rv)); 4202 } 4203 4204 /* 4205 * mdi_prop_free(): 4206 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4207 * functions return the pointer to actual property data and not a 4208 * copy of it. So the data returned is valid as long as 4209 * mdi_pathinfo_t node is valid. 4210 */ 4211 4212 /*ARGSUSED*/ 4213 int 4214 mdi_prop_free(void *data) 4215 { 4216 return (DDI_PROP_SUCCESS); 4217 } 4218 4219 /*ARGSUSED*/ 4220 static void 4221 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4222 { 4223 char *phci_path, *ct_path; 4224 char *ct_status; 4225 char *status; 4226 dev_info_t *dip = ct->ct_dip; 4227 char lb_buf[64]; 4228 4229 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4230 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4231 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4232 return; 4233 } 4234 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4235 ct_status = "optimal"; 4236 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4237 ct_status = "degraded"; 4238 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4239 ct_status = "failed"; 4240 } else { 4241 ct_status = "unknown"; 4242 } 4243 4244 if (MDI_PI_IS_OFFLINE(pip)) { 4245 status = "offline"; 4246 } else if (MDI_PI_IS_ONLINE(pip)) { 4247 status = "online"; 4248 } else if (MDI_PI_IS_STANDBY(pip)) { 4249 status = "standby"; 4250 } else if (MDI_PI_IS_FAULT(pip)) { 4251 status = "faulted"; 4252 } else { 4253 status = "unknown"; 4254 } 4255 4256 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4257 (void) snprintf(lb_buf, sizeof (lb_buf), 4258 "%s, region-size: %d", mdi_load_balance_lba, 4259 ct->ct_lb_args->region_size); 4260 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4261 (void) snprintf(lb_buf, sizeof (lb_buf), 4262 "%s", mdi_load_balance_none); 4263 } else { 4264 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4265 mdi_load_balance_rr); 4266 } 4267 4268 if (dip) { 4269 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4270 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4271 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4272 "path %s (%s%d) to target address: %s is %s" 4273 " Load balancing: %s\n", 4274 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4275 ddi_get_instance(dip), ct_status, 4276 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4277 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4278 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4279 MDI_PI(pip)->pi_addr, status, lb_buf); 4280 kmem_free(phci_path, MAXPATHLEN); 4281 kmem_free(ct_path, MAXPATHLEN); 4282 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4283 } 4284 } 4285 4286 #ifdef DEBUG 4287 /* 4288 * i_mdi_log(): 4289 * Utility function for error message management 4290 * 4291 */ 4292 4293 /*VARARGS3*/ 4294 static void 4295 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4296 { 4297 char buf[MAXNAMELEN]; 4298 char name[MAXNAMELEN]; 4299 va_list ap; 4300 int log_only = 0; 4301 int boot_only = 0; 4302 int console_only = 0; 4303 4304 if (dip) { 4305 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4306 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4307 ddi_node_name(dip), ddi_get_instance(dip)); 4308 } else { 4309 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4310 ddi_node_name(dip), ddi_get_instance(dip)); 4311 } 4312 } else { 4313 name[0] = '\0'; 4314 } 4315 4316 va_start(ap, fmt); 4317 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4318 va_end(ap); 4319 4320 switch (buf[0]) { 4321 case '!': 4322 log_only = 1; 4323 break; 4324 case '?': 4325 boot_only = 1; 4326 break; 4327 case '^': 4328 console_only = 1; 4329 break; 4330 } 4331 4332 switch (level) { 4333 case CE_NOTE: 4334 level = CE_CONT; 4335 /* FALLTHROUGH */ 4336 case CE_CONT: 4337 case CE_WARN: 4338 case CE_PANIC: 4339 if (boot_only) { 4340 cmn_err(level, "?%s\t%s", name, &buf[1]); 4341 } else if (console_only) { 4342 cmn_err(level, "^%s\t%s", name, &buf[1]); 4343 } else if (log_only) { 4344 cmn_err(level, "!%s\t%s", name, &buf[1]); 4345 } else { 4346 cmn_err(level, "%s\t%s", name, buf); 4347 } 4348 break; 4349 default: 4350 cmn_err(level, "%s\t%s", name, buf); 4351 break; 4352 } 4353 } 4354 #endif /* DEBUG */ 4355 4356 void 4357 i_mdi_client_online(dev_info_t *ct_dip) 4358 { 4359 mdi_client_t *ct; 4360 4361 /* 4362 * Client online notification. Mark client state as online 4363 * restore our binding with dev_info node 4364 */ 4365 ct = i_devi_get_client(ct_dip); 4366 ASSERT(ct != NULL); 4367 MDI_CLIENT_LOCK(ct); 4368 MDI_CLIENT_SET_ONLINE(ct); 4369 /* catch for any memory leaks */ 4370 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4371 ct->ct_dip = ct_dip; 4372 4373 if (ct->ct_power_cnt == 0) 4374 (void) i_mdi_power_all_phci(ct); 4375 4376 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4377 "i_mdi_pm_hold_client\n")); 4378 i_mdi_pm_hold_client(ct, 1); 4379 4380 MDI_CLIENT_UNLOCK(ct); 4381 } 4382 4383 void 4384 i_mdi_phci_online(dev_info_t *ph_dip) 4385 { 4386 mdi_phci_t *ph; 4387 4388 /* pHCI online notification. Mark state accordingly */ 4389 ph = i_devi_get_phci(ph_dip); 4390 ASSERT(ph != NULL); 4391 MDI_PHCI_LOCK(ph); 4392 MDI_PHCI_SET_ONLINE(ph); 4393 MDI_PHCI_UNLOCK(ph); 4394 } 4395 4396 /* 4397 * mdi_devi_online(): 4398 * Online notification from NDI framework on pHCI/client 4399 * device online. 4400 * Return Values: 4401 * NDI_SUCCESS 4402 * MDI_FAILURE 4403 */ 4404 4405 /*ARGSUSED*/ 4406 int 4407 mdi_devi_online(dev_info_t *dip, uint_t flags) 4408 { 4409 if (MDI_PHCI(dip)) { 4410 i_mdi_phci_online(dip); 4411 } 4412 4413 if (MDI_CLIENT(dip)) { 4414 i_mdi_client_online(dip); 4415 } 4416 return (NDI_SUCCESS); 4417 } 4418 4419 /* 4420 * mdi_devi_offline(): 4421 * Offline notification from NDI framework on pHCI/Client device 4422 * offline. 4423 * 4424 * Return Values: 4425 * NDI_SUCCESS 4426 * NDI_FAILURE 4427 */ 4428 4429 /*ARGSUSED*/ 4430 int 4431 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4432 { 4433 int rv = NDI_SUCCESS; 4434 4435 if (MDI_CLIENT(dip)) { 4436 rv = i_mdi_client_offline(dip, flags); 4437 if (rv != NDI_SUCCESS) 4438 return (rv); 4439 } 4440 4441 if (MDI_PHCI(dip)) { 4442 rv = i_mdi_phci_offline(dip, flags); 4443 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4444 /* set client back online */ 4445 i_mdi_client_online(dip); 4446 } 4447 } 4448 4449 return (rv); 4450 } 4451 4452 /*ARGSUSED*/ 4453 static int 4454 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4455 { 4456 int rv = NDI_SUCCESS; 4457 mdi_phci_t *ph; 4458 mdi_client_t *ct; 4459 mdi_pathinfo_t *pip; 4460 mdi_pathinfo_t *next; 4461 mdi_pathinfo_t *failed_pip = NULL; 4462 dev_info_t *cdip; 4463 4464 /* 4465 * pHCI component offline notification 4466 * Make sure that this pHCI instance is free to be offlined. 4467 * If it is OK to proceed, Offline and remove all the child 4468 * mdi_pathinfo nodes. This process automatically offlines 4469 * corresponding client devices, for which this pHCI provides 4470 * critical services. 4471 */ 4472 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4473 dip)); 4474 4475 ph = i_devi_get_phci(dip); 4476 if (ph == NULL) { 4477 return (rv); 4478 } 4479 4480 MDI_PHCI_LOCK(ph); 4481 4482 if (MDI_PHCI_IS_OFFLINE(ph)) { 4483 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4484 MDI_PHCI_UNLOCK(ph); 4485 return (NDI_SUCCESS); 4486 } 4487 4488 /* 4489 * Check to see if the pHCI can be offlined 4490 */ 4491 if (ph->ph_unstable) { 4492 MDI_DEBUG(1, (CE_WARN, dip, 4493 "!One or more target devices are in transient " 4494 "state. This device can not be removed at " 4495 "this moment. Please try again later.")); 4496 MDI_PHCI_UNLOCK(ph); 4497 return (NDI_BUSY); 4498 } 4499 4500 pip = ph->ph_path_head; 4501 while (pip != NULL) { 4502 MDI_PI_LOCK(pip); 4503 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4504 /* 4505 * The mdi_pathinfo state is OK. Check the client state. 4506 * If failover in progress fail the pHCI from offlining 4507 */ 4508 ct = MDI_PI(pip)->pi_client; 4509 i_mdi_client_lock(ct, pip); 4510 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4511 (ct->ct_unstable)) { 4512 /* 4513 * Failover is in progress, Fail the DR 4514 */ 4515 MDI_DEBUG(1, (CE_WARN, dip, 4516 "!pHCI device (%s%d) is Busy. %s", 4517 ddi_driver_name(dip), ddi_get_instance(dip), 4518 "This device can not be removed at " 4519 "this moment. Please try again later.")); 4520 MDI_PI_UNLOCK(pip); 4521 MDI_CLIENT_UNLOCK(ct); 4522 MDI_PHCI_UNLOCK(ph); 4523 return (NDI_BUSY); 4524 } 4525 MDI_PI_UNLOCK(pip); 4526 4527 /* 4528 * Check to see of we are removing the last path of this 4529 * client device... 4530 */ 4531 cdip = ct->ct_dip; 4532 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4533 (i_mdi_client_compute_state(ct, ph) == 4534 MDI_CLIENT_STATE_FAILED)) { 4535 i_mdi_client_unlock(ct); 4536 MDI_PHCI_UNLOCK(ph); 4537 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4538 /* 4539 * ndi_devi_offline() failed. 4540 * This pHCI provides the critical path 4541 * to one or more client devices. 4542 * Return busy. 4543 */ 4544 MDI_PHCI_LOCK(ph); 4545 MDI_DEBUG(1, (CE_WARN, dip, 4546 "!pHCI device (%s%d) is Busy. %s", 4547 ddi_driver_name(dip), ddi_get_instance(dip), 4548 "This device can not be removed at " 4549 "this moment. Please try again later.")); 4550 failed_pip = pip; 4551 break; 4552 } else { 4553 MDI_PHCI_LOCK(ph); 4554 pip = next; 4555 } 4556 } else { 4557 i_mdi_client_unlock(ct); 4558 pip = next; 4559 } 4560 } 4561 4562 if (failed_pip) { 4563 pip = ph->ph_path_head; 4564 while (pip != failed_pip) { 4565 MDI_PI_LOCK(pip); 4566 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4567 ct = MDI_PI(pip)->pi_client; 4568 i_mdi_client_lock(ct, pip); 4569 cdip = ct->ct_dip; 4570 switch (MDI_CLIENT_STATE(ct)) { 4571 case MDI_CLIENT_STATE_OPTIMAL: 4572 case MDI_CLIENT_STATE_DEGRADED: 4573 if (cdip) { 4574 MDI_PI_UNLOCK(pip); 4575 i_mdi_client_unlock(ct); 4576 MDI_PHCI_UNLOCK(ph); 4577 (void) ndi_devi_online(cdip, 0); 4578 MDI_PHCI_LOCK(ph); 4579 pip = next; 4580 continue; 4581 } 4582 break; 4583 4584 case MDI_CLIENT_STATE_FAILED: 4585 if (cdip) { 4586 MDI_PI_UNLOCK(pip); 4587 i_mdi_client_unlock(ct); 4588 MDI_PHCI_UNLOCK(ph); 4589 (void) ndi_devi_offline(cdip, 0); 4590 MDI_PHCI_LOCK(ph); 4591 pip = next; 4592 continue; 4593 } 4594 break; 4595 } 4596 MDI_PI_UNLOCK(pip); 4597 i_mdi_client_unlock(ct); 4598 pip = next; 4599 } 4600 MDI_PHCI_UNLOCK(ph); 4601 return (NDI_BUSY); 4602 } 4603 4604 /* 4605 * Mark the pHCI as offline 4606 */ 4607 MDI_PHCI_SET_OFFLINE(ph); 4608 4609 /* 4610 * Mark the child mdi_pathinfo nodes as transient 4611 */ 4612 pip = ph->ph_path_head; 4613 while (pip != NULL) { 4614 MDI_PI_LOCK(pip); 4615 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4616 MDI_PI_SET_OFFLINING(pip); 4617 MDI_PI_UNLOCK(pip); 4618 pip = next; 4619 } 4620 MDI_PHCI_UNLOCK(ph); 4621 /* 4622 * Give a chance for any pending commands to execute 4623 */ 4624 delay(1); 4625 MDI_PHCI_LOCK(ph); 4626 pip = ph->ph_path_head; 4627 while (pip != NULL) { 4628 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4629 (void) i_mdi_pi_offline(pip, flags); 4630 MDI_PI_LOCK(pip); 4631 ct = MDI_PI(pip)->pi_client; 4632 if (!MDI_PI_IS_OFFLINE(pip)) { 4633 MDI_DEBUG(1, (CE_WARN, dip, 4634 "!pHCI device (%s%d) is Busy. %s", 4635 ddi_driver_name(dip), ddi_get_instance(dip), 4636 "This device can not be removed at " 4637 "this moment. Please try again later.")); 4638 MDI_PI_UNLOCK(pip); 4639 MDI_PHCI_SET_ONLINE(ph); 4640 MDI_PHCI_UNLOCK(ph); 4641 return (NDI_BUSY); 4642 } 4643 MDI_PI_UNLOCK(pip); 4644 pip = next; 4645 } 4646 MDI_PHCI_UNLOCK(ph); 4647 4648 return (rv); 4649 } 4650 4651 /*ARGSUSED*/ 4652 static int 4653 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4654 { 4655 int rv = NDI_SUCCESS; 4656 mdi_client_t *ct; 4657 4658 /* 4659 * Client component to go offline. Make sure that we are 4660 * not in failing over state and update client state 4661 * accordingly 4662 */ 4663 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4664 dip)); 4665 ct = i_devi_get_client(dip); 4666 if (ct != NULL) { 4667 MDI_CLIENT_LOCK(ct); 4668 if (ct->ct_unstable) { 4669 /* 4670 * One or more paths are in transient state, 4671 * Dont allow offline of a client device 4672 */ 4673 MDI_DEBUG(1, (CE_WARN, dip, 4674 "!One or more paths to this device is " 4675 "in transient state. This device can not " 4676 "be removed at this moment. " 4677 "Please try again later.")); 4678 MDI_CLIENT_UNLOCK(ct); 4679 return (NDI_BUSY); 4680 } 4681 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4682 /* 4683 * Failover is in progress, Dont allow DR of 4684 * a client device 4685 */ 4686 MDI_DEBUG(1, (CE_WARN, dip, 4687 "!Client device (%s%d) is Busy. %s", 4688 ddi_driver_name(dip), ddi_get_instance(dip), 4689 "This device can not be removed at " 4690 "this moment. Please try again later.")); 4691 MDI_CLIENT_UNLOCK(ct); 4692 return (NDI_BUSY); 4693 } 4694 MDI_CLIENT_SET_OFFLINE(ct); 4695 4696 /* 4697 * Unbind our relationship with the dev_info node 4698 */ 4699 if (flags & NDI_DEVI_REMOVE) { 4700 ct->ct_dip = NULL; 4701 } 4702 MDI_CLIENT_UNLOCK(ct); 4703 } 4704 return (rv); 4705 } 4706 4707 /* 4708 * mdi_pre_attach(): 4709 * Pre attach() notification handler 4710 */ 4711 4712 /*ARGSUSED*/ 4713 int 4714 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4715 { 4716 /* don't support old DDI_PM_RESUME */ 4717 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4718 (cmd == DDI_PM_RESUME)) 4719 return (DDI_FAILURE); 4720 4721 return (DDI_SUCCESS); 4722 } 4723 4724 /* 4725 * mdi_post_attach(): 4726 * Post attach() notification handler 4727 */ 4728 4729 /*ARGSUSED*/ 4730 void 4731 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4732 { 4733 mdi_phci_t *ph; 4734 mdi_client_t *ct; 4735 mdi_pathinfo_t *pip; 4736 4737 if (MDI_PHCI(dip)) { 4738 ph = i_devi_get_phci(dip); 4739 ASSERT(ph != NULL); 4740 4741 MDI_PHCI_LOCK(ph); 4742 switch (cmd) { 4743 case DDI_ATTACH: 4744 MDI_DEBUG(2, (CE_NOTE, dip, 4745 "!pHCI post_attach: called %p\n", ph)); 4746 if (error == DDI_SUCCESS) { 4747 MDI_PHCI_SET_ATTACH(ph); 4748 } else { 4749 MDI_DEBUG(1, (CE_NOTE, dip, 4750 "!pHCI post_attach: failed error=%d\n", 4751 error)); 4752 MDI_PHCI_SET_DETACH(ph); 4753 } 4754 break; 4755 4756 case DDI_RESUME: 4757 MDI_DEBUG(2, (CE_NOTE, dip, 4758 "!pHCI post_resume: called %p\n", ph)); 4759 if (error == DDI_SUCCESS) { 4760 MDI_PHCI_SET_RESUME(ph); 4761 } else { 4762 MDI_DEBUG(1, (CE_NOTE, dip, 4763 "!pHCI post_resume: failed error=%d\n", 4764 error)); 4765 MDI_PHCI_SET_SUSPEND(ph); 4766 } 4767 break; 4768 } 4769 MDI_PHCI_UNLOCK(ph); 4770 } 4771 4772 if (MDI_CLIENT(dip)) { 4773 ct = i_devi_get_client(dip); 4774 ASSERT(ct != NULL); 4775 4776 MDI_CLIENT_LOCK(ct); 4777 switch (cmd) { 4778 case DDI_ATTACH: 4779 MDI_DEBUG(2, (CE_NOTE, dip, 4780 "!Client post_attach: called %p\n", ct)); 4781 if (error != DDI_SUCCESS) { 4782 MDI_DEBUG(1, (CE_NOTE, dip, 4783 "!Client post_attach: failed error=%d\n", 4784 error)); 4785 MDI_CLIENT_SET_DETACH(ct); 4786 MDI_DEBUG(4, (CE_WARN, dip, 4787 "mdi_post_attach i_mdi_pm_reset_client\n")); 4788 i_mdi_pm_reset_client(ct); 4789 break; 4790 } 4791 4792 /* 4793 * Client device has successfully attached. 4794 * Create kstats for any pathinfo structures 4795 * initially associated with this client. 4796 */ 4797 for (pip = ct->ct_path_head; pip != NULL; 4798 pip = (mdi_pathinfo_t *) 4799 MDI_PI(pip)->pi_client_link) { 4800 (void) i_mdi_pi_kstat_create(pip); 4801 i_mdi_report_path_state(ct, pip); 4802 } 4803 MDI_CLIENT_SET_ATTACH(ct); 4804 break; 4805 4806 case DDI_RESUME: 4807 MDI_DEBUG(2, (CE_NOTE, dip, 4808 "!Client post_attach: called %p\n", ct)); 4809 if (error == DDI_SUCCESS) { 4810 MDI_CLIENT_SET_RESUME(ct); 4811 } else { 4812 MDI_DEBUG(1, (CE_NOTE, dip, 4813 "!Client post_resume: failed error=%d\n", 4814 error)); 4815 MDI_CLIENT_SET_SUSPEND(ct); 4816 } 4817 break; 4818 } 4819 MDI_CLIENT_UNLOCK(ct); 4820 } 4821 } 4822 4823 /* 4824 * mdi_pre_detach(): 4825 * Pre detach notification handler 4826 */ 4827 4828 /*ARGSUSED*/ 4829 int 4830 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4831 { 4832 int rv = DDI_SUCCESS; 4833 4834 if (MDI_CLIENT(dip)) { 4835 (void) i_mdi_client_pre_detach(dip, cmd); 4836 } 4837 4838 if (MDI_PHCI(dip)) { 4839 rv = i_mdi_phci_pre_detach(dip, cmd); 4840 } 4841 4842 return (rv); 4843 } 4844 4845 /*ARGSUSED*/ 4846 static int 4847 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4848 { 4849 int rv = DDI_SUCCESS; 4850 mdi_phci_t *ph; 4851 mdi_client_t *ct; 4852 mdi_pathinfo_t *pip; 4853 mdi_pathinfo_t *failed_pip = NULL; 4854 mdi_pathinfo_t *next; 4855 4856 ph = i_devi_get_phci(dip); 4857 if (ph == NULL) { 4858 return (rv); 4859 } 4860 4861 MDI_PHCI_LOCK(ph); 4862 switch (cmd) { 4863 case DDI_DETACH: 4864 MDI_DEBUG(2, (CE_NOTE, dip, 4865 "!pHCI pre_detach: called %p\n", ph)); 4866 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4867 /* 4868 * mdi_pathinfo nodes are still attached to 4869 * this pHCI. Fail the detach for this pHCI. 4870 */ 4871 MDI_DEBUG(2, (CE_WARN, dip, 4872 "!pHCI pre_detach: " 4873 "mdi_pathinfo nodes are still attached " 4874 "%p\n", ph)); 4875 rv = DDI_FAILURE; 4876 break; 4877 } 4878 MDI_PHCI_SET_DETACH(ph); 4879 break; 4880 4881 case DDI_SUSPEND: 4882 /* 4883 * pHCI is getting suspended. Since mpxio client 4884 * devices may not be suspended at this point, to avoid 4885 * a potential stack overflow, it is important to suspend 4886 * client devices before pHCI can be suspended. 4887 */ 4888 4889 MDI_DEBUG(2, (CE_NOTE, dip, 4890 "!pHCI pre_suspend: called %p\n", ph)); 4891 /* 4892 * Suspend all the client devices accessible through this pHCI 4893 */ 4894 pip = ph->ph_path_head; 4895 while (pip != NULL && rv == DDI_SUCCESS) { 4896 dev_info_t *cdip; 4897 MDI_PI_LOCK(pip); 4898 next = 4899 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4900 ct = MDI_PI(pip)->pi_client; 4901 i_mdi_client_lock(ct, pip); 4902 cdip = ct->ct_dip; 4903 MDI_PI_UNLOCK(pip); 4904 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4905 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4906 i_mdi_client_unlock(ct); 4907 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4908 DDI_SUCCESS) { 4909 /* 4910 * Suspend of one of the client 4911 * device has failed. 4912 */ 4913 MDI_DEBUG(1, (CE_WARN, dip, 4914 "!Suspend of device (%s%d) failed.", 4915 ddi_driver_name(cdip), 4916 ddi_get_instance(cdip))); 4917 failed_pip = pip; 4918 break; 4919 } 4920 } else { 4921 i_mdi_client_unlock(ct); 4922 } 4923 pip = next; 4924 } 4925 4926 if (rv == DDI_SUCCESS) { 4927 /* 4928 * Suspend of client devices is complete. Proceed 4929 * with pHCI suspend. 4930 */ 4931 MDI_PHCI_SET_SUSPEND(ph); 4932 } else { 4933 /* 4934 * Revert back all the suspended client device states 4935 * to converse. 4936 */ 4937 pip = ph->ph_path_head; 4938 while (pip != failed_pip) { 4939 dev_info_t *cdip; 4940 MDI_PI_LOCK(pip); 4941 next = 4942 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4943 ct = MDI_PI(pip)->pi_client; 4944 i_mdi_client_lock(ct, pip); 4945 cdip = ct->ct_dip; 4946 MDI_PI_UNLOCK(pip); 4947 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4948 i_mdi_client_unlock(ct); 4949 (void) devi_attach(cdip, DDI_RESUME); 4950 } else { 4951 i_mdi_client_unlock(ct); 4952 } 4953 pip = next; 4954 } 4955 } 4956 break; 4957 4958 default: 4959 rv = DDI_FAILURE; 4960 break; 4961 } 4962 MDI_PHCI_UNLOCK(ph); 4963 return (rv); 4964 } 4965 4966 /*ARGSUSED*/ 4967 static int 4968 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4969 { 4970 int rv = DDI_SUCCESS; 4971 mdi_client_t *ct; 4972 4973 ct = i_devi_get_client(dip); 4974 if (ct == NULL) { 4975 return (rv); 4976 } 4977 4978 MDI_CLIENT_LOCK(ct); 4979 switch (cmd) { 4980 case DDI_DETACH: 4981 MDI_DEBUG(2, (CE_NOTE, dip, 4982 "!Client pre_detach: called %p\n", ct)); 4983 MDI_CLIENT_SET_DETACH(ct); 4984 break; 4985 4986 case DDI_SUSPEND: 4987 MDI_DEBUG(2, (CE_NOTE, dip, 4988 "!Client pre_suspend: called %p\n", ct)); 4989 MDI_CLIENT_SET_SUSPEND(ct); 4990 break; 4991 4992 default: 4993 rv = DDI_FAILURE; 4994 break; 4995 } 4996 MDI_CLIENT_UNLOCK(ct); 4997 return (rv); 4998 } 4999 5000 /* 5001 * mdi_post_detach(): 5002 * Post detach notification handler 5003 */ 5004 5005 /*ARGSUSED*/ 5006 void 5007 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5008 { 5009 /* 5010 * Detach/Suspend of mpxio component failed. Update our state 5011 * too 5012 */ 5013 if (MDI_PHCI(dip)) 5014 i_mdi_phci_post_detach(dip, cmd, error); 5015 5016 if (MDI_CLIENT(dip)) 5017 i_mdi_client_post_detach(dip, cmd, error); 5018 } 5019 5020 /*ARGSUSED*/ 5021 static void 5022 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5023 { 5024 mdi_phci_t *ph; 5025 5026 /* 5027 * Detach/Suspend of phci component failed. Update our state 5028 * too 5029 */ 5030 ph = i_devi_get_phci(dip); 5031 if (ph == NULL) { 5032 return; 5033 } 5034 5035 MDI_PHCI_LOCK(ph); 5036 /* 5037 * Detach of pHCI failed. Restore back converse 5038 * state 5039 */ 5040 switch (cmd) { 5041 case DDI_DETACH: 5042 MDI_DEBUG(2, (CE_NOTE, dip, 5043 "!pHCI post_detach: called %p\n", ph)); 5044 if (error != DDI_SUCCESS) 5045 MDI_PHCI_SET_ATTACH(ph); 5046 break; 5047 5048 case DDI_SUSPEND: 5049 MDI_DEBUG(2, (CE_NOTE, dip, 5050 "!pHCI post_suspend: called %p\n", ph)); 5051 if (error != DDI_SUCCESS) 5052 MDI_PHCI_SET_RESUME(ph); 5053 break; 5054 } 5055 MDI_PHCI_UNLOCK(ph); 5056 } 5057 5058 /*ARGSUSED*/ 5059 static void 5060 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5061 { 5062 mdi_client_t *ct; 5063 5064 ct = i_devi_get_client(dip); 5065 if (ct == NULL) { 5066 return; 5067 } 5068 MDI_CLIENT_LOCK(ct); 5069 /* 5070 * Detach of Client failed. Restore back converse 5071 * state 5072 */ 5073 switch (cmd) { 5074 case DDI_DETACH: 5075 MDI_DEBUG(2, (CE_NOTE, dip, 5076 "!Client post_detach: called %p\n", ct)); 5077 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5078 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5079 "i_mdi_pm_rele_client\n")); 5080 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5081 } else { 5082 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5083 "i_mdi_pm_reset_client\n")); 5084 i_mdi_pm_reset_client(ct); 5085 } 5086 if (error != DDI_SUCCESS) 5087 MDI_CLIENT_SET_ATTACH(ct); 5088 break; 5089 5090 case DDI_SUSPEND: 5091 MDI_DEBUG(2, (CE_NOTE, dip, 5092 "!Client post_suspend: called %p\n", ct)); 5093 if (error != DDI_SUCCESS) 5094 MDI_CLIENT_SET_RESUME(ct); 5095 break; 5096 } 5097 MDI_CLIENT_UNLOCK(ct); 5098 } 5099 5100 /* 5101 * create and install per-path (client - pHCI) statistics 5102 * I/O stats supported: nread, nwritten, reads, and writes 5103 * Error stats - hard errors, soft errors, & transport errors 5104 */ 5105 static int 5106 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5107 { 5108 5109 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5110 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5111 char ksname[KSTAT_STRLEN]; 5112 mdi_pathinfo_t *cpip; 5113 const char *err_postfix = ",err"; 5114 kstat_t *kiosp, *kerrsp; 5115 struct pi_errs *nsp; 5116 struct mdi_pi_kstats *mdi_statp; 5117 5118 ASSERT(client != NULL && ppath != NULL); 5119 5120 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5121 5122 if (MDI_PI(pip)->pi_kstats != NULL) 5123 return (MDI_SUCCESS); 5124 5125 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5126 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5127 if (cpip == pip) 5128 continue; 5129 /* 5130 * We have found a different path with same parent 5131 * kstats for a given client-pHCI are common 5132 */ 5133 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5134 (MDI_PI(cpip)->pi_kstats != NULL)) { 5135 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5136 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5137 return (MDI_SUCCESS); 5138 } 5139 } 5140 5141 /* 5142 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5143 * clamp length of name against max length of error kstat name 5144 */ 5145 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5146 ddi_driver_name(client), ddi_get_instance(client), 5147 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5148 (KSTAT_STRLEN - strlen(err_postfix))) { 5149 return (MDI_FAILURE); 5150 } 5151 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5152 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5153 return (MDI_FAILURE); 5154 } 5155 5156 (void) strcat(ksname, err_postfix); 5157 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5158 KSTAT_TYPE_NAMED, 5159 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5160 5161 if (kerrsp == NULL) { 5162 kstat_delete(kiosp); 5163 return (MDI_FAILURE); 5164 } 5165 5166 nsp = (struct pi_errs *)kerrsp->ks_data; 5167 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5168 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5169 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5170 KSTAT_DATA_UINT32); 5171 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5172 KSTAT_DATA_UINT32); 5173 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5174 KSTAT_DATA_UINT32); 5175 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5176 KSTAT_DATA_UINT32); 5177 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5178 KSTAT_DATA_UINT32); 5179 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5180 KSTAT_DATA_UINT32); 5181 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5182 KSTAT_DATA_UINT32); 5183 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5184 5185 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5186 mdi_statp->pi_kstat_ref = 1; 5187 mdi_statp->pi_kstat_iostats = kiosp; 5188 mdi_statp->pi_kstat_errstats = kerrsp; 5189 kstat_install(kiosp); 5190 kstat_install(kerrsp); 5191 MDI_PI(pip)->pi_kstats = mdi_statp; 5192 return (MDI_SUCCESS); 5193 } 5194 5195 /* 5196 * destroy per-path properties 5197 */ 5198 static void 5199 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5200 { 5201 5202 struct mdi_pi_kstats *mdi_statp; 5203 5204 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5205 return; 5206 5207 MDI_PI(pip)->pi_kstats = NULL; 5208 5209 /* 5210 * the kstat may be shared between multiple pathinfo nodes 5211 * decrement this pathinfo's usage, removing the kstats 5212 * themselves when the last pathinfo reference is removed. 5213 */ 5214 ASSERT(mdi_statp->pi_kstat_ref > 0); 5215 if (--mdi_statp->pi_kstat_ref != 0) 5216 return; 5217 5218 kstat_delete(mdi_statp->pi_kstat_iostats); 5219 kstat_delete(mdi_statp->pi_kstat_errstats); 5220 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5221 } 5222 5223 /* 5224 * update I/O paths KSTATS 5225 */ 5226 void 5227 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5228 { 5229 kstat_t *iostatp; 5230 size_t xfer_cnt; 5231 5232 ASSERT(pip != NULL); 5233 5234 /* 5235 * I/O can be driven across a path prior to having path 5236 * statistics available, i.e. probe(9e). 5237 */ 5238 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5239 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5240 xfer_cnt = bp->b_bcount - bp->b_resid; 5241 if (bp->b_flags & B_READ) { 5242 KSTAT_IO_PTR(iostatp)->reads++; 5243 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5244 } else { 5245 KSTAT_IO_PTR(iostatp)->writes++; 5246 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5247 } 5248 } 5249 } 5250 5251 /* 5252 * Enable the path(specific client/target/initiator) 5253 * Enabling a path means that MPxIO may select the enabled path for routing 5254 * future I/O requests, subject to other path state constraints. 5255 */ 5256 int 5257 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5258 { 5259 mdi_phci_t *ph; 5260 5261 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5262 if (ph == NULL) { 5263 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5264 " failed. pip: %p ph = NULL\n", pip)); 5265 return (MDI_FAILURE); 5266 } 5267 5268 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5269 MDI_ENABLE_OP); 5270 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5271 " Returning success pip = %p. ph = %p\n", pip, ph)); 5272 return (MDI_SUCCESS); 5273 5274 } 5275 5276 /* 5277 * Disable the path (specific client/target/initiator) 5278 * Disabling a path means that MPxIO will not select the disabled path for 5279 * routing any new I/O requests. 5280 */ 5281 int 5282 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5283 { 5284 mdi_phci_t *ph; 5285 5286 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5287 if (ph == NULL) { 5288 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5289 " failed. pip: %p ph = NULL\n", pip)); 5290 return (MDI_FAILURE); 5291 } 5292 5293 (void) i_mdi_enable_disable_path(pip, 5294 ph->ph_vhci, flags, MDI_DISABLE_OP); 5295 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5296 "Returning success pip = %p. ph = %p", pip, ph)); 5297 return (MDI_SUCCESS); 5298 } 5299 5300 /* 5301 * disable the path to a particular pHCI (pHCI specified in the phci_path 5302 * argument) for a particular client (specified in the client_path argument). 5303 * Disabling a path means that MPxIO will not select the disabled path for 5304 * routing any new I/O requests. 5305 * NOTE: this will be removed once the NWS files are changed to use the new 5306 * mdi_{enable,disable}_path interfaces 5307 */ 5308 int 5309 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5310 { 5311 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5312 } 5313 5314 /* 5315 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5316 * argument) for a particular client (specified in the client_path argument). 5317 * Enabling a path means that MPxIO may select the enabled path for routing 5318 * future I/O requests, subject to other path state constraints. 5319 * NOTE: this will be removed once the NWS files are changed to use the new 5320 * mdi_{enable,disable}_path interfaces 5321 */ 5322 5323 int 5324 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5325 { 5326 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5327 } 5328 5329 /* 5330 * Common routine for doing enable/disable. 5331 */ 5332 static mdi_pathinfo_t * 5333 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5334 int op) 5335 { 5336 int sync_flag = 0; 5337 int rv; 5338 mdi_pathinfo_t *next; 5339 int (*f)() = NULL; 5340 5341 f = vh->vh_ops->vo_pi_state_change; 5342 5343 sync_flag = (flags << 8) & 0xf00; 5344 5345 /* 5346 * Do a callback into the mdi consumer to let it 5347 * know that path is about to get enabled/disabled. 5348 */ 5349 if (f != NULL) { 5350 rv = (*f)(vh->vh_dip, pip, 0, 5351 MDI_PI_EXT_STATE(pip), 5352 MDI_EXT_STATE_CHANGE | sync_flag | 5353 op | MDI_BEFORE_STATE_CHANGE); 5354 if (rv != MDI_SUCCESS) { 5355 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5356 "!vo_pi_state_change: failed rv = %x", rv)); 5357 } 5358 } 5359 MDI_PI_LOCK(pip); 5360 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5361 5362 switch (flags) { 5363 case USER_DISABLE: 5364 if (op == MDI_DISABLE_OP) 5365 MDI_PI_SET_USER_DISABLE(pip); 5366 else 5367 MDI_PI_SET_USER_ENABLE(pip); 5368 break; 5369 case DRIVER_DISABLE: 5370 if (op == MDI_DISABLE_OP) 5371 MDI_PI_SET_DRV_DISABLE(pip); 5372 else 5373 MDI_PI_SET_DRV_ENABLE(pip); 5374 break; 5375 case DRIVER_DISABLE_TRANSIENT: 5376 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5377 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5378 else 5379 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5380 break; 5381 } 5382 MDI_PI_UNLOCK(pip); 5383 /* 5384 * Do a callback into the mdi consumer to let it 5385 * know that path is now enabled/disabled. 5386 */ 5387 if (f != NULL) { 5388 rv = (*f)(vh->vh_dip, pip, 0, 5389 MDI_PI_EXT_STATE(pip), 5390 MDI_EXT_STATE_CHANGE | sync_flag | 5391 op | MDI_AFTER_STATE_CHANGE); 5392 if (rv != MDI_SUCCESS) { 5393 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5394 "!vo_pi_state_change: failed rv = %x", rv)); 5395 } 5396 } 5397 return (next); 5398 } 5399 5400 /* 5401 * Common routine for doing enable/disable. 5402 * NOTE: this will be removed once the NWS files are changed to use the new 5403 * mdi_{enable,disable}_path has been putback 5404 */ 5405 int 5406 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5407 { 5408 5409 mdi_phci_t *ph; 5410 mdi_vhci_t *vh = NULL; 5411 mdi_client_t *ct; 5412 mdi_pathinfo_t *next, *pip; 5413 int found_it; 5414 5415 ph = i_devi_get_phci(pdip); 5416 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5417 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5418 if (ph == NULL) { 5419 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5420 " failed. ph = NULL operation = %d\n", op)); 5421 return (MDI_FAILURE); 5422 } 5423 5424 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5425 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5426 " Invalid operation = %d\n", op)); 5427 return (MDI_FAILURE); 5428 } 5429 5430 vh = ph->ph_vhci; 5431 5432 if (cdip == NULL) { 5433 /* 5434 * Need to mark the Phci as enabled/disabled. 5435 */ 5436 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5437 "Operation %d for the phci\n", op)); 5438 MDI_PHCI_LOCK(ph); 5439 switch (flags) { 5440 case USER_DISABLE: 5441 if (op == MDI_DISABLE_OP) 5442 MDI_PHCI_SET_USER_DISABLE(ph); 5443 else 5444 MDI_PHCI_SET_USER_ENABLE(ph); 5445 break; 5446 case DRIVER_DISABLE: 5447 if (op == MDI_DISABLE_OP) 5448 MDI_PHCI_SET_DRV_DISABLE(ph); 5449 else 5450 MDI_PHCI_SET_DRV_ENABLE(ph); 5451 break; 5452 case DRIVER_DISABLE_TRANSIENT: 5453 if (op == MDI_DISABLE_OP) 5454 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5455 else 5456 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5457 break; 5458 default: 5459 MDI_PHCI_UNLOCK(ph); 5460 MDI_DEBUG(1, (CE_NOTE, NULL, 5461 "!i_mdi_pi_enable_disable:" 5462 " Invalid flag argument= %d\n", flags)); 5463 } 5464 5465 /* 5466 * Phci has been disabled. Now try to enable/disable 5467 * path info's to each client. 5468 */ 5469 pip = ph->ph_path_head; 5470 while (pip != NULL) { 5471 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5472 } 5473 MDI_PHCI_UNLOCK(ph); 5474 } else { 5475 5476 /* 5477 * Disable a specific client. 5478 */ 5479 ct = i_devi_get_client(cdip); 5480 if (ct == NULL) { 5481 MDI_DEBUG(1, (CE_NOTE, NULL, 5482 "!i_mdi_pi_enable_disable:" 5483 " failed. ct = NULL operation = %d\n", op)); 5484 return (MDI_FAILURE); 5485 } 5486 5487 MDI_CLIENT_LOCK(ct); 5488 pip = ct->ct_path_head; 5489 found_it = 0; 5490 while (pip != NULL) { 5491 MDI_PI_LOCK(pip); 5492 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5493 if (MDI_PI(pip)->pi_phci == ph) { 5494 MDI_PI_UNLOCK(pip); 5495 found_it = 1; 5496 break; 5497 } 5498 MDI_PI_UNLOCK(pip); 5499 pip = next; 5500 } 5501 5502 5503 MDI_CLIENT_UNLOCK(ct); 5504 if (found_it == 0) { 5505 MDI_DEBUG(1, (CE_NOTE, NULL, 5506 "!i_mdi_pi_enable_disable:" 5507 " failed. Could not find corresponding pip\n")); 5508 return (MDI_FAILURE); 5509 } 5510 5511 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5512 } 5513 5514 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5515 " Returning success op: %x pdip = %p cdip = %p\n", op, 5516 pdip, cdip)); 5517 return (MDI_SUCCESS); 5518 } 5519 5520 /*ARGSUSED3*/ 5521 int 5522 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5523 int flags, clock_t timeout) 5524 { 5525 mdi_pathinfo_t *pip; 5526 dev_info_t *dip; 5527 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5528 char *paddr; 5529 5530 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5531 5532 if (!MDI_PHCI(pdip)) 5533 return (MDI_FAILURE); 5534 5535 paddr = strchr(devnm, '@'); 5536 if (paddr == NULL) 5537 return (MDI_FAILURE); 5538 5539 paddr++; /* skip '@' */ 5540 pip = mdi_pi_find(pdip, NULL, paddr); 5541 while (pip == NULL && timeout > 0) { 5542 if (interval > timeout) 5543 interval = timeout; 5544 if (flags & NDI_DEVI_DEBUG) { 5545 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5546 ddi_driver_name(pdip), ddi_get_instance(pdip), 5547 paddr, interval, timeout); 5548 } 5549 delay(interval); 5550 timeout -= interval; 5551 interval += interval; 5552 pip = mdi_pi_find(pdip, NULL, paddr); 5553 } 5554 5555 if (pip == NULL) 5556 return (MDI_FAILURE); 5557 dip = mdi_pi_get_client(pip); 5558 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5559 return (MDI_FAILURE); 5560 *cdipp = dip; 5561 5562 /* TODO: holding should happen inside search functions */ 5563 ndi_hold_devi(dip); 5564 return (MDI_SUCCESS); 5565 } 5566 5567 /* 5568 * Ensure phci powered up 5569 */ 5570 static void 5571 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5572 { 5573 dev_info_t *ph_dip; 5574 5575 ASSERT(pip != NULL); 5576 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5577 5578 if (MDI_PI(pip)->pi_pm_held) { 5579 return; 5580 } 5581 5582 ph_dip = mdi_pi_get_phci(pip); 5583 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5584 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5585 if (ph_dip == NULL) { 5586 return; 5587 } 5588 5589 MDI_PI_UNLOCK(pip); 5590 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5591 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5592 pm_hold_power(ph_dip); 5593 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5594 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5595 MDI_PI_LOCK(pip); 5596 5597 MDI_PI(pip)->pi_pm_held = 1; 5598 } 5599 5600 /* 5601 * Allow phci powered down 5602 */ 5603 static void 5604 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5605 { 5606 dev_info_t *ph_dip = NULL; 5607 5608 ASSERT(pip != NULL); 5609 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5610 5611 if (MDI_PI(pip)->pi_pm_held == 0) { 5612 return; 5613 } 5614 5615 ph_dip = mdi_pi_get_phci(pip); 5616 ASSERT(ph_dip != NULL); 5617 5618 MDI_PI_UNLOCK(pip); 5619 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5620 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5621 5622 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5623 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5624 pm_rele_power(ph_dip); 5625 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5626 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5627 5628 MDI_PI_LOCK(pip); 5629 MDI_PI(pip)->pi_pm_held = 0; 5630 } 5631 5632 static void 5633 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5634 { 5635 ASSERT(ct); 5636 5637 ct->ct_power_cnt += incr; 5638 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5639 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5640 ASSERT(ct->ct_power_cnt >= 0); 5641 } 5642 5643 static void 5644 i_mdi_rele_all_phci(mdi_client_t *ct) 5645 { 5646 mdi_pathinfo_t *pip; 5647 5648 ASSERT(mutex_owned(&ct->ct_mutex)); 5649 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5650 while (pip != NULL) { 5651 mdi_hold_path(pip); 5652 MDI_PI_LOCK(pip); 5653 i_mdi_pm_rele_pip(pip); 5654 MDI_PI_UNLOCK(pip); 5655 mdi_rele_path(pip); 5656 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5657 } 5658 } 5659 5660 static void 5661 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5662 { 5663 ASSERT(ct); 5664 5665 if (i_ddi_devi_attached(ct->ct_dip)) { 5666 ct->ct_power_cnt -= decr; 5667 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5668 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5669 } 5670 5671 ASSERT(ct->ct_power_cnt >= 0); 5672 if (ct->ct_power_cnt == 0) { 5673 i_mdi_rele_all_phci(ct); 5674 return; 5675 } 5676 } 5677 5678 static void 5679 i_mdi_pm_reset_client(mdi_client_t *ct) 5680 { 5681 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5682 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5683 ct->ct_power_cnt = 0; 5684 i_mdi_rele_all_phci(ct); 5685 ct->ct_powercnt_config = 0; 5686 ct->ct_powercnt_unconfig = 0; 5687 ct->ct_powercnt_reset = 1; 5688 } 5689 5690 static void 5691 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5692 { 5693 mdi_pathinfo_t *pip; 5694 ASSERT(mutex_owned(&ct->ct_mutex)); 5695 5696 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5697 while (pip != NULL) { 5698 mdi_hold_path(pip); 5699 MDI_PI_LOCK(pip); 5700 i_mdi_pm_hold_pip(pip); 5701 MDI_PI_UNLOCK(pip); 5702 mdi_rele_path(pip); 5703 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5704 } 5705 } 5706 5707 static int 5708 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5709 { 5710 int ret; 5711 dev_info_t *ph_dip; 5712 5713 MDI_PI_LOCK(pip); 5714 i_mdi_pm_hold_pip(pip); 5715 5716 ph_dip = mdi_pi_get_phci(pip); 5717 MDI_PI_UNLOCK(pip); 5718 5719 /* bring all components of phci to full power */ 5720 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5721 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5722 ddi_get_instance(ph_dip))); 5723 5724 ret = pm_powerup(ph_dip); 5725 5726 if (ret == DDI_FAILURE) { 5727 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5728 "pm_powerup FAILED for %s%d\n", 5729 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5730 5731 MDI_PI_LOCK(pip); 5732 i_mdi_pm_rele_pip(pip); 5733 MDI_PI_UNLOCK(pip); 5734 return (MDI_FAILURE); 5735 } 5736 5737 return (MDI_SUCCESS); 5738 } 5739 5740 static int 5741 i_mdi_power_all_phci(mdi_client_t *ct) 5742 { 5743 mdi_pathinfo_t *pip; 5744 int succeeded = 0; 5745 5746 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5747 while (pip != NULL) { 5748 mdi_hold_path(pip); 5749 MDI_CLIENT_UNLOCK(ct); 5750 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5751 succeeded = 1; 5752 5753 ASSERT(ct == MDI_PI(pip)->pi_client); 5754 MDI_CLIENT_LOCK(ct); 5755 mdi_rele_path(pip); 5756 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5757 } 5758 5759 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5760 } 5761 5762 /* 5763 * mdi_bus_power(): 5764 * 1. Place the phci(s) into powered up state so that 5765 * client can do power management 5766 * 2. Ensure phci powered up as client power managing 5767 * Return Values: 5768 * MDI_SUCCESS 5769 * MDI_FAILURE 5770 */ 5771 int 5772 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5773 void *arg, void *result) 5774 { 5775 int ret = MDI_SUCCESS; 5776 pm_bp_child_pwrchg_t *bpc; 5777 mdi_client_t *ct; 5778 dev_info_t *cdip; 5779 pm_bp_has_changed_t *bphc; 5780 5781 /* 5782 * BUS_POWER_NOINVOL not supported 5783 */ 5784 if (op == BUS_POWER_NOINVOL) 5785 return (MDI_FAILURE); 5786 5787 /* 5788 * ignore other OPs. 5789 * return quickly to save cou cycles on the ct processing 5790 */ 5791 switch (op) { 5792 case BUS_POWER_PRE_NOTIFICATION: 5793 case BUS_POWER_POST_NOTIFICATION: 5794 bpc = (pm_bp_child_pwrchg_t *)arg; 5795 cdip = bpc->bpc_dip; 5796 break; 5797 case BUS_POWER_HAS_CHANGED: 5798 bphc = (pm_bp_has_changed_t *)arg; 5799 cdip = bphc->bphc_dip; 5800 break; 5801 default: 5802 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5803 } 5804 5805 ASSERT(MDI_CLIENT(cdip)); 5806 5807 ct = i_devi_get_client(cdip); 5808 if (ct == NULL) 5809 return (MDI_FAILURE); 5810 5811 /* 5812 * wait till the mdi_pathinfo node state change are processed 5813 */ 5814 MDI_CLIENT_LOCK(ct); 5815 switch (op) { 5816 case BUS_POWER_PRE_NOTIFICATION: 5817 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5818 "BUS_POWER_PRE_NOTIFICATION:" 5819 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5820 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5821 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5822 5823 /* serialize power level change per client */ 5824 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5825 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5826 5827 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5828 5829 if (ct->ct_power_cnt == 0) { 5830 ret = i_mdi_power_all_phci(ct); 5831 } 5832 5833 /* 5834 * if new_level > 0: 5835 * - hold phci(s) 5836 * - power up phci(s) if not already 5837 * ignore power down 5838 */ 5839 if (bpc->bpc_nlevel > 0) { 5840 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5841 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5842 "mdi_bus_power i_mdi_pm_hold_client\n")); 5843 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5844 } 5845 } 5846 break; 5847 case BUS_POWER_POST_NOTIFICATION: 5848 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5849 "BUS_POWER_POST_NOTIFICATION:" 5850 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5851 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5852 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5853 *(int *)result)); 5854 5855 if (*(int *)result == DDI_SUCCESS) { 5856 if (bpc->bpc_nlevel > 0) { 5857 MDI_CLIENT_SET_POWER_UP(ct); 5858 } else { 5859 MDI_CLIENT_SET_POWER_DOWN(ct); 5860 } 5861 } 5862 5863 /* release the hold we did in pre-notification */ 5864 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5865 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5866 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5867 "mdi_bus_power i_mdi_pm_rele_client\n")); 5868 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5869 } 5870 5871 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5872 /* another thread might started attaching */ 5873 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5874 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5875 "mdi_bus_power i_mdi_pm_rele_client\n")); 5876 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5877 /* detaching has been taken care in pm_post_unconfig */ 5878 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5879 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5880 "mdi_bus_power i_mdi_pm_reset_client\n")); 5881 i_mdi_pm_reset_client(ct); 5882 } 5883 } 5884 5885 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5886 cv_broadcast(&ct->ct_powerchange_cv); 5887 5888 break; 5889 5890 /* need to do more */ 5891 case BUS_POWER_HAS_CHANGED: 5892 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5893 "BUS_POWER_HAS_CHANGED:" 5894 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5895 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5896 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5897 5898 if (bphc->bphc_nlevel > 0 && 5899 bphc->bphc_nlevel > bphc->bphc_olevel) { 5900 if (ct->ct_power_cnt == 0) { 5901 ret = i_mdi_power_all_phci(ct); 5902 } 5903 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5904 "mdi_bus_power i_mdi_pm_hold_client\n")); 5905 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5906 } 5907 5908 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5909 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5910 "mdi_bus_power i_mdi_pm_rele_client\n")); 5911 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5912 } 5913 break; 5914 } 5915 5916 MDI_CLIENT_UNLOCK(ct); 5917 return (ret); 5918 } 5919 5920 static int 5921 i_mdi_pm_pre_config_one(dev_info_t *child) 5922 { 5923 int ret = MDI_SUCCESS; 5924 mdi_client_t *ct; 5925 5926 ct = i_devi_get_client(child); 5927 if (ct == NULL) 5928 return (MDI_FAILURE); 5929 5930 MDI_CLIENT_LOCK(ct); 5931 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5932 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5933 5934 if (!MDI_CLIENT_IS_FAILED(ct)) { 5935 MDI_CLIENT_UNLOCK(ct); 5936 MDI_DEBUG(4, (CE_NOTE, child, 5937 "i_mdi_pm_pre_config_one already configured\n")); 5938 return (MDI_SUCCESS); 5939 } 5940 5941 if (ct->ct_powercnt_config) { 5942 MDI_CLIENT_UNLOCK(ct); 5943 MDI_DEBUG(4, (CE_NOTE, child, 5944 "i_mdi_pm_pre_config_one ALREADY held\n")); 5945 return (MDI_SUCCESS); 5946 } 5947 5948 if (ct->ct_power_cnt == 0) { 5949 ret = i_mdi_power_all_phci(ct); 5950 } 5951 MDI_DEBUG(4, (CE_NOTE, child, 5952 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5953 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5954 ct->ct_powercnt_config = 1; 5955 ct->ct_powercnt_reset = 0; 5956 MDI_CLIENT_UNLOCK(ct); 5957 return (ret); 5958 } 5959 5960 static int 5961 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5962 { 5963 int ret = MDI_SUCCESS; 5964 dev_info_t *cdip; 5965 int circ; 5966 5967 ASSERT(MDI_VHCI(parent)); 5968 5969 /* ndi_devi_config_one */ 5970 if (child) { 5971 return (i_mdi_pm_pre_config_one(child)); 5972 } 5973 5974 /* devi_config_common */ 5975 ndi_devi_enter(parent, &circ); 5976 cdip = ddi_get_child(parent); 5977 while (cdip) { 5978 dev_info_t *next = ddi_get_next_sibling(cdip); 5979 5980 ret = i_mdi_pm_pre_config_one(cdip); 5981 if (ret != MDI_SUCCESS) 5982 break; 5983 cdip = next; 5984 } 5985 ndi_devi_exit(parent, circ); 5986 return (ret); 5987 } 5988 5989 static int 5990 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5991 { 5992 int ret = MDI_SUCCESS; 5993 mdi_client_t *ct; 5994 5995 ct = i_devi_get_client(child); 5996 if (ct == NULL) 5997 return (MDI_FAILURE); 5998 5999 MDI_CLIENT_LOCK(ct); 6000 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6001 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6002 6003 if (!i_ddi_devi_attached(ct->ct_dip)) { 6004 MDI_DEBUG(4, (CE_NOTE, child, 6005 "i_mdi_pm_pre_unconfig node detached already\n")); 6006 MDI_CLIENT_UNLOCK(ct); 6007 return (MDI_SUCCESS); 6008 } 6009 6010 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6011 (flags & NDI_AUTODETACH)) { 6012 MDI_DEBUG(4, (CE_NOTE, child, 6013 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6014 MDI_CLIENT_UNLOCK(ct); 6015 return (MDI_FAILURE); 6016 } 6017 6018 if (ct->ct_powercnt_unconfig) { 6019 MDI_DEBUG(4, (CE_NOTE, child, 6020 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6021 MDI_CLIENT_UNLOCK(ct); 6022 *held = 1; 6023 return (MDI_SUCCESS); 6024 } 6025 6026 if (ct->ct_power_cnt == 0) { 6027 ret = i_mdi_power_all_phci(ct); 6028 } 6029 MDI_DEBUG(4, (CE_NOTE, child, 6030 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6031 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6032 ct->ct_powercnt_unconfig = 1; 6033 ct->ct_powercnt_reset = 0; 6034 MDI_CLIENT_UNLOCK(ct); 6035 if (ret == MDI_SUCCESS) 6036 *held = 1; 6037 return (ret); 6038 } 6039 6040 static int 6041 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 6042 int flags) 6043 { 6044 int ret = MDI_SUCCESS; 6045 dev_info_t *cdip; 6046 int circ; 6047 6048 ASSERT(MDI_VHCI(parent)); 6049 *held = 0; 6050 6051 /* ndi_devi_unconfig_one */ 6052 if (child) { 6053 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6054 } 6055 6056 /* devi_unconfig_common */ 6057 ndi_devi_enter(parent, &circ); 6058 cdip = ddi_get_child(parent); 6059 while (cdip) { 6060 dev_info_t *next = ddi_get_next_sibling(cdip); 6061 6062 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6063 cdip = next; 6064 } 6065 ndi_devi_exit(parent, circ); 6066 6067 if (*held) 6068 ret = MDI_SUCCESS; 6069 6070 return (ret); 6071 } 6072 6073 static void 6074 i_mdi_pm_post_config_one(dev_info_t *child) 6075 { 6076 mdi_client_t *ct; 6077 6078 ct = i_devi_get_client(child); 6079 if (ct == NULL) 6080 return; 6081 6082 MDI_CLIENT_LOCK(ct); 6083 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6084 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6085 6086 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6087 MDI_DEBUG(4, (CE_NOTE, child, 6088 "i_mdi_pm_post_config_one NOT configured\n")); 6089 MDI_CLIENT_UNLOCK(ct); 6090 return; 6091 } 6092 6093 /* client has not been updated */ 6094 if (MDI_CLIENT_IS_FAILED(ct)) { 6095 MDI_DEBUG(4, (CE_NOTE, child, 6096 "i_mdi_pm_post_config_one NOT configured\n")); 6097 MDI_CLIENT_UNLOCK(ct); 6098 return; 6099 } 6100 6101 /* another thread might have powered it down or detached it */ 6102 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6103 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6104 (!i_ddi_devi_attached(ct->ct_dip) && 6105 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6106 MDI_DEBUG(4, (CE_NOTE, child, 6107 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6108 i_mdi_pm_reset_client(ct); 6109 } else { 6110 mdi_pathinfo_t *pip, *next; 6111 int valid_path_count = 0; 6112 6113 MDI_DEBUG(4, (CE_NOTE, child, 6114 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6115 pip = ct->ct_path_head; 6116 while (pip != NULL) { 6117 MDI_PI_LOCK(pip); 6118 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6119 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6120 valid_path_count ++; 6121 MDI_PI_UNLOCK(pip); 6122 pip = next; 6123 } 6124 i_mdi_pm_rele_client(ct, valid_path_count); 6125 } 6126 ct->ct_powercnt_config = 0; 6127 MDI_CLIENT_UNLOCK(ct); 6128 } 6129 6130 static void 6131 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6132 { 6133 int circ; 6134 dev_info_t *cdip; 6135 ASSERT(MDI_VHCI(parent)); 6136 6137 /* ndi_devi_config_one */ 6138 if (child) { 6139 i_mdi_pm_post_config_one(child); 6140 return; 6141 } 6142 6143 /* devi_config_common */ 6144 ndi_devi_enter(parent, &circ); 6145 cdip = ddi_get_child(parent); 6146 while (cdip) { 6147 dev_info_t *next = ddi_get_next_sibling(cdip); 6148 6149 i_mdi_pm_post_config_one(cdip); 6150 cdip = next; 6151 } 6152 ndi_devi_exit(parent, circ); 6153 } 6154 6155 static void 6156 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6157 { 6158 mdi_client_t *ct; 6159 6160 ct = i_devi_get_client(child); 6161 if (ct == NULL) 6162 return; 6163 6164 MDI_CLIENT_LOCK(ct); 6165 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6166 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6167 6168 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6169 MDI_DEBUG(4, (CE_NOTE, child, 6170 "i_mdi_pm_post_unconfig NOT held\n")); 6171 MDI_CLIENT_UNLOCK(ct); 6172 return; 6173 } 6174 6175 /* failure detaching or another thread just attached it */ 6176 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6177 i_ddi_devi_attached(ct->ct_dip)) || 6178 (!i_ddi_devi_attached(ct->ct_dip) && 6179 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6180 MDI_DEBUG(4, (CE_NOTE, child, 6181 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6182 i_mdi_pm_reset_client(ct); 6183 } else { 6184 mdi_pathinfo_t *pip, *next; 6185 int valid_path_count = 0; 6186 6187 MDI_DEBUG(4, (CE_NOTE, child, 6188 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6189 pip = ct->ct_path_head; 6190 while (pip != NULL) { 6191 MDI_PI_LOCK(pip); 6192 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6193 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6194 valid_path_count ++; 6195 MDI_PI_UNLOCK(pip); 6196 pip = next; 6197 } 6198 i_mdi_pm_rele_client(ct, valid_path_count); 6199 ct->ct_powercnt_unconfig = 0; 6200 } 6201 6202 MDI_CLIENT_UNLOCK(ct); 6203 } 6204 6205 static void 6206 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6207 { 6208 int circ; 6209 dev_info_t *cdip; 6210 6211 ASSERT(MDI_VHCI(parent)); 6212 6213 if (!held) { 6214 MDI_DEBUG(4, (CE_NOTE, parent, 6215 "i_mdi_pm_post_unconfig held = %d\n", held)); 6216 return; 6217 } 6218 6219 if (child) { 6220 i_mdi_pm_post_unconfig_one(child); 6221 return; 6222 } 6223 6224 ndi_devi_enter(parent, &circ); 6225 cdip = ddi_get_child(parent); 6226 while (cdip) { 6227 dev_info_t *next = ddi_get_next_sibling(cdip); 6228 6229 i_mdi_pm_post_unconfig_one(cdip); 6230 cdip = next; 6231 } 6232 ndi_devi_exit(parent, circ); 6233 } 6234 6235 int 6236 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6237 { 6238 int circ, ret = MDI_SUCCESS; 6239 dev_info_t *client_dip = NULL; 6240 mdi_client_t *ct; 6241 6242 /* 6243 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6244 * Power up pHCI for the named client device. 6245 * Note: Before the client is enumerated under vhci by phci, 6246 * client_dip can be NULL. Then proceed to power up all the 6247 * pHCIs. 6248 */ 6249 if (devnm != NULL) { 6250 ndi_devi_enter(vdip, &circ); 6251 client_dip = ndi_devi_findchild(vdip, devnm); 6252 ndi_devi_exit(vdip, circ); 6253 } 6254 6255 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6256 6257 switch (op) { 6258 case MDI_PM_PRE_CONFIG: 6259 ret = i_mdi_pm_pre_config(vdip, client_dip); 6260 6261 break; 6262 case MDI_PM_PRE_UNCONFIG: 6263 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6264 flags); 6265 6266 break; 6267 case MDI_PM_POST_CONFIG: 6268 i_mdi_pm_post_config(vdip, client_dip); 6269 6270 break; 6271 case MDI_PM_POST_UNCONFIG: 6272 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6273 6274 break; 6275 case MDI_PM_HOLD_POWER: 6276 case MDI_PM_RELE_POWER: 6277 ASSERT(args); 6278 6279 client_dip = (dev_info_t *)args; 6280 ASSERT(MDI_CLIENT(client_dip)); 6281 6282 ct = i_devi_get_client(client_dip); 6283 MDI_CLIENT_LOCK(ct); 6284 6285 if (op == MDI_PM_HOLD_POWER) { 6286 if (ct->ct_power_cnt == 0) { 6287 (void) i_mdi_power_all_phci(ct); 6288 MDI_DEBUG(4, (CE_NOTE, client_dip, 6289 "mdi_power i_mdi_pm_hold_client\n")); 6290 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6291 } 6292 } else { 6293 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6294 MDI_DEBUG(4, (CE_NOTE, client_dip, 6295 "mdi_power i_mdi_pm_rele_client\n")); 6296 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6297 } else { 6298 MDI_DEBUG(4, (CE_NOTE, client_dip, 6299 "mdi_power i_mdi_pm_reset_client\n")); 6300 i_mdi_pm_reset_client(ct); 6301 } 6302 } 6303 6304 MDI_CLIENT_UNLOCK(ct); 6305 break; 6306 default: 6307 break; 6308 } 6309 6310 return (ret); 6311 } 6312 6313 int 6314 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6315 { 6316 mdi_vhci_t *vhci; 6317 6318 if (!MDI_VHCI(dip)) 6319 return (MDI_FAILURE); 6320 6321 if (mdi_class) { 6322 vhci = DEVI(dip)->devi_mdi_xhci; 6323 ASSERT(vhci); 6324 *mdi_class = vhci->vh_class; 6325 } 6326 6327 return (MDI_SUCCESS); 6328 } 6329 6330 int 6331 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6332 { 6333 mdi_phci_t *phci; 6334 6335 if (!MDI_PHCI(dip)) 6336 return (MDI_FAILURE); 6337 6338 if (mdi_class) { 6339 phci = DEVI(dip)->devi_mdi_xhci; 6340 ASSERT(phci); 6341 *mdi_class = phci->ph_vhci->vh_class; 6342 } 6343 6344 return (MDI_SUCCESS); 6345 } 6346 6347 int 6348 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6349 { 6350 mdi_client_t *client; 6351 6352 if (!MDI_CLIENT(dip)) 6353 return (MDI_FAILURE); 6354 6355 if (mdi_class) { 6356 client = DEVI(dip)->devi_mdi_client; 6357 ASSERT(client); 6358 *mdi_class = client->ct_vhci->vh_class; 6359 } 6360 6361 return (MDI_SUCCESS); 6362 } 6363 6364 void * 6365 mdi_client_get_vhci_private(dev_info_t *dip) 6366 { 6367 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6368 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6369 mdi_client_t *ct; 6370 ct = i_devi_get_client(dip); 6371 return (ct->ct_vprivate); 6372 } 6373 return (NULL); 6374 } 6375 6376 void 6377 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6378 { 6379 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6380 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6381 mdi_client_t *ct; 6382 ct = i_devi_get_client(dip); 6383 ct->ct_vprivate = data; 6384 } 6385 } 6386 /* 6387 * mdi_pi_get_vhci_private(): 6388 * Get the vhci private information associated with the 6389 * mdi_pathinfo node 6390 */ 6391 void * 6392 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6393 { 6394 caddr_t vprivate = NULL; 6395 if (pip) { 6396 vprivate = MDI_PI(pip)->pi_vprivate; 6397 } 6398 return (vprivate); 6399 } 6400 6401 /* 6402 * mdi_pi_set_vhci_private(): 6403 * Set the vhci private information in the mdi_pathinfo node 6404 */ 6405 void 6406 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6407 { 6408 if (pip) { 6409 MDI_PI(pip)->pi_vprivate = priv; 6410 } 6411 } 6412 6413 /* 6414 * mdi_phci_get_vhci_private(): 6415 * Get the vhci private information associated with the 6416 * mdi_phci node 6417 */ 6418 void * 6419 mdi_phci_get_vhci_private(dev_info_t *dip) 6420 { 6421 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6422 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6423 mdi_phci_t *ph; 6424 ph = i_devi_get_phci(dip); 6425 return (ph->ph_vprivate); 6426 } 6427 return (NULL); 6428 } 6429 6430 /* 6431 * mdi_phci_set_vhci_private(): 6432 * Set the vhci private information in the mdi_phci node 6433 */ 6434 void 6435 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6436 { 6437 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6438 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6439 mdi_phci_t *ph; 6440 ph = i_devi_get_phci(dip); 6441 ph->ph_vprivate = priv; 6442 } 6443 } 6444 6445 /* 6446 * List of vhci class names: 6447 * A vhci class name must be in this list only if the corresponding vhci 6448 * driver intends to use the mdi provided bus config implementation 6449 * (i.e., mdi_vhci_bus_config()). 6450 */ 6451 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6452 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6453 6454 /* 6455 * During boot time, the on-disk vhci cache for every vhci class is read 6456 * in the form of an nvlist and stored here. 6457 */ 6458 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6459 6460 /* nvpair names in vhci cache nvlist */ 6461 #define MDI_VHCI_CACHE_VERSION 1 6462 #define MDI_NVPNAME_VERSION "version" 6463 #define MDI_NVPNAME_PHCIS "phcis" 6464 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6465 6466 /* 6467 * Given vhci class name, return its on-disk vhci cache filename. 6468 * Memory for the returned filename which includes the full path is allocated 6469 * by this function. 6470 */ 6471 static char * 6472 vhclass2vhcache_filename(char *vhclass) 6473 { 6474 char *filename; 6475 int len; 6476 static char *fmt = "/etc/devices/mdi_%s_cache"; 6477 6478 /* 6479 * fmt contains the on-disk vhci cache file name format; 6480 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6481 */ 6482 6483 /* the -1 below is to account for "%s" in the format string */ 6484 len = strlen(fmt) + strlen(vhclass) - 1; 6485 filename = kmem_alloc(len, KM_SLEEP); 6486 (void) snprintf(filename, len, fmt, vhclass); 6487 ASSERT(len == (strlen(filename) + 1)); 6488 return (filename); 6489 } 6490 6491 /* 6492 * initialize the vhci cache related data structures and read the on-disk 6493 * vhci cached data into memory. 6494 */ 6495 static void 6496 setup_vhci_cache(mdi_vhci_t *vh) 6497 { 6498 mdi_vhci_config_t *vhc; 6499 mdi_vhci_cache_t *vhcache; 6500 int i; 6501 nvlist_t *nvl = NULL; 6502 6503 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6504 vh->vh_config = vhc; 6505 vhcache = &vhc->vhc_vhcache; 6506 6507 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6508 6509 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6510 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6511 6512 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6513 6514 /* 6515 * Create string hash; same as mod_hash_create_strhash() except that 6516 * we use NULL key destructor. 6517 */ 6518 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6519 mdi_bus_config_cache_hash_size, 6520 mod_hash_null_keydtor, mod_hash_null_valdtor, 6521 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6522 6523 /* 6524 * The on-disk vhci cache is read during booting prior to the 6525 * lights-out period by mdi_read_devices_files(). 6526 */ 6527 for (i = 0; i < N_VHCI_CLASSES; i++) { 6528 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6529 nvl = vhcache_nvl[i]; 6530 vhcache_nvl[i] = NULL; 6531 break; 6532 } 6533 } 6534 6535 /* 6536 * this is to cover the case of some one manually causing unloading 6537 * (or detaching) and reloading (or attaching) of a vhci driver. 6538 */ 6539 if (nvl == NULL && modrootloaded) 6540 nvl = read_on_disk_vhci_cache(vh->vh_class); 6541 6542 if (nvl != NULL) { 6543 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6544 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6545 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6546 else { 6547 cmn_err(CE_WARN, 6548 "%s: data file corrupted, will recreate\n", 6549 vhc->vhc_vhcache_filename); 6550 } 6551 rw_exit(&vhcache->vhcache_lock); 6552 nvlist_free(nvl); 6553 } 6554 6555 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6556 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6557 6558 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6559 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6560 } 6561 6562 /* 6563 * free all vhci cache related resources 6564 */ 6565 static int 6566 destroy_vhci_cache(mdi_vhci_t *vh) 6567 { 6568 mdi_vhci_config_t *vhc = vh->vh_config; 6569 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6570 mdi_vhcache_phci_t *cphci, *cphci_next; 6571 mdi_vhcache_client_t *cct, *cct_next; 6572 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6573 6574 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6575 return (MDI_FAILURE); 6576 6577 kmem_free(vhc->vhc_vhcache_filename, 6578 strlen(vhc->vhc_vhcache_filename) + 1); 6579 6580 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6581 6582 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6583 cphci = cphci_next) { 6584 cphci_next = cphci->cphci_next; 6585 free_vhcache_phci(cphci); 6586 } 6587 6588 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6589 cct_next = cct->cct_next; 6590 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6591 cpi_next = cpi->cpi_next; 6592 free_vhcache_pathinfo(cpi); 6593 } 6594 free_vhcache_client(cct); 6595 } 6596 6597 rw_destroy(&vhcache->vhcache_lock); 6598 6599 mutex_destroy(&vhc->vhc_lock); 6600 cv_destroy(&vhc->vhc_cv); 6601 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6602 return (MDI_SUCCESS); 6603 } 6604 6605 /* 6606 * Stop all vhci cache related async threads and free their resources. 6607 */ 6608 static int 6609 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6610 { 6611 mdi_async_client_config_t *acc, *acc_next; 6612 6613 mutex_enter(&vhc->vhc_lock); 6614 vhc->vhc_flags |= MDI_VHC_EXIT; 6615 ASSERT(vhc->vhc_acc_thrcount >= 0); 6616 cv_broadcast(&vhc->vhc_cv); 6617 6618 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6619 vhc->vhc_acc_thrcount != 0) { 6620 mutex_exit(&vhc->vhc_lock); 6621 delay(1); 6622 mutex_enter(&vhc->vhc_lock); 6623 } 6624 6625 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6626 6627 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6628 acc_next = acc->acc_next; 6629 free_async_client_config(acc); 6630 } 6631 vhc->vhc_acc_list_head = NULL; 6632 vhc->vhc_acc_list_tail = NULL; 6633 vhc->vhc_acc_count = 0; 6634 6635 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6636 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6637 mutex_exit(&vhc->vhc_lock); 6638 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6639 vhcache_dirty(vhc); 6640 return (MDI_FAILURE); 6641 } 6642 } else 6643 mutex_exit(&vhc->vhc_lock); 6644 6645 if (callb_delete(vhc->vhc_cbid) != 0) 6646 return (MDI_FAILURE); 6647 6648 return (MDI_SUCCESS); 6649 } 6650 6651 /* 6652 * Stop vhci cache flush thread 6653 */ 6654 /* ARGSUSED */ 6655 static boolean_t 6656 stop_vhcache_flush_thread(void *arg, int code) 6657 { 6658 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6659 6660 mutex_enter(&vhc->vhc_lock); 6661 vhc->vhc_flags |= MDI_VHC_EXIT; 6662 cv_broadcast(&vhc->vhc_cv); 6663 6664 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6665 mutex_exit(&vhc->vhc_lock); 6666 delay(1); 6667 mutex_enter(&vhc->vhc_lock); 6668 } 6669 6670 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6671 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6672 mutex_exit(&vhc->vhc_lock); 6673 (void) flush_vhcache(vhc, 1); 6674 } else 6675 mutex_exit(&vhc->vhc_lock); 6676 6677 return (B_TRUE); 6678 } 6679 6680 /* 6681 * Enqueue the vhcache phci (cphci) at the tail of the list 6682 */ 6683 static void 6684 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6685 { 6686 cphci->cphci_next = NULL; 6687 if (vhcache->vhcache_phci_head == NULL) 6688 vhcache->vhcache_phci_head = cphci; 6689 else 6690 vhcache->vhcache_phci_tail->cphci_next = cphci; 6691 vhcache->vhcache_phci_tail = cphci; 6692 } 6693 6694 /* 6695 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6696 */ 6697 static void 6698 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6699 mdi_vhcache_pathinfo_t *cpi) 6700 { 6701 cpi->cpi_next = NULL; 6702 if (cct->cct_cpi_head == NULL) 6703 cct->cct_cpi_head = cpi; 6704 else 6705 cct->cct_cpi_tail->cpi_next = cpi; 6706 cct->cct_cpi_tail = cpi; 6707 } 6708 6709 /* 6710 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6711 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6712 * flag set come at the beginning of the list. All cpis which have this 6713 * flag set come at the end of the list. 6714 */ 6715 static void 6716 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6717 mdi_vhcache_pathinfo_t *newcpi) 6718 { 6719 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6720 6721 if (cct->cct_cpi_head == NULL || 6722 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6723 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6724 else { 6725 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6726 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6727 prev_cpi = cpi, cpi = cpi->cpi_next) 6728 ; 6729 6730 if (prev_cpi == NULL) 6731 cct->cct_cpi_head = newcpi; 6732 else 6733 prev_cpi->cpi_next = newcpi; 6734 6735 newcpi->cpi_next = cpi; 6736 6737 if (cpi == NULL) 6738 cct->cct_cpi_tail = newcpi; 6739 } 6740 } 6741 6742 /* 6743 * Enqueue the vhcache client (cct) at the tail of the list 6744 */ 6745 static void 6746 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6747 mdi_vhcache_client_t *cct) 6748 { 6749 cct->cct_next = NULL; 6750 if (vhcache->vhcache_client_head == NULL) 6751 vhcache->vhcache_client_head = cct; 6752 else 6753 vhcache->vhcache_client_tail->cct_next = cct; 6754 vhcache->vhcache_client_tail = cct; 6755 } 6756 6757 static void 6758 free_string_array(char **str, int nelem) 6759 { 6760 int i; 6761 6762 if (str) { 6763 for (i = 0; i < nelem; i++) { 6764 if (str[i]) 6765 kmem_free(str[i], strlen(str[i]) + 1); 6766 } 6767 kmem_free(str, sizeof (char *) * nelem); 6768 } 6769 } 6770 6771 static void 6772 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6773 { 6774 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6775 kmem_free(cphci, sizeof (*cphci)); 6776 } 6777 6778 static void 6779 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6780 { 6781 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6782 kmem_free(cpi, sizeof (*cpi)); 6783 } 6784 6785 static void 6786 free_vhcache_client(mdi_vhcache_client_t *cct) 6787 { 6788 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6789 kmem_free(cct, sizeof (*cct)); 6790 } 6791 6792 static char * 6793 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6794 { 6795 char *name_addr; 6796 int len; 6797 6798 len = strlen(ct_name) + strlen(ct_addr) + 2; 6799 name_addr = kmem_alloc(len, KM_SLEEP); 6800 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6801 6802 if (ret_len) 6803 *ret_len = len; 6804 return (name_addr); 6805 } 6806 6807 /* 6808 * Copy the contents of paddrnvl to vhci cache. 6809 * paddrnvl nvlist contains path information for a vhci client. 6810 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6811 */ 6812 static void 6813 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6814 mdi_vhcache_client_t *cct) 6815 { 6816 nvpair_t *nvp = NULL; 6817 mdi_vhcache_pathinfo_t *cpi; 6818 uint_t nelem; 6819 uint32_t *val; 6820 6821 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6822 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6823 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6824 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6825 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6826 ASSERT(nelem == 2); 6827 cpi->cpi_cphci = cphci_list[val[0]]; 6828 cpi->cpi_flags = val[1]; 6829 enqueue_tail_vhcache_pathinfo(cct, cpi); 6830 } 6831 } 6832 6833 /* 6834 * Copy the contents of caddrmapnvl to vhci cache. 6835 * caddrmapnvl nvlist contains vhci client address to phci client address 6836 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6837 * this nvlist. 6838 */ 6839 static void 6840 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6841 mdi_vhcache_phci_t *cphci_list[]) 6842 { 6843 nvpair_t *nvp = NULL; 6844 nvlist_t *paddrnvl; 6845 mdi_vhcache_client_t *cct; 6846 6847 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6848 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6849 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6850 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6851 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6852 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6853 /* the client must contain at least one path */ 6854 ASSERT(cct->cct_cpi_head != NULL); 6855 6856 enqueue_vhcache_client(vhcache, cct); 6857 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6858 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6859 } 6860 } 6861 6862 /* 6863 * Copy the contents of the main nvlist to vhci cache. 6864 * 6865 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6866 * The nvlist contains the mappings between the vhci client addresses and 6867 * their corresponding phci client addresses. 6868 * 6869 * The structure of the nvlist is as follows: 6870 * 6871 * Main nvlist: 6872 * NAME TYPE DATA 6873 * version int32 version number 6874 * phcis string array array of phci paths 6875 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6876 * 6877 * structure of c2paddrs_nvl: 6878 * NAME TYPE DATA 6879 * caddr1 nvlist_t paddrs_nvl1 6880 * caddr2 nvlist_t paddrs_nvl2 6881 * ... 6882 * where caddr1, caddr2, ... are vhci client name and addresses in the 6883 * form of "<clientname>@<clientaddress>". 6884 * (for example: "ssd@2000002037cd9f72"); 6885 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6886 * 6887 * structure of paddrs_nvl: 6888 * NAME TYPE DATA 6889 * pi_addr1 uint32_array (phci-id, cpi_flags) 6890 * pi_addr2 uint32_array (phci-id, cpi_flags) 6891 * ... 6892 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6893 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6894 * phci-ids are integers that identify PHCIs to which the 6895 * the bus specific address belongs to. These integers are used as an index 6896 * into to the phcis string array in the main nvlist to get the PHCI path. 6897 */ 6898 static int 6899 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6900 { 6901 char **phcis, **phci_namep; 6902 uint_t nphcis; 6903 mdi_vhcache_phci_t *cphci, **cphci_list; 6904 nvlist_t *caddrmapnvl; 6905 int32_t ver; 6906 int i; 6907 size_t cphci_list_size; 6908 6909 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6910 6911 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6912 ver != MDI_VHCI_CACHE_VERSION) 6913 return (MDI_FAILURE); 6914 6915 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6916 &nphcis) != 0) 6917 return (MDI_SUCCESS); 6918 6919 ASSERT(nphcis > 0); 6920 6921 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6922 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6923 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6924 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6925 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6926 enqueue_vhcache_phci(vhcache, cphci); 6927 cphci_list[i] = cphci; 6928 } 6929 6930 ASSERT(vhcache->vhcache_phci_head != NULL); 6931 6932 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6933 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6934 6935 kmem_free(cphci_list, cphci_list_size); 6936 return (MDI_SUCCESS); 6937 } 6938 6939 /* 6940 * Build paddrnvl for the specified client using the information in the 6941 * vhci cache and add it to the caddrmapnnvl. 6942 * Returns 0 on success, errno on failure. 6943 */ 6944 static int 6945 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 6946 nvlist_t *caddrmapnvl) 6947 { 6948 mdi_vhcache_pathinfo_t *cpi; 6949 nvlist_t *nvl; 6950 int err; 6951 uint32_t val[2]; 6952 6953 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 6954 6955 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 6956 return (err); 6957 6958 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 6959 val[0] = cpi->cpi_cphci->cphci_id; 6960 val[1] = cpi->cpi_flags; 6961 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 6962 != 0) 6963 goto out; 6964 } 6965 6966 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 6967 out: 6968 nvlist_free(nvl); 6969 return (err); 6970 } 6971 6972 /* 6973 * Build caddrmapnvl using the information in the vhci cache 6974 * and add it to the mainnvl. 6975 * Returns 0 on success, errno on failure. 6976 */ 6977 static int 6978 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 6979 { 6980 mdi_vhcache_client_t *cct; 6981 nvlist_t *nvl; 6982 int err; 6983 6984 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 6985 6986 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 6987 return (err); 6988 6989 for (cct = vhcache->vhcache_client_head; cct != NULL; 6990 cct = cct->cct_next) { 6991 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 6992 goto out; 6993 } 6994 6995 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 6996 out: 6997 nvlist_free(nvl); 6998 return (err); 6999 } 7000 7001 /* 7002 * Build nvlist using the information in the vhci cache. 7003 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7004 * Returns nvl on success, NULL on failure. 7005 */ 7006 static nvlist_t * 7007 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7008 { 7009 mdi_vhcache_phci_t *cphci; 7010 uint_t phci_count; 7011 char **phcis; 7012 nvlist_t *nvl; 7013 int err, i; 7014 7015 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7016 nvl = NULL; 7017 goto out; 7018 } 7019 7020 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7021 MDI_VHCI_CACHE_VERSION)) != 0) 7022 goto out; 7023 7024 rw_enter(&vhcache->vhcache_lock, RW_READER); 7025 if (vhcache->vhcache_phci_head == NULL) { 7026 rw_exit(&vhcache->vhcache_lock); 7027 return (nvl); 7028 } 7029 7030 phci_count = 0; 7031 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7032 cphci = cphci->cphci_next) 7033 cphci->cphci_id = phci_count++; 7034 7035 /* build phci pathname list */ 7036 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7037 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7038 cphci = cphci->cphci_next, i++) 7039 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7040 7041 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7042 phci_count); 7043 free_string_array(phcis, phci_count); 7044 7045 if (err == 0 && 7046 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7047 rw_exit(&vhcache->vhcache_lock); 7048 return (nvl); 7049 } 7050 7051 rw_exit(&vhcache->vhcache_lock); 7052 out: 7053 if (nvl) 7054 nvlist_free(nvl); 7055 return (NULL); 7056 } 7057 7058 /* 7059 * Lookup vhcache phci structure for the specified phci path. 7060 */ 7061 static mdi_vhcache_phci_t * 7062 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7063 { 7064 mdi_vhcache_phci_t *cphci; 7065 7066 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7067 7068 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7069 cphci = cphci->cphci_next) { 7070 if (strcmp(cphci->cphci_path, phci_path) == 0) 7071 return (cphci); 7072 } 7073 7074 return (NULL); 7075 } 7076 7077 /* 7078 * Lookup vhcache phci structure for the specified phci. 7079 */ 7080 static mdi_vhcache_phci_t * 7081 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7082 { 7083 mdi_vhcache_phci_t *cphci; 7084 7085 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7086 7087 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7088 cphci = cphci->cphci_next) { 7089 if (cphci->cphci_phci == ph) 7090 return (cphci); 7091 } 7092 7093 return (NULL); 7094 } 7095 7096 /* 7097 * Add the specified phci to the vhci cache if not already present. 7098 */ 7099 static void 7100 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7101 { 7102 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7103 mdi_vhcache_phci_t *cphci; 7104 char *pathname; 7105 int cache_updated; 7106 7107 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7108 7109 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7110 (void) ddi_pathname(ph->ph_dip, pathname); 7111 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7112 != NULL) { 7113 cphci->cphci_phci = ph; 7114 cache_updated = 0; 7115 } else { 7116 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7117 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7118 cphci->cphci_phci = ph; 7119 enqueue_vhcache_phci(vhcache, cphci); 7120 cache_updated = 1; 7121 } 7122 7123 rw_exit(&vhcache->vhcache_lock); 7124 7125 /* 7126 * Since a new phci has been added, reset 7127 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7128 * during next vhcache_discover_paths(). 7129 */ 7130 mutex_enter(&vhc->vhc_lock); 7131 vhc->vhc_path_discovery_cutoff_time = 0; 7132 mutex_exit(&vhc->vhc_lock); 7133 7134 kmem_free(pathname, MAXPATHLEN); 7135 if (cache_updated) 7136 vhcache_dirty(vhc); 7137 } 7138 7139 /* 7140 * Remove the reference to the specified phci from the vhci cache. 7141 */ 7142 static void 7143 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7144 { 7145 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7146 mdi_vhcache_phci_t *cphci; 7147 7148 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7149 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7150 /* do not remove the actual mdi_vhcache_phci structure */ 7151 cphci->cphci_phci = NULL; 7152 } 7153 rw_exit(&vhcache->vhcache_lock); 7154 } 7155 7156 static void 7157 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7158 mdi_vhcache_lookup_token_t *src) 7159 { 7160 if (src == NULL) { 7161 dst->lt_cct = NULL; 7162 dst->lt_cct_lookup_time = 0; 7163 } else { 7164 dst->lt_cct = src->lt_cct; 7165 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7166 } 7167 } 7168 7169 /* 7170 * Look up vhcache client for the specified client. 7171 */ 7172 static mdi_vhcache_client_t * 7173 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7174 mdi_vhcache_lookup_token_t *token) 7175 { 7176 mod_hash_val_t hv; 7177 char *name_addr; 7178 int len; 7179 7180 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7181 7182 /* 7183 * If no vhcache clean occurred since the last lookup, we can 7184 * simply return the cct from the last lookup operation. 7185 * It works because ccts are never freed except during the vhcache 7186 * cleanup operation. 7187 */ 7188 if (token != NULL && 7189 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7190 return (token->lt_cct); 7191 7192 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7193 if (mod_hash_find(vhcache->vhcache_client_hash, 7194 (mod_hash_key_t)name_addr, &hv) == 0) { 7195 if (token) { 7196 token->lt_cct = (mdi_vhcache_client_t *)hv; 7197 token->lt_cct_lookup_time = lbolt64; 7198 } 7199 } else { 7200 if (token) { 7201 token->lt_cct = NULL; 7202 token->lt_cct_lookup_time = 0; 7203 } 7204 hv = NULL; 7205 } 7206 kmem_free(name_addr, len); 7207 return ((mdi_vhcache_client_t *)hv); 7208 } 7209 7210 /* 7211 * Add the specified path to the vhci cache if not already present. 7212 * Also add the vhcache client for the client corresponding to this path 7213 * if it doesn't already exist. 7214 */ 7215 static void 7216 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7217 { 7218 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7219 mdi_vhcache_client_t *cct; 7220 mdi_vhcache_pathinfo_t *cpi; 7221 mdi_phci_t *ph = pip->pi_phci; 7222 mdi_client_t *ct = pip->pi_client; 7223 int cache_updated = 0; 7224 7225 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7226 7227 /* if vhcache client for this pip doesn't already exist, add it */ 7228 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7229 NULL)) == NULL) { 7230 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7231 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7232 ct->ct_guid, NULL); 7233 enqueue_vhcache_client(vhcache, cct); 7234 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7235 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7236 cache_updated = 1; 7237 } 7238 7239 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7240 if (cpi->cpi_cphci->cphci_phci == ph && 7241 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7242 cpi->cpi_pip = pip; 7243 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7244 cpi->cpi_flags &= 7245 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7246 sort_vhcache_paths(cct); 7247 cache_updated = 1; 7248 } 7249 break; 7250 } 7251 } 7252 7253 if (cpi == NULL) { 7254 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7255 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7256 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7257 ASSERT(cpi->cpi_cphci != NULL); 7258 cpi->cpi_pip = pip; 7259 enqueue_vhcache_pathinfo(cct, cpi); 7260 cache_updated = 1; 7261 } 7262 7263 rw_exit(&vhcache->vhcache_lock); 7264 7265 if (cache_updated) 7266 vhcache_dirty(vhc); 7267 } 7268 7269 /* 7270 * Remove the reference to the specified path from the vhci cache. 7271 */ 7272 static void 7273 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7274 { 7275 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7276 mdi_client_t *ct = pip->pi_client; 7277 mdi_vhcache_client_t *cct; 7278 mdi_vhcache_pathinfo_t *cpi; 7279 7280 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7281 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7282 NULL)) != NULL) { 7283 for (cpi = cct->cct_cpi_head; cpi != NULL; 7284 cpi = cpi->cpi_next) { 7285 if (cpi->cpi_pip == pip) { 7286 cpi->cpi_pip = NULL; 7287 break; 7288 } 7289 } 7290 } 7291 rw_exit(&vhcache->vhcache_lock); 7292 } 7293 7294 /* 7295 * Flush the vhci cache to disk. 7296 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7297 */ 7298 static int 7299 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7300 { 7301 nvlist_t *nvl; 7302 int err; 7303 int rv; 7304 7305 /* 7306 * It is possible that the system may shutdown before 7307 * i_ddi_io_initialized (during stmsboot for example). To allow for 7308 * flushing the cache in this case do not check for 7309 * i_ddi_io_initialized when force flag is set. 7310 */ 7311 if (force_flag == 0 && !i_ddi_io_initialized()) 7312 return (MDI_FAILURE); 7313 7314 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7315 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7316 nvlist_free(nvl); 7317 } else 7318 err = EFAULT; 7319 7320 rv = MDI_SUCCESS; 7321 mutex_enter(&vhc->vhc_lock); 7322 if (err != 0) { 7323 if (err == EROFS) { 7324 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7325 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7326 MDI_VHC_VHCACHE_DIRTY); 7327 } else { 7328 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7329 cmn_err(CE_CONT, "%s: update failed\n", 7330 vhc->vhc_vhcache_filename); 7331 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7332 } 7333 rv = MDI_FAILURE; 7334 } 7335 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7336 cmn_err(CE_CONT, 7337 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7338 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7339 } 7340 mutex_exit(&vhc->vhc_lock); 7341 7342 return (rv); 7343 } 7344 7345 /* 7346 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7347 * Exits itself if left idle for the idle timeout period. 7348 */ 7349 static void 7350 vhcache_flush_thread(void *arg) 7351 { 7352 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7353 clock_t idle_time, quit_at_ticks; 7354 callb_cpr_t cprinfo; 7355 7356 /* number of seconds to sleep idle before exiting */ 7357 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7358 7359 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7360 "mdi_vhcache_flush"); 7361 mutex_enter(&vhc->vhc_lock); 7362 for (; ; ) { 7363 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7364 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7365 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7366 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7367 (void) cv_timedwait(&vhc->vhc_cv, 7368 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7369 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7370 } else { 7371 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7372 mutex_exit(&vhc->vhc_lock); 7373 7374 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7375 vhcache_dirty(vhc); 7376 7377 mutex_enter(&vhc->vhc_lock); 7378 } 7379 } 7380 7381 quit_at_ticks = ddi_get_lbolt() + idle_time; 7382 7383 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7384 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7385 ddi_get_lbolt() < quit_at_ticks) { 7386 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7387 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7388 quit_at_ticks); 7389 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7390 } 7391 7392 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7393 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7394 goto out; 7395 } 7396 7397 out: 7398 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7399 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7400 CALLB_CPR_EXIT(&cprinfo); 7401 } 7402 7403 /* 7404 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7405 */ 7406 static void 7407 vhcache_dirty(mdi_vhci_config_t *vhc) 7408 { 7409 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7410 int create_thread; 7411 7412 rw_enter(&vhcache->vhcache_lock, RW_READER); 7413 /* do not flush cache until the cache is fully built */ 7414 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7415 rw_exit(&vhcache->vhcache_lock); 7416 return; 7417 } 7418 rw_exit(&vhcache->vhcache_lock); 7419 7420 mutex_enter(&vhc->vhc_lock); 7421 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7422 mutex_exit(&vhc->vhc_lock); 7423 return; 7424 } 7425 7426 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7427 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7428 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7429 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7430 cv_broadcast(&vhc->vhc_cv); 7431 create_thread = 0; 7432 } else { 7433 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7434 create_thread = 1; 7435 } 7436 mutex_exit(&vhc->vhc_lock); 7437 7438 if (create_thread) 7439 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7440 0, &p0, TS_RUN, minclsyspri); 7441 } 7442 7443 /* 7444 * phci bus config structure - one for for each phci bus config operation that 7445 * we initiate on behalf of a vhci. 7446 */ 7447 typedef struct mdi_phci_bus_config_s { 7448 char *phbc_phci_path; 7449 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7450 struct mdi_phci_bus_config_s *phbc_next; 7451 } mdi_phci_bus_config_t; 7452 7453 /* vhci bus config structure - one for each vhci bus config operation */ 7454 typedef struct mdi_vhci_bus_config_s { 7455 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7456 major_t vhbc_op_major; /* bus config op major */ 7457 uint_t vhbc_op_flags; /* bus config op flags */ 7458 kmutex_t vhbc_lock; 7459 kcondvar_t vhbc_cv; 7460 int vhbc_thr_count; 7461 } mdi_vhci_bus_config_t; 7462 7463 /* 7464 * bus config the specified phci 7465 */ 7466 static void 7467 bus_config_phci(void *arg) 7468 { 7469 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7470 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7471 dev_info_t *ph_dip; 7472 7473 /* 7474 * first configure all path components upto phci and then configure 7475 * the phci children. 7476 */ 7477 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7478 != NULL) { 7479 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7480 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7481 (void) ndi_devi_config_driver(ph_dip, 7482 vhbc->vhbc_op_flags, 7483 vhbc->vhbc_op_major); 7484 } else 7485 (void) ndi_devi_config(ph_dip, 7486 vhbc->vhbc_op_flags); 7487 7488 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7489 ndi_rele_devi(ph_dip); 7490 } 7491 7492 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7493 kmem_free(phbc, sizeof (*phbc)); 7494 7495 mutex_enter(&vhbc->vhbc_lock); 7496 vhbc->vhbc_thr_count--; 7497 if (vhbc->vhbc_thr_count == 0) 7498 cv_broadcast(&vhbc->vhbc_cv); 7499 mutex_exit(&vhbc->vhbc_lock); 7500 } 7501 7502 /* 7503 * Bus config all phcis associated with the vhci in parallel. 7504 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7505 */ 7506 static void 7507 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7508 ddi_bus_config_op_t op, major_t maj) 7509 { 7510 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7511 mdi_vhci_bus_config_t *vhbc; 7512 mdi_vhcache_phci_t *cphci; 7513 7514 rw_enter(&vhcache->vhcache_lock, RW_READER); 7515 if (vhcache->vhcache_phci_head == NULL) { 7516 rw_exit(&vhcache->vhcache_lock); 7517 return; 7518 } 7519 7520 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7521 7522 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7523 cphci = cphci->cphci_next) { 7524 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7525 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7526 KM_SLEEP); 7527 phbc->phbc_vhbusconfig = vhbc; 7528 phbc->phbc_next = phbc_head; 7529 phbc_head = phbc; 7530 vhbc->vhbc_thr_count++; 7531 } 7532 rw_exit(&vhcache->vhcache_lock); 7533 7534 vhbc->vhbc_op = op; 7535 vhbc->vhbc_op_major = maj; 7536 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7537 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7538 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7539 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7540 7541 /* now create threads to initiate bus config on all phcis in parallel */ 7542 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7543 phbc_next = phbc->phbc_next; 7544 if (mdi_mtc_off) 7545 bus_config_phci((void *)phbc); 7546 else 7547 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7548 0, &p0, TS_RUN, minclsyspri); 7549 } 7550 7551 mutex_enter(&vhbc->vhbc_lock); 7552 /* wait until all threads exit */ 7553 while (vhbc->vhbc_thr_count > 0) 7554 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7555 mutex_exit(&vhbc->vhbc_lock); 7556 7557 mutex_destroy(&vhbc->vhbc_lock); 7558 cv_destroy(&vhbc->vhbc_cv); 7559 kmem_free(vhbc, sizeof (*vhbc)); 7560 } 7561 7562 /* 7563 * Single threaded version of bus_config_all_phcis() 7564 */ 7565 static void 7566 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7567 ddi_bus_config_op_t op, major_t maj) 7568 { 7569 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7570 7571 single_threaded_vhconfig_enter(vhc); 7572 bus_config_all_phcis(vhcache, flags, op, maj); 7573 single_threaded_vhconfig_exit(vhc); 7574 } 7575 7576 /* 7577 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7578 * The path includes the child component in addition to the phci path. 7579 */ 7580 static int 7581 bus_config_one_phci_child(char *path) 7582 { 7583 dev_info_t *ph_dip, *child; 7584 char *devnm; 7585 int rv = MDI_FAILURE; 7586 7587 /* extract the child component of the phci */ 7588 devnm = strrchr(path, '/'); 7589 *devnm++ = '\0'; 7590 7591 /* 7592 * first configure all path components upto phci and then 7593 * configure the phci child. 7594 */ 7595 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7596 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7597 NDI_SUCCESS) { 7598 /* 7599 * release the hold that ndi_devi_config_one() placed 7600 */ 7601 ndi_rele_devi(child); 7602 rv = MDI_SUCCESS; 7603 } 7604 7605 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7606 ndi_rele_devi(ph_dip); 7607 } 7608 7609 devnm--; 7610 *devnm = '/'; 7611 return (rv); 7612 } 7613 7614 /* 7615 * Build a list of phci client paths for the specified vhci client. 7616 * The list includes only those phci client paths which aren't configured yet. 7617 */ 7618 static mdi_phys_path_t * 7619 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7620 { 7621 mdi_vhcache_pathinfo_t *cpi; 7622 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7623 int config_path, len; 7624 7625 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7626 /* 7627 * include only those paths that aren't configured. 7628 */ 7629 config_path = 0; 7630 if (cpi->cpi_pip == NULL) 7631 config_path = 1; 7632 else { 7633 MDI_PI_LOCK(cpi->cpi_pip); 7634 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7635 config_path = 1; 7636 MDI_PI_UNLOCK(cpi->cpi_pip); 7637 } 7638 7639 if (config_path) { 7640 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7641 len = strlen(cpi->cpi_cphci->cphci_path) + 7642 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7643 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7644 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7645 cpi->cpi_cphci->cphci_path, ct_name, 7646 cpi->cpi_addr); 7647 pp->phys_path_next = NULL; 7648 7649 if (pp_head == NULL) 7650 pp_head = pp; 7651 else 7652 pp_tail->phys_path_next = pp; 7653 pp_tail = pp; 7654 } 7655 } 7656 7657 return (pp_head); 7658 } 7659 7660 /* 7661 * Free the memory allocated for phci client path list. 7662 */ 7663 static void 7664 free_phclient_path_list(mdi_phys_path_t *pp_head) 7665 { 7666 mdi_phys_path_t *pp, *pp_next; 7667 7668 for (pp = pp_head; pp != NULL; pp = pp_next) { 7669 pp_next = pp->phys_path_next; 7670 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7671 kmem_free(pp, sizeof (*pp)); 7672 } 7673 } 7674 7675 /* 7676 * Allocated async client structure and initialize with the specified values. 7677 */ 7678 static mdi_async_client_config_t * 7679 alloc_async_client_config(char *ct_name, char *ct_addr, 7680 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7681 { 7682 mdi_async_client_config_t *acc; 7683 7684 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7685 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7686 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7687 acc->acc_phclient_path_list_head = pp_head; 7688 init_vhcache_lookup_token(&acc->acc_token, tok); 7689 acc->acc_next = NULL; 7690 return (acc); 7691 } 7692 7693 /* 7694 * Free the memory allocated for the async client structure and their members. 7695 */ 7696 static void 7697 free_async_client_config(mdi_async_client_config_t *acc) 7698 { 7699 if (acc->acc_phclient_path_list_head) 7700 free_phclient_path_list(acc->acc_phclient_path_list_head); 7701 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7702 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7703 kmem_free(acc, sizeof (*acc)); 7704 } 7705 7706 /* 7707 * Sort vhcache pathinfos (cpis) of the specified client. 7708 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7709 * flag set come at the beginning of the list. All cpis which have this 7710 * flag set come at the end of the list. 7711 */ 7712 static void 7713 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7714 { 7715 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7716 7717 cpi_head = cct->cct_cpi_head; 7718 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7719 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7720 cpi_next = cpi->cpi_next; 7721 enqueue_vhcache_pathinfo(cct, cpi); 7722 } 7723 } 7724 7725 /* 7726 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7727 * every vhcache pathinfo of the specified client. If not adjust the flag 7728 * setting appropriately. 7729 * 7730 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7731 * on-disk vhci cache. So every time this flag is updated the cache must be 7732 * flushed. 7733 */ 7734 static void 7735 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7736 mdi_vhcache_lookup_token_t *tok) 7737 { 7738 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7739 mdi_vhcache_client_t *cct; 7740 mdi_vhcache_pathinfo_t *cpi; 7741 7742 rw_enter(&vhcache->vhcache_lock, RW_READER); 7743 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7744 == NULL) { 7745 rw_exit(&vhcache->vhcache_lock); 7746 return; 7747 } 7748 7749 /* 7750 * to avoid unnecessary on-disk cache updates, first check if an 7751 * update is really needed. If no update is needed simply return. 7752 */ 7753 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7754 if ((cpi->cpi_pip != NULL && 7755 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7756 (cpi->cpi_pip == NULL && 7757 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7758 break; 7759 } 7760 } 7761 if (cpi == NULL) { 7762 rw_exit(&vhcache->vhcache_lock); 7763 return; 7764 } 7765 7766 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7767 rw_exit(&vhcache->vhcache_lock); 7768 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7769 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7770 tok)) == NULL) { 7771 rw_exit(&vhcache->vhcache_lock); 7772 return; 7773 } 7774 } 7775 7776 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7777 if (cpi->cpi_pip != NULL) 7778 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7779 else 7780 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7781 } 7782 sort_vhcache_paths(cct); 7783 7784 rw_exit(&vhcache->vhcache_lock); 7785 vhcache_dirty(vhc); 7786 } 7787 7788 /* 7789 * Configure all specified paths of the client. 7790 */ 7791 static void 7792 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7793 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7794 { 7795 mdi_phys_path_t *pp; 7796 7797 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7798 (void) bus_config_one_phci_child(pp->phys_path); 7799 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7800 } 7801 7802 /* 7803 * Dequeue elements from vhci async client config list and bus configure 7804 * their corresponding phci clients. 7805 */ 7806 static void 7807 config_client_paths_thread(void *arg) 7808 { 7809 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7810 mdi_async_client_config_t *acc; 7811 clock_t quit_at_ticks; 7812 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7813 callb_cpr_t cprinfo; 7814 7815 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7816 "mdi_config_client_paths"); 7817 7818 for (; ; ) { 7819 quit_at_ticks = ddi_get_lbolt() + idle_time; 7820 7821 mutex_enter(&vhc->vhc_lock); 7822 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7823 vhc->vhc_acc_list_head == NULL && 7824 ddi_get_lbolt() < quit_at_ticks) { 7825 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7826 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7827 quit_at_ticks); 7828 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7829 } 7830 7831 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7832 vhc->vhc_acc_list_head == NULL) 7833 goto out; 7834 7835 acc = vhc->vhc_acc_list_head; 7836 vhc->vhc_acc_list_head = acc->acc_next; 7837 if (vhc->vhc_acc_list_head == NULL) 7838 vhc->vhc_acc_list_tail = NULL; 7839 vhc->vhc_acc_count--; 7840 mutex_exit(&vhc->vhc_lock); 7841 7842 config_client_paths_sync(vhc, acc->acc_ct_name, 7843 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7844 &acc->acc_token); 7845 7846 free_async_client_config(acc); 7847 } 7848 7849 out: 7850 vhc->vhc_acc_thrcount--; 7851 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7852 CALLB_CPR_EXIT(&cprinfo); 7853 } 7854 7855 /* 7856 * Arrange for all the phci client paths (pp_head) for the specified client 7857 * to be bus configured asynchronously by a thread. 7858 */ 7859 static void 7860 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7861 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7862 { 7863 mdi_async_client_config_t *acc, *newacc; 7864 int create_thread; 7865 7866 if (pp_head == NULL) 7867 return; 7868 7869 if (mdi_mtc_off) { 7870 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7871 free_phclient_path_list(pp_head); 7872 return; 7873 } 7874 7875 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7876 ASSERT(newacc); 7877 7878 mutex_enter(&vhc->vhc_lock); 7879 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7880 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7881 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7882 free_async_client_config(newacc); 7883 mutex_exit(&vhc->vhc_lock); 7884 return; 7885 } 7886 } 7887 7888 if (vhc->vhc_acc_list_head == NULL) 7889 vhc->vhc_acc_list_head = newacc; 7890 else 7891 vhc->vhc_acc_list_tail->acc_next = newacc; 7892 vhc->vhc_acc_list_tail = newacc; 7893 vhc->vhc_acc_count++; 7894 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7895 cv_broadcast(&vhc->vhc_cv); 7896 create_thread = 0; 7897 } else { 7898 vhc->vhc_acc_thrcount++; 7899 create_thread = 1; 7900 } 7901 mutex_exit(&vhc->vhc_lock); 7902 7903 if (create_thread) 7904 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7905 0, &p0, TS_RUN, minclsyspri); 7906 } 7907 7908 /* 7909 * Return number of online paths for the specified client. 7910 */ 7911 static int 7912 nonline_paths(mdi_vhcache_client_t *cct) 7913 { 7914 mdi_vhcache_pathinfo_t *cpi; 7915 int online_count = 0; 7916 7917 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7918 if (cpi->cpi_pip != NULL) { 7919 MDI_PI_LOCK(cpi->cpi_pip); 7920 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7921 online_count++; 7922 MDI_PI_UNLOCK(cpi->cpi_pip); 7923 } 7924 } 7925 7926 return (online_count); 7927 } 7928 7929 /* 7930 * Bus configure all paths for the specified vhci client. 7931 * If at least one path for the client is already online, the remaining paths 7932 * will be configured asynchronously. Otherwise, it synchronously configures 7933 * the paths until at least one path is online and then rest of the paths 7934 * will be configured asynchronously. 7935 */ 7936 static void 7937 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7938 { 7939 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7940 mdi_phys_path_t *pp_head, *pp; 7941 mdi_vhcache_client_t *cct; 7942 mdi_vhcache_lookup_token_t tok; 7943 7944 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7945 7946 init_vhcache_lookup_token(&tok, NULL); 7947 7948 if (ct_name == NULL || ct_addr == NULL || 7949 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 7950 == NULL || 7951 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 7952 rw_exit(&vhcache->vhcache_lock); 7953 return; 7954 } 7955 7956 /* if at least one path is online, configure the rest asynchronously */ 7957 if (nonline_paths(cct) > 0) { 7958 rw_exit(&vhcache->vhcache_lock); 7959 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 7960 return; 7961 } 7962 7963 rw_exit(&vhcache->vhcache_lock); 7964 7965 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 7966 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 7967 rw_enter(&vhcache->vhcache_lock, RW_READER); 7968 7969 if ((cct = lookup_vhcache_client(vhcache, ct_name, 7970 ct_addr, &tok)) == NULL) { 7971 rw_exit(&vhcache->vhcache_lock); 7972 goto out; 7973 } 7974 7975 if (nonline_paths(cct) > 0 && 7976 pp->phys_path_next != NULL) { 7977 rw_exit(&vhcache->vhcache_lock); 7978 config_client_paths_async(vhc, ct_name, ct_addr, 7979 pp->phys_path_next, &tok); 7980 pp->phys_path_next = NULL; 7981 goto out; 7982 } 7983 7984 rw_exit(&vhcache->vhcache_lock); 7985 } 7986 } 7987 7988 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 7989 out: 7990 free_phclient_path_list(pp_head); 7991 } 7992 7993 static void 7994 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 7995 { 7996 mutex_enter(&vhc->vhc_lock); 7997 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 7998 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 7999 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8000 mutex_exit(&vhc->vhc_lock); 8001 } 8002 8003 static void 8004 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8005 { 8006 mutex_enter(&vhc->vhc_lock); 8007 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8008 cv_broadcast(&vhc->vhc_cv); 8009 mutex_exit(&vhc->vhc_lock); 8010 } 8011 8012 typedef struct mdi_phci_driver_info { 8013 char *phdriver_name; /* name of the phci driver */ 8014 8015 /* set to non zero if the phci driver supports root device */ 8016 int phdriver_root_support; 8017 } mdi_phci_driver_info_t; 8018 8019 /* 8020 * vhci class and root support capability of a phci driver can be 8021 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8022 * phci driver.conf file. The built-in tables below contain this information 8023 * for those phci drivers whose driver.conf files don't yet contain this info. 8024 * 8025 * All phci drivers expect iscsi have root device support. 8026 */ 8027 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8028 { "fp", 1 }, 8029 { "iscsi", 0 }, 8030 { "ibsrp", 1 } 8031 }; 8032 8033 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8034 8035 static void * 8036 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8037 { 8038 void *new_ptr; 8039 8040 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8041 if (old_ptr) { 8042 bcopy(old_ptr, new_ptr, old_size); 8043 kmem_free(old_ptr, old_size); 8044 } 8045 return (new_ptr); 8046 } 8047 8048 static void 8049 add_to_phci_list(char ***driver_list, int **root_support_list, 8050 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8051 { 8052 ASSERT(*cur_elements <= *max_elements); 8053 if (*cur_elements == *max_elements) { 8054 *max_elements += 10; 8055 *driver_list = mdi_realloc(*driver_list, 8056 sizeof (char *) * (*cur_elements), 8057 sizeof (char *) * (*max_elements)); 8058 *root_support_list = mdi_realloc(*root_support_list, 8059 sizeof (int) * (*cur_elements), 8060 sizeof (int) * (*max_elements)); 8061 } 8062 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8063 (*root_support_list)[*cur_elements] = root_support; 8064 (*cur_elements)++; 8065 } 8066 8067 static void 8068 get_phci_driver_list(char *vhci_class, char ***driver_list, 8069 int **root_support_list, int *cur_elements, int *max_elements) 8070 { 8071 mdi_phci_driver_info_t *st_driver_list, *p; 8072 int st_ndrivers, root_support, i, j, driver_conf_count; 8073 major_t m; 8074 struct devnames *dnp; 8075 ddi_prop_t *propp; 8076 8077 *driver_list = NULL; 8078 *root_support_list = NULL; 8079 *cur_elements = 0; 8080 *max_elements = 0; 8081 8082 /* add the phci drivers derived from the phci driver.conf files */ 8083 for (m = 0; m < devcnt; m++) { 8084 dnp = &devnamesp[m]; 8085 8086 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8087 LOCK_DEV_OPS(&dnp->dn_lock); 8088 if (dnp->dn_global_prop_ptr != NULL && 8089 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8090 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8091 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8092 strcmp(propp->prop_val, vhci_class) == 0) { 8093 8094 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8095 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8096 &dnp->dn_global_prop_ptr->prop_list) 8097 == NULL) ? 1 : 0; 8098 8099 add_to_phci_list(driver_list, root_support_list, 8100 cur_elements, max_elements, dnp->dn_name, 8101 root_support); 8102 8103 UNLOCK_DEV_OPS(&dnp->dn_lock); 8104 } else 8105 UNLOCK_DEV_OPS(&dnp->dn_lock); 8106 } 8107 } 8108 8109 driver_conf_count = *cur_elements; 8110 8111 /* add the phci drivers specified in the built-in tables */ 8112 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8113 st_driver_list = scsi_phci_driver_list; 8114 st_ndrivers = sizeof (scsi_phci_driver_list) / 8115 sizeof (mdi_phci_driver_info_t); 8116 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8117 st_driver_list = ib_phci_driver_list; 8118 st_ndrivers = sizeof (ib_phci_driver_list) / 8119 sizeof (mdi_phci_driver_info_t); 8120 } else { 8121 st_driver_list = NULL; 8122 st_ndrivers = 0; 8123 } 8124 8125 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8126 /* add this phci driver if not already added before */ 8127 for (j = 0; j < driver_conf_count; j++) { 8128 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8129 break; 8130 } 8131 if (j == driver_conf_count) { 8132 add_to_phci_list(driver_list, root_support_list, 8133 cur_elements, max_elements, p->phdriver_name, 8134 p->phdriver_root_support); 8135 } 8136 } 8137 } 8138 8139 /* 8140 * Attach the phci driver instances associated with the specified vhci class. 8141 * If root is mounted attach all phci driver instances. 8142 * If root is not mounted, attach the instances of only those phci 8143 * drivers that have the root support. 8144 */ 8145 static void 8146 attach_phci_drivers(char *vhci_class) 8147 { 8148 char **driver_list, **p; 8149 int *root_support_list; 8150 int cur_elements, max_elements, i; 8151 major_t m; 8152 8153 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8154 &cur_elements, &max_elements); 8155 8156 for (i = 0; i < cur_elements; i++) { 8157 if (modrootloaded || root_support_list[i]) { 8158 m = ddi_name_to_major(driver_list[i]); 8159 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8160 ddi_rele_driver(m); 8161 } 8162 } 8163 8164 if (driver_list) { 8165 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8166 kmem_free(*p, strlen(*p) + 1); 8167 kmem_free(driver_list, sizeof (char *) * max_elements); 8168 kmem_free(root_support_list, sizeof (int) * max_elements); 8169 } 8170 } 8171 8172 /* 8173 * Build vhci cache: 8174 * 8175 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8176 * the phci driver instances. During this process the cache gets built. 8177 * 8178 * Cache is built fully if the root is mounted. 8179 * If the root is not mounted, phci drivers that do not have root support 8180 * are not attached. As a result the cache is built partially. The entries 8181 * in the cache reflect only those phci drivers that have root support. 8182 */ 8183 static int 8184 build_vhci_cache(mdi_vhci_t *vh) 8185 { 8186 mdi_vhci_config_t *vhc = vh->vh_config; 8187 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8188 8189 single_threaded_vhconfig_enter(vhc); 8190 8191 rw_enter(&vhcache->vhcache_lock, RW_READER); 8192 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8193 rw_exit(&vhcache->vhcache_lock); 8194 single_threaded_vhconfig_exit(vhc); 8195 return (0); 8196 } 8197 rw_exit(&vhcache->vhcache_lock); 8198 8199 attach_phci_drivers(vh->vh_class); 8200 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8201 BUS_CONFIG_ALL, (major_t)-1); 8202 8203 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8204 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8205 rw_exit(&vhcache->vhcache_lock); 8206 8207 single_threaded_vhconfig_exit(vhc); 8208 vhcache_dirty(vhc); 8209 return (1); 8210 } 8211 8212 /* 8213 * Determine if discovery of paths is needed. 8214 */ 8215 static int 8216 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8217 { 8218 int rv = 1; 8219 8220 mutex_enter(&vhc->vhc_lock); 8221 if (i_ddi_io_initialized() == 0) { 8222 if (vhc->vhc_path_discovery_boot > 0) { 8223 vhc->vhc_path_discovery_boot--; 8224 goto out; 8225 } 8226 } else { 8227 if (vhc->vhc_path_discovery_postboot > 0) { 8228 vhc->vhc_path_discovery_postboot--; 8229 goto out; 8230 } 8231 } 8232 8233 /* 8234 * Do full path discovery at most once per mdi_path_discovery_interval. 8235 * This is to avoid a series of full path discoveries when opening 8236 * stale /dev/[r]dsk links. 8237 */ 8238 if (mdi_path_discovery_interval != -1 && 8239 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8240 goto out; 8241 8242 rv = 0; 8243 out: 8244 mutex_exit(&vhc->vhc_lock); 8245 return (rv); 8246 } 8247 8248 /* 8249 * Discover all paths: 8250 * 8251 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8252 * driver instances. During this process all paths will be discovered. 8253 */ 8254 static int 8255 vhcache_discover_paths(mdi_vhci_t *vh) 8256 { 8257 mdi_vhci_config_t *vhc = vh->vh_config; 8258 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8259 int rv = 0; 8260 8261 single_threaded_vhconfig_enter(vhc); 8262 8263 if (vhcache_do_discovery(vhc)) { 8264 attach_phci_drivers(vh->vh_class); 8265 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8266 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8267 8268 mutex_enter(&vhc->vhc_lock); 8269 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8270 mdi_path_discovery_interval * TICKS_PER_SECOND; 8271 mutex_exit(&vhc->vhc_lock); 8272 rv = 1; 8273 } 8274 8275 single_threaded_vhconfig_exit(vhc); 8276 return (rv); 8277 } 8278 8279 /* 8280 * Generic vhci bus config implementation: 8281 * 8282 * Parameters 8283 * vdip vhci dip 8284 * flags bus config flags 8285 * op bus config operation 8286 * The remaining parameters are bus config operation specific 8287 * 8288 * for BUS_CONFIG_ONE 8289 * arg pointer to name@addr 8290 * child upon successful return from this function, *child will be 8291 * set to the configured and held devinfo child node of vdip. 8292 * ct_addr pointer to client address (i.e. GUID) 8293 * 8294 * for BUS_CONFIG_DRIVER 8295 * arg major number of the driver 8296 * child and ct_addr parameters are ignored 8297 * 8298 * for BUS_CONFIG_ALL 8299 * arg, child, and ct_addr parameters are ignored 8300 * 8301 * Note that for the rest of the bus config operations, this function simply 8302 * calls the framework provided default bus config routine. 8303 */ 8304 int 8305 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8306 void *arg, dev_info_t **child, char *ct_addr) 8307 { 8308 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8309 mdi_vhci_config_t *vhc = vh->vh_config; 8310 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8311 int rv = 0; 8312 int params_valid = 0; 8313 char *cp; 8314 8315 /* 8316 * While bus configuring phcis, the phci driver interactions with MDI 8317 * cause child nodes to be enumerated under the vhci node for which 8318 * they need to ndi_devi_enter the vhci node. 8319 * 8320 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8321 * for the bus config operations on phcis to finish while holding the 8322 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8323 * phcis and call the default framework provided bus config function 8324 * if we are called with ndi_devi_enter lock held. 8325 */ 8326 if (DEVI_BUSY_OWNED(vdip)) { 8327 MDI_DEBUG(2, (CE_NOTE, vdip, 8328 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8329 goto default_bus_config; 8330 } 8331 8332 rw_enter(&vhcache->vhcache_lock, RW_READER); 8333 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8334 rw_exit(&vhcache->vhcache_lock); 8335 rv = build_vhci_cache(vh); 8336 rw_enter(&vhcache->vhcache_lock, RW_READER); 8337 } 8338 8339 switch (op) { 8340 case BUS_CONFIG_ONE: 8341 if (arg != NULL && ct_addr != NULL) { 8342 /* extract node name */ 8343 cp = (char *)arg; 8344 while (*cp != '\0' && *cp != '@') 8345 cp++; 8346 if (*cp == '@') { 8347 params_valid = 1; 8348 *cp = '\0'; 8349 config_client_paths(vhc, (char *)arg, ct_addr); 8350 /* config_client_paths() releases cache_lock */ 8351 *cp = '@'; 8352 break; 8353 } 8354 } 8355 8356 rw_exit(&vhcache->vhcache_lock); 8357 break; 8358 8359 case BUS_CONFIG_DRIVER: 8360 rw_exit(&vhcache->vhcache_lock); 8361 if (rv == 0) 8362 st_bus_config_all_phcis(vhc, flags, op, 8363 (major_t)(uintptr_t)arg); 8364 break; 8365 8366 case BUS_CONFIG_ALL: 8367 rw_exit(&vhcache->vhcache_lock); 8368 if (rv == 0) 8369 st_bus_config_all_phcis(vhc, flags, op, -1); 8370 break; 8371 8372 default: 8373 rw_exit(&vhcache->vhcache_lock); 8374 break; 8375 } 8376 8377 8378 default_bus_config: 8379 /* 8380 * All requested child nodes are enumerated under the vhci. 8381 * Now configure them. 8382 */ 8383 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8384 NDI_SUCCESS) { 8385 return (MDI_SUCCESS); 8386 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8387 /* discover all paths and try configuring again */ 8388 if (vhcache_discover_paths(vh) && 8389 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8390 NDI_SUCCESS) 8391 return (MDI_SUCCESS); 8392 } 8393 8394 return (MDI_FAILURE); 8395 } 8396 8397 /* 8398 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8399 */ 8400 static nvlist_t * 8401 read_on_disk_vhci_cache(char *vhci_class) 8402 { 8403 nvlist_t *nvl; 8404 int err; 8405 char *filename; 8406 8407 filename = vhclass2vhcache_filename(vhci_class); 8408 8409 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8410 kmem_free(filename, strlen(filename) + 1); 8411 return (nvl); 8412 } else if (err == EIO) 8413 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8414 else if (err == EINVAL) 8415 cmn_err(CE_WARN, 8416 "%s: data file corrupted, will recreate\n", filename); 8417 8418 kmem_free(filename, strlen(filename) + 1); 8419 return (NULL); 8420 } 8421 8422 /* 8423 * Read on-disk vhci cache into nvlists for all vhci classes. 8424 * Called during booting by i_ddi_read_devices_files(). 8425 */ 8426 void 8427 mdi_read_devices_files(void) 8428 { 8429 int i; 8430 8431 for (i = 0; i < N_VHCI_CLASSES; i++) 8432 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8433 } 8434 8435 /* 8436 * Remove all stale entries from vhci cache. 8437 */ 8438 static void 8439 clean_vhcache(mdi_vhci_config_t *vhc) 8440 { 8441 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8442 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8443 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8444 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8445 8446 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8447 8448 cct_head = vhcache->vhcache_client_head; 8449 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8450 for (cct = cct_head; cct != NULL; cct = cct_next) { 8451 cct_next = cct->cct_next; 8452 8453 cpi_head = cct->cct_cpi_head; 8454 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8455 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8456 cpi_next = cpi->cpi_next; 8457 if (cpi->cpi_pip != NULL) { 8458 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8459 enqueue_tail_vhcache_pathinfo(cct, cpi); 8460 } else 8461 free_vhcache_pathinfo(cpi); 8462 } 8463 8464 if (cct->cct_cpi_head != NULL) 8465 enqueue_vhcache_client(vhcache, cct); 8466 else { 8467 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8468 (mod_hash_key_t)cct->cct_name_addr); 8469 free_vhcache_client(cct); 8470 } 8471 } 8472 8473 cphci_head = vhcache->vhcache_phci_head; 8474 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8475 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8476 cphci_next = cphci->cphci_next; 8477 if (cphci->cphci_phci != NULL) 8478 enqueue_vhcache_phci(vhcache, cphci); 8479 else 8480 free_vhcache_phci(cphci); 8481 } 8482 8483 vhcache->vhcache_clean_time = lbolt64; 8484 rw_exit(&vhcache->vhcache_lock); 8485 vhcache_dirty(vhc); 8486 } 8487 8488 /* 8489 * Remove all stale entries from vhci cache. 8490 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8491 */ 8492 void 8493 mdi_clean_vhcache(void) 8494 { 8495 mdi_vhci_t *vh; 8496 8497 mutex_enter(&mdi_mutex); 8498 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8499 vh->vh_refcnt++; 8500 mutex_exit(&mdi_mutex); 8501 clean_vhcache(vh->vh_config); 8502 mutex_enter(&mdi_mutex); 8503 vh->vh_refcnt--; 8504 } 8505 mutex_exit(&mdi_mutex); 8506 } 8507 8508 /* 8509 * mdi_vhci_walk_clients(): 8510 * Walker routine to traverse client dev_info nodes 8511 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8512 * below the client, including nexus devices, which we dont want. 8513 * So we just traverse the immediate siblings, starting from 1st client. 8514 */ 8515 void 8516 mdi_vhci_walk_clients(dev_info_t *vdip, 8517 int (*f)(dev_info_t *, void *), void *arg) 8518 { 8519 dev_info_t *cdip; 8520 mdi_client_t *ct; 8521 8522 mutex_enter(&mdi_mutex); 8523 8524 cdip = ddi_get_child(vdip); 8525 8526 while (cdip) { 8527 ct = i_devi_get_client(cdip); 8528 MDI_CLIENT_LOCK(ct); 8529 8530 switch ((*f)(cdip, arg)) { 8531 case DDI_WALK_CONTINUE: 8532 cdip = ddi_get_next_sibling(cdip); 8533 MDI_CLIENT_UNLOCK(ct); 8534 break; 8535 8536 default: 8537 MDI_CLIENT_UNLOCK(ct); 8538 mutex_exit(&mdi_mutex); 8539 return; 8540 } 8541 } 8542 8543 mutex_exit(&mdi_mutex); 8544 } 8545 8546 /* 8547 * mdi_vhci_walk_phcis(): 8548 * Walker routine to traverse phci dev_info nodes 8549 */ 8550 void 8551 mdi_vhci_walk_phcis(dev_info_t *vdip, 8552 int (*f)(dev_info_t *, void *), void *arg) 8553 { 8554 mdi_vhci_t *vh = NULL; 8555 mdi_phci_t *ph = NULL; 8556 8557 mutex_enter(&mdi_mutex); 8558 8559 vh = i_devi_get_vhci(vdip); 8560 ph = vh->vh_phci_head; 8561 8562 while (ph) { 8563 MDI_PHCI_LOCK(ph); 8564 8565 switch ((*f)(ph->ph_dip, arg)) { 8566 case DDI_WALK_CONTINUE: 8567 MDI_PHCI_UNLOCK(ph); 8568 ph = ph->ph_next; 8569 break; 8570 8571 default: 8572 MDI_PHCI_UNLOCK(ph); 8573 mutex_exit(&mdi_mutex); 8574 return; 8575 } 8576 } 8577 8578 mutex_exit(&mdi_mutex); 8579 } 8580 8581 8582 /* 8583 * mdi_walk_vhcis(): 8584 * Walker routine to traverse vhci dev_info nodes 8585 */ 8586 void 8587 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8588 { 8589 mdi_vhci_t *vh = NULL; 8590 8591 mutex_enter(&mdi_mutex); 8592 /* 8593 * Scan for already registered vhci 8594 */ 8595 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8596 vh->vh_refcnt++; 8597 mutex_exit(&mdi_mutex); 8598 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8599 mutex_enter(&mdi_mutex); 8600 vh->vh_refcnt--; 8601 break; 8602 } else { 8603 mutex_enter(&mdi_mutex); 8604 vh->vh_refcnt--; 8605 } 8606 } 8607 8608 mutex_exit(&mdi_mutex); 8609 } 8610 8611 /* 8612 * i_mdi_log_sysevent(): 8613 * Logs events for pickup by syseventd 8614 */ 8615 static void 8616 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8617 { 8618 char *path_name; 8619 nvlist_t *attr_list; 8620 8621 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8622 KM_SLEEP) != DDI_SUCCESS) { 8623 goto alloc_failed; 8624 } 8625 8626 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8627 (void) ddi_pathname(dip, path_name); 8628 8629 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8630 ddi_driver_name(dip)) != DDI_SUCCESS) { 8631 goto error; 8632 } 8633 8634 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8635 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8636 goto error; 8637 } 8638 8639 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8640 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8641 goto error; 8642 } 8643 8644 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8645 path_name) != DDI_SUCCESS) { 8646 goto error; 8647 } 8648 8649 if (nvlist_add_string(attr_list, DDI_CLASS, 8650 ph_vh_class) != DDI_SUCCESS) { 8651 goto error; 8652 } 8653 8654 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8655 attr_list, NULL, DDI_SLEEP); 8656 8657 error: 8658 kmem_free(path_name, MAXPATHLEN); 8659 nvlist_free(attr_list); 8660 return; 8661 8662 alloc_failed: 8663 MDI_DEBUG(1, (CE_WARN, dip, 8664 "!i_mdi_log_sysevent: Unable to send sysevent")); 8665 } 8666