1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 30 * detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 71 #ifdef DEBUG 72 #include <sys/debug.h> 73 int mdi_debug = 1; 74 #define MDI_DEBUG(level, stmnt) \ 75 if (mdi_debug >= (level)) i_mdi_log stmnt 76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 77 #else /* !DEBUG */ 78 #define MDI_DEBUG(level, stmnt) 79 #endif /* DEBUG */ 80 81 extern pri_t minclsyspri; 82 extern int modrootloaded; 83 84 /* 85 * Global mutex: 86 * Protects vHCI list and structure members, pHCI and Client lists. 87 */ 88 kmutex_t mdi_mutex; 89 90 /* 91 * Registered vHCI class driver lists 92 */ 93 int mdi_vhci_count; 94 mdi_vhci_t *mdi_vhci_head; 95 mdi_vhci_t *mdi_vhci_tail; 96 97 /* 98 * Client Hash Table size 99 */ 100 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 101 102 /* 103 * taskq interface definitions 104 */ 105 #define MDI_TASKQ_N_THREADS 8 106 #define MDI_TASKQ_PRI minclsyspri 107 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 108 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 109 110 taskq_t *mdi_taskq; 111 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 112 113 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 114 115 /* 116 * The data should be "quiet" for this interval (in seconds) before the 117 * vhci cached data is flushed to the disk. 118 */ 119 static int mdi_vhcache_flush_delay = 10; 120 121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 122 static int mdi_vhcache_flush_daemon_idle_time = 60; 123 124 /* 125 * MDI falls back to discovery of all paths when a bus_config_one fails. 126 * The following parameters can be used to tune this operation. 127 * 128 * mdi_path_discovery_boot 129 * Number of times path discovery will be attempted during early boot. 130 * Probably there is no reason to ever set this value to greater than one. 131 * 132 * mdi_path_discovery_postboot 133 * Number of times path discovery will be attempted after early boot. 134 * Set it to a minimum of two to allow for discovery of iscsi paths which 135 * may happen very late during booting. 136 * 137 * mdi_path_discovery_interval 138 * Minimum number of seconds MDI will wait between successive discovery 139 * of all paths. Set it to -1 to disable discovery of all paths. 140 */ 141 static int mdi_path_discovery_boot = 1; 142 static int mdi_path_discovery_postboot = 2; 143 static int mdi_path_discovery_interval = 10; 144 145 /* 146 * number of seconds the asynchronous configuration thread will sleep idle 147 * before exiting. 148 */ 149 static int mdi_async_config_idle_time = 600; 150 151 static int mdi_bus_config_cache_hash_size = 256; 152 153 /* turns off multithreaded configuration for certain operations */ 154 static int mdi_mtc_off = 0; 155 156 /* 157 * MDI component property name/value string definitions 158 */ 159 const char *mdi_component_prop = "mpxio-component"; 160 const char *mdi_component_prop_vhci = "vhci"; 161 const char *mdi_component_prop_phci = "phci"; 162 const char *mdi_component_prop_client = "client"; 163 164 /* 165 * MDI client global unique identifier property name 166 */ 167 const char *mdi_client_guid_prop = "client-guid"; 168 169 /* 170 * MDI client load balancing property name/value string definitions 171 */ 172 const char *mdi_load_balance = "load-balance"; 173 const char *mdi_load_balance_none = "none"; 174 const char *mdi_load_balance_rr = "round-robin"; 175 const char *mdi_load_balance_lba = "logical-block"; 176 177 /* 178 * Obsolete vHCI class definition; to be removed after Leadville update 179 */ 180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 181 182 static char vhci_greeting[] = 183 "\tThere already exists one vHCI driver for class %s\n" 184 "\tOnly one vHCI driver for each class is allowed\n"; 185 186 /* 187 * Static function prototypes 188 */ 189 static int i_mdi_phci_offline(dev_info_t *, uint_t); 190 static int i_mdi_client_offline(dev_info_t *, uint_t); 191 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 192 static void i_mdi_phci_post_detach(dev_info_t *, 193 ddi_detach_cmd_t, int); 194 static int i_mdi_client_pre_detach(dev_info_t *, 195 ddi_detach_cmd_t); 196 static void i_mdi_client_post_detach(dev_info_t *, 197 ddi_detach_cmd_t, int); 198 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 199 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 200 static int i_mdi_lba_lb(mdi_client_t *ct, 201 mdi_pathinfo_t **ret_pip, struct buf *buf); 202 static void i_mdi_pm_hold_client(mdi_client_t *, int); 203 static void i_mdi_pm_rele_client(mdi_client_t *, int); 204 static void i_mdi_pm_reset_client(mdi_client_t *); 205 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 206 static int i_mdi_power_all_phci(mdi_client_t *); 207 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 208 209 210 /* 211 * Internal mdi_pathinfo node functions 212 */ 213 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 214 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 215 216 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 217 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 218 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 219 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 220 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 221 mdi_client_t *); 222 static void i_mdi_phci_unlock(mdi_phci_t *); 223 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 224 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 225 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 226 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 227 mdi_client_t *); 228 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 229 static void i_mdi_client_remove_path(mdi_client_t *, 230 mdi_pathinfo_t *); 231 232 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 233 mdi_pathinfo_state_t, int); 234 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 235 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 236 char **, int); 237 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 238 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 239 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 240 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 241 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 242 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 243 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 244 static void i_mdi_client_update_state(mdi_client_t *); 245 static int i_mdi_client_compute_state(mdi_client_t *, 246 mdi_phci_t *); 247 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 248 static void i_mdi_client_unlock(mdi_client_t *); 249 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 250 static mdi_client_t *i_devi_get_client(dev_info_t *); 251 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, 252 int); 253 /* 254 * Failover related function prototypes 255 */ 256 static int i_mdi_failover(void *); 257 258 /* 259 * misc internal functions 260 */ 261 static int i_mdi_get_hash_key(char *); 262 static int i_map_nvlist_error_to_mdi(int); 263 static void i_mdi_report_path_state(mdi_client_t *, 264 mdi_pathinfo_t *); 265 266 static void setup_vhci_cache(mdi_vhci_t *); 267 static int destroy_vhci_cache(mdi_vhci_t *); 268 static void setup_phci_driver_list(mdi_vhci_t *); 269 static void free_phci_driver_list(mdi_vhci_config_t *); 270 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 271 static boolean_t stop_vhcache_flush_thread(void *, int); 272 static void free_string_array(char **, int); 273 static void free_vhcache_phci(mdi_vhcache_phci_t *); 274 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 275 static void free_vhcache_client(mdi_vhcache_client_t *); 276 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 277 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 278 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 279 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 280 static void vhcache_pi_add(mdi_vhci_config_t *, 281 struct mdi_pathinfo *); 282 static void vhcache_pi_remove(mdi_vhci_config_t *, 283 struct mdi_pathinfo *); 284 static void free_phclient_path_list(mdi_phys_path_t *); 285 static void sort_vhcache_paths(mdi_vhcache_client_t *); 286 static int flush_vhcache(mdi_vhci_config_t *, int); 287 static void vhcache_dirty(mdi_vhci_config_t *); 288 static void free_async_client_config(mdi_async_client_config_t *); 289 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 290 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 291 static nvlist_t *read_on_disk_vhci_cache(char *); 292 extern int fread_nvlist(char *, nvlist_t **); 293 extern int fwrite_nvlist(char *, nvlist_t *); 294 295 /* called once when first vhci registers with mdi */ 296 static void 297 i_mdi_init() 298 { 299 static int initialized = 0; 300 301 if (initialized) 302 return; 303 initialized = 1; 304 305 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 306 /* 307 * Create our taskq resources 308 */ 309 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 310 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 311 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 312 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 313 } 314 315 /* 316 * mdi_get_component_type(): 317 * Return mpxio component type 318 * Return Values: 319 * MDI_COMPONENT_NONE 320 * MDI_COMPONENT_VHCI 321 * MDI_COMPONENT_PHCI 322 * MDI_COMPONENT_CLIENT 323 * XXX This doesn't work under multi-level MPxIO and should be 324 * removed when clients migrate mdi_is_*() interfaces. 325 */ 326 int 327 mdi_get_component_type(dev_info_t *dip) 328 { 329 return (DEVI(dip)->devi_mdi_component); 330 } 331 332 /* 333 * mdi_vhci_register(): 334 * Register a vHCI module with the mpxio framework 335 * mdi_vhci_register() is called by vHCI drivers to register the 336 * 'class_driver' vHCI driver and its MDI entrypoints with the 337 * mpxio framework. The vHCI driver must call this interface as 338 * part of its attach(9e) handler. 339 * Competing threads may try to attach mdi_vhci_register() as 340 * the vHCI drivers are loaded and attached as a result of pHCI 341 * driver instance registration (mdi_phci_register()) with the 342 * framework. 343 * Return Values: 344 * MDI_SUCCESS 345 * MDI_FAILURE 346 */ 347 348 /*ARGSUSED*/ 349 int 350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 351 int flags) 352 { 353 mdi_vhci_t *vh = NULL; 354 355 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 356 357 i_mdi_init(); 358 359 mutex_enter(&mdi_mutex); 360 /* 361 * Scan for already registered vhci 362 */ 363 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 364 if (strcmp(vh->vh_class, class) == 0) { 365 /* 366 * vHCI has already been created. Check for valid 367 * vHCI ops registration. We only support one vHCI 368 * module per class 369 */ 370 if (vh->vh_ops != NULL) { 371 mutex_exit(&mdi_mutex); 372 cmn_err(CE_NOTE, vhci_greeting, class); 373 return (MDI_FAILURE); 374 } 375 break; 376 } 377 } 378 379 /* 380 * if not yet created, create the vHCI component 381 */ 382 if (vh == NULL) { 383 struct client_hash *hash = NULL; 384 char *load_balance; 385 386 /* 387 * Allocate and initialize the mdi extensions 388 */ 389 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 390 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 391 KM_SLEEP); 392 vh->vh_client_table = hash; 393 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 394 (void) strcpy(vh->vh_class, class); 395 vh->vh_lb = LOAD_BALANCE_RR; 396 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 397 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 398 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 399 vh->vh_lb = LOAD_BALANCE_NONE; 400 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 401 == 0) { 402 vh->vh_lb = LOAD_BALANCE_LBA; 403 } 404 ddi_prop_free(load_balance); 405 } 406 407 /* 408 * Store the vHCI ops vectors 409 */ 410 vh->vh_dip = vdip; 411 vh->vh_ops = vops; 412 413 setup_vhci_cache(vh); 414 415 if (mdi_vhci_head == NULL) { 416 mdi_vhci_head = vh; 417 } 418 if (mdi_vhci_tail) { 419 mdi_vhci_tail->vh_next = vh; 420 } 421 mdi_vhci_tail = vh; 422 mdi_vhci_count++; 423 } 424 425 /* 426 * Claim the devfs node as a vhci component 427 */ 428 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 429 430 /* 431 * Initialize our back reference from dev_info node 432 */ 433 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 434 mutex_exit(&mdi_mutex); 435 return (MDI_SUCCESS); 436 } 437 438 /* 439 * mdi_vhci_unregister(): 440 * Unregister a vHCI module from mpxio framework 441 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 442 * of a vhci to unregister it from the framework. 443 * Return Values: 444 * MDI_SUCCESS 445 * MDI_FAILURE 446 */ 447 448 /*ARGSUSED*/ 449 int 450 mdi_vhci_unregister(dev_info_t *vdip, int flags) 451 { 452 mdi_vhci_t *found, *vh, *prev = NULL; 453 454 /* 455 * Check for invalid VHCI 456 */ 457 if ((vh = i_devi_get_vhci(vdip)) == NULL) 458 return (MDI_FAILURE); 459 460 mutex_enter(&mdi_mutex); 461 462 /* 463 * Scan the list of registered vHCIs for a match 464 */ 465 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 466 if (found == vh) 467 break; 468 prev = found; 469 } 470 471 if (found == NULL) { 472 mutex_exit(&mdi_mutex); 473 return (MDI_FAILURE); 474 } 475 476 /* 477 * Check the vHCI, pHCI and client count. All the pHCIs and clients 478 * should have been unregistered, before a vHCI can be 479 * unregistered. 480 */ 481 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 482 mutex_exit(&mdi_mutex); 483 return (MDI_FAILURE); 484 } 485 486 /* 487 * Remove the vHCI from the global list 488 */ 489 if (vh == mdi_vhci_head) { 490 mdi_vhci_head = vh->vh_next; 491 } else { 492 prev->vh_next = vh->vh_next; 493 } 494 if (vh == mdi_vhci_tail) { 495 mdi_vhci_tail = prev; 496 } 497 498 mdi_vhci_count--; 499 mutex_exit(&mdi_mutex); 500 501 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 502 /* add vhci to the global list */ 503 mutex_enter(&mdi_mutex); 504 if (mdi_vhci_head == NULL) 505 mdi_vhci_head = vh; 506 else 507 mdi_vhci_tail->vh_next = vh; 508 mdi_vhci_tail = vh; 509 mdi_vhci_count++; 510 mutex_exit(&mdi_mutex); 511 return (MDI_FAILURE); 512 } 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 kmem_free(vh, sizeof (mdi_vhci_t)); 521 return (MDI_SUCCESS); 522 } 523 524 /* 525 * i_mdi_vhci_class2vhci(): 526 * Look for a matching vHCI module given a vHCI class name 527 * Return Values: 528 * Handle to a vHCI component 529 * NULL 530 */ 531 static mdi_vhci_t * 532 i_mdi_vhci_class2vhci(char *class) 533 { 534 mdi_vhci_t *vh = NULL; 535 536 ASSERT(!MUTEX_HELD(&mdi_mutex)); 537 538 mutex_enter(&mdi_mutex); 539 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 540 if (strcmp(vh->vh_class, class) == 0) { 541 break; 542 } 543 } 544 mutex_exit(&mdi_mutex); 545 return (vh); 546 } 547 548 /* 549 * i_devi_get_vhci(): 550 * Utility function to get the handle to a vHCI component 551 * Return Values: 552 * Handle to a vHCI component 553 * NULL 554 */ 555 mdi_vhci_t * 556 i_devi_get_vhci(dev_info_t *vdip) 557 { 558 mdi_vhci_t *vh = NULL; 559 if (MDI_VHCI(vdip)) { 560 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 561 } 562 return (vh); 563 } 564 565 /* 566 * mdi_phci_register(): 567 * Register a pHCI module with mpxio framework 568 * mdi_phci_register() is called by pHCI drivers to register with 569 * the mpxio framework and a specific 'class_driver' vHCI. The 570 * pHCI driver must call this interface as part of its attach(9e) 571 * handler. 572 * Return Values: 573 * MDI_SUCCESS 574 * MDI_FAILURE 575 */ 576 577 /*ARGSUSED*/ 578 int 579 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 580 { 581 mdi_phci_t *ph; 582 mdi_vhci_t *vh; 583 char *data; 584 char *pathname; 585 586 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 587 (void) ddi_pathname(pdip, pathname); 588 589 /* 590 * Check for mpxio-disable property. Enable mpxio if the property is 591 * missing or not set to "yes". 592 * If the property is set to "yes" then emit a brief message. 593 */ 594 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 595 &data) == DDI_SUCCESS)) { 596 if (strcmp(data, "yes") == 0) { 597 MDI_DEBUG(1, (CE_CONT, pdip, 598 "?%s (%s%d) multipath capabilities " 599 "disabled via %s.conf.\n", pathname, 600 ddi_driver_name(pdip), ddi_get_instance(pdip), 601 ddi_driver_name(pdip))); 602 ddi_prop_free(data); 603 kmem_free(pathname, MAXPATHLEN); 604 return (MDI_FAILURE); 605 } 606 ddi_prop_free(data); 607 } 608 609 kmem_free(pathname, MAXPATHLEN); 610 611 /* 612 * Search for a matching vHCI 613 */ 614 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 615 if (vh == NULL) { 616 return (MDI_FAILURE); 617 } 618 619 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 620 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 621 ph->ph_dip = pdip; 622 ph->ph_vhci = vh; 623 ph->ph_next = NULL; 624 ph->ph_unstable = 0; 625 ph->ph_vprivate = 0; 626 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 627 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 628 629 MDI_PHCI_SET_POWER_UP(ph); 630 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 631 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 632 633 vhcache_phci_add(vh->vh_config, ph); 634 635 mutex_enter(&mdi_mutex); 636 if (vh->vh_phci_head == NULL) { 637 vh->vh_phci_head = ph; 638 } 639 if (vh->vh_phci_tail) { 640 vh->vh_phci_tail->ph_next = ph; 641 } 642 vh->vh_phci_tail = ph; 643 vh->vh_phci_count++; 644 mutex_exit(&mdi_mutex); 645 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 646 return (MDI_SUCCESS); 647 } 648 649 /* 650 * mdi_phci_unregister(): 651 * Unregister a pHCI module from mpxio framework 652 * mdi_phci_unregister() is called by the pHCI drivers from their 653 * detach(9E) handler to unregister their instances from the 654 * framework. 655 * Return Values: 656 * MDI_SUCCESS 657 * MDI_FAILURE 658 */ 659 660 /*ARGSUSED*/ 661 int 662 mdi_phci_unregister(dev_info_t *pdip, int flags) 663 { 664 mdi_vhci_t *vh; 665 mdi_phci_t *ph; 666 mdi_phci_t *tmp; 667 mdi_phci_t *prev = NULL; 668 669 ph = i_devi_get_phci(pdip); 670 if (ph == NULL) { 671 MDI_DEBUG(1, (CE_WARN, pdip, 672 "!pHCI unregister: Not a valid pHCI")); 673 return (MDI_FAILURE); 674 } 675 676 vh = ph->ph_vhci; 677 ASSERT(vh != NULL); 678 if (vh == NULL) { 679 MDI_DEBUG(1, (CE_WARN, pdip, 680 "!pHCI unregister: Not a valid vHCI")); 681 return (MDI_FAILURE); 682 } 683 684 mutex_enter(&mdi_mutex); 685 tmp = vh->vh_phci_head; 686 while (tmp) { 687 if (tmp == ph) { 688 break; 689 } 690 prev = tmp; 691 tmp = tmp->ph_next; 692 } 693 694 if (ph == vh->vh_phci_head) { 695 vh->vh_phci_head = ph->ph_next; 696 } else { 697 prev->ph_next = ph->ph_next; 698 } 699 700 if (ph == vh->vh_phci_tail) { 701 vh->vh_phci_tail = prev; 702 } 703 704 vh->vh_phci_count--; 705 706 mutex_exit(&mdi_mutex); 707 708 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 709 ESC_DDI_INITIATOR_UNREGISTER); 710 vhcache_phci_remove(vh->vh_config, ph); 711 cv_destroy(&ph->ph_unstable_cv); 712 cv_destroy(&ph->ph_powerchange_cv); 713 mutex_destroy(&ph->ph_mutex); 714 kmem_free(ph, sizeof (mdi_phci_t)); 715 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 716 DEVI(pdip)->devi_mdi_xhci = NULL; 717 return (MDI_SUCCESS); 718 } 719 720 /* 721 * i_devi_get_phci(): 722 * Utility function to return the phci extensions. 723 */ 724 static mdi_phci_t * 725 i_devi_get_phci(dev_info_t *pdip) 726 { 727 mdi_phci_t *ph = NULL; 728 if (MDI_PHCI(pdip)) { 729 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 730 } 731 return (ph); 732 } 733 734 /* 735 * mdi_phci_path2devinfo(): 736 * Utility function to search for a valid phci device given 737 * the devfs pathname. 738 */ 739 740 dev_info_t * 741 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 742 { 743 char *temp_pathname; 744 mdi_vhci_t *vh; 745 mdi_phci_t *ph; 746 dev_info_t *pdip = NULL; 747 748 vh = i_devi_get_vhci(vdip); 749 ASSERT(vh != NULL); 750 751 if (vh == NULL) { 752 /* 753 * Invalid vHCI component, return failure 754 */ 755 return (NULL); 756 } 757 758 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 759 mutex_enter(&mdi_mutex); 760 ph = vh->vh_phci_head; 761 while (ph != NULL) { 762 pdip = ph->ph_dip; 763 ASSERT(pdip != NULL); 764 *temp_pathname = '\0'; 765 (void) ddi_pathname(pdip, temp_pathname); 766 if (strcmp(temp_pathname, pathname) == 0) { 767 break; 768 } 769 ph = ph->ph_next; 770 } 771 if (ph == NULL) { 772 pdip = NULL; 773 } 774 mutex_exit(&mdi_mutex); 775 kmem_free(temp_pathname, MAXPATHLEN); 776 return (pdip); 777 } 778 779 /* 780 * mdi_phci_get_path_count(): 781 * get number of path information nodes associated with a given 782 * pHCI device. 783 */ 784 int 785 mdi_phci_get_path_count(dev_info_t *pdip) 786 { 787 mdi_phci_t *ph; 788 int count = 0; 789 790 ph = i_devi_get_phci(pdip); 791 if (ph != NULL) { 792 count = ph->ph_path_count; 793 } 794 return (count); 795 } 796 797 /* 798 * i_mdi_phci_lock(): 799 * Lock a pHCI device 800 * Return Values: 801 * None 802 * Note: 803 * The default locking order is: 804 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 805 * But there are number of situations where locks need to be 806 * grabbed in reverse order. This routine implements try and lock 807 * mechanism depending on the requested parameter option. 808 */ 809 static void 810 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 811 { 812 if (pip) { 813 /* Reverse locking is requested. */ 814 while (MDI_PHCI_TRYLOCK(ph) == 0) { 815 /* 816 * tryenter failed. Try to grab again 817 * after a small delay 818 */ 819 MDI_PI_HOLD(pip); 820 MDI_PI_UNLOCK(pip); 821 delay(1); 822 MDI_PI_LOCK(pip); 823 MDI_PI_RELE(pip); 824 } 825 } else { 826 MDI_PHCI_LOCK(ph); 827 } 828 } 829 830 /* 831 * i_mdi_phci_get_client_lock(): 832 * Lock a pHCI device 833 * Return Values: 834 * None 835 * Note: 836 * The default locking order is: 837 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 838 * But there are number of situations where locks need to be 839 * grabbed in reverse order. This routine implements try and lock 840 * mechanism depending on the requested parameter option. 841 */ 842 static void 843 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 844 { 845 if (ct) { 846 /* Reverse locking is requested. */ 847 while (MDI_PHCI_TRYLOCK(ph) == 0) { 848 /* 849 * tryenter failed. Try to grab again 850 * after a small delay 851 */ 852 MDI_CLIENT_UNLOCK(ct); 853 delay(1); 854 MDI_CLIENT_LOCK(ct); 855 } 856 } else { 857 MDI_PHCI_LOCK(ph); 858 } 859 } 860 861 /* 862 * i_mdi_phci_unlock(): 863 * Unlock the pHCI component 864 */ 865 static void 866 i_mdi_phci_unlock(mdi_phci_t *ph) 867 { 868 MDI_PHCI_UNLOCK(ph); 869 } 870 871 /* 872 * i_mdi_devinfo_create(): 873 * create client device's devinfo node 874 * Return Values: 875 * dev_info 876 * NULL 877 * Notes: 878 */ 879 static dev_info_t * 880 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 881 char **compatible, int ncompatible) 882 { 883 dev_info_t *cdip = NULL; 884 885 ASSERT(MUTEX_HELD(&mdi_mutex)); 886 887 /* Verify for duplicate entry */ 888 cdip = i_mdi_devinfo_find(vh, name, guid); 889 ASSERT(cdip == NULL); 890 if (cdip) { 891 cmn_err(CE_WARN, 892 "i_mdi_devinfo_create: client dip %p already exists", 893 (void *)cdip); 894 } 895 896 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 897 if (cdip == NULL) 898 goto fail; 899 900 /* 901 * Create component type and Global unique identifier 902 * properties 903 */ 904 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 905 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 906 goto fail; 907 } 908 909 /* Decorate the node with compatible property */ 910 if (compatible && 911 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 912 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 913 goto fail; 914 } 915 916 return (cdip); 917 918 fail: 919 if (cdip) { 920 (void) ndi_prop_remove_all(cdip); 921 (void) ndi_devi_free(cdip); 922 } 923 return (NULL); 924 } 925 926 /* 927 * i_mdi_devinfo_find(): 928 * Find a matching devinfo node for given client node name 929 * and its guid. 930 * Return Values: 931 * Handle to a dev_info node or NULL 932 */ 933 934 static dev_info_t * 935 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 936 { 937 char *data; 938 dev_info_t *cdip = NULL; 939 dev_info_t *ndip = NULL; 940 int circular; 941 942 ndi_devi_enter(vh->vh_dip, &circular); 943 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 944 while ((cdip = ndip) != NULL) { 945 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 946 947 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 948 continue; 949 } 950 951 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 952 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 953 &data) != DDI_PROP_SUCCESS) { 954 continue; 955 } 956 957 if (strcmp(data, guid) != 0) { 958 ddi_prop_free(data); 959 continue; 960 } 961 ddi_prop_free(data); 962 break; 963 } 964 ndi_devi_exit(vh->vh_dip, circular); 965 return (cdip); 966 } 967 968 /* 969 * i_mdi_devinfo_remove(): 970 * Remove a client device node 971 */ 972 static int 973 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 974 { 975 int rv = MDI_SUCCESS; 976 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 977 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 978 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 979 if (rv != NDI_SUCCESS) { 980 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 981 " failed. cdip = %p\n", cdip)); 982 } 983 /* 984 * Convert to MDI error code 985 */ 986 switch (rv) { 987 case NDI_SUCCESS: 988 rv = MDI_SUCCESS; 989 break; 990 case NDI_BUSY: 991 rv = MDI_BUSY; 992 break; 993 default: 994 rv = MDI_FAILURE; 995 break; 996 } 997 } 998 return (rv); 999 } 1000 1001 /* 1002 * i_devi_get_client() 1003 * Utility function to get mpxio component extensions 1004 */ 1005 static mdi_client_t * 1006 i_devi_get_client(dev_info_t *cdip) 1007 { 1008 mdi_client_t *ct = NULL; 1009 if (MDI_CLIENT(cdip)) { 1010 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1011 } 1012 return (ct); 1013 } 1014 1015 /* 1016 * i_mdi_is_child_present(): 1017 * Search for the presence of client device dev_info node 1018 */ 1019 1020 static int 1021 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1022 { 1023 int rv = MDI_FAILURE; 1024 struct dev_info *dip; 1025 int circular; 1026 1027 ndi_devi_enter(vdip, &circular); 1028 dip = DEVI(vdip)->devi_child; 1029 while (dip) { 1030 if (dip == DEVI(cdip)) { 1031 rv = MDI_SUCCESS; 1032 break; 1033 } 1034 dip = dip->devi_sibling; 1035 } 1036 ndi_devi_exit(vdip, circular); 1037 return (rv); 1038 } 1039 1040 1041 /* 1042 * i_mdi_client_lock(): 1043 * Grab client component lock 1044 * Return Values: 1045 * None 1046 * Note: 1047 * The default locking order is: 1048 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1049 * But there are number of situations where locks need to be 1050 * grabbed in reverse order. This routine implements try and lock 1051 * mechanism depending on the requested parameter option. 1052 */ 1053 1054 static void 1055 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1056 { 1057 if (pip) { 1058 /* 1059 * Reverse locking is requested. 1060 */ 1061 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1062 /* 1063 * tryenter failed. Try to grab again 1064 * after a small delay 1065 */ 1066 MDI_PI_HOLD(pip); 1067 MDI_PI_UNLOCK(pip); 1068 delay(1); 1069 MDI_PI_LOCK(pip); 1070 MDI_PI_RELE(pip); 1071 } 1072 } else { 1073 MDI_CLIENT_LOCK(ct); 1074 } 1075 } 1076 1077 /* 1078 * i_mdi_client_unlock(): 1079 * Unlock a client component 1080 */ 1081 1082 static void 1083 i_mdi_client_unlock(mdi_client_t *ct) 1084 { 1085 MDI_CLIENT_UNLOCK(ct); 1086 } 1087 1088 /* 1089 * i_mdi_client_alloc(): 1090 * Allocate and initialize a client structure. Caller should 1091 * hold the global mdi_mutex. 1092 * Return Values: 1093 * Handle to a client component 1094 */ 1095 /*ARGSUSED*/ 1096 static mdi_client_t * 1097 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1098 { 1099 mdi_client_t *ct; 1100 1101 ASSERT(MUTEX_HELD(&mdi_mutex)); 1102 1103 /* 1104 * Allocate and initialize a component structure. 1105 */ 1106 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1107 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1108 ct->ct_hnext = NULL; 1109 ct->ct_hprev = NULL; 1110 ct->ct_dip = NULL; 1111 ct->ct_vhci = vh; 1112 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1113 (void) strcpy(ct->ct_drvname, name); 1114 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1115 (void) strcpy(ct->ct_guid, lguid); 1116 ct->ct_cprivate = NULL; 1117 ct->ct_vprivate = NULL; 1118 ct->ct_flags = 0; 1119 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1120 MDI_CLIENT_SET_OFFLINE(ct); 1121 MDI_CLIENT_SET_DETACH(ct); 1122 MDI_CLIENT_SET_POWER_UP(ct); 1123 ct->ct_failover_flags = 0; 1124 ct->ct_failover_status = 0; 1125 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1126 ct->ct_unstable = 0; 1127 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1128 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1129 ct->ct_lb = vh->vh_lb; 1130 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1131 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1132 ct->ct_path_count = 0; 1133 ct->ct_path_head = NULL; 1134 ct->ct_path_tail = NULL; 1135 ct->ct_path_last = NULL; 1136 1137 /* 1138 * Add this client component to our client hash queue 1139 */ 1140 i_mdi_client_enlist_table(vh, ct); 1141 return (ct); 1142 } 1143 1144 /* 1145 * i_mdi_client_enlist_table(): 1146 * Attach the client device to the client hash table. Caller 1147 * should hold the mdi_mutex 1148 */ 1149 1150 static void 1151 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1152 { 1153 int index; 1154 struct client_hash *head; 1155 1156 ASSERT(MUTEX_HELD(&mdi_mutex)); 1157 index = i_mdi_get_hash_key(ct->ct_guid); 1158 head = &vh->vh_client_table[index]; 1159 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1160 head->ct_hash_head = ct; 1161 head->ct_hash_count++; 1162 vh->vh_client_count++; 1163 } 1164 1165 /* 1166 * i_mdi_client_delist_table(): 1167 * Attach the client device to the client hash table. 1168 * Caller should hold the mdi_mutex 1169 */ 1170 1171 static void 1172 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1173 { 1174 int index; 1175 char *guid; 1176 struct client_hash *head; 1177 mdi_client_t *next; 1178 mdi_client_t *last; 1179 1180 ASSERT(MUTEX_HELD(&mdi_mutex)); 1181 guid = ct->ct_guid; 1182 index = i_mdi_get_hash_key(guid); 1183 head = &vh->vh_client_table[index]; 1184 1185 last = NULL; 1186 next = (mdi_client_t *)head->ct_hash_head; 1187 while (next != NULL) { 1188 if (next == ct) { 1189 break; 1190 } 1191 last = next; 1192 next = next->ct_hnext; 1193 } 1194 1195 if (next) { 1196 head->ct_hash_count--; 1197 if (last == NULL) { 1198 head->ct_hash_head = ct->ct_hnext; 1199 } else { 1200 last->ct_hnext = ct->ct_hnext; 1201 } 1202 ct->ct_hnext = NULL; 1203 vh->vh_client_count--; 1204 } 1205 } 1206 1207 1208 /* 1209 * i_mdi_client_free(): 1210 * Free a client component 1211 */ 1212 static int 1213 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1214 { 1215 int rv = MDI_SUCCESS; 1216 int flags = ct->ct_flags; 1217 dev_info_t *cdip; 1218 dev_info_t *vdip; 1219 1220 ASSERT(MUTEX_HELD(&mdi_mutex)); 1221 vdip = vh->vh_dip; 1222 cdip = ct->ct_dip; 1223 1224 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1225 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1226 DEVI(cdip)->devi_mdi_client = NULL; 1227 1228 /* 1229 * Clear out back ref. to dev_info_t node 1230 */ 1231 ct->ct_dip = NULL; 1232 1233 /* 1234 * Remove this client from our hash queue 1235 */ 1236 i_mdi_client_delist_table(vh, ct); 1237 1238 /* 1239 * Uninitialize and free the component 1240 */ 1241 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1242 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1243 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1244 cv_destroy(&ct->ct_failover_cv); 1245 cv_destroy(&ct->ct_unstable_cv); 1246 cv_destroy(&ct->ct_powerchange_cv); 1247 mutex_destroy(&ct->ct_mutex); 1248 kmem_free(ct, sizeof (*ct)); 1249 1250 if (cdip != NULL) { 1251 mutex_exit(&mdi_mutex); 1252 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1253 mutex_enter(&mdi_mutex); 1254 } 1255 return (rv); 1256 } 1257 1258 /* 1259 * i_mdi_client_find(): 1260 * Find the client structure corresponding to a given guid 1261 * Caller should hold the mdi_mutex 1262 */ 1263 static mdi_client_t * 1264 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1265 { 1266 int index; 1267 struct client_hash *head; 1268 mdi_client_t *ct; 1269 1270 ASSERT(MUTEX_HELD(&mdi_mutex)); 1271 index = i_mdi_get_hash_key(guid); 1272 head = &vh->vh_client_table[index]; 1273 1274 ct = head->ct_hash_head; 1275 while (ct != NULL) { 1276 if (strcmp(ct->ct_guid, guid) == 0 && 1277 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1278 break; 1279 } 1280 ct = ct->ct_hnext; 1281 } 1282 return (ct); 1283 } 1284 1285 1286 1287 /* 1288 * i_mdi_client_update_state(): 1289 * Compute and update client device state 1290 * Notes: 1291 * A client device can be in any of three possible states: 1292 * 1293 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1294 * one online/standby paths. Can tolerate failures. 1295 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1296 * no alternate paths available as standby. A failure on the online 1297 * would result in loss of access to device data. 1298 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1299 * no paths available to access the device. 1300 */ 1301 static void 1302 i_mdi_client_update_state(mdi_client_t *ct) 1303 { 1304 int state; 1305 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1306 state = i_mdi_client_compute_state(ct, NULL); 1307 MDI_CLIENT_SET_STATE(ct, state); 1308 } 1309 1310 /* 1311 * i_mdi_client_compute_state(): 1312 * Compute client device state 1313 * 1314 * mdi_phci_t * Pointer to pHCI structure which should 1315 * while computing the new value. Used by 1316 * i_mdi_phci_offline() to find the new 1317 * client state after DR of a pHCI. 1318 */ 1319 static int 1320 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1321 { 1322 int state; 1323 int online_count = 0; 1324 int standby_count = 0; 1325 mdi_pathinfo_t *pip, *next; 1326 1327 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1328 pip = ct->ct_path_head; 1329 while (pip != NULL) { 1330 MDI_PI_LOCK(pip); 1331 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1332 if (MDI_PI(pip)->pi_phci == ph) { 1333 MDI_PI_UNLOCK(pip); 1334 pip = next; 1335 continue; 1336 } 1337 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1338 == MDI_PATHINFO_STATE_ONLINE) 1339 online_count++; 1340 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1341 == MDI_PATHINFO_STATE_STANDBY) 1342 standby_count++; 1343 MDI_PI_UNLOCK(pip); 1344 pip = next; 1345 } 1346 1347 if (online_count == 0) { 1348 if (standby_count == 0) { 1349 state = MDI_CLIENT_STATE_FAILED; 1350 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1351 " ct = %p\n", ct)); 1352 } else if (standby_count == 1) { 1353 state = MDI_CLIENT_STATE_DEGRADED; 1354 } else { 1355 state = MDI_CLIENT_STATE_OPTIMAL; 1356 } 1357 } else if (online_count == 1) { 1358 if (standby_count == 0) { 1359 state = MDI_CLIENT_STATE_DEGRADED; 1360 } else { 1361 state = MDI_CLIENT_STATE_OPTIMAL; 1362 } 1363 } else { 1364 state = MDI_CLIENT_STATE_OPTIMAL; 1365 } 1366 return (state); 1367 } 1368 1369 /* 1370 * i_mdi_client2devinfo(): 1371 * Utility function 1372 */ 1373 dev_info_t * 1374 i_mdi_client2devinfo(mdi_client_t *ct) 1375 { 1376 return (ct->ct_dip); 1377 } 1378 1379 /* 1380 * mdi_client_path2_devinfo(): 1381 * Given the parent devinfo and child devfs pathname, search for 1382 * a valid devfs node handle. 1383 */ 1384 dev_info_t * 1385 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1386 { 1387 dev_info_t *cdip = NULL; 1388 dev_info_t *ndip = NULL; 1389 char *temp_pathname; 1390 int circular; 1391 1392 /* 1393 * Allocate temp buffer 1394 */ 1395 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1396 1397 /* 1398 * Lock parent against changes 1399 */ 1400 ndi_devi_enter(vdip, &circular); 1401 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1402 while ((cdip = ndip) != NULL) { 1403 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1404 1405 *temp_pathname = '\0'; 1406 (void) ddi_pathname(cdip, temp_pathname); 1407 if (strcmp(temp_pathname, pathname) == 0) { 1408 break; 1409 } 1410 } 1411 /* 1412 * Release devinfo lock 1413 */ 1414 ndi_devi_exit(vdip, circular); 1415 1416 /* 1417 * Free the temp buffer 1418 */ 1419 kmem_free(temp_pathname, MAXPATHLEN); 1420 return (cdip); 1421 } 1422 1423 1424 /* 1425 * mdi_client_get_path_count(): 1426 * Utility function to get number of path information nodes 1427 * associated with a given client device. 1428 */ 1429 int 1430 mdi_client_get_path_count(dev_info_t *cdip) 1431 { 1432 mdi_client_t *ct; 1433 int count = 0; 1434 1435 ct = i_devi_get_client(cdip); 1436 if (ct != NULL) { 1437 count = ct->ct_path_count; 1438 } 1439 return (count); 1440 } 1441 1442 1443 /* 1444 * i_mdi_get_hash_key(): 1445 * Create a hash using strings as keys 1446 * 1447 */ 1448 static int 1449 i_mdi_get_hash_key(char *str) 1450 { 1451 uint32_t g, hash = 0; 1452 char *p; 1453 1454 for (p = str; *p != '\0'; p++) { 1455 g = *p; 1456 hash += g; 1457 } 1458 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1459 } 1460 1461 /* 1462 * mdi_get_lb_policy(): 1463 * Get current load balancing policy for a given client device 1464 */ 1465 client_lb_t 1466 mdi_get_lb_policy(dev_info_t *cdip) 1467 { 1468 client_lb_t lb = LOAD_BALANCE_NONE; 1469 mdi_client_t *ct; 1470 1471 ct = i_devi_get_client(cdip); 1472 if (ct != NULL) { 1473 lb = ct->ct_lb; 1474 } 1475 return (lb); 1476 } 1477 1478 /* 1479 * mdi_set_lb_region_size(): 1480 * Set current region size for the load-balance 1481 */ 1482 int 1483 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1484 { 1485 mdi_client_t *ct; 1486 int rv = MDI_FAILURE; 1487 1488 ct = i_devi_get_client(cdip); 1489 if (ct != NULL && ct->ct_lb_args != NULL) { 1490 ct->ct_lb_args->region_size = region_size; 1491 rv = MDI_SUCCESS; 1492 } 1493 return (rv); 1494 } 1495 1496 /* 1497 * mdi_Set_lb_policy(): 1498 * Set current load balancing policy for a given client device 1499 */ 1500 int 1501 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1502 { 1503 mdi_client_t *ct; 1504 int rv = MDI_FAILURE; 1505 1506 ct = i_devi_get_client(cdip); 1507 if (ct != NULL) { 1508 ct->ct_lb = lb; 1509 rv = MDI_SUCCESS; 1510 } 1511 return (rv); 1512 } 1513 1514 /* 1515 * mdi_failover(): 1516 * failover function called by the vHCI drivers to initiate 1517 * a failover operation. This is typically due to non-availability 1518 * of online paths to route I/O requests. Failover can be 1519 * triggered through user application also. 1520 * 1521 * The vHCI driver calls mdi_failover() to initiate a failover 1522 * operation. mdi_failover() calls back into the vHCI driver's 1523 * vo_failover() entry point to perform the actual failover 1524 * operation. The reason for requiring the vHCI driver to 1525 * initiate failover by calling mdi_failover(), instead of directly 1526 * executing vo_failover() itself, is to ensure that the mdi 1527 * framework can keep track of the client state properly. 1528 * Additionally, mdi_failover() provides as a convenience the 1529 * option of performing the failover operation synchronously or 1530 * asynchronously 1531 * 1532 * Upon successful completion of the failover operation, the 1533 * paths that were previously ONLINE will be in the STANDBY state, 1534 * and the newly activated paths will be in the ONLINE state. 1535 * 1536 * The flags modifier determines whether the activation is done 1537 * synchronously: MDI_FAILOVER_SYNC 1538 * Return Values: 1539 * MDI_SUCCESS 1540 * MDI_FAILURE 1541 * MDI_BUSY 1542 */ 1543 /*ARGSUSED*/ 1544 int 1545 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1546 { 1547 int rv; 1548 mdi_client_t *ct; 1549 1550 ct = i_devi_get_client(cdip); 1551 ASSERT(ct != NULL); 1552 if (ct == NULL) { 1553 /* cdip is not a valid client device. Nothing more to do. */ 1554 return (MDI_FAILURE); 1555 } 1556 1557 MDI_CLIENT_LOCK(ct); 1558 1559 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1560 /* A path to the client is being freed */ 1561 MDI_CLIENT_UNLOCK(ct); 1562 return (MDI_BUSY); 1563 } 1564 1565 1566 if (MDI_CLIENT_IS_FAILED(ct)) { 1567 /* 1568 * Client is in failed state. Nothing more to do. 1569 */ 1570 MDI_CLIENT_UNLOCK(ct); 1571 return (MDI_FAILURE); 1572 } 1573 1574 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1575 /* 1576 * Failover is already in progress; return BUSY 1577 */ 1578 MDI_CLIENT_UNLOCK(ct); 1579 return (MDI_BUSY); 1580 } 1581 /* 1582 * Make sure that mdi_pathinfo node state changes are processed. 1583 * We do not allow failovers to progress while client path state 1584 * changes are in progress 1585 */ 1586 if (ct->ct_unstable) { 1587 if (flags == MDI_FAILOVER_ASYNC) { 1588 MDI_CLIENT_UNLOCK(ct); 1589 return (MDI_BUSY); 1590 } else { 1591 while (ct->ct_unstable) 1592 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1593 } 1594 } 1595 1596 /* 1597 * Client device is in stable state. Before proceeding, perform sanity 1598 * checks again. 1599 */ 1600 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1601 (i_ddi_node_state(ct->ct_dip) < DS_READY)) { 1602 /* 1603 * Client is in failed state. Nothing more to do. 1604 */ 1605 MDI_CLIENT_UNLOCK(ct); 1606 return (MDI_FAILURE); 1607 } 1608 1609 /* 1610 * Set the client state as failover in progress. 1611 */ 1612 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1613 ct->ct_failover_flags = flags; 1614 MDI_CLIENT_UNLOCK(ct); 1615 1616 if (flags == MDI_FAILOVER_ASYNC) { 1617 /* 1618 * Submit the initiate failover request via CPR safe 1619 * taskq threads. 1620 */ 1621 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1622 ct, KM_SLEEP); 1623 return (MDI_ACCEPT); 1624 } else { 1625 /* 1626 * Synchronous failover mode. Typically invoked from the user 1627 * land. 1628 */ 1629 rv = i_mdi_failover(ct); 1630 } 1631 return (rv); 1632 } 1633 1634 /* 1635 * i_mdi_failover(): 1636 * internal failover function. Invokes vHCI drivers failover 1637 * callback function and process the failover status 1638 * Return Values: 1639 * None 1640 * 1641 * Note: A client device in failover state can not be detached or freed. 1642 */ 1643 static int 1644 i_mdi_failover(void *arg) 1645 { 1646 int rv = MDI_SUCCESS; 1647 mdi_client_t *ct = (mdi_client_t *)arg; 1648 mdi_vhci_t *vh = ct->ct_vhci; 1649 1650 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1651 1652 if (vh->vh_ops->vo_failover != NULL) { 1653 /* 1654 * Call vHCI drivers callback routine 1655 */ 1656 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1657 ct->ct_failover_flags); 1658 } 1659 1660 MDI_CLIENT_LOCK(ct); 1661 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1662 1663 /* 1664 * Save the failover return status 1665 */ 1666 ct->ct_failover_status = rv; 1667 1668 /* 1669 * As a result of failover, client status would have been changed. 1670 * Update the client state and wake up anyone waiting on this client 1671 * device. 1672 */ 1673 i_mdi_client_update_state(ct); 1674 1675 cv_broadcast(&ct->ct_failover_cv); 1676 MDI_CLIENT_UNLOCK(ct); 1677 return (rv); 1678 } 1679 1680 /* 1681 * Load balancing is logical block. 1682 * IOs within the range described by region_size 1683 * would go on the same path. This would improve the 1684 * performance by cache-hit on some of the RAID devices. 1685 * Search only for online paths(At some point we 1686 * may want to balance across target ports). 1687 * If no paths are found then default to round-robin. 1688 */ 1689 static int 1690 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1691 { 1692 int path_index = -1; 1693 int online_path_count = 0; 1694 int online_nonpref_path_count = 0; 1695 int region_size = ct->ct_lb_args->region_size; 1696 mdi_pathinfo_t *pip; 1697 mdi_pathinfo_t *next; 1698 int preferred, path_cnt; 1699 1700 pip = ct->ct_path_head; 1701 while (pip) { 1702 MDI_PI_LOCK(pip); 1703 if (MDI_PI(pip)->pi_state == 1704 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1705 online_path_count++; 1706 } else if (MDI_PI(pip)->pi_state == 1707 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1708 online_nonpref_path_count++; 1709 } 1710 next = (mdi_pathinfo_t *) 1711 MDI_PI(pip)->pi_client_link; 1712 MDI_PI_UNLOCK(pip); 1713 pip = next; 1714 } 1715 /* if found any online/preferred then use this type */ 1716 if (online_path_count > 0) { 1717 path_cnt = online_path_count; 1718 preferred = 1; 1719 } else if (online_nonpref_path_count > 0) { 1720 path_cnt = online_nonpref_path_count; 1721 preferred = 0; 1722 } else { 1723 path_cnt = 0; 1724 } 1725 if (path_cnt) { 1726 path_index = (bp->b_blkno >> region_size) % path_cnt; 1727 pip = ct->ct_path_head; 1728 while (pip && path_index != -1) { 1729 MDI_PI_LOCK(pip); 1730 if (path_index == 0 && 1731 (MDI_PI(pip)->pi_state == 1732 MDI_PATHINFO_STATE_ONLINE) && 1733 MDI_PI(pip)->pi_preferred == preferred) { 1734 MDI_PI_HOLD(pip); 1735 MDI_PI_UNLOCK(pip); 1736 *ret_pip = pip; 1737 return (MDI_SUCCESS); 1738 } 1739 path_index --; 1740 next = (mdi_pathinfo_t *) 1741 MDI_PI(pip)->pi_client_link; 1742 MDI_PI_UNLOCK(pip); 1743 pip = next; 1744 } 1745 if (pip == NULL) { 1746 MDI_DEBUG(4, (CE_NOTE, NULL, 1747 "!lba %p, no pip !!\n", 1748 bp->b_blkno)); 1749 } else { 1750 MDI_DEBUG(4, (CE_NOTE, NULL, 1751 "!lba %p, no pip for path_index, " 1752 "pip %p\n", pip)); 1753 } 1754 } 1755 return (MDI_FAILURE); 1756 } 1757 1758 /* 1759 * mdi_select_path(): 1760 * select a path to access a client device. 1761 * 1762 * mdi_select_path() function is called by the vHCI drivers to 1763 * select a path to route the I/O request to. The caller passes 1764 * the block I/O data transfer structure ("buf") as one of the 1765 * parameters. The mpxio framework uses the buf structure 1766 * contents to maintain per path statistics (total I/O size / 1767 * count pending). If more than one online paths are available to 1768 * select, the framework automatically selects a suitable path 1769 * for routing I/O request. If a failover operation is active for 1770 * this client device the call shall be failed with MDI_BUSY error 1771 * code. 1772 * 1773 * By default this function returns a suitable path in online 1774 * state based on the current load balancing policy. Currently 1775 * we support LOAD_BALANCE_NONE (Previously selected online path 1776 * will continue to be used till the path is usable) and 1777 * LOAD_BALANCE_RR (Online paths will be selected in a round 1778 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1779 * based on the logical block). The load balancing 1780 * through vHCI drivers configuration file (driver.conf). 1781 * 1782 * vHCI drivers may override this default behavior by specifying 1783 * appropriate flags. If start_pip is specified (non NULL) is 1784 * used as start point to walk and find the next appropriate path. 1785 * The following values are currently defined: 1786 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1787 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1788 * 1789 * The non-standard behavior is used by the scsi_vhci driver, 1790 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1791 * attach of client devices (to avoid an unnecessary failover 1792 * when the STANDBY path comes up first), during failover 1793 * (to activate a STANDBY path as ONLINE). 1794 * 1795 * The selected path in returned in a held state (ref_cnt). 1796 * Caller should release the hold by calling mdi_rele_path(). 1797 * 1798 * Return Values: 1799 * MDI_SUCCESS - Completed successfully 1800 * MDI_BUSY - Client device is busy failing over 1801 * MDI_NOPATH - Client device is online, but no valid path are 1802 * available to access this client device 1803 * MDI_FAILURE - Invalid client device or state 1804 * MDI_DEVI_ONLINING 1805 * - Client device (struct dev_info state) is in 1806 * onlining state. 1807 */ 1808 1809 /*ARGSUSED*/ 1810 int 1811 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1812 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1813 { 1814 mdi_client_t *ct; 1815 mdi_pathinfo_t *pip; 1816 mdi_pathinfo_t *next; 1817 mdi_pathinfo_t *head; 1818 mdi_pathinfo_t *start; 1819 client_lb_t lbp; /* load balancing policy */ 1820 int sb = 1; /* standard behavior */ 1821 int preferred = 1; /* preferred path */ 1822 int cond, cont = 1; 1823 int retry = 0; 1824 1825 if (flags != 0) { 1826 /* 1827 * disable default behavior 1828 */ 1829 sb = 0; 1830 } 1831 1832 *ret_pip = NULL; 1833 ct = i_devi_get_client(cdip); 1834 if (ct == NULL) { 1835 /* mdi extensions are NULL, Nothing more to do */ 1836 return (MDI_FAILURE); 1837 } 1838 1839 MDI_CLIENT_LOCK(ct); 1840 1841 if (sb) { 1842 if (MDI_CLIENT_IS_FAILED(ct)) { 1843 /* 1844 * Client is not ready to accept any I/O requests. 1845 * Fail this request. 1846 */ 1847 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1848 "client state offline ct = %p\n", ct)); 1849 MDI_CLIENT_UNLOCK(ct); 1850 return (MDI_FAILURE); 1851 } 1852 1853 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1854 /* 1855 * Check for Failover is in progress. If so tell the 1856 * caller that this device is busy. 1857 */ 1858 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1859 "client failover in progress ct = %p\n", ct)); 1860 MDI_CLIENT_UNLOCK(ct); 1861 return (MDI_BUSY); 1862 } 1863 1864 /* 1865 * Check to see whether the client device is attached. 1866 * If not so, let the vHCI driver manually select a path 1867 * (standby) and let the probe/attach process to continue. 1868 */ 1869 if ((MDI_CLIENT_IS_DETACHED(ct)) || 1870 i_ddi_node_state(cdip) < DS_READY) { 1871 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1872 MDI_CLIENT_UNLOCK(ct); 1873 return (MDI_DEVI_ONLINING); 1874 } 1875 } 1876 1877 /* 1878 * Cache in the client list head. If head of the list is NULL 1879 * return MDI_NOPATH 1880 */ 1881 head = ct->ct_path_head; 1882 if (head == NULL) { 1883 MDI_CLIENT_UNLOCK(ct); 1884 return (MDI_NOPATH); 1885 } 1886 1887 /* 1888 * for non default behavior, bypass current 1889 * load balancing policy and always use LOAD_BALANCE_RR 1890 * except that the start point will be adjusted based 1891 * on the provided start_pip 1892 */ 1893 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1894 1895 switch (lbp) { 1896 case LOAD_BALANCE_NONE: 1897 /* 1898 * Load balancing is None or Alternate path mode 1899 * Start looking for a online mdi_pathinfo node starting from 1900 * last known selected path 1901 */ 1902 preferred = 1; 1903 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1904 if (pip == NULL) { 1905 pip = head; 1906 } 1907 start = pip; 1908 do { 1909 MDI_PI_LOCK(pip); 1910 /* 1911 * No need to explicitly check if the path is disabled. 1912 * Since we are checking for state == ONLINE and the 1913 * same veriable is used for DISABLE/ENABLE information. 1914 */ 1915 if (MDI_PI(pip)->pi_state == 1916 MDI_PATHINFO_STATE_ONLINE && 1917 preferred == MDI_PI(pip)->pi_preferred) { 1918 /* 1919 * Return the path in hold state. Caller should 1920 * release the lock by calling mdi_rele_path() 1921 */ 1922 MDI_PI_HOLD(pip); 1923 MDI_PI_UNLOCK(pip); 1924 ct->ct_path_last = pip; 1925 *ret_pip = pip; 1926 MDI_CLIENT_UNLOCK(ct); 1927 return (MDI_SUCCESS); 1928 } 1929 1930 /* 1931 * Path is busy. 1932 */ 1933 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1934 MDI_PI_IS_TRANSIENT(pip)) 1935 retry = 1; 1936 /* 1937 * Keep looking for a next available online path 1938 */ 1939 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1940 if (next == NULL) { 1941 next = head; 1942 } 1943 MDI_PI_UNLOCK(pip); 1944 pip = next; 1945 if (start == pip && preferred) { 1946 preferred = 0; 1947 } else if (start == pip && !preferred) { 1948 cont = 0; 1949 } 1950 } while (cont); 1951 break; 1952 1953 case LOAD_BALANCE_LBA: 1954 /* 1955 * Make sure we are looking 1956 * for an online path. Otherwise, if it is for a STANDBY 1957 * path request, it will go through and fetch an ONLINE 1958 * path which is not desirable. 1959 */ 1960 if ((ct->ct_lb_args != NULL) && 1961 (ct->ct_lb_args->region_size) && bp && 1962 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1963 if (i_mdi_lba_lb(ct, ret_pip, bp) 1964 == MDI_SUCCESS) { 1965 MDI_CLIENT_UNLOCK(ct); 1966 return (MDI_SUCCESS); 1967 } 1968 } 1969 /* FALLTHROUGH */ 1970 case LOAD_BALANCE_RR: 1971 /* 1972 * Load balancing is Round Robin. Start looking for a online 1973 * mdi_pathinfo node starting from last known selected path 1974 * as the start point. If override flags are specified, 1975 * process accordingly. 1976 * If the search is already in effect(start_pip not null), 1977 * then lets just use the same path preference to continue the 1978 * traversal. 1979 */ 1980 1981 if (start_pip != NULL) { 1982 preferred = MDI_PI(start_pip)->pi_preferred; 1983 } else { 1984 preferred = 1; 1985 } 1986 1987 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1988 if (start == NULL) { 1989 pip = head; 1990 } else { 1991 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1992 if (pip == NULL) { 1993 if (!sb) { 1994 if (preferred == 0) { 1995 /* 1996 * Looks like we have completed 1997 * the traversal as preferred 1998 * value is 0. Time to bail out. 1999 */ 2000 *ret_pip = NULL; 2001 MDI_CLIENT_UNLOCK(ct); 2002 return (MDI_NOPATH); 2003 } else { 2004 /* 2005 * Looks like we reached the 2006 * end of the list. Lets enable 2007 * traversal of non preferred 2008 * paths. 2009 */ 2010 preferred = 0; 2011 } 2012 } 2013 pip = head; 2014 } 2015 } 2016 start = pip; 2017 do { 2018 MDI_PI_LOCK(pip); 2019 if (sb) { 2020 cond = ((MDI_PI(pip)->pi_state == 2021 MDI_PATHINFO_STATE_ONLINE && 2022 MDI_PI(pip)->pi_preferred == 2023 preferred) ? 1 : 0); 2024 } else { 2025 if (flags == MDI_SELECT_ONLINE_PATH) { 2026 cond = ((MDI_PI(pip)->pi_state == 2027 MDI_PATHINFO_STATE_ONLINE && 2028 MDI_PI(pip)->pi_preferred == 2029 preferred) ? 1 : 0); 2030 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2031 cond = ((MDI_PI(pip)->pi_state == 2032 MDI_PATHINFO_STATE_STANDBY && 2033 MDI_PI(pip)->pi_preferred == 2034 preferred) ? 1 : 0); 2035 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2036 MDI_SELECT_STANDBY_PATH)) { 2037 cond = (((MDI_PI(pip)->pi_state == 2038 MDI_PATHINFO_STATE_ONLINE || 2039 (MDI_PI(pip)->pi_state == 2040 MDI_PATHINFO_STATE_STANDBY)) && 2041 MDI_PI(pip)->pi_preferred == 2042 preferred) ? 1 : 0); 2043 } else { 2044 cond = 0; 2045 } 2046 } 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if (cond) { 2053 /* 2054 * Return the path in hold state. Caller should 2055 * release the lock by calling mdi_rele_path() 2056 */ 2057 MDI_PI_HOLD(pip); 2058 MDI_PI_UNLOCK(pip); 2059 if (sb) 2060 ct->ct_path_last = pip; 2061 *ret_pip = pip; 2062 MDI_CLIENT_UNLOCK(ct); 2063 return (MDI_SUCCESS); 2064 } 2065 /* 2066 * Path is busy. 2067 */ 2068 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2069 MDI_PI_IS_TRANSIENT(pip)) 2070 retry = 1; 2071 2072 /* 2073 * Keep looking for a next available online path 2074 */ 2075 do_again: 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 if (!sb) { 2079 if (preferred == 1) { 2080 /* 2081 * Looks like we reached the 2082 * end of the list. Lets enable 2083 * traversal of non preferred 2084 * paths. 2085 */ 2086 preferred = 0; 2087 next = head; 2088 } else { 2089 /* 2090 * We have done both the passes 2091 * Preferred as well as for 2092 * Non-preferred. Bail out now. 2093 */ 2094 cont = 0; 2095 } 2096 } else { 2097 /* 2098 * Standard behavior case. 2099 */ 2100 next = head; 2101 } 2102 } 2103 MDI_PI_UNLOCK(pip); 2104 if (cont == 0) { 2105 break; 2106 } 2107 pip = next; 2108 2109 if (!sb) { 2110 /* 2111 * We need to handle the selection of 2112 * non-preferred path in the following 2113 * case: 2114 * 2115 * +------+ +------+ +------+ +-----+ 2116 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2117 * +------+ +------+ +------+ +-----+ 2118 * 2119 * If we start the search with B, we need to 2120 * skip beyond B to pick C which is non - 2121 * preferred in the second pass. The following 2122 * test, if true, will allow us to skip over 2123 * the 'start'(B in the example) to select 2124 * other non preferred elements. 2125 */ 2126 if ((start_pip != NULL) && (start_pip == pip) && 2127 (MDI_PI(start_pip)->pi_preferred 2128 != preferred)) { 2129 /* 2130 * try again after going past the start 2131 * pip 2132 */ 2133 MDI_PI_LOCK(pip); 2134 goto do_again; 2135 } 2136 } else { 2137 /* 2138 * Standard behavior case 2139 */ 2140 if (start == pip && preferred) { 2141 /* look for nonpreferred paths */ 2142 preferred = 0; 2143 } else if (start == pip && !preferred) { 2144 /* 2145 * Exit condition 2146 */ 2147 cont = 0; 2148 } 2149 } 2150 } while (cont); 2151 break; 2152 } 2153 2154 MDI_CLIENT_UNLOCK(ct); 2155 if (retry == 1) { 2156 return (MDI_BUSY); 2157 } else { 2158 return (MDI_NOPATH); 2159 } 2160 } 2161 2162 /* 2163 * For a client, return the next available path to any phci 2164 * 2165 * Note: 2166 * Caller should hold the branch's devinfo node to get a consistent 2167 * snap shot of the mdi_pathinfo nodes. 2168 * 2169 * Please note that even the list is stable the mdi_pathinfo 2170 * node state and properties are volatile. The caller should lock 2171 * and unlock the nodes by calling mdi_pi_lock() and 2172 * mdi_pi_unlock() functions to get a stable properties. 2173 * 2174 * If there is a need to use the nodes beyond the hold of the 2175 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2176 * need to be held against unexpected removal by calling 2177 * mdi_hold_path() and should be released by calling 2178 * mdi_rele_path() on completion. 2179 */ 2180 mdi_pathinfo_t * 2181 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2182 { 2183 mdi_client_t *ct; 2184 2185 if (!MDI_CLIENT(ct_dip)) 2186 return (NULL); 2187 2188 /* 2189 * Walk through client link 2190 */ 2191 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2192 ASSERT(ct != NULL); 2193 2194 if (pip == NULL) 2195 return ((mdi_pathinfo_t *)ct->ct_path_head); 2196 2197 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2198 } 2199 2200 /* 2201 * For a phci, return the next available path to any client 2202 * Note: ditto mdi_get_next_phci_path() 2203 */ 2204 mdi_pathinfo_t * 2205 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2206 { 2207 mdi_phci_t *ph; 2208 2209 if (!MDI_PHCI(ph_dip)) 2210 return (NULL); 2211 2212 /* 2213 * Walk through pHCI link 2214 */ 2215 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2216 ASSERT(ph != NULL); 2217 2218 if (pip == NULL) 2219 return ((mdi_pathinfo_t *)ph->ph_path_head); 2220 2221 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2222 } 2223 2224 /* 2225 * mdi_get_nextpath(): 2226 * mdi_pathinfo node walker function. Get the next node from the 2227 * client or pHCI device list. 2228 * 2229 * XXX This is wrapper function for compatibility purposes only. 2230 * 2231 * It doesn't work under Multi-level MPxIO, where a dip 2232 * is both client and phci (which link should next_path follow?). 2233 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2234 * this interface should be removed. 2235 */ 2236 void 2237 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2238 mdi_pathinfo_t **ret_pip) 2239 { 2240 if (MDI_CLIENT(dip)) { 2241 *ret_pip = mdi_get_next_phci_path(dip, pip); 2242 } else if (MDI_PHCI(dip)) { 2243 *ret_pip = mdi_get_next_client_path(dip, pip); 2244 } else { 2245 *ret_pip = NULL; 2246 } 2247 } 2248 2249 /* 2250 * mdi_hold_path(): 2251 * Hold the mdi_pathinfo node against unwanted unexpected free. 2252 * Return Values: 2253 * None 2254 */ 2255 void 2256 mdi_hold_path(mdi_pathinfo_t *pip) 2257 { 2258 if (pip) { 2259 MDI_PI_LOCK(pip); 2260 MDI_PI_HOLD(pip); 2261 MDI_PI_UNLOCK(pip); 2262 } 2263 } 2264 2265 2266 /* 2267 * mdi_rele_path(): 2268 * Release the mdi_pathinfo node which was selected 2269 * through mdi_select_path() mechanism or manually held by 2270 * calling mdi_hold_path(). 2271 * Return Values: 2272 * None 2273 */ 2274 void 2275 mdi_rele_path(mdi_pathinfo_t *pip) 2276 { 2277 if (pip) { 2278 MDI_PI_LOCK(pip); 2279 MDI_PI_RELE(pip); 2280 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2281 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2282 } 2283 MDI_PI_UNLOCK(pip); 2284 } 2285 } 2286 2287 2288 /* 2289 * mdi_pi_lock(): 2290 * Lock the mdi_pathinfo node. 2291 * Note: 2292 * The caller should release the lock by calling mdi_pi_unlock() 2293 */ 2294 void 2295 mdi_pi_lock(mdi_pathinfo_t *pip) 2296 { 2297 ASSERT(pip != NULL); 2298 if (pip) { 2299 MDI_PI_LOCK(pip); 2300 } 2301 } 2302 2303 2304 /* 2305 * mdi_pi_unlock(): 2306 * Unlock the mdi_pathinfo node. 2307 * Note: 2308 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2309 */ 2310 void 2311 mdi_pi_unlock(mdi_pathinfo_t *pip) 2312 { 2313 ASSERT(pip != NULL); 2314 if (pip) { 2315 MDI_PI_UNLOCK(pip); 2316 } 2317 } 2318 2319 /* 2320 * mdi_pi_find(): 2321 * Search the list of mdi_pathinfo nodes attached to the 2322 * pHCI/Client device node whose path address matches "paddr". 2323 * Returns a pointer to the mdi_pathinfo node if a matching node is 2324 * found. 2325 * Return Values: 2326 * mdi_pathinfo node handle 2327 * NULL 2328 * Notes: 2329 * Caller need not hold any locks to call this function. 2330 */ 2331 mdi_pathinfo_t * 2332 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2333 { 2334 mdi_phci_t *ph; 2335 mdi_vhci_t *vh; 2336 mdi_client_t *ct; 2337 mdi_pathinfo_t *pip = NULL; 2338 2339 if ((pdip == NULL) || (paddr == NULL)) { 2340 return (NULL); 2341 } 2342 ph = i_devi_get_phci(pdip); 2343 if (ph == NULL) { 2344 /* 2345 * Invalid pHCI device, Nothing more to do. 2346 */ 2347 MDI_DEBUG(2, (CE_WARN, NULL, 2348 "!mdi_pi_find: invalid phci")); 2349 return (NULL); 2350 } 2351 2352 vh = ph->ph_vhci; 2353 if (vh == NULL) { 2354 /* 2355 * Invalid vHCI device, Nothing more to do. 2356 */ 2357 MDI_DEBUG(2, (CE_WARN, NULL, 2358 "!mdi_pi_find: invalid phci")); 2359 return (NULL); 2360 } 2361 2362 /* 2363 * Look for client device identified by caddr (guid) 2364 */ 2365 if (caddr == NULL) { 2366 /* 2367 * Find a mdi_pathinfo node under pHCI list for a matching 2368 * unit address. 2369 */ 2370 mutex_enter(&ph->ph_mutex); 2371 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2372 2373 while (pip != NULL) { 2374 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2375 break; 2376 } 2377 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2378 } 2379 mutex_exit(&ph->ph_mutex); 2380 return (pip); 2381 } 2382 2383 /* 2384 * XXX - Is the rest of the code in this function really necessary? 2385 * The consumers of mdi_pi_find() can search for the desired pathinfo 2386 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2387 * whether the search is based on the pathinfo nodes attached to 2388 * the pHCI or the client node, the result will be the same. 2389 */ 2390 2391 /* 2392 * Find the client device corresponding to 'caddr' 2393 */ 2394 mutex_enter(&mdi_mutex); 2395 2396 /* 2397 * XXX - Passing NULL to the following function works as long as the 2398 * the client addresses (caddr) are unique per vhci basis. 2399 */ 2400 ct = i_mdi_client_find(vh, NULL, caddr); 2401 if (ct == NULL) { 2402 /* 2403 * Client not found, Obviously mdi_pathinfo node has not been 2404 * created yet. 2405 */ 2406 mutex_exit(&mdi_mutex); 2407 return (pip); 2408 } 2409 2410 /* 2411 * Hold the client lock and look for a mdi_pathinfo node with matching 2412 * pHCI and paddr 2413 */ 2414 MDI_CLIENT_LOCK(ct); 2415 2416 /* 2417 * Release the global mutex as it is no more needed. Note: We always 2418 * respect the locking order while acquiring. 2419 */ 2420 mutex_exit(&mdi_mutex); 2421 2422 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2423 while (pip != NULL) { 2424 /* 2425 * Compare the unit address 2426 */ 2427 if ((MDI_PI(pip)->pi_phci == ph) && 2428 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2429 break; 2430 } 2431 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2432 } 2433 MDI_CLIENT_UNLOCK(ct); 2434 return (pip); 2435 } 2436 2437 /* 2438 * mdi_pi_alloc(): 2439 * Allocate and initialize a new instance of a mdi_pathinfo node. 2440 * The mdi_pathinfo node returned by this function identifies a 2441 * unique device path is capable of having properties attached 2442 * and passed to mdi_pi_online() to fully attach and online the 2443 * path and client device node. 2444 * The mdi_pathinfo node returned by this function must be 2445 * destroyed using mdi_pi_free() if the path is no longer 2446 * operational or if the caller fails to attach a client device 2447 * node when calling mdi_pi_online(). The framework will not free 2448 * the resources allocated. 2449 * This function can be called from both interrupt and kernel 2450 * contexts. DDI_NOSLEEP flag should be used while calling 2451 * from interrupt contexts. 2452 * Return Values: 2453 * MDI_SUCCESS 2454 * MDI_FAILURE 2455 * MDI_NOMEM 2456 */ 2457 /*ARGSUSED*/ 2458 int 2459 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2460 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2461 { 2462 mdi_vhci_t *vh; 2463 mdi_phci_t *ph; 2464 mdi_client_t *ct; 2465 mdi_pathinfo_t *pip = NULL; 2466 dev_info_t *cdip; 2467 int rv = MDI_NOMEM; 2468 int path_allocated = 0; 2469 2470 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2471 ret_pip == NULL) { 2472 /* Nothing more to do */ 2473 return (MDI_FAILURE); 2474 } 2475 2476 *ret_pip = NULL; 2477 ph = i_devi_get_phci(pdip); 2478 ASSERT(ph != NULL); 2479 if (ph == NULL) { 2480 /* Invalid pHCI device, return failure */ 2481 MDI_DEBUG(1, (CE_WARN, NULL, 2482 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2483 return (MDI_FAILURE); 2484 } 2485 2486 MDI_PHCI_LOCK(ph); 2487 vh = ph->ph_vhci; 2488 if (vh == NULL) { 2489 /* Invalid vHCI device, return failure */ 2490 MDI_DEBUG(1, (CE_WARN, NULL, 2491 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2492 MDI_PHCI_UNLOCK(ph); 2493 return (MDI_FAILURE); 2494 } 2495 2496 if (MDI_PHCI_IS_READY(ph) == 0) { 2497 /* 2498 * Do not allow new node creation when pHCI is in 2499 * offline/suspended states 2500 */ 2501 MDI_DEBUG(1, (CE_WARN, NULL, 2502 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (MDI_BUSY); 2505 } 2506 MDI_PHCI_UNSTABLE(ph); 2507 MDI_PHCI_UNLOCK(ph); 2508 2509 /* look for a matching client, create one if not found */ 2510 mutex_enter(&mdi_mutex); 2511 ct = i_mdi_client_find(vh, cname, caddr); 2512 if (ct == NULL) { 2513 ct = i_mdi_client_alloc(vh, cname, caddr); 2514 ASSERT(ct != NULL); 2515 } 2516 2517 if (ct->ct_dip == NULL) { 2518 /* 2519 * Allocate a devinfo node 2520 */ 2521 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2522 compatible, ncompatible); 2523 if (ct->ct_dip == NULL) { 2524 (void) i_mdi_client_free(vh, ct); 2525 goto fail; 2526 } 2527 } 2528 cdip = ct->ct_dip; 2529 2530 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2531 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2532 2533 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2534 while (pip != NULL) { 2535 /* 2536 * Compare the unit address 2537 */ 2538 if ((MDI_PI(pip)->pi_phci == ph) && 2539 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2540 break; 2541 } 2542 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2543 } 2544 2545 if (pip == NULL) { 2546 /* 2547 * This is a new path for this client device. Allocate and 2548 * initialize a new pathinfo node 2549 */ 2550 pip = i_mdi_pi_alloc(ph, paddr, ct); 2551 ASSERT(pip != NULL); 2552 path_allocated = 1; 2553 } 2554 rv = MDI_SUCCESS; 2555 2556 fail: 2557 /* 2558 * Release the global mutex. 2559 */ 2560 mutex_exit(&mdi_mutex); 2561 2562 /* 2563 * Mark the pHCI as stable 2564 */ 2565 MDI_PHCI_LOCK(ph); 2566 MDI_PHCI_STABLE(ph); 2567 MDI_PHCI_UNLOCK(ph); 2568 *ret_pip = pip; 2569 2570 if (path_allocated) 2571 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2572 2573 return (rv); 2574 } 2575 2576 /*ARGSUSED*/ 2577 int 2578 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2579 int flags, mdi_pathinfo_t **ret_pip) 2580 { 2581 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2582 flags, ret_pip)); 2583 } 2584 2585 /* 2586 * i_mdi_pi_alloc(): 2587 * Allocate a mdi_pathinfo node and add to the pHCI path list 2588 * Return Values: 2589 * mdi_pathinfo 2590 */ 2591 2592 /*ARGSUSED*/ 2593 static mdi_pathinfo_t * 2594 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2595 { 2596 mdi_pathinfo_t *pip; 2597 int ct_circular; 2598 int ph_circular; 2599 int se_flag; 2600 int kmem_flag; 2601 2602 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2603 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2604 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2605 MDI_PATHINFO_STATE_TRANSIENT; 2606 2607 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2608 MDI_PI_SET_USER_DISABLE(pip); 2609 2610 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2611 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2612 2613 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2614 MDI_PI_SET_DRV_DISABLE(pip); 2615 2616 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2617 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2618 MDI_PI(pip)->pi_client = ct; 2619 MDI_PI(pip)->pi_phci = ph; 2620 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2621 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2622 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2623 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2624 MDI_PI(pip)->pi_pprivate = NULL; 2625 MDI_PI(pip)->pi_cprivate = NULL; 2626 MDI_PI(pip)->pi_vprivate = NULL; 2627 MDI_PI(pip)->pi_client_link = NULL; 2628 MDI_PI(pip)->pi_phci_link = NULL; 2629 MDI_PI(pip)->pi_ref_cnt = 0; 2630 MDI_PI(pip)->pi_kstats = NULL; 2631 MDI_PI(pip)->pi_preferred = 1; 2632 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2633 2634 /* 2635 * Lock both dev_info nodes against changes in parallel. 2636 */ 2637 ndi_devi_enter(ct->ct_dip, &ct_circular); 2638 ndi_devi_enter(ph->ph_dip, &ph_circular); 2639 2640 i_mdi_phci_add_path(ph, pip); 2641 i_mdi_client_add_path(ct, pip); 2642 2643 ndi_devi_exit(ph->ph_dip, ph_circular); 2644 ndi_devi_exit(ct->ct_dip, ct_circular); 2645 2646 /* determine interrupt context */ 2647 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2648 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2649 2650 i_ddi_di_cache_invalidate(kmem_flag); 2651 2652 return (pip); 2653 } 2654 2655 /* 2656 * i_mdi_phci_add_path(): 2657 * Add a mdi_pathinfo node to pHCI list. 2658 * Notes: 2659 * Caller should per-pHCI mutex 2660 */ 2661 2662 static void 2663 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2664 { 2665 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2666 2667 if (ph->ph_path_head == NULL) { 2668 ph->ph_path_head = pip; 2669 } else { 2670 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2671 } 2672 ph->ph_path_tail = pip; 2673 ph->ph_path_count++; 2674 } 2675 2676 /* 2677 * i_mdi_client_add_path(): 2678 * Add mdi_pathinfo node to client list 2679 */ 2680 2681 static void 2682 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2683 { 2684 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2685 2686 if (ct->ct_path_head == NULL) { 2687 ct->ct_path_head = pip; 2688 } else { 2689 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2690 } 2691 ct->ct_path_tail = pip; 2692 ct->ct_path_count++; 2693 } 2694 2695 /* 2696 * mdi_pi_free(): 2697 * Free the mdi_pathinfo node and also client device node if this 2698 * is the last path to the device 2699 * Return Values: 2700 * MDI_SUCCESS 2701 * MDI_FAILURE 2702 * MDI_BUSY 2703 */ 2704 2705 /*ARGSUSED*/ 2706 int 2707 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2708 { 2709 int rv = MDI_SUCCESS; 2710 mdi_vhci_t *vh; 2711 mdi_phci_t *ph; 2712 mdi_client_t *ct; 2713 int (*f)(); 2714 int client_held = 0; 2715 2716 MDI_PI_LOCK(pip); 2717 ph = MDI_PI(pip)->pi_phci; 2718 ASSERT(ph != NULL); 2719 if (ph == NULL) { 2720 /* 2721 * Invalid pHCI device, return failure 2722 */ 2723 MDI_DEBUG(1, (CE_WARN, NULL, 2724 "!mdi_pi_free: invalid pHCI")); 2725 MDI_PI_UNLOCK(pip); 2726 return (MDI_FAILURE); 2727 } 2728 2729 vh = ph->ph_vhci; 2730 ASSERT(vh != NULL); 2731 if (vh == NULL) { 2732 /* Invalid pHCI device, return failure */ 2733 MDI_DEBUG(1, (CE_WARN, NULL, 2734 "!mdi_pi_free: invalid vHCI")); 2735 MDI_PI_UNLOCK(pip); 2736 return (MDI_FAILURE); 2737 } 2738 2739 ct = MDI_PI(pip)->pi_client; 2740 ASSERT(ct != NULL); 2741 if (ct == NULL) { 2742 /* 2743 * Invalid Client device, return failure 2744 */ 2745 MDI_DEBUG(1, (CE_WARN, NULL, 2746 "!mdi_pi_free: invalid client")); 2747 MDI_PI_UNLOCK(pip); 2748 return (MDI_FAILURE); 2749 } 2750 2751 /* 2752 * Check to see for busy condition. A mdi_pathinfo can only be freed 2753 * if the node state is either offline or init and the reference count 2754 * is zero. 2755 */ 2756 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2757 MDI_PI_IS_INITING(pip))) { 2758 /* 2759 * Node is busy 2760 */ 2761 MDI_DEBUG(1, (CE_WARN, NULL, 2762 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2763 MDI_PI_UNLOCK(pip); 2764 return (MDI_BUSY); 2765 } 2766 2767 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2768 /* 2769 * Give a chance for pending I/Os to complete. 2770 */ 2771 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2772 "%d cmds still pending on path: %p\n", 2773 MDI_PI(pip)->pi_ref_cnt, pip)); 2774 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2775 &MDI_PI(pip)->pi_mutex, 2776 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2777 /* 2778 * The timeout time reached without ref_cnt being zero 2779 * being signaled. 2780 */ 2781 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2782 "!mdi_pi_free: " 2783 "Timeout reached on path %p without the cond\n", 2784 pip)); 2785 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2786 "!mdi_pi_free: " 2787 "%d cmds still pending on path: %p\n", 2788 MDI_PI(pip)->pi_ref_cnt, pip)); 2789 MDI_PI_UNLOCK(pip); 2790 return (MDI_BUSY); 2791 } 2792 } 2793 if (MDI_PI(pip)->pi_pm_held) { 2794 client_held = 1; 2795 } 2796 MDI_PI_UNLOCK(pip); 2797 2798 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2799 2800 MDI_CLIENT_LOCK(ct); 2801 2802 /* Prevent further failovers till mdi_mutex is held */ 2803 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2804 2805 /* 2806 * Wait till failover is complete before removing this node. 2807 */ 2808 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2809 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2810 2811 MDI_CLIENT_UNLOCK(ct); 2812 mutex_enter(&mdi_mutex); 2813 MDI_CLIENT_LOCK(ct); 2814 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2815 2816 if (!MDI_PI_IS_INITING(pip)) { 2817 f = vh->vh_ops->vo_pi_uninit; 2818 if (f != NULL) { 2819 rv = (*f)(vh->vh_dip, pip, 0); 2820 } 2821 } 2822 /* 2823 * If vo_pi_uninit() completed successfully. 2824 */ 2825 if (rv == MDI_SUCCESS) { 2826 if (client_held) { 2827 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2828 "i_mdi_pm_rele_client\n")); 2829 i_mdi_pm_rele_client(ct, 1); 2830 } 2831 i_mdi_pi_free(ph, pip, ct); 2832 if (ct->ct_path_count == 0) { 2833 /* 2834 * Client lost its last path. 2835 * Clean up the client device 2836 */ 2837 MDI_CLIENT_UNLOCK(ct); 2838 (void) i_mdi_client_free(ct->ct_vhci, ct); 2839 mutex_exit(&mdi_mutex); 2840 return (rv); 2841 } 2842 } 2843 MDI_CLIENT_UNLOCK(ct); 2844 mutex_exit(&mdi_mutex); 2845 2846 if (rv == MDI_FAILURE) 2847 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2848 2849 return (rv); 2850 } 2851 2852 /* 2853 * i_mdi_pi_free(): 2854 * Free the mdi_pathinfo node 2855 */ 2856 static void 2857 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2858 { 2859 int ct_circular; 2860 int ph_circular; 2861 int se_flag; 2862 int kmem_flag; 2863 2864 /* 2865 * remove any per-path kstats 2866 */ 2867 i_mdi_pi_kstat_destroy(pip); 2868 2869 ndi_devi_enter(ct->ct_dip, &ct_circular); 2870 ndi_devi_enter(ph->ph_dip, &ph_circular); 2871 2872 i_mdi_client_remove_path(ct, pip); 2873 i_mdi_phci_remove_path(ph, pip); 2874 2875 ndi_devi_exit(ph->ph_dip, ph_circular); 2876 ndi_devi_exit(ct->ct_dip, ct_circular); 2877 2878 /* determine interrupt context */ 2879 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2880 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2881 2882 i_ddi_di_cache_invalidate(kmem_flag); 2883 2884 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2885 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2886 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2887 if (MDI_PI(pip)->pi_addr) { 2888 kmem_free(MDI_PI(pip)->pi_addr, 2889 strlen(MDI_PI(pip)->pi_addr) + 1); 2890 MDI_PI(pip)->pi_addr = NULL; 2891 } 2892 2893 if (MDI_PI(pip)->pi_prop) { 2894 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2895 MDI_PI(pip)->pi_prop = NULL; 2896 } 2897 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2898 } 2899 2900 2901 /* 2902 * i_mdi_phci_remove_path(): 2903 * Remove a mdi_pathinfo node from pHCI list. 2904 * Notes: 2905 * Caller should hold per-pHCI mutex 2906 */ 2907 2908 static void 2909 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2910 { 2911 mdi_pathinfo_t *prev = NULL; 2912 mdi_pathinfo_t *path = NULL; 2913 2914 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2915 2916 path = ph->ph_path_head; 2917 while (path != NULL) { 2918 if (path == pip) { 2919 break; 2920 } 2921 prev = path; 2922 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2923 } 2924 2925 if (path) { 2926 ph->ph_path_count--; 2927 if (prev) { 2928 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2929 } else { 2930 ph->ph_path_head = 2931 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2932 } 2933 if (ph->ph_path_tail == path) { 2934 ph->ph_path_tail = prev; 2935 } 2936 } 2937 2938 /* 2939 * Clear the pHCI link 2940 */ 2941 MDI_PI(pip)->pi_phci_link = NULL; 2942 MDI_PI(pip)->pi_phci = NULL; 2943 } 2944 2945 /* 2946 * i_mdi_client_remove_path(): 2947 * Remove a mdi_pathinfo node from client path list. 2948 */ 2949 2950 static void 2951 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2952 { 2953 mdi_pathinfo_t *prev = NULL; 2954 mdi_pathinfo_t *path; 2955 2956 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2957 2958 path = ct->ct_path_head; 2959 while (path != NULL) { 2960 if (path == pip) { 2961 break; 2962 } 2963 prev = path; 2964 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2965 } 2966 2967 if (path) { 2968 ct->ct_path_count--; 2969 if (prev) { 2970 MDI_PI(prev)->pi_client_link = 2971 MDI_PI(path)->pi_client_link; 2972 } else { 2973 ct->ct_path_head = 2974 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2975 } 2976 if (ct->ct_path_tail == path) { 2977 ct->ct_path_tail = prev; 2978 } 2979 if (ct->ct_path_last == path) { 2980 ct->ct_path_last = ct->ct_path_head; 2981 } 2982 } 2983 MDI_PI(pip)->pi_client_link = NULL; 2984 MDI_PI(pip)->pi_client = NULL; 2985 } 2986 2987 /* 2988 * i_mdi_pi_state_change(): 2989 * online a mdi_pathinfo node 2990 * 2991 * Return Values: 2992 * MDI_SUCCESS 2993 * MDI_FAILURE 2994 */ 2995 /*ARGSUSED*/ 2996 static int 2997 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 2998 { 2999 int rv = MDI_SUCCESS; 3000 mdi_vhci_t *vh; 3001 mdi_phci_t *ph; 3002 mdi_client_t *ct; 3003 int (*f)(); 3004 dev_info_t *cdip; 3005 3006 MDI_PI_LOCK(pip); 3007 3008 ph = MDI_PI(pip)->pi_phci; 3009 ASSERT(ph); 3010 if (ph == NULL) { 3011 /* 3012 * Invalid pHCI device, fail the request 3013 */ 3014 MDI_PI_UNLOCK(pip); 3015 MDI_DEBUG(1, (CE_WARN, NULL, 3016 "!mdi_pi_state_change: invalid phci")); 3017 return (MDI_FAILURE); 3018 } 3019 3020 vh = ph->ph_vhci; 3021 ASSERT(vh); 3022 if (vh == NULL) { 3023 /* 3024 * Invalid vHCI device, fail the request 3025 */ 3026 MDI_PI_UNLOCK(pip); 3027 MDI_DEBUG(1, (CE_WARN, NULL, 3028 "!mdi_pi_state_change: invalid vhci")); 3029 return (MDI_FAILURE); 3030 } 3031 3032 ct = MDI_PI(pip)->pi_client; 3033 ASSERT(ct != NULL); 3034 if (ct == NULL) { 3035 /* 3036 * Invalid client device, fail the request 3037 */ 3038 MDI_PI_UNLOCK(pip); 3039 MDI_DEBUG(1, (CE_WARN, NULL, 3040 "!mdi_pi_state_change: invalid client")); 3041 return (MDI_FAILURE); 3042 } 3043 3044 /* 3045 * If this path has not been initialized yet, Callback vHCI driver's 3046 * pathinfo node initialize entry point 3047 */ 3048 3049 if (MDI_PI_IS_INITING(pip)) { 3050 MDI_PI_UNLOCK(pip); 3051 f = vh->vh_ops->vo_pi_init; 3052 if (f != NULL) { 3053 rv = (*f)(vh->vh_dip, pip, 0); 3054 if (rv != MDI_SUCCESS) { 3055 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3056 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3057 vh, pip)); 3058 return (MDI_FAILURE); 3059 } 3060 } 3061 MDI_PI_LOCK(pip); 3062 MDI_PI_CLEAR_TRANSIENT(pip); 3063 } 3064 3065 /* 3066 * Do not allow state transition when pHCI is in offline/suspended 3067 * states 3068 */ 3069 i_mdi_phci_lock(ph, pip); 3070 if (MDI_PHCI_IS_READY(ph) == 0) { 3071 MDI_DEBUG(1, (CE_WARN, NULL, 3072 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3073 MDI_PI_UNLOCK(pip); 3074 i_mdi_phci_unlock(ph); 3075 return (MDI_BUSY); 3076 } 3077 MDI_PHCI_UNSTABLE(ph); 3078 i_mdi_phci_unlock(ph); 3079 3080 /* 3081 * Check if mdi_pathinfo state is in transient state. 3082 * If yes, offlining is in progress and wait till transient state is 3083 * cleared. 3084 */ 3085 if (MDI_PI_IS_TRANSIENT(pip)) { 3086 while (MDI_PI_IS_TRANSIENT(pip)) { 3087 cv_wait(&MDI_PI(pip)->pi_state_cv, 3088 &MDI_PI(pip)->pi_mutex); 3089 } 3090 } 3091 3092 /* 3093 * Grab the client lock in reverse order sequence and release the 3094 * mdi_pathinfo mutex. 3095 */ 3096 i_mdi_client_lock(ct, pip); 3097 MDI_PI_UNLOCK(pip); 3098 3099 /* 3100 * Wait till failover state is cleared 3101 */ 3102 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3103 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3104 3105 /* 3106 * Mark the mdi_pathinfo node state as transient 3107 */ 3108 MDI_PI_LOCK(pip); 3109 switch (state) { 3110 case MDI_PATHINFO_STATE_ONLINE: 3111 MDI_PI_SET_ONLINING(pip); 3112 break; 3113 3114 case MDI_PATHINFO_STATE_STANDBY: 3115 MDI_PI_SET_STANDBYING(pip); 3116 break; 3117 3118 case MDI_PATHINFO_STATE_FAULT: 3119 /* 3120 * Mark the pathinfo state as FAULTED 3121 */ 3122 MDI_PI_SET_FAULTING(pip); 3123 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3124 break; 3125 3126 case MDI_PATHINFO_STATE_OFFLINE: 3127 /* 3128 * ndi_devi_offline() cannot hold pip or ct locks. 3129 */ 3130 MDI_PI_UNLOCK(pip); 3131 /* 3132 * Do not offline if path will become last path and path 3133 * is busy for user initiated events. 3134 */ 3135 cdip = ct->ct_dip; 3136 if ((flag & NDI_DEVI_REMOVE) && 3137 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3138 i_mdi_client_unlock(ct); 3139 rv = ndi_devi_offline(cdip, 0); 3140 if (rv != NDI_SUCCESS) { 3141 /* 3142 * Convert to MDI error code 3143 */ 3144 switch (rv) { 3145 case NDI_BUSY: 3146 rv = MDI_BUSY; 3147 break; 3148 default: 3149 rv = MDI_FAILURE; 3150 break; 3151 } 3152 goto state_change_exit; 3153 } else { 3154 i_mdi_client_lock(ct, NULL); 3155 } 3156 } 3157 /* 3158 * Mark the mdi_pathinfo node state as transient 3159 */ 3160 MDI_PI_LOCK(pip); 3161 MDI_PI_SET_OFFLINING(pip); 3162 break; 3163 } 3164 MDI_PI_UNLOCK(pip); 3165 MDI_CLIENT_UNSTABLE(ct); 3166 i_mdi_client_unlock(ct); 3167 3168 f = vh->vh_ops->vo_pi_state_change; 3169 if (f != NULL) { 3170 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3171 if (rv == MDI_NOT_SUPPORTED) { 3172 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3173 } 3174 if (rv != MDI_SUCCESS) { 3175 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3176 "!vo_pi_state_change: failed rv = %x", rv)); 3177 } 3178 } 3179 MDI_CLIENT_LOCK(ct); 3180 MDI_PI_LOCK(pip); 3181 if (MDI_PI_IS_TRANSIENT(pip)) { 3182 if (rv == MDI_SUCCESS) { 3183 MDI_PI_CLEAR_TRANSIENT(pip); 3184 } else { 3185 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3186 } 3187 } 3188 3189 /* 3190 * Wake anyone waiting for this mdi_pathinfo node 3191 */ 3192 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3193 MDI_PI_UNLOCK(pip); 3194 3195 /* 3196 * Mark the client device as stable 3197 */ 3198 MDI_CLIENT_STABLE(ct); 3199 if (rv == MDI_SUCCESS) { 3200 if (ct->ct_unstable == 0) { 3201 cdip = ct->ct_dip; 3202 3203 /* 3204 * Onlining the mdi_pathinfo node will impact the 3205 * client state Update the client and dev_info node 3206 * state accordingly 3207 */ 3208 rv = NDI_SUCCESS; 3209 i_mdi_client_update_state(ct); 3210 switch (MDI_CLIENT_STATE(ct)) { 3211 case MDI_CLIENT_STATE_OPTIMAL: 3212 case MDI_CLIENT_STATE_DEGRADED: 3213 if (cdip && 3214 (i_ddi_node_state(cdip) < DS_READY) && 3215 ((state == MDI_PATHINFO_STATE_ONLINE) || 3216 (state == MDI_PATHINFO_STATE_STANDBY))) { 3217 3218 i_mdi_client_unlock(ct); 3219 /* 3220 * Must do ndi_devi_online() through 3221 * hotplug thread for deferred 3222 * attach mechanism to work 3223 */ 3224 rv = ndi_devi_online(cdip, 0); 3225 i_mdi_client_lock(ct, NULL); 3226 if ((rv != NDI_SUCCESS) && 3227 (MDI_CLIENT_STATE(ct) == 3228 MDI_CLIENT_STATE_DEGRADED)) { 3229 /* 3230 * ndi_devi_online failed. 3231 * Reset client flags to 3232 * offline. 3233 */ 3234 MDI_DEBUG(1, (CE_WARN, cdip, 3235 "!ndi_devi_online: failed " 3236 " Error: %x", rv)); 3237 MDI_CLIENT_SET_OFFLINE(ct); 3238 } 3239 if (rv != NDI_SUCCESS) { 3240 /* Reset the path state */ 3241 MDI_PI_LOCK(pip); 3242 MDI_PI(pip)->pi_state = 3243 MDI_PI_OLD_STATE(pip); 3244 MDI_PI_UNLOCK(pip); 3245 } 3246 } 3247 break; 3248 3249 case MDI_CLIENT_STATE_FAILED: 3250 /* 3251 * This is the last path case for 3252 * non-user initiated events. 3253 */ 3254 if (((flag & NDI_DEVI_REMOVE) == 0) && 3255 cdip && (i_ddi_node_state(cdip) >= 3256 DS_INITIALIZED)) { 3257 i_mdi_client_unlock(ct); 3258 rv = ndi_devi_offline(cdip, 0); 3259 i_mdi_client_lock(ct, NULL); 3260 3261 if (rv != NDI_SUCCESS) { 3262 /* 3263 * ndi_devi_offline failed. 3264 * Reset client flags to 3265 * online as the path could not 3266 * be offlined. 3267 */ 3268 MDI_DEBUG(1, (CE_WARN, cdip, 3269 "!ndi_devi_offline: failed " 3270 " Error: %x", rv)); 3271 MDI_CLIENT_SET_ONLINE(ct); 3272 } 3273 } 3274 break; 3275 } 3276 /* 3277 * Convert to MDI error code 3278 */ 3279 switch (rv) { 3280 case NDI_SUCCESS: 3281 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3282 i_mdi_report_path_state(ct, pip); 3283 rv = MDI_SUCCESS; 3284 break; 3285 case NDI_BUSY: 3286 rv = MDI_BUSY; 3287 break; 3288 default: 3289 rv = MDI_FAILURE; 3290 break; 3291 } 3292 } 3293 } 3294 MDI_CLIENT_UNLOCK(ct); 3295 3296 state_change_exit: 3297 /* 3298 * Mark the pHCI as stable again. 3299 */ 3300 MDI_PHCI_LOCK(ph); 3301 MDI_PHCI_STABLE(ph); 3302 MDI_PHCI_UNLOCK(ph); 3303 return (rv); 3304 } 3305 3306 /* 3307 * mdi_pi_online(): 3308 * Place the path_info node in the online state. The path is 3309 * now available to be selected by mdi_select_path() for 3310 * transporting I/O requests to client devices. 3311 * Return Values: 3312 * MDI_SUCCESS 3313 * MDI_FAILURE 3314 */ 3315 int 3316 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3317 { 3318 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3319 dev_info_t *cdip; 3320 int client_held = 0; 3321 int rv; 3322 3323 ASSERT(ct != NULL); 3324 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3325 if (rv != MDI_SUCCESS) 3326 return (rv); 3327 3328 MDI_PI_LOCK(pip); 3329 if (MDI_PI(pip)->pi_pm_held == 0) { 3330 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3331 "i_mdi_pm_hold_pip\n")); 3332 i_mdi_pm_hold_pip(pip); 3333 client_held = 1; 3334 } 3335 MDI_PI_UNLOCK(pip); 3336 3337 if (client_held) { 3338 MDI_CLIENT_LOCK(ct); 3339 if (ct->ct_power_cnt == 0) { 3340 rv = i_mdi_power_all_phci(ct); 3341 } 3342 3343 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3344 "i_mdi_pm_hold_client\n")); 3345 i_mdi_pm_hold_client(ct, 1); 3346 MDI_CLIENT_UNLOCK(ct); 3347 } 3348 3349 /* 3350 * Create the per-path (pathinfo) IO and error kstats which 3351 * are reported via iostat(1m). 3352 * 3353 * Defer creating the per-path kstats if device is not yet 3354 * attached; the names of the kstats are constructed in part 3355 * using the devices instance number which is assigned during 3356 * process of attaching the client device. 3357 * 3358 * The framework post_attach handler, mdi_post_attach(), is 3359 * is responsible for initializing the client's pathinfo list 3360 * once successfully attached. 3361 */ 3362 cdip = ct->ct_dip; 3363 ASSERT(cdip); 3364 if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED)) 3365 return (rv); 3366 3367 MDI_CLIENT_LOCK(ct); 3368 rv = i_mdi_pi_kstat_create(pip); 3369 MDI_CLIENT_UNLOCK(ct); 3370 return (rv); 3371 } 3372 3373 /* 3374 * mdi_pi_standby(): 3375 * Place the mdi_pathinfo node in standby state 3376 * 3377 * Return Values: 3378 * MDI_SUCCESS 3379 * MDI_FAILURE 3380 */ 3381 int 3382 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3383 { 3384 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3385 } 3386 3387 /* 3388 * mdi_pi_fault(): 3389 * Place the mdi_pathinfo node in fault'ed state 3390 * Return Values: 3391 * MDI_SUCCESS 3392 * MDI_FAILURE 3393 */ 3394 int 3395 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3396 { 3397 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3398 } 3399 3400 /* 3401 * mdi_pi_offline(): 3402 * Offline a mdi_pathinfo node. 3403 * Return Values: 3404 * MDI_SUCCESS 3405 * MDI_FAILURE 3406 */ 3407 int 3408 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3409 { 3410 int ret, client_held = 0; 3411 mdi_client_t *ct; 3412 3413 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3414 3415 if (ret == MDI_SUCCESS) { 3416 MDI_PI_LOCK(pip); 3417 if (MDI_PI(pip)->pi_pm_held) { 3418 client_held = 1; 3419 } 3420 MDI_PI_UNLOCK(pip); 3421 3422 if (client_held) { 3423 ct = MDI_PI(pip)->pi_client; 3424 MDI_CLIENT_LOCK(ct); 3425 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3426 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3427 i_mdi_pm_rele_client(ct, 1); 3428 MDI_CLIENT_UNLOCK(ct); 3429 } 3430 } 3431 3432 return (ret); 3433 } 3434 3435 /* 3436 * i_mdi_pi_offline(): 3437 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3438 */ 3439 static int 3440 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3441 { 3442 dev_info_t *vdip = NULL; 3443 mdi_vhci_t *vh = NULL; 3444 mdi_client_t *ct = NULL; 3445 int (*f)(); 3446 int rv; 3447 3448 MDI_PI_LOCK(pip); 3449 ct = MDI_PI(pip)->pi_client; 3450 ASSERT(ct != NULL); 3451 3452 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3453 /* 3454 * Give a chance for pending I/Os to complete. 3455 */ 3456 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3457 "%d cmds still pending on path: %p\n", 3458 MDI_PI(pip)->pi_ref_cnt, pip)); 3459 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3460 &MDI_PI(pip)->pi_mutex, 3461 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3462 /* 3463 * The timeout time reached without ref_cnt being zero 3464 * being signaled. 3465 */ 3466 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3467 "Timeout reached on path %p without the cond\n", 3468 pip)); 3469 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3470 "%d cmds still pending on path: %p\n", 3471 MDI_PI(pip)->pi_ref_cnt, pip)); 3472 } 3473 } 3474 vh = ct->ct_vhci; 3475 vdip = vh->vh_dip; 3476 3477 /* 3478 * Notify vHCI that has registered this event 3479 */ 3480 ASSERT(vh->vh_ops); 3481 f = vh->vh_ops->vo_pi_state_change; 3482 3483 if (f != NULL) { 3484 MDI_PI_UNLOCK(pip); 3485 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3486 flags)) != MDI_SUCCESS) { 3487 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3488 "vdip 0x%x, pip 0x%x", vdip, pip)); 3489 } 3490 MDI_PI_LOCK(pip); 3491 } 3492 3493 /* 3494 * Set the mdi_pathinfo node state and clear the transient condition 3495 */ 3496 MDI_PI_SET_OFFLINE(pip); 3497 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3498 MDI_PI_UNLOCK(pip); 3499 3500 MDI_CLIENT_LOCK(ct); 3501 if (rv == MDI_SUCCESS) { 3502 if (ct->ct_unstable == 0) { 3503 dev_info_t *cdip = ct->ct_dip; 3504 3505 /* 3506 * Onlining the mdi_pathinfo node will impact the 3507 * client state Update the client and dev_info node 3508 * state accordingly 3509 */ 3510 i_mdi_client_update_state(ct); 3511 rv = NDI_SUCCESS; 3512 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3513 if (cdip && 3514 (i_ddi_node_state(cdip) >= 3515 DS_INITIALIZED)) { 3516 MDI_CLIENT_UNLOCK(ct); 3517 rv = ndi_devi_offline(cdip, 0); 3518 MDI_CLIENT_LOCK(ct); 3519 if (rv != NDI_SUCCESS) { 3520 /* 3521 * ndi_devi_offline failed. 3522 * Reset client flags to 3523 * online. 3524 */ 3525 MDI_DEBUG(4, (CE_WARN, cdip, 3526 "!ndi_devi_offline: failed " 3527 " Error: %x", rv)); 3528 MDI_CLIENT_SET_ONLINE(ct); 3529 } 3530 } 3531 } 3532 /* 3533 * Convert to MDI error code 3534 */ 3535 switch (rv) { 3536 case NDI_SUCCESS: 3537 rv = MDI_SUCCESS; 3538 break; 3539 case NDI_BUSY: 3540 rv = MDI_BUSY; 3541 break; 3542 default: 3543 rv = MDI_FAILURE; 3544 break; 3545 } 3546 } 3547 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3548 i_mdi_report_path_state(ct, pip); 3549 } 3550 3551 MDI_CLIENT_UNLOCK(ct); 3552 3553 /* 3554 * Change in the mdi_pathinfo node state will impact the client state 3555 */ 3556 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3557 ct, pip)); 3558 return (rv); 3559 } 3560 3561 3562 /* 3563 * mdi_pi_get_addr(): 3564 * Get the unit address associated with a mdi_pathinfo node 3565 * 3566 * Return Values: 3567 * char * 3568 */ 3569 char * 3570 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3571 { 3572 if (pip == NULL) 3573 return (NULL); 3574 3575 return (MDI_PI(pip)->pi_addr); 3576 } 3577 3578 /* 3579 * mdi_pi_get_client(): 3580 * Get the client devinfo associated with a mdi_pathinfo node 3581 * 3582 * Return Values: 3583 * Handle to client device dev_info node 3584 */ 3585 dev_info_t * 3586 mdi_pi_get_client(mdi_pathinfo_t *pip) 3587 { 3588 dev_info_t *dip = NULL; 3589 if (pip) { 3590 dip = MDI_PI(pip)->pi_client->ct_dip; 3591 } 3592 return (dip); 3593 } 3594 3595 /* 3596 * mdi_pi_get_phci(): 3597 * Get the pHCI devinfo associated with the mdi_pathinfo node 3598 * Return Values: 3599 * Handle to dev_info node 3600 */ 3601 dev_info_t * 3602 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3603 { 3604 dev_info_t *dip = NULL; 3605 if (pip) { 3606 dip = MDI_PI(pip)->pi_phci->ph_dip; 3607 } 3608 return (dip); 3609 } 3610 3611 /* 3612 * mdi_pi_get_client_private(): 3613 * Get the client private information associated with the 3614 * mdi_pathinfo node 3615 */ 3616 void * 3617 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3618 { 3619 void *cprivate = NULL; 3620 if (pip) { 3621 cprivate = MDI_PI(pip)->pi_cprivate; 3622 } 3623 return (cprivate); 3624 } 3625 3626 /* 3627 * mdi_pi_set_client_private(): 3628 * Set the client private information in the mdi_pathinfo node 3629 */ 3630 void 3631 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3632 { 3633 if (pip) { 3634 MDI_PI(pip)->pi_cprivate = priv; 3635 } 3636 } 3637 3638 /* 3639 * mdi_pi_get_phci_private(): 3640 * Get the pHCI private information associated with the 3641 * mdi_pathinfo node 3642 */ 3643 caddr_t 3644 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3645 { 3646 caddr_t pprivate = NULL; 3647 if (pip) { 3648 pprivate = MDI_PI(pip)->pi_pprivate; 3649 } 3650 return (pprivate); 3651 } 3652 3653 /* 3654 * mdi_pi_set_phci_private(): 3655 * Set the pHCI private information in the mdi_pathinfo node 3656 */ 3657 void 3658 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3659 { 3660 if (pip) { 3661 MDI_PI(pip)->pi_pprivate = priv; 3662 } 3663 } 3664 3665 /* 3666 * mdi_pi_get_state(): 3667 * Get the mdi_pathinfo node state. Transient states are internal 3668 * and not provided to the users 3669 */ 3670 mdi_pathinfo_state_t 3671 mdi_pi_get_state(mdi_pathinfo_t *pip) 3672 { 3673 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3674 3675 if (pip) { 3676 if (MDI_PI_IS_TRANSIENT(pip)) { 3677 /* 3678 * mdi_pathinfo is in state transition. Return the 3679 * last good state. 3680 */ 3681 state = MDI_PI_OLD_STATE(pip); 3682 } else { 3683 state = MDI_PI_STATE(pip); 3684 } 3685 } 3686 return (state); 3687 } 3688 3689 /* 3690 * Note that the following function needs to be the new interface for 3691 * mdi_pi_get_state when mpxio gets integrated to ON. 3692 */ 3693 int 3694 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3695 uint32_t *ext_state) 3696 { 3697 *state = MDI_PATHINFO_STATE_INIT; 3698 3699 if (pip) { 3700 if (MDI_PI_IS_TRANSIENT(pip)) { 3701 /* 3702 * mdi_pathinfo is in state transition. Return the 3703 * last good state. 3704 */ 3705 *state = MDI_PI_OLD_STATE(pip); 3706 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3707 } else { 3708 *state = MDI_PI_STATE(pip); 3709 *ext_state = MDI_PI_EXT_STATE(pip); 3710 } 3711 } 3712 return (MDI_SUCCESS); 3713 } 3714 3715 /* 3716 * mdi_pi_get_preferred: 3717 * Get the preferred path flag 3718 */ 3719 int 3720 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3721 { 3722 if (pip) { 3723 return (MDI_PI(pip)->pi_preferred); 3724 } 3725 return (0); 3726 } 3727 3728 /* 3729 * mdi_pi_set_preferred: 3730 * Set the preferred path flag 3731 */ 3732 void 3733 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3734 { 3735 if (pip) { 3736 MDI_PI(pip)->pi_preferred = preferred; 3737 } 3738 } 3739 3740 3741 /* 3742 * mdi_pi_set_state(): 3743 * Set the mdi_pathinfo node state 3744 */ 3745 void 3746 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3747 { 3748 uint32_t ext_state; 3749 3750 if (pip) { 3751 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3752 MDI_PI(pip)->pi_state = state; 3753 MDI_PI(pip)->pi_state |= ext_state; 3754 } 3755 } 3756 3757 /* 3758 * Property functions: 3759 */ 3760 3761 int 3762 i_map_nvlist_error_to_mdi(int val) 3763 { 3764 int rv; 3765 3766 switch (val) { 3767 case 0: 3768 rv = DDI_PROP_SUCCESS; 3769 break; 3770 case EINVAL: 3771 case ENOTSUP: 3772 rv = DDI_PROP_INVAL_ARG; 3773 break; 3774 case ENOMEM: 3775 rv = DDI_PROP_NO_MEMORY; 3776 break; 3777 default: 3778 rv = DDI_PROP_NOT_FOUND; 3779 break; 3780 } 3781 return (rv); 3782 } 3783 3784 /* 3785 * mdi_pi_get_next_prop(): 3786 * Property walk function. The caller should hold mdi_pi_lock() 3787 * and release by calling mdi_pi_unlock() at the end of walk to 3788 * get a consistent value. 3789 */ 3790 3791 nvpair_t * 3792 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3793 { 3794 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3795 return (NULL); 3796 } 3797 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3798 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3799 } 3800 3801 /* 3802 * mdi_prop_remove(): 3803 * Remove the named property from the named list. 3804 */ 3805 3806 int 3807 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3808 { 3809 if (pip == NULL) { 3810 return (DDI_PROP_NOT_FOUND); 3811 } 3812 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3813 MDI_PI_LOCK(pip); 3814 if (MDI_PI(pip)->pi_prop == NULL) { 3815 MDI_PI_UNLOCK(pip); 3816 return (DDI_PROP_NOT_FOUND); 3817 } 3818 if (name) { 3819 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3820 } else { 3821 char nvp_name[MAXNAMELEN]; 3822 nvpair_t *nvp; 3823 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3824 while (nvp) { 3825 nvpair_t *next; 3826 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3827 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3828 nvpair_name(nvp)); 3829 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3830 nvp_name); 3831 nvp = next; 3832 } 3833 } 3834 MDI_PI_UNLOCK(pip); 3835 return (DDI_PROP_SUCCESS); 3836 } 3837 3838 /* 3839 * mdi_prop_size(): 3840 * Get buffer size needed to pack the property data. 3841 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3842 * buffer size. 3843 */ 3844 3845 int 3846 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3847 { 3848 int rv; 3849 size_t bufsize; 3850 3851 *buflenp = 0; 3852 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3853 return (DDI_PROP_NOT_FOUND); 3854 } 3855 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3856 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3857 &bufsize, NV_ENCODE_NATIVE); 3858 *buflenp = bufsize; 3859 return (i_map_nvlist_error_to_mdi(rv)); 3860 } 3861 3862 /* 3863 * mdi_prop_pack(): 3864 * pack the property list. The caller should hold the 3865 * mdi_pathinfo_t node to get a consistent data 3866 */ 3867 3868 int 3869 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3870 { 3871 int rv; 3872 size_t bufsize; 3873 3874 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3875 return (DDI_PROP_NOT_FOUND); 3876 } 3877 3878 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3879 3880 bufsize = buflen; 3881 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3882 NV_ENCODE_NATIVE, KM_SLEEP); 3883 3884 return (i_map_nvlist_error_to_mdi(rv)); 3885 } 3886 3887 /* 3888 * mdi_prop_update_byte(): 3889 * Create/Update a byte property 3890 */ 3891 int 3892 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3893 { 3894 int rv; 3895 3896 if (pip == NULL) { 3897 return (DDI_PROP_INVAL_ARG); 3898 } 3899 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3900 MDI_PI_LOCK(pip); 3901 if (MDI_PI(pip)->pi_prop == NULL) { 3902 MDI_PI_UNLOCK(pip); 3903 return (DDI_PROP_NOT_FOUND); 3904 } 3905 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3906 MDI_PI_UNLOCK(pip); 3907 return (i_map_nvlist_error_to_mdi(rv)); 3908 } 3909 3910 /* 3911 * mdi_prop_update_byte_array(): 3912 * Create/Update a byte array property 3913 */ 3914 int 3915 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3916 uint_t nelements) 3917 { 3918 int rv; 3919 3920 if (pip == NULL) { 3921 return (DDI_PROP_INVAL_ARG); 3922 } 3923 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3924 MDI_PI_LOCK(pip); 3925 if (MDI_PI(pip)->pi_prop == NULL) { 3926 MDI_PI_UNLOCK(pip); 3927 return (DDI_PROP_NOT_FOUND); 3928 } 3929 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3930 MDI_PI_UNLOCK(pip); 3931 return (i_map_nvlist_error_to_mdi(rv)); 3932 } 3933 3934 /* 3935 * mdi_prop_update_int(): 3936 * Create/Update a 32 bit integer property 3937 */ 3938 int 3939 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3940 { 3941 int rv; 3942 3943 if (pip == NULL) { 3944 return (DDI_PROP_INVAL_ARG); 3945 } 3946 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3947 MDI_PI_LOCK(pip); 3948 if (MDI_PI(pip)->pi_prop == NULL) { 3949 MDI_PI_UNLOCK(pip); 3950 return (DDI_PROP_NOT_FOUND); 3951 } 3952 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3953 MDI_PI_UNLOCK(pip); 3954 return (i_map_nvlist_error_to_mdi(rv)); 3955 } 3956 3957 /* 3958 * mdi_prop_update_int64(): 3959 * Create/Update a 64 bit integer property 3960 */ 3961 int 3962 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3963 { 3964 int rv; 3965 3966 if (pip == NULL) { 3967 return (DDI_PROP_INVAL_ARG); 3968 } 3969 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3970 MDI_PI_LOCK(pip); 3971 if (MDI_PI(pip)->pi_prop == NULL) { 3972 MDI_PI_UNLOCK(pip); 3973 return (DDI_PROP_NOT_FOUND); 3974 } 3975 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3976 MDI_PI_UNLOCK(pip); 3977 return (i_map_nvlist_error_to_mdi(rv)); 3978 } 3979 3980 /* 3981 * mdi_prop_update_int_array(): 3982 * Create/Update a int array property 3983 */ 3984 int 3985 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 3986 uint_t nelements) 3987 { 3988 int rv; 3989 3990 if (pip == NULL) { 3991 return (DDI_PROP_INVAL_ARG); 3992 } 3993 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3994 MDI_PI_LOCK(pip); 3995 if (MDI_PI(pip)->pi_prop == NULL) { 3996 MDI_PI_UNLOCK(pip); 3997 return (DDI_PROP_NOT_FOUND); 3998 } 3999 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4000 nelements); 4001 MDI_PI_UNLOCK(pip); 4002 return (i_map_nvlist_error_to_mdi(rv)); 4003 } 4004 4005 /* 4006 * mdi_prop_update_string(): 4007 * Create/Update a string property 4008 */ 4009 int 4010 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4011 { 4012 int rv; 4013 4014 if (pip == NULL) { 4015 return (DDI_PROP_INVAL_ARG); 4016 } 4017 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4018 MDI_PI_LOCK(pip); 4019 if (MDI_PI(pip)->pi_prop == NULL) { 4020 MDI_PI_UNLOCK(pip); 4021 return (DDI_PROP_NOT_FOUND); 4022 } 4023 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4024 MDI_PI_UNLOCK(pip); 4025 return (i_map_nvlist_error_to_mdi(rv)); 4026 } 4027 4028 /* 4029 * mdi_prop_update_string_array(): 4030 * Create/Update a string array property 4031 */ 4032 int 4033 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4034 uint_t nelements) 4035 { 4036 int rv; 4037 4038 if (pip == NULL) { 4039 return (DDI_PROP_INVAL_ARG); 4040 } 4041 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4042 MDI_PI_LOCK(pip); 4043 if (MDI_PI(pip)->pi_prop == NULL) { 4044 MDI_PI_UNLOCK(pip); 4045 return (DDI_PROP_NOT_FOUND); 4046 } 4047 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4048 nelements); 4049 MDI_PI_UNLOCK(pip); 4050 return (i_map_nvlist_error_to_mdi(rv)); 4051 } 4052 4053 /* 4054 * mdi_prop_lookup_byte(): 4055 * Look for byte property identified by name. The data returned 4056 * is the actual property and valid as long as mdi_pathinfo_t node 4057 * is alive. 4058 */ 4059 int 4060 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4061 { 4062 int rv; 4063 4064 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4065 return (DDI_PROP_NOT_FOUND); 4066 } 4067 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4068 return (i_map_nvlist_error_to_mdi(rv)); 4069 } 4070 4071 4072 /* 4073 * mdi_prop_lookup_byte_array(): 4074 * Look for byte array property identified by name. The data 4075 * returned is the actual property and valid as long as 4076 * mdi_pathinfo_t node is alive. 4077 */ 4078 int 4079 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4080 uint_t *nelements) 4081 { 4082 int rv; 4083 4084 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4085 return (DDI_PROP_NOT_FOUND); 4086 } 4087 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4088 nelements); 4089 return (i_map_nvlist_error_to_mdi(rv)); 4090 } 4091 4092 /* 4093 * mdi_prop_lookup_int(): 4094 * Look for int property identified by name. The data returned 4095 * is the actual property and valid as long as mdi_pathinfo_t 4096 * node is alive. 4097 */ 4098 int 4099 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4100 { 4101 int rv; 4102 4103 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4104 return (DDI_PROP_NOT_FOUND); 4105 } 4106 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4107 return (i_map_nvlist_error_to_mdi(rv)); 4108 } 4109 4110 /* 4111 * mdi_prop_lookup_int64(): 4112 * Look for int64 property identified by name. The data returned 4113 * is the actual property and valid as long as mdi_pathinfo_t node 4114 * is alive. 4115 */ 4116 int 4117 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4118 { 4119 int rv; 4120 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4121 return (DDI_PROP_NOT_FOUND); 4122 } 4123 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4124 return (i_map_nvlist_error_to_mdi(rv)); 4125 } 4126 4127 /* 4128 * mdi_prop_lookup_int_array(): 4129 * Look for int array property identified by name. The data 4130 * returned is the actual property and valid as long as 4131 * mdi_pathinfo_t node is alive. 4132 */ 4133 int 4134 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4135 uint_t *nelements) 4136 { 4137 int rv; 4138 4139 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4140 return (DDI_PROP_NOT_FOUND); 4141 } 4142 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4143 (int32_t **)data, nelements); 4144 return (i_map_nvlist_error_to_mdi(rv)); 4145 } 4146 4147 /* 4148 * mdi_prop_lookup_string(): 4149 * Look for string property identified by name. The data 4150 * returned is the actual property and valid as long as 4151 * mdi_pathinfo_t node is alive. 4152 */ 4153 int 4154 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4155 { 4156 int rv; 4157 4158 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4159 return (DDI_PROP_NOT_FOUND); 4160 } 4161 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4162 return (i_map_nvlist_error_to_mdi(rv)); 4163 } 4164 4165 /* 4166 * mdi_prop_lookup_string_array(): 4167 * Look for string array property identified by name. The data 4168 * returned is the actual property and valid as long as 4169 * mdi_pathinfo_t node is alive. 4170 */ 4171 4172 int 4173 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4174 uint_t *nelements) 4175 { 4176 int rv; 4177 4178 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4179 return (DDI_PROP_NOT_FOUND); 4180 } 4181 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4182 nelements); 4183 return (i_map_nvlist_error_to_mdi(rv)); 4184 } 4185 4186 /* 4187 * mdi_prop_free(): 4188 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4189 * functions return the pointer to actual property data and not a 4190 * copy of it. So the data returned is valid as long as 4191 * mdi_pathinfo_t node is valid. 4192 */ 4193 4194 /*ARGSUSED*/ 4195 int 4196 mdi_prop_free(void *data) 4197 { 4198 return (DDI_PROP_SUCCESS); 4199 } 4200 4201 /*ARGSUSED*/ 4202 static void 4203 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4204 { 4205 char *phci_path, *ct_path; 4206 char *ct_status; 4207 char *status; 4208 dev_info_t *dip = ct->ct_dip; 4209 char lb_buf[64]; 4210 4211 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4212 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4213 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4214 return; 4215 } 4216 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4217 ct_status = "optimal"; 4218 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4219 ct_status = "degraded"; 4220 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4221 ct_status = "failed"; 4222 } else { 4223 ct_status = "unknown"; 4224 } 4225 4226 if (MDI_PI_IS_OFFLINE(pip)) { 4227 status = "offline"; 4228 } else if (MDI_PI_IS_ONLINE(pip)) { 4229 status = "online"; 4230 } else if (MDI_PI_IS_STANDBY(pip)) { 4231 status = "standby"; 4232 } else if (MDI_PI_IS_FAULT(pip)) { 4233 status = "faulted"; 4234 } else { 4235 status = "unknown"; 4236 } 4237 4238 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4239 (void) snprintf(lb_buf, sizeof (lb_buf), 4240 "%s, region-size: %d", mdi_load_balance_lba, 4241 ct->ct_lb_args->region_size); 4242 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4243 (void) snprintf(lb_buf, sizeof (lb_buf), 4244 "%s", mdi_load_balance_none); 4245 } else { 4246 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4247 mdi_load_balance_rr); 4248 } 4249 4250 if (dip) { 4251 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4252 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4253 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4254 "path %s (%s%d) to target address: %s is %s" 4255 " Load balancing: %s\n", 4256 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4257 ddi_get_instance(dip), ct_status, 4258 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4259 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4260 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4261 MDI_PI(pip)->pi_addr, status, lb_buf); 4262 kmem_free(phci_path, MAXPATHLEN); 4263 kmem_free(ct_path, MAXPATHLEN); 4264 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4265 } 4266 } 4267 4268 #ifdef DEBUG 4269 /* 4270 * i_mdi_log(): 4271 * Utility function for error message management 4272 * 4273 */ 4274 4275 /*VARARGS3*/ 4276 static void 4277 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4278 { 4279 char buf[MAXNAMELEN]; 4280 char name[MAXNAMELEN]; 4281 va_list ap; 4282 int log_only = 0; 4283 int boot_only = 0; 4284 int console_only = 0; 4285 4286 if (dip) { 4287 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4288 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4289 ddi_node_name(dip), ddi_get_instance(dip)); 4290 } else { 4291 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4292 ddi_node_name(dip), ddi_get_instance(dip)); 4293 } 4294 } else { 4295 name[0] = '\0'; 4296 } 4297 4298 va_start(ap, fmt); 4299 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4300 va_end(ap); 4301 4302 switch (buf[0]) { 4303 case '!': 4304 log_only = 1; 4305 break; 4306 case '?': 4307 boot_only = 1; 4308 break; 4309 case '^': 4310 console_only = 1; 4311 break; 4312 } 4313 4314 switch (level) { 4315 case CE_NOTE: 4316 level = CE_CONT; 4317 /* FALLTHROUGH */ 4318 case CE_CONT: 4319 case CE_WARN: 4320 case CE_PANIC: 4321 if (boot_only) { 4322 cmn_err(level, "?%s\t%s", name, &buf[1]); 4323 } else if (console_only) { 4324 cmn_err(level, "^%s\t%s", name, &buf[1]); 4325 } else if (log_only) { 4326 cmn_err(level, "!%s\t%s", name, &buf[1]); 4327 } else { 4328 cmn_err(level, "%s\t%s", name, buf); 4329 } 4330 break; 4331 default: 4332 cmn_err(level, "%s\t%s", name, buf); 4333 break; 4334 } 4335 } 4336 #endif /* DEBUG */ 4337 4338 void 4339 i_mdi_client_online(dev_info_t *ct_dip) 4340 { 4341 mdi_client_t *ct; 4342 4343 /* 4344 * Client online notification. Mark client state as online 4345 * restore our binding with dev_info node 4346 */ 4347 ct = i_devi_get_client(ct_dip); 4348 ASSERT(ct != NULL); 4349 MDI_CLIENT_LOCK(ct); 4350 MDI_CLIENT_SET_ONLINE(ct); 4351 /* catch for any memory leaks */ 4352 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4353 ct->ct_dip = ct_dip; 4354 4355 if (ct->ct_power_cnt == 0) 4356 (void) i_mdi_power_all_phci(ct); 4357 4358 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4359 "i_mdi_pm_hold_client\n")); 4360 i_mdi_pm_hold_client(ct, 1); 4361 4362 MDI_CLIENT_UNLOCK(ct); 4363 } 4364 4365 void 4366 i_mdi_phci_online(dev_info_t *ph_dip) 4367 { 4368 mdi_phci_t *ph; 4369 4370 /* pHCI online notification. Mark state accordingly */ 4371 ph = i_devi_get_phci(ph_dip); 4372 ASSERT(ph != NULL); 4373 MDI_PHCI_LOCK(ph); 4374 MDI_PHCI_SET_ONLINE(ph); 4375 MDI_PHCI_UNLOCK(ph); 4376 } 4377 4378 /* 4379 * mdi_devi_online(): 4380 * Online notification from NDI framework on pHCI/client 4381 * device online. 4382 * Return Values: 4383 * NDI_SUCCESS 4384 * MDI_FAILURE 4385 */ 4386 4387 /*ARGSUSED*/ 4388 int 4389 mdi_devi_online(dev_info_t *dip, uint_t flags) 4390 { 4391 if (MDI_PHCI(dip)) { 4392 i_mdi_phci_online(dip); 4393 } 4394 4395 if (MDI_CLIENT(dip)) { 4396 i_mdi_client_online(dip); 4397 } 4398 return (NDI_SUCCESS); 4399 } 4400 4401 /* 4402 * mdi_devi_offline(): 4403 * Offline notification from NDI framework on pHCI/Client device 4404 * offline. 4405 * 4406 * Return Values: 4407 * NDI_SUCCESS 4408 * NDI_FAILURE 4409 */ 4410 4411 /*ARGSUSED*/ 4412 int 4413 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4414 { 4415 int rv = NDI_SUCCESS; 4416 4417 if (MDI_CLIENT(dip)) { 4418 rv = i_mdi_client_offline(dip, flags); 4419 if (rv != NDI_SUCCESS) 4420 return (rv); 4421 } 4422 4423 if (MDI_PHCI(dip)) { 4424 rv = i_mdi_phci_offline(dip, flags); 4425 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4426 /* set client back online */ 4427 i_mdi_client_online(dip); 4428 } 4429 } 4430 4431 return (rv); 4432 } 4433 4434 /*ARGSUSED*/ 4435 static int 4436 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4437 { 4438 int rv = NDI_SUCCESS; 4439 mdi_phci_t *ph; 4440 mdi_client_t *ct; 4441 mdi_pathinfo_t *pip; 4442 mdi_pathinfo_t *next; 4443 mdi_pathinfo_t *failed_pip = NULL; 4444 dev_info_t *cdip; 4445 4446 /* 4447 * pHCI component offline notification 4448 * Make sure that this pHCI instance is free to be offlined. 4449 * If it is OK to proceed, Offline and remove all the child 4450 * mdi_pathinfo nodes. This process automatically offlines 4451 * corresponding client devices, for which this pHCI provides 4452 * critical services. 4453 */ 4454 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4455 dip)); 4456 4457 ph = i_devi_get_phci(dip); 4458 if (ph == NULL) { 4459 return (rv); 4460 } 4461 4462 MDI_PHCI_LOCK(ph); 4463 4464 if (MDI_PHCI_IS_OFFLINE(ph)) { 4465 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4466 MDI_PHCI_UNLOCK(ph); 4467 return (NDI_SUCCESS); 4468 } 4469 4470 /* 4471 * Check to see if the pHCI can be offlined 4472 */ 4473 if (ph->ph_unstable) { 4474 MDI_DEBUG(1, (CE_WARN, dip, 4475 "!One or more target devices are in transient " 4476 "state. This device can not be removed at " 4477 "this moment. Please try again later.")); 4478 MDI_PHCI_UNLOCK(ph); 4479 return (NDI_BUSY); 4480 } 4481 4482 pip = ph->ph_path_head; 4483 while (pip != NULL) { 4484 MDI_PI_LOCK(pip); 4485 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4486 /* 4487 * The mdi_pathinfo state is OK. Check the client state. 4488 * If failover in progress fail the pHCI from offlining 4489 */ 4490 ct = MDI_PI(pip)->pi_client; 4491 i_mdi_client_lock(ct, pip); 4492 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4493 (ct->ct_unstable)) { 4494 /* 4495 * Failover is in progress, Fail the DR 4496 */ 4497 MDI_DEBUG(1, (CE_WARN, dip, 4498 "!pHCI device (%s%d) is Busy. %s", 4499 ddi_driver_name(dip), ddi_get_instance(dip), 4500 "This device can not be removed at " 4501 "this moment. Please try again later.")); 4502 MDI_PI_UNLOCK(pip); 4503 MDI_CLIENT_UNLOCK(ct); 4504 MDI_PHCI_UNLOCK(ph); 4505 return (NDI_BUSY); 4506 } 4507 MDI_PI_UNLOCK(pip); 4508 4509 /* 4510 * Check to see of we are removing the last path of this 4511 * client device... 4512 */ 4513 cdip = ct->ct_dip; 4514 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4515 (i_mdi_client_compute_state(ct, ph) == 4516 MDI_CLIENT_STATE_FAILED)) { 4517 i_mdi_client_unlock(ct); 4518 MDI_PHCI_UNLOCK(ph); 4519 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4520 /* 4521 * ndi_devi_offline() failed. 4522 * This pHCI provides the critical path 4523 * to one or more client devices. 4524 * Return busy. 4525 */ 4526 MDI_PHCI_LOCK(ph); 4527 MDI_DEBUG(1, (CE_WARN, dip, 4528 "!pHCI device (%s%d) is Busy. %s", 4529 ddi_driver_name(dip), ddi_get_instance(dip), 4530 "This device can not be removed at " 4531 "this moment. Please try again later.")); 4532 failed_pip = pip; 4533 break; 4534 } else { 4535 MDI_PHCI_LOCK(ph); 4536 pip = next; 4537 } 4538 } else { 4539 i_mdi_client_unlock(ct); 4540 pip = next; 4541 } 4542 } 4543 4544 if (failed_pip) { 4545 pip = ph->ph_path_head; 4546 while (pip != failed_pip) { 4547 MDI_PI_LOCK(pip); 4548 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4549 ct = MDI_PI(pip)->pi_client; 4550 i_mdi_client_lock(ct, pip); 4551 cdip = ct->ct_dip; 4552 switch (MDI_CLIENT_STATE(ct)) { 4553 case MDI_CLIENT_STATE_OPTIMAL: 4554 case MDI_CLIENT_STATE_DEGRADED: 4555 if (cdip) { 4556 MDI_PI_UNLOCK(pip); 4557 i_mdi_client_unlock(ct); 4558 MDI_PHCI_UNLOCK(ph); 4559 (void) ndi_devi_online(cdip, 0); 4560 MDI_PHCI_LOCK(ph); 4561 pip = next; 4562 continue; 4563 } 4564 break; 4565 4566 case MDI_CLIENT_STATE_FAILED: 4567 if (cdip) { 4568 MDI_PI_UNLOCK(pip); 4569 i_mdi_client_unlock(ct); 4570 MDI_PHCI_UNLOCK(ph); 4571 (void) ndi_devi_offline(cdip, 0); 4572 MDI_PHCI_LOCK(ph); 4573 pip = next; 4574 continue; 4575 } 4576 break; 4577 } 4578 MDI_PI_UNLOCK(pip); 4579 i_mdi_client_unlock(ct); 4580 pip = next; 4581 } 4582 MDI_PHCI_UNLOCK(ph); 4583 return (NDI_BUSY); 4584 } 4585 4586 /* 4587 * Mark the pHCI as offline 4588 */ 4589 MDI_PHCI_SET_OFFLINE(ph); 4590 4591 /* 4592 * Mark the child mdi_pathinfo nodes as transient 4593 */ 4594 pip = ph->ph_path_head; 4595 while (pip != NULL) { 4596 MDI_PI_LOCK(pip); 4597 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4598 MDI_PI_SET_OFFLINING(pip); 4599 MDI_PI_UNLOCK(pip); 4600 pip = next; 4601 } 4602 MDI_PHCI_UNLOCK(ph); 4603 /* 4604 * Give a chance for any pending commands to execute 4605 */ 4606 delay(1); 4607 MDI_PHCI_LOCK(ph); 4608 pip = ph->ph_path_head; 4609 while (pip != NULL) { 4610 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4611 (void) i_mdi_pi_offline(pip, flags); 4612 MDI_PI_LOCK(pip); 4613 ct = MDI_PI(pip)->pi_client; 4614 if (!MDI_PI_IS_OFFLINE(pip)) { 4615 MDI_DEBUG(1, (CE_WARN, dip, 4616 "!pHCI device (%s%d) is Busy. %s", 4617 ddi_driver_name(dip), ddi_get_instance(dip), 4618 "This device can not be removed at " 4619 "this moment. Please try again later.")); 4620 MDI_PI_UNLOCK(pip); 4621 MDI_PHCI_SET_ONLINE(ph); 4622 MDI_PHCI_UNLOCK(ph); 4623 return (NDI_BUSY); 4624 } 4625 MDI_PI_UNLOCK(pip); 4626 pip = next; 4627 } 4628 MDI_PHCI_UNLOCK(ph); 4629 4630 return (rv); 4631 } 4632 4633 /*ARGSUSED*/ 4634 static int 4635 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4636 { 4637 int rv = NDI_SUCCESS; 4638 mdi_client_t *ct; 4639 4640 /* 4641 * Client component to go offline. Make sure that we are 4642 * not in failing over state and update client state 4643 * accordingly 4644 */ 4645 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4646 dip)); 4647 ct = i_devi_get_client(dip); 4648 if (ct != NULL) { 4649 MDI_CLIENT_LOCK(ct); 4650 if (ct->ct_unstable) { 4651 /* 4652 * One or more paths are in transient state, 4653 * Dont allow offline of a client device 4654 */ 4655 MDI_DEBUG(1, (CE_WARN, dip, 4656 "!One or more paths to this device is " 4657 "in transient state. This device can not " 4658 "be removed at this moment. " 4659 "Please try again later.")); 4660 MDI_CLIENT_UNLOCK(ct); 4661 return (NDI_BUSY); 4662 } 4663 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4664 /* 4665 * Failover is in progress, Dont allow DR of 4666 * a client device 4667 */ 4668 MDI_DEBUG(1, (CE_WARN, dip, 4669 "!Client device (%s%d) is Busy. %s", 4670 ddi_driver_name(dip), ddi_get_instance(dip), 4671 "This device can not be removed at " 4672 "this moment. Please try again later.")); 4673 MDI_CLIENT_UNLOCK(ct); 4674 return (NDI_BUSY); 4675 } 4676 MDI_CLIENT_SET_OFFLINE(ct); 4677 4678 /* 4679 * Unbind our relationship with the dev_info node 4680 */ 4681 if (flags & NDI_DEVI_REMOVE) { 4682 ct->ct_dip = NULL; 4683 } 4684 MDI_CLIENT_UNLOCK(ct); 4685 } 4686 return (rv); 4687 } 4688 4689 /* 4690 * mdi_pre_attach(): 4691 * Pre attach() notification handler 4692 */ 4693 4694 /*ARGSUSED*/ 4695 int 4696 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4697 { 4698 /* don't support old DDI_PM_RESUME */ 4699 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4700 (cmd == DDI_PM_RESUME)) 4701 return (DDI_FAILURE); 4702 4703 return (DDI_SUCCESS); 4704 } 4705 4706 /* 4707 * mdi_post_attach(): 4708 * Post attach() notification handler 4709 */ 4710 4711 /*ARGSUSED*/ 4712 void 4713 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4714 { 4715 mdi_phci_t *ph; 4716 mdi_client_t *ct; 4717 mdi_pathinfo_t *pip; 4718 4719 if (MDI_PHCI(dip)) { 4720 ph = i_devi_get_phci(dip); 4721 ASSERT(ph != NULL); 4722 4723 MDI_PHCI_LOCK(ph); 4724 switch (cmd) { 4725 case DDI_ATTACH: 4726 MDI_DEBUG(2, (CE_NOTE, dip, 4727 "!pHCI post_attach: called %p\n", ph)); 4728 if (error == DDI_SUCCESS) { 4729 MDI_PHCI_SET_ATTACH(ph); 4730 } else { 4731 MDI_DEBUG(1, (CE_NOTE, dip, 4732 "!pHCI post_attach: failed error=%d\n", 4733 error)); 4734 MDI_PHCI_SET_DETACH(ph); 4735 } 4736 break; 4737 4738 case DDI_RESUME: 4739 MDI_DEBUG(2, (CE_NOTE, dip, 4740 "!pHCI post_resume: called %p\n", ph)); 4741 if (error == DDI_SUCCESS) { 4742 MDI_PHCI_SET_RESUME(ph); 4743 } else { 4744 MDI_DEBUG(1, (CE_NOTE, dip, 4745 "!pHCI post_resume: failed error=%d\n", 4746 error)); 4747 MDI_PHCI_SET_SUSPEND(ph); 4748 } 4749 break; 4750 } 4751 MDI_PHCI_UNLOCK(ph); 4752 } 4753 4754 if (MDI_CLIENT(dip)) { 4755 ct = i_devi_get_client(dip); 4756 ASSERT(ct != NULL); 4757 4758 MDI_CLIENT_LOCK(ct); 4759 switch (cmd) { 4760 case DDI_ATTACH: 4761 MDI_DEBUG(2, (CE_NOTE, dip, 4762 "!Client post_attach: called %p\n", ct)); 4763 if (error != DDI_SUCCESS) { 4764 MDI_DEBUG(1, (CE_NOTE, dip, 4765 "!Client post_attach: failed error=%d\n", 4766 error)); 4767 MDI_CLIENT_SET_DETACH(ct); 4768 MDI_DEBUG(4, (CE_WARN, dip, 4769 "mdi_post_attach i_mdi_pm_reset_client\n")); 4770 i_mdi_pm_reset_client(ct); 4771 break; 4772 } 4773 4774 /* 4775 * Client device has successfully attached. 4776 * Create kstats for any pathinfo structures 4777 * initially associated with this client. 4778 */ 4779 for (pip = ct->ct_path_head; pip != NULL; 4780 pip = (mdi_pathinfo_t *) 4781 MDI_PI(pip)->pi_client_link) { 4782 (void) i_mdi_pi_kstat_create(pip); 4783 i_mdi_report_path_state(ct, pip); 4784 } 4785 MDI_CLIENT_SET_ATTACH(ct); 4786 break; 4787 4788 case DDI_RESUME: 4789 MDI_DEBUG(2, (CE_NOTE, dip, 4790 "!Client post_attach: called %p\n", ct)); 4791 if (error == DDI_SUCCESS) { 4792 MDI_CLIENT_SET_RESUME(ct); 4793 } else { 4794 MDI_DEBUG(1, (CE_NOTE, dip, 4795 "!Client post_resume: failed error=%d\n", 4796 error)); 4797 MDI_CLIENT_SET_SUSPEND(ct); 4798 } 4799 break; 4800 } 4801 MDI_CLIENT_UNLOCK(ct); 4802 } 4803 } 4804 4805 /* 4806 * mdi_pre_detach(): 4807 * Pre detach notification handler 4808 */ 4809 4810 /*ARGSUSED*/ 4811 int 4812 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4813 { 4814 int rv = DDI_SUCCESS; 4815 4816 if (MDI_CLIENT(dip)) { 4817 (void) i_mdi_client_pre_detach(dip, cmd); 4818 } 4819 4820 if (MDI_PHCI(dip)) { 4821 rv = i_mdi_phci_pre_detach(dip, cmd); 4822 } 4823 4824 return (rv); 4825 } 4826 4827 /*ARGSUSED*/ 4828 static int 4829 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4830 { 4831 int rv = DDI_SUCCESS; 4832 mdi_phci_t *ph; 4833 mdi_client_t *ct; 4834 mdi_pathinfo_t *pip; 4835 mdi_pathinfo_t *failed_pip = NULL; 4836 mdi_pathinfo_t *next; 4837 4838 ph = i_devi_get_phci(dip); 4839 if (ph == NULL) { 4840 return (rv); 4841 } 4842 4843 MDI_PHCI_LOCK(ph); 4844 switch (cmd) { 4845 case DDI_DETACH: 4846 MDI_DEBUG(2, (CE_NOTE, dip, 4847 "!pHCI pre_detach: called %p\n", ph)); 4848 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4849 /* 4850 * mdi_pathinfo nodes are still attached to 4851 * this pHCI. Fail the detach for this pHCI. 4852 */ 4853 MDI_DEBUG(2, (CE_WARN, dip, 4854 "!pHCI pre_detach: " 4855 "mdi_pathinfo nodes are still attached " 4856 "%p\n", ph)); 4857 rv = DDI_FAILURE; 4858 break; 4859 } 4860 MDI_PHCI_SET_DETACH(ph); 4861 break; 4862 4863 case DDI_SUSPEND: 4864 /* 4865 * pHCI is getting suspended. Since mpxio client 4866 * devices may not be suspended at this point, to avoid 4867 * a potential stack overflow, it is important to suspend 4868 * client devices before pHCI can be suspended. 4869 */ 4870 4871 MDI_DEBUG(2, (CE_NOTE, dip, 4872 "!pHCI pre_suspend: called %p\n", ph)); 4873 /* 4874 * Suspend all the client devices accessible through this pHCI 4875 */ 4876 pip = ph->ph_path_head; 4877 while (pip != NULL && rv == DDI_SUCCESS) { 4878 dev_info_t *cdip; 4879 MDI_PI_LOCK(pip); 4880 next = 4881 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4882 ct = MDI_PI(pip)->pi_client; 4883 i_mdi_client_lock(ct, pip); 4884 cdip = ct->ct_dip; 4885 MDI_PI_UNLOCK(pip); 4886 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4887 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4888 i_mdi_client_unlock(ct); 4889 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4890 DDI_SUCCESS) { 4891 /* 4892 * Suspend of one of the client 4893 * device has failed. 4894 */ 4895 MDI_DEBUG(1, (CE_WARN, dip, 4896 "!Suspend of device (%s%d) failed.", 4897 ddi_driver_name(cdip), 4898 ddi_get_instance(cdip))); 4899 failed_pip = pip; 4900 break; 4901 } 4902 } else { 4903 i_mdi_client_unlock(ct); 4904 } 4905 pip = next; 4906 } 4907 4908 if (rv == DDI_SUCCESS) { 4909 /* 4910 * Suspend of client devices is complete. Proceed 4911 * with pHCI suspend. 4912 */ 4913 MDI_PHCI_SET_SUSPEND(ph); 4914 } else { 4915 /* 4916 * Revert back all the suspended client device states 4917 * to converse. 4918 */ 4919 pip = ph->ph_path_head; 4920 while (pip != failed_pip) { 4921 dev_info_t *cdip; 4922 MDI_PI_LOCK(pip); 4923 next = 4924 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4925 ct = MDI_PI(pip)->pi_client; 4926 i_mdi_client_lock(ct, pip); 4927 cdip = ct->ct_dip; 4928 MDI_PI_UNLOCK(pip); 4929 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4930 i_mdi_client_unlock(ct); 4931 (void) devi_attach(cdip, DDI_RESUME); 4932 } else { 4933 i_mdi_client_unlock(ct); 4934 } 4935 pip = next; 4936 } 4937 } 4938 break; 4939 4940 default: 4941 rv = DDI_FAILURE; 4942 break; 4943 } 4944 MDI_PHCI_UNLOCK(ph); 4945 return (rv); 4946 } 4947 4948 /*ARGSUSED*/ 4949 static int 4950 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4951 { 4952 int rv = DDI_SUCCESS; 4953 mdi_client_t *ct; 4954 4955 ct = i_devi_get_client(dip); 4956 if (ct == NULL) { 4957 return (rv); 4958 } 4959 4960 MDI_CLIENT_LOCK(ct); 4961 switch (cmd) { 4962 case DDI_DETACH: 4963 MDI_DEBUG(2, (CE_NOTE, dip, 4964 "!Client pre_detach: called %p\n", ct)); 4965 MDI_CLIENT_SET_DETACH(ct); 4966 break; 4967 4968 case DDI_SUSPEND: 4969 MDI_DEBUG(2, (CE_NOTE, dip, 4970 "!Client pre_suspend: called %p\n", ct)); 4971 MDI_CLIENT_SET_SUSPEND(ct); 4972 break; 4973 4974 default: 4975 rv = DDI_FAILURE; 4976 break; 4977 } 4978 MDI_CLIENT_UNLOCK(ct); 4979 return (rv); 4980 } 4981 4982 /* 4983 * mdi_post_detach(): 4984 * Post detach notification handler 4985 */ 4986 4987 /*ARGSUSED*/ 4988 void 4989 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4990 { 4991 /* 4992 * Detach/Suspend of mpxio component failed. Update our state 4993 * too 4994 */ 4995 if (MDI_PHCI(dip)) 4996 i_mdi_phci_post_detach(dip, cmd, error); 4997 4998 if (MDI_CLIENT(dip)) 4999 i_mdi_client_post_detach(dip, cmd, error); 5000 } 5001 5002 /*ARGSUSED*/ 5003 static void 5004 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5005 { 5006 mdi_phci_t *ph; 5007 5008 /* 5009 * Detach/Suspend of phci component failed. Update our state 5010 * too 5011 */ 5012 ph = i_devi_get_phci(dip); 5013 if (ph == NULL) { 5014 return; 5015 } 5016 5017 MDI_PHCI_LOCK(ph); 5018 /* 5019 * Detach of pHCI failed. Restore back converse 5020 * state 5021 */ 5022 switch (cmd) { 5023 case DDI_DETACH: 5024 MDI_DEBUG(2, (CE_NOTE, dip, 5025 "!pHCI post_detach: called %p\n", ph)); 5026 if (error != DDI_SUCCESS) 5027 MDI_PHCI_SET_ATTACH(ph); 5028 break; 5029 5030 case DDI_SUSPEND: 5031 MDI_DEBUG(2, (CE_NOTE, dip, 5032 "!pHCI post_suspend: called %p\n", ph)); 5033 if (error != DDI_SUCCESS) 5034 MDI_PHCI_SET_RESUME(ph); 5035 break; 5036 } 5037 MDI_PHCI_UNLOCK(ph); 5038 } 5039 5040 /*ARGSUSED*/ 5041 static void 5042 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5043 { 5044 mdi_client_t *ct; 5045 5046 ct = i_devi_get_client(dip); 5047 if (ct == NULL) { 5048 return; 5049 } 5050 MDI_CLIENT_LOCK(ct); 5051 /* 5052 * Detach of Client failed. Restore back converse 5053 * state 5054 */ 5055 switch (cmd) { 5056 case DDI_DETACH: 5057 MDI_DEBUG(2, (CE_NOTE, dip, 5058 "!Client post_detach: called %p\n", ct)); 5059 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5060 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5061 "i_mdi_pm_rele_client\n")); 5062 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5063 } else { 5064 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5065 "i_mdi_pm_reset_client\n")); 5066 i_mdi_pm_reset_client(ct); 5067 } 5068 if (error != DDI_SUCCESS) 5069 MDI_CLIENT_SET_ATTACH(ct); 5070 break; 5071 5072 case DDI_SUSPEND: 5073 MDI_DEBUG(2, (CE_NOTE, dip, 5074 "!Client post_suspend: called %p\n", ct)); 5075 if (error != DDI_SUCCESS) 5076 MDI_CLIENT_SET_RESUME(ct); 5077 break; 5078 } 5079 MDI_CLIENT_UNLOCK(ct); 5080 } 5081 5082 /* 5083 * create and install per-path (client - pHCI) statistics 5084 * I/O stats supported: nread, nwritten, reads, and writes 5085 * Error stats - hard errors, soft errors, & transport errors 5086 */ 5087 static int 5088 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5089 { 5090 5091 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5092 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5093 char ksname[KSTAT_STRLEN]; 5094 mdi_pathinfo_t *cpip; 5095 const char *err_postfix = ",err"; 5096 kstat_t *kiosp, *kerrsp; 5097 struct pi_errs *nsp; 5098 struct mdi_pi_kstats *mdi_statp; 5099 5100 ASSERT(client != NULL && ppath != NULL); 5101 5102 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5103 5104 if (MDI_PI(pip)->pi_kstats != NULL) 5105 return (MDI_SUCCESS); 5106 5107 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5108 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5109 if (cpip == pip) 5110 continue; 5111 /* 5112 * We have found a different path with same parent 5113 * kstats for a given client-pHCI are common 5114 */ 5115 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5116 (MDI_PI(cpip)->pi_kstats != NULL)) { 5117 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5118 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5119 return (MDI_SUCCESS); 5120 } 5121 } 5122 5123 /* 5124 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5125 * clamp length of name against max length of error kstat name 5126 */ 5127 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5128 ddi_driver_name(client), ddi_get_instance(client), 5129 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5130 (KSTAT_STRLEN - strlen(err_postfix))) { 5131 return (MDI_FAILURE); 5132 } 5133 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5134 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5135 return (MDI_FAILURE); 5136 } 5137 5138 (void) strcat(ksname, err_postfix); 5139 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5140 KSTAT_TYPE_NAMED, 5141 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5142 5143 if (kerrsp == NULL) { 5144 kstat_delete(kiosp); 5145 return (MDI_FAILURE); 5146 } 5147 5148 nsp = (struct pi_errs *)kerrsp->ks_data; 5149 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5150 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5151 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5152 KSTAT_DATA_UINT32); 5153 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5154 KSTAT_DATA_UINT32); 5155 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5156 KSTAT_DATA_UINT32); 5157 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5158 KSTAT_DATA_UINT32); 5159 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5160 KSTAT_DATA_UINT32); 5161 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5162 KSTAT_DATA_UINT32); 5163 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5164 KSTAT_DATA_UINT32); 5165 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5166 5167 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5168 mdi_statp->pi_kstat_ref = 1; 5169 mdi_statp->pi_kstat_iostats = kiosp; 5170 mdi_statp->pi_kstat_errstats = kerrsp; 5171 kstat_install(kiosp); 5172 kstat_install(kerrsp); 5173 MDI_PI(pip)->pi_kstats = mdi_statp; 5174 return (MDI_SUCCESS); 5175 } 5176 5177 /* 5178 * destroy per-path properties 5179 */ 5180 static void 5181 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5182 { 5183 5184 struct mdi_pi_kstats *mdi_statp; 5185 5186 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5187 return; 5188 5189 MDI_PI(pip)->pi_kstats = NULL; 5190 5191 /* 5192 * the kstat may be shared between multiple pathinfo nodes 5193 * decrement this pathinfo's usage, removing the kstats 5194 * themselves when the last pathinfo reference is removed. 5195 */ 5196 ASSERT(mdi_statp->pi_kstat_ref > 0); 5197 if (--mdi_statp->pi_kstat_ref != 0) 5198 return; 5199 5200 kstat_delete(mdi_statp->pi_kstat_iostats); 5201 kstat_delete(mdi_statp->pi_kstat_errstats); 5202 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5203 } 5204 5205 /* 5206 * update I/O paths KSTATS 5207 */ 5208 void 5209 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5210 { 5211 kstat_t *iostatp; 5212 size_t xfer_cnt; 5213 5214 ASSERT(pip != NULL); 5215 5216 /* 5217 * I/O can be driven across a path prior to having path 5218 * statistics available, i.e. probe(9e). 5219 */ 5220 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5221 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5222 xfer_cnt = bp->b_bcount - bp->b_resid; 5223 if (bp->b_flags & B_READ) { 5224 KSTAT_IO_PTR(iostatp)->reads++; 5225 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5226 } else { 5227 KSTAT_IO_PTR(iostatp)->writes++; 5228 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5229 } 5230 } 5231 } 5232 5233 /* 5234 * disable the path to a particular pHCI (pHCI specified in the phci_path 5235 * argument) for a particular client (specified in the client_path argument). 5236 * Disabling a path means that MPxIO will not select the disabled path for 5237 * routing any new I/O requests. 5238 */ 5239 int 5240 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5241 { 5242 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5243 } 5244 5245 /* 5246 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5247 * argument) for a particular client (specified in the client_path argument). 5248 * Enabling a path means that MPxIO may select the enabled path for routing 5249 * future I/O requests, subject to other path state constraints. 5250 */ 5251 5252 int 5253 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5254 { 5255 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5256 } 5257 5258 5259 /* 5260 * Common routine for doing enable/disable. 5261 */ 5262 int 5263 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5264 { 5265 5266 mdi_phci_t *ph; 5267 mdi_vhci_t *vh = NULL; 5268 mdi_client_t *ct; 5269 mdi_pathinfo_t *next, *pip; 5270 int found_it; 5271 int (*f)() = NULL; 5272 int rv; 5273 int sync_flag = 0; 5274 5275 ph = i_devi_get_phci(pdip); 5276 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5277 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5278 if (ph == NULL) { 5279 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5280 " failed. ph = NULL operation = %d\n", op)); 5281 return (MDI_FAILURE); 5282 } 5283 5284 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5285 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5286 " Invalid operation = %d\n", op)); 5287 return (MDI_FAILURE); 5288 } 5289 5290 sync_flag = (flags << 8) & 0xf00; 5291 5292 vh = ph->ph_vhci; 5293 f = vh->vh_ops->vo_pi_state_change; 5294 5295 if (cdip == NULL) { 5296 /* 5297 * Need to mark the Phci as enabled/disabled. 5298 */ 5299 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5300 "Operation %d for the phci\n", op)); 5301 MDI_PHCI_LOCK(ph); 5302 switch (flags) { 5303 case USER_DISABLE: 5304 if (op == MDI_DISABLE_OP) 5305 MDI_PHCI_SET_USER_DISABLE(ph); 5306 else 5307 MDI_PHCI_SET_USER_ENABLE(ph); 5308 break; 5309 case DRIVER_DISABLE: 5310 if (op == MDI_DISABLE_OP) 5311 MDI_PHCI_SET_DRV_DISABLE(ph); 5312 else 5313 MDI_PHCI_SET_DRV_ENABLE(ph); 5314 break; 5315 case DRIVER_DISABLE_TRANSIENT: 5316 if (op == MDI_DISABLE_OP) 5317 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5318 else 5319 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5320 break; 5321 default: 5322 MDI_PHCI_UNLOCK(ph); 5323 MDI_DEBUG(1, (CE_NOTE, NULL, 5324 "!i_mdi_pi_enable_disable:" 5325 " Invalid flag argument= %d\n", flags)); 5326 } 5327 5328 /* 5329 * Phci has been disabled. Now try to enable/disable 5330 * path info's to each client. 5331 */ 5332 pip = ph->ph_path_head; 5333 while (pip != NULL) { 5334 /* 5335 * Do a callback into the mdi consumer to let it 5336 * know that path is about to be enabled/disabled. 5337 */ 5338 if (f != NULL) { 5339 rv = (*f)(vh->vh_dip, pip, 0, 5340 MDI_PI_EXT_STATE(pip), 5341 MDI_EXT_STATE_CHANGE | sync_flag | 5342 op | MDI_BEFORE_STATE_CHANGE); 5343 if (rv != MDI_SUCCESS) { 5344 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5345 "!vo_pi_state_change: failed rv = %x", rv)); 5346 } 5347 } 5348 5349 MDI_PI_LOCK(pip); 5350 next = 5351 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5352 switch (flags) { 5353 case USER_DISABLE: 5354 if (op == MDI_DISABLE_OP) 5355 MDI_PI_SET_USER_DISABLE(pip); 5356 else 5357 MDI_PI_SET_USER_ENABLE(pip); 5358 break; 5359 case DRIVER_DISABLE: 5360 if (op == MDI_DISABLE_OP) 5361 MDI_PI_SET_DRV_DISABLE(pip); 5362 else 5363 MDI_PI_SET_DRV_ENABLE(pip); 5364 break; 5365 case DRIVER_DISABLE_TRANSIENT: 5366 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5367 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5368 else 5369 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5370 break; 5371 } 5372 MDI_PI_UNLOCK(pip); 5373 /* 5374 * Do a callback into the mdi consumer to let it 5375 * know that path is now enabled/disabled. 5376 */ 5377 if (f != NULL) { 5378 rv = (*f)(vh->vh_dip, pip, 0, 5379 MDI_PI_EXT_STATE(pip), 5380 MDI_EXT_STATE_CHANGE | sync_flag | 5381 op | MDI_AFTER_STATE_CHANGE); 5382 if (rv != MDI_SUCCESS) { 5383 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5384 "!vo_pi_state_change: failed rv = %x", rv)); 5385 } 5386 } 5387 pip = next; 5388 } 5389 MDI_PHCI_UNLOCK(ph); 5390 } else { 5391 5392 /* 5393 * Disable a specific client. 5394 */ 5395 ct = i_devi_get_client(cdip); 5396 if (ct == NULL) { 5397 MDI_DEBUG(1, (CE_NOTE, NULL, 5398 "!i_mdi_pi_enable_disable:" 5399 " failed. ct = NULL operation = %d\n", op)); 5400 return (MDI_FAILURE); 5401 } 5402 5403 MDI_CLIENT_LOCK(ct); 5404 pip = ct->ct_path_head; 5405 found_it = 0; 5406 while (pip != NULL) { 5407 MDI_PI_LOCK(pip); 5408 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5409 if (MDI_PI(pip)->pi_phci == ph) { 5410 MDI_PI_UNLOCK(pip); 5411 found_it = 1; 5412 break; 5413 } 5414 MDI_PI_UNLOCK(pip); 5415 pip = next; 5416 } 5417 5418 MDI_CLIENT_UNLOCK(ct); 5419 if (found_it == 0) { 5420 MDI_DEBUG(1, (CE_NOTE, NULL, 5421 "!i_mdi_pi_enable_disable:" 5422 " failed. Could not find corresponding pip\n")); 5423 return (MDI_FAILURE); 5424 } 5425 /* 5426 * Do a callback into the mdi consumer to let it 5427 * know that path is about to get enabled/disabled. 5428 */ 5429 if (f != NULL) { 5430 rv = (*f)(vh->vh_dip, pip, 0, 5431 MDI_PI_EXT_STATE(pip), 5432 MDI_EXT_STATE_CHANGE | sync_flag | 5433 op | MDI_BEFORE_STATE_CHANGE); 5434 if (rv != MDI_SUCCESS) { 5435 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5436 "!vo_pi_state_change: failed rv = %x", rv)); 5437 } 5438 } 5439 MDI_PI_LOCK(pip); 5440 switch (flags) { 5441 case USER_DISABLE: 5442 if (op == MDI_DISABLE_OP) 5443 MDI_PI_SET_USER_DISABLE(pip); 5444 else 5445 MDI_PI_SET_USER_ENABLE(pip); 5446 break; 5447 case DRIVER_DISABLE: 5448 if (op == MDI_DISABLE_OP) 5449 MDI_PI_SET_DRV_DISABLE(pip); 5450 else 5451 MDI_PI_SET_DRV_ENABLE(pip); 5452 break; 5453 case DRIVER_DISABLE_TRANSIENT: 5454 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5455 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5456 else 5457 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5458 break; 5459 } 5460 MDI_PI_UNLOCK(pip); 5461 /* 5462 * Do a callback into the mdi consumer to let it 5463 * know that path is now enabled/disabled. 5464 */ 5465 if (f != NULL) { 5466 rv = (*f)(vh->vh_dip, pip, 0, 5467 MDI_PI_EXT_STATE(pip), 5468 MDI_EXT_STATE_CHANGE | sync_flag | 5469 op | MDI_AFTER_STATE_CHANGE); 5470 if (rv != MDI_SUCCESS) { 5471 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5472 "!vo_pi_state_change: failed rv = %x", rv)); 5473 } 5474 } 5475 } 5476 5477 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5478 " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); 5479 return (MDI_SUCCESS); 5480 } 5481 5482 /*ARGSUSED3*/ 5483 int 5484 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5485 int flags, clock_t timeout) 5486 { 5487 mdi_pathinfo_t *pip; 5488 dev_info_t *dip; 5489 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5490 char *paddr; 5491 5492 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5493 5494 if (!MDI_PHCI(pdip)) 5495 return (MDI_FAILURE); 5496 5497 paddr = strchr(devnm, '@'); 5498 if (paddr == NULL) 5499 return (MDI_FAILURE); 5500 5501 paddr++; /* skip '@' */ 5502 pip = mdi_pi_find(pdip, NULL, paddr); 5503 while (pip == NULL && timeout > 0) { 5504 if (interval > timeout) 5505 interval = timeout; 5506 if (flags & NDI_DEVI_DEBUG) { 5507 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5508 ddi_driver_name(pdip), ddi_get_instance(pdip), 5509 paddr, interval, timeout); 5510 } 5511 delay(interval); 5512 timeout -= interval; 5513 interval += interval; 5514 pip = mdi_pi_find(pdip, NULL, paddr); 5515 } 5516 5517 if (pip == NULL) 5518 return (MDI_FAILURE); 5519 dip = mdi_pi_get_client(pip); 5520 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5521 return (MDI_FAILURE); 5522 *cdipp = dip; 5523 5524 /* TODO: holding should happen inside search functions */ 5525 ndi_hold_devi(dip); 5526 return (MDI_SUCCESS); 5527 } 5528 5529 /* 5530 * Ensure phci powered up 5531 */ 5532 static void 5533 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5534 { 5535 dev_info_t *ph_dip; 5536 5537 ASSERT(pip != NULL); 5538 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5539 5540 if (MDI_PI(pip)->pi_pm_held) { 5541 return; 5542 } 5543 5544 ph_dip = mdi_pi_get_phci(pip); 5545 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5546 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5547 if (ph_dip == NULL) { 5548 return; 5549 } 5550 5551 MDI_PI_UNLOCK(pip); 5552 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5553 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5554 pm_hold_power(ph_dip); 5555 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5556 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5557 MDI_PI_LOCK(pip); 5558 5559 MDI_PI(pip)->pi_pm_held = 1; 5560 } 5561 5562 /* 5563 * Allow phci powered down 5564 */ 5565 static void 5566 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5567 { 5568 dev_info_t *ph_dip = NULL; 5569 5570 ASSERT(pip != NULL); 5571 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5572 5573 if (MDI_PI(pip)->pi_pm_held == 0) { 5574 return; 5575 } 5576 5577 ph_dip = mdi_pi_get_phci(pip); 5578 ASSERT(ph_dip != NULL); 5579 5580 MDI_PI_UNLOCK(pip); 5581 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5582 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5583 5584 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5585 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5586 pm_rele_power(ph_dip); 5587 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5588 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5589 5590 MDI_PI_LOCK(pip); 5591 MDI_PI(pip)->pi_pm_held = 0; 5592 } 5593 5594 static void 5595 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5596 { 5597 ASSERT(ct); 5598 5599 ct->ct_power_cnt += incr; 5600 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5601 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5602 ASSERT(ct->ct_power_cnt >= 0); 5603 } 5604 5605 static void 5606 i_mdi_rele_all_phci(mdi_client_t *ct) 5607 { 5608 mdi_pathinfo_t *pip; 5609 5610 ASSERT(mutex_owned(&ct->ct_mutex)); 5611 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5612 while (pip != NULL) { 5613 mdi_hold_path(pip); 5614 MDI_PI_LOCK(pip); 5615 i_mdi_pm_rele_pip(pip); 5616 MDI_PI_UNLOCK(pip); 5617 mdi_rele_path(pip); 5618 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5619 } 5620 } 5621 5622 static void 5623 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5624 { 5625 ASSERT(ct); 5626 5627 if (i_ddi_node_state(ct->ct_dip) >= DS_READY) { 5628 ct->ct_power_cnt -= decr; 5629 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5630 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5631 } 5632 5633 ASSERT(ct->ct_power_cnt >= 0); 5634 if (ct->ct_power_cnt == 0) { 5635 i_mdi_rele_all_phci(ct); 5636 return; 5637 } 5638 } 5639 5640 static void 5641 i_mdi_pm_reset_client(mdi_client_t *ct) 5642 { 5643 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5644 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5645 ct->ct_power_cnt = 0; 5646 i_mdi_rele_all_phci(ct); 5647 ct->ct_powercnt_reset = 1; 5648 ct->ct_powercnt_held = 0; 5649 } 5650 5651 static void 5652 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5653 { 5654 mdi_pathinfo_t *pip; 5655 ASSERT(mutex_owned(&ct->ct_mutex)); 5656 5657 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5658 while (pip != NULL) { 5659 mdi_hold_path(pip); 5660 MDI_PI_LOCK(pip); 5661 i_mdi_pm_hold_pip(pip); 5662 MDI_PI_UNLOCK(pip); 5663 mdi_rele_path(pip); 5664 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5665 } 5666 } 5667 5668 static int 5669 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5670 { 5671 int ret; 5672 dev_info_t *ph_dip; 5673 5674 MDI_PI_LOCK(pip); 5675 i_mdi_pm_hold_pip(pip); 5676 5677 ph_dip = mdi_pi_get_phci(pip); 5678 MDI_PI_UNLOCK(pip); 5679 5680 /* bring all components of phci to full power */ 5681 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5682 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5683 ddi_get_instance(ph_dip))); 5684 5685 ret = pm_powerup(ph_dip); 5686 5687 if (ret == DDI_FAILURE) { 5688 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5689 "pm_powerup FAILED for %s%d\n", 5690 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5691 5692 MDI_PI_LOCK(pip); 5693 i_mdi_pm_rele_pip(pip); 5694 MDI_PI_UNLOCK(pip); 5695 return (MDI_FAILURE); 5696 } 5697 5698 return (MDI_SUCCESS); 5699 } 5700 5701 static int 5702 i_mdi_power_all_phci(mdi_client_t *ct) 5703 { 5704 mdi_pathinfo_t *pip; 5705 int succeeded = 0; 5706 5707 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5708 while (pip != NULL) { 5709 mdi_hold_path(pip); 5710 MDI_CLIENT_UNLOCK(ct); 5711 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5712 succeeded = 1; 5713 5714 ASSERT(ct == MDI_PI(pip)->pi_client); 5715 MDI_CLIENT_LOCK(ct); 5716 mdi_rele_path(pip); 5717 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5718 } 5719 5720 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5721 } 5722 5723 /* 5724 * mdi_bus_power(): 5725 * 1. Place the phci(s) into powered up state so that 5726 * client can do power management 5727 * 2. Ensure phci powered up as client power managing 5728 * Return Values: 5729 * MDI_SUCCESS 5730 * MDI_FAILURE 5731 */ 5732 int 5733 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5734 void *arg, void *result) 5735 { 5736 int ret = MDI_SUCCESS; 5737 pm_bp_child_pwrchg_t *bpc; 5738 mdi_client_t *ct; 5739 dev_info_t *cdip; 5740 pm_bp_has_changed_t *bphc; 5741 5742 /* 5743 * BUS_POWER_NOINVOL not supported 5744 */ 5745 if (op == BUS_POWER_NOINVOL) 5746 return (MDI_FAILURE); 5747 5748 /* 5749 * ignore other OPs. 5750 * return quickly to save cou cycles on the ct processing 5751 */ 5752 switch (op) { 5753 case BUS_POWER_PRE_NOTIFICATION: 5754 case BUS_POWER_POST_NOTIFICATION: 5755 bpc = (pm_bp_child_pwrchg_t *)arg; 5756 cdip = bpc->bpc_dip; 5757 break; 5758 case BUS_POWER_HAS_CHANGED: 5759 bphc = (pm_bp_has_changed_t *)arg; 5760 cdip = bphc->bphc_dip; 5761 break; 5762 default: 5763 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5764 } 5765 5766 ASSERT(MDI_CLIENT(cdip)); 5767 5768 ct = i_devi_get_client(cdip); 5769 if (ct == NULL) 5770 return (MDI_FAILURE); 5771 5772 /* 5773 * wait till the mdi_pathinfo node state change are processed 5774 */ 5775 MDI_CLIENT_LOCK(ct); 5776 switch (op) { 5777 case BUS_POWER_PRE_NOTIFICATION: 5778 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5779 "BUS_POWER_PRE_NOTIFICATION:" 5780 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5781 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5782 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5783 5784 /* serialize power level change per client */ 5785 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5786 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5787 5788 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5789 5790 if (ct->ct_power_cnt == 0) { 5791 ret = i_mdi_power_all_phci(ct); 5792 } 5793 5794 /* 5795 * if new_level > 0: 5796 * - hold phci(s) 5797 * - power up phci(s) if not already 5798 * ignore power down 5799 */ 5800 if (bpc->bpc_nlevel > 0) { 5801 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5802 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5803 "mdi_bus_power i_mdi_pm_hold_client\n")); 5804 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5805 } 5806 } 5807 break; 5808 case BUS_POWER_POST_NOTIFICATION: 5809 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5810 "BUS_POWER_POST_NOTIFICATION:" 5811 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5812 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5813 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5814 *(int *)result)); 5815 5816 if (*(int *)result == DDI_SUCCESS) { 5817 if (bpc->bpc_nlevel > 0) { 5818 MDI_CLIENT_SET_POWER_UP(ct); 5819 } else { 5820 MDI_CLIENT_SET_POWER_DOWN(ct); 5821 } 5822 } 5823 5824 /* release the hold we did in pre-notification */ 5825 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5826 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5827 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5828 "mdi_bus_power i_mdi_pm_rele_client\n")); 5829 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5830 } 5831 5832 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5833 /* another thread might started attaching */ 5834 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5835 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5836 "mdi_bus_power i_mdi_pm_rele_client\n")); 5837 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5838 /* detaching has been taken care in pm_post_unconfig */ 5839 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5840 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5841 "mdi_bus_power i_mdi_pm_reset_client\n")); 5842 i_mdi_pm_reset_client(ct); 5843 } 5844 } 5845 5846 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5847 cv_broadcast(&ct->ct_powerchange_cv); 5848 5849 break; 5850 5851 /* need to do more */ 5852 case BUS_POWER_HAS_CHANGED: 5853 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5854 "BUS_POWER_HAS_CHANGED:" 5855 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5856 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5857 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5858 5859 if (bphc->bphc_nlevel > 0 && 5860 bphc->bphc_nlevel > bphc->bphc_olevel) { 5861 if (ct->ct_power_cnt == 0) { 5862 ret = i_mdi_power_all_phci(ct); 5863 } 5864 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5865 "mdi_bus_power i_mdi_pm_hold_client\n")); 5866 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5867 } 5868 5869 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5870 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5871 "mdi_bus_power i_mdi_pm_rele_client\n")); 5872 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5873 } 5874 break; 5875 } 5876 5877 MDI_CLIENT_UNLOCK(ct); 5878 return (ret); 5879 } 5880 5881 static int 5882 i_mdi_pm_pre_config_one(dev_info_t *child) 5883 { 5884 int ret = MDI_SUCCESS; 5885 mdi_client_t *ct; 5886 5887 ct = i_devi_get_client(child); 5888 if (ct == NULL) 5889 return (MDI_FAILURE); 5890 5891 MDI_CLIENT_LOCK(ct); 5892 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5893 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5894 5895 if (!MDI_CLIENT_IS_FAILED(ct)) { 5896 MDI_CLIENT_UNLOCK(ct); 5897 MDI_DEBUG(4, (CE_NOTE, child, 5898 "i_mdi_pm_pre_config_one already configured\n")); 5899 return (MDI_SUCCESS); 5900 } 5901 5902 if (ct->ct_powercnt_held) { 5903 MDI_CLIENT_UNLOCK(ct); 5904 MDI_DEBUG(4, (CE_NOTE, child, 5905 "i_mdi_pm_pre_config_one ALREADY held\n")); 5906 return (MDI_SUCCESS); 5907 } 5908 5909 if (ct->ct_power_cnt == 0) { 5910 ret = i_mdi_power_all_phci(ct); 5911 } 5912 MDI_DEBUG(4, (CE_NOTE, child, 5913 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5914 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5915 ct->ct_powercnt_held = 1; 5916 ct->ct_powercnt_reset = 0; 5917 MDI_CLIENT_UNLOCK(ct); 5918 return (ret); 5919 } 5920 5921 static int 5922 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5923 { 5924 int ret = MDI_SUCCESS; 5925 dev_info_t *cdip; 5926 int circ; 5927 5928 ASSERT(MDI_VHCI(parent)); 5929 5930 /* ndi_devi_config_one */ 5931 if (child) { 5932 return (i_mdi_pm_pre_config_one(child)); 5933 } 5934 5935 /* devi_config_common */ 5936 ndi_devi_enter(parent, &circ); 5937 cdip = ddi_get_child(parent); 5938 while (cdip) { 5939 dev_info_t *next = ddi_get_next_sibling(cdip); 5940 5941 ret = i_mdi_pm_pre_config_one(cdip); 5942 if (ret != MDI_SUCCESS) 5943 break; 5944 cdip = next; 5945 } 5946 ndi_devi_exit(parent, circ); 5947 return (ret); 5948 } 5949 5950 static int 5951 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5952 { 5953 int ret = MDI_SUCCESS; 5954 mdi_client_t *ct; 5955 5956 ct = i_devi_get_client(child); 5957 if (ct == NULL) 5958 return (MDI_FAILURE); 5959 5960 MDI_CLIENT_LOCK(ct); 5961 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5962 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5963 5964 if (i_ddi_node_state(ct->ct_dip) < DS_READY) { 5965 MDI_DEBUG(4, (CE_NOTE, child, 5966 "i_mdi_pm_pre_unconfig node detached already\n")); 5967 MDI_CLIENT_UNLOCK(ct); 5968 return (MDI_SUCCESS); 5969 } 5970 5971 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 5972 (flags & NDI_AUTODETACH)) { 5973 MDI_DEBUG(4, (CE_NOTE, child, 5974 "i_mdi_pm_pre_unconfig auto-modunload\n")); 5975 MDI_CLIENT_UNLOCK(ct); 5976 return (MDI_FAILURE); 5977 } 5978 5979 if (ct->ct_powercnt_held) { 5980 MDI_DEBUG(4, (CE_NOTE, child, 5981 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 5982 MDI_CLIENT_UNLOCK(ct); 5983 *held = 1; 5984 return (MDI_SUCCESS); 5985 } 5986 5987 if (ct->ct_power_cnt == 0) { 5988 ret = i_mdi_power_all_phci(ct); 5989 } 5990 MDI_DEBUG(4, (CE_NOTE, child, 5991 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 5992 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5993 ct->ct_powercnt_held = 1; 5994 ct->ct_powercnt_reset = 0; 5995 MDI_CLIENT_UNLOCK(ct); 5996 if (ret == MDI_SUCCESS) 5997 *held = 1; 5998 return (ret); 5999 } 6000 6001 static int 6002 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 6003 int flags) 6004 { 6005 int ret = MDI_SUCCESS; 6006 dev_info_t *cdip; 6007 int circ; 6008 6009 ASSERT(MDI_VHCI(parent)); 6010 *held = 0; 6011 6012 /* ndi_devi_unconfig_one */ 6013 if (child) { 6014 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6015 } 6016 6017 /* devi_unconfig_common */ 6018 ndi_devi_enter(parent, &circ); 6019 cdip = ddi_get_child(parent); 6020 while (cdip) { 6021 dev_info_t *next = ddi_get_next_sibling(cdip); 6022 6023 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6024 cdip = next; 6025 } 6026 ndi_devi_exit(parent, circ); 6027 6028 if (*held) 6029 ret = MDI_SUCCESS; 6030 6031 return (ret); 6032 } 6033 6034 static void 6035 i_mdi_pm_post_config_one(dev_info_t *child) 6036 { 6037 mdi_client_t *ct; 6038 6039 ct = i_devi_get_client(child); 6040 if (ct == NULL) 6041 return; 6042 6043 MDI_CLIENT_LOCK(ct); 6044 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6045 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6046 6047 if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) { 6048 MDI_DEBUG(4, (CE_NOTE, child, 6049 "i_mdi_pm_post_config_one NOT held\n")); 6050 MDI_CLIENT_UNLOCK(ct); 6051 return; 6052 } 6053 6054 /* client has not been updated */ 6055 if (MDI_CLIENT_IS_FAILED(ct)) { 6056 MDI_DEBUG(4, (CE_NOTE, child, 6057 "i_mdi_pm_post_config_one NOT configured\n")); 6058 MDI_CLIENT_UNLOCK(ct); 6059 return; 6060 } 6061 6062 /* another thread might have powered it down or detached it */ 6063 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6064 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6065 (i_ddi_node_state(ct->ct_dip) < DS_READY && 6066 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6067 MDI_DEBUG(4, (CE_NOTE, child, 6068 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6069 i_mdi_pm_reset_client(ct); 6070 } else { 6071 mdi_pathinfo_t *pip, *next; 6072 int valid_path_count = 0; 6073 6074 MDI_DEBUG(4, (CE_NOTE, child, 6075 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6076 pip = ct->ct_path_head; 6077 while (pip != NULL) { 6078 MDI_PI_LOCK(pip); 6079 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6080 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6081 == MDI_PATHINFO_STATE_ONLINE || 6082 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6083 == MDI_PATHINFO_STATE_STANDBY) 6084 valid_path_count ++; 6085 MDI_PI_UNLOCK(pip); 6086 pip = next; 6087 } 6088 i_mdi_pm_rele_client(ct, valid_path_count); 6089 } 6090 ct->ct_powercnt_held = 0; 6091 MDI_CLIENT_UNLOCK(ct); 6092 } 6093 6094 static void 6095 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6096 { 6097 int circ; 6098 dev_info_t *cdip; 6099 ASSERT(MDI_VHCI(parent)); 6100 6101 /* ndi_devi_config_one */ 6102 if (child) { 6103 i_mdi_pm_post_config_one(child); 6104 return; 6105 } 6106 6107 /* devi_config_common */ 6108 ndi_devi_enter(parent, &circ); 6109 cdip = ddi_get_child(parent); 6110 while (cdip) { 6111 dev_info_t *next = ddi_get_next_sibling(cdip); 6112 6113 i_mdi_pm_post_config_one(cdip); 6114 cdip = next; 6115 } 6116 ndi_devi_exit(parent, circ); 6117 } 6118 6119 static void 6120 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6121 { 6122 mdi_client_t *ct; 6123 6124 ct = i_devi_get_client(child); 6125 if (ct == NULL) 6126 return; 6127 6128 MDI_CLIENT_LOCK(ct); 6129 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6130 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6131 6132 if (!ct->ct_powercnt_held) { 6133 MDI_DEBUG(4, (CE_NOTE, child, 6134 "i_mdi_pm_post_unconfig NOT held\n")); 6135 MDI_CLIENT_UNLOCK(ct); 6136 return; 6137 } 6138 6139 /* failure detaching or another thread just attached it */ 6140 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6141 i_ddi_node_state(ct->ct_dip) == DS_READY) || 6142 (i_ddi_node_state(ct->ct_dip) != DS_READY && 6143 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6144 MDI_DEBUG(4, (CE_NOTE, child, 6145 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6146 i_mdi_pm_reset_client(ct); 6147 } 6148 6149 MDI_DEBUG(4, (CE_NOTE, child, 6150 "i_mdi_pm_post_unconfig not changed\n")); 6151 MDI_CLIENT_UNLOCK(ct); 6152 } 6153 6154 static void 6155 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6156 { 6157 int circ; 6158 dev_info_t *cdip; 6159 6160 ASSERT(MDI_VHCI(parent)); 6161 6162 if (!held) { 6163 MDI_DEBUG(4, (CE_NOTE, parent, 6164 "i_mdi_pm_post_unconfig held = %d\n", held)); 6165 return; 6166 } 6167 6168 if (child) { 6169 i_mdi_pm_post_unconfig_one(child); 6170 return; 6171 } 6172 6173 ndi_devi_enter(parent, &circ); 6174 cdip = ddi_get_child(parent); 6175 while (cdip) { 6176 dev_info_t *next = ddi_get_next_sibling(cdip); 6177 6178 i_mdi_pm_post_unconfig_one(cdip); 6179 cdip = next; 6180 } 6181 ndi_devi_exit(parent, circ); 6182 } 6183 6184 int 6185 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6186 { 6187 int circ, ret = MDI_SUCCESS; 6188 dev_info_t *client_dip = NULL; 6189 mdi_client_t *ct; 6190 6191 /* 6192 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6193 * Power up pHCI for the named client device. 6194 * Note: Before the client is enumerated under vhci by phci, 6195 * client_dip can be NULL. Then proceed to power up all the 6196 * pHCIs. 6197 */ 6198 if (devnm != NULL) { 6199 ndi_devi_enter(vdip, &circ); 6200 client_dip = ndi_devi_findchild(vdip, devnm); 6201 ndi_devi_exit(vdip, circ); 6202 } 6203 6204 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6205 6206 switch (op) { 6207 case MDI_PM_PRE_CONFIG: 6208 ret = i_mdi_pm_pre_config(vdip, client_dip); 6209 6210 break; 6211 case MDI_PM_PRE_UNCONFIG: 6212 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6213 flags); 6214 6215 break; 6216 case MDI_PM_POST_CONFIG: 6217 i_mdi_pm_post_config(vdip, client_dip); 6218 6219 break; 6220 case MDI_PM_POST_UNCONFIG: 6221 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6222 6223 break; 6224 case MDI_PM_HOLD_POWER: 6225 case MDI_PM_RELE_POWER: 6226 ASSERT(args); 6227 6228 client_dip = (dev_info_t *)args; 6229 ASSERT(MDI_CLIENT(client_dip)); 6230 6231 ct = i_devi_get_client(client_dip); 6232 MDI_CLIENT_LOCK(ct); 6233 6234 if (op == MDI_PM_HOLD_POWER) { 6235 if (ct->ct_power_cnt == 0) { 6236 (void) i_mdi_power_all_phci(ct); 6237 MDI_DEBUG(4, (CE_NOTE, client_dip, 6238 "mdi_power i_mdi_pm_hold_client\n")); 6239 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6240 } 6241 } else { 6242 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6243 MDI_DEBUG(4, (CE_NOTE, client_dip, 6244 "mdi_power i_mdi_pm_rele_client\n")); 6245 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6246 } else { 6247 MDI_DEBUG(4, (CE_NOTE, client_dip, 6248 "mdi_power i_mdi_pm_reset_client\n")); 6249 i_mdi_pm_reset_client(ct); 6250 } 6251 } 6252 6253 MDI_CLIENT_UNLOCK(ct); 6254 break; 6255 default: 6256 break; 6257 } 6258 6259 return (ret); 6260 } 6261 6262 int 6263 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6264 { 6265 mdi_vhci_t *vhci; 6266 6267 if (!MDI_VHCI(dip)) 6268 return (MDI_FAILURE); 6269 6270 if (mdi_class) { 6271 vhci = DEVI(dip)->devi_mdi_xhci; 6272 ASSERT(vhci); 6273 *mdi_class = vhci->vh_class; 6274 } 6275 6276 return (MDI_SUCCESS); 6277 } 6278 6279 int 6280 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6281 { 6282 mdi_phci_t *phci; 6283 6284 if (!MDI_PHCI(dip)) 6285 return (MDI_FAILURE); 6286 6287 if (mdi_class) { 6288 phci = DEVI(dip)->devi_mdi_xhci; 6289 ASSERT(phci); 6290 *mdi_class = phci->ph_vhci->vh_class; 6291 } 6292 6293 return (MDI_SUCCESS); 6294 } 6295 6296 int 6297 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6298 { 6299 mdi_client_t *client; 6300 6301 if (!MDI_CLIENT(dip)) 6302 return (MDI_FAILURE); 6303 6304 if (mdi_class) { 6305 client = DEVI(dip)->devi_mdi_client; 6306 ASSERT(client); 6307 *mdi_class = client->ct_vhci->vh_class; 6308 } 6309 6310 return (MDI_SUCCESS); 6311 } 6312 6313 void * 6314 mdi_client_get_vhci_private(dev_info_t *dip) 6315 { 6316 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6317 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6318 mdi_client_t *ct; 6319 ct = i_devi_get_client(dip); 6320 return (ct->ct_vprivate); 6321 } 6322 return (NULL); 6323 } 6324 6325 void 6326 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6327 { 6328 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6329 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6330 mdi_client_t *ct; 6331 ct = i_devi_get_client(dip); 6332 ct->ct_vprivate = data; 6333 } 6334 } 6335 /* 6336 * mdi_pi_get_vhci_private(): 6337 * Get the vhci private information associated with the 6338 * mdi_pathinfo node 6339 */ 6340 void * 6341 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6342 { 6343 caddr_t vprivate = NULL; 6344 if (pip) { 6345 vprivate = MDI_PI(pip)->pi_vprivate; 6346 } 6347 return (vprivate); 6348 } 6349 6350 /* 6351 * mdi_pi_set_vhci_private(): 6352 * Set the vhci private information in the mdi_pathinfo node 6353 */ 6354 void 6355 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6356 { 6357 if (pip) { 6358 MDI_PI(pip)->pi_vprivate = priv; 6359 } 6360 } 6361 6362 /* 6363 * mdi_phci_get_vhci_private(): 6364 * Get the vhci private information associated with the 6365 * mdi_phci node 6366 */ 6367 void * 6368 mdi_phci_get_vhci_private(dev_info_t *dip) 6369 { 6370 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6371 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6372 mdi_phci_t *ph; 6373 ph = i_devi_get_phci(dip); 6374 return (ph->ph_vprivate); 6375 } 6376 return (NULL); 6377 } 6378 6379 /* 6380 * mdi_phci_set_vhci_private(): 6381 * Set the vhci private information in the mdi_phci node 6382 */ 6383 void 6384 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6385 { 6386 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6387 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6388 mdi_phci_t *ph; 6389 ph = i_devi_get_phci(dip); 6390 ph->ph_vprivate = priv; 6391 } 6392 } 6393 6394 /* 6395 * List of vhci class names: 6396 * A vhci class name must be in this list only if the corresponding vhci 6397 * driver intends to use the mdi provided bus config implementation 6398 * (i.e., mdi_vhci_bus_config()). 6399 */ 6400 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6401 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6402 6403 /* 6404 * Built-in list of phci drivers for every vhci class. 6405 * All phci drivers expect iscsi have root device support. 6406 */ 6407 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6408 { "fp", 1 }, 6409 { "iscsi", 0 }, 6410 { "ibsrp", 1 } 6411 }; 6412 6413 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6414 6415 /* 6416 * During boot time, the on-disk vhci cache for every vhci class is read 6417 * in the form of an nvlist and stored here. 6418 */ 6419 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6420 6421 /* nvpair names in vhci cache nvlist */ 6422 #define MDI_VHCI_CACHE_VERSION 1 6423 #define MDI_NVPNAME_VERSION "version" 6424 #define MDI_NVPNAME_PHCIS "phcis" 6425 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6426 6427 /* 6428 * Given vhci class name, return its on-disk vhci cache filename. 6429 * Memory for the returned filename which includes the full path is allocated 6430 * by this function. 6431 */ 6432 static char * 6433 vhclass2vhcache_filename(char *vhclass) 6434 { 6435 char *filename; 6436 int len; 6437 static char *fmt = "/etc/devices/mdi_%s_cache"; 6438 6439 /* 6440 * fmt contains the on-disk vhci cache file name format; 6441 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6442 */ 6443 6444 /* the -1 below is to account for "%s" in the format string */ 6445 len = strlen(fmt) + strlen(vhclass) - 1; 6446 filename = kmem_alloc(len, KM_SLEEP); 6447 (void) snprintf(filename, len, fmt, vhclass); 6448 ASSERT(len == (strlen(filename) + 1)); 6449 return (filename); 6450 } 6451 6452 /* 6453 * initialize the vhci cache related data structures and read the on-disk 6454 * vhci cached data into memory. 6455 */ 6456 static void 6457 setup_vhci_cache(mdi_vhci_t *vh) 6458 { 6459 mdi_vhci_config_t *vhc; 6460 mdi_vhci_cache_t *vhcache; 6461 int i; 6462 nvlist_t *nvl = NULL; 6463 6464 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6465 vh->vh_config = vhc; 6466 vhcache = &vhc->vhc_vhcache; 6467 6468 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6469 6470 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6471 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6472 6473 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6474 6475 /* 6476 * Create string hash; same as mod_hash_create_strhash() except that 6477 * we use NULL key destructor. 6478 */ 6479 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6480 mdi_bus_config_cache_hash_size, 6481 mod_hash_null_keydtor, mod_hash_null_valdtor, 6482 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6483 6484 setup_phci_driver_list(vh); 6485 6486 /* 6487 * The on-disk vhci cache is read during booting prior to the 6488 * lights-out period by mdi_read_devices_files(). 6489 */ 6490 for (i = 0; i < N_VHCI_CLASSES; i++) { 6491 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6492 nvl = vhcache_nvl[i]; 6493 vhcache_nvl[i] = NULL; 6494 break; 6495 } 6496 } 6497 6498 /* 6499 * this is to cover the case of some one manually causing unloading 6500 * (or detaching) and reloading (or attaching) of a vhci driver. 6501 */ 6502 if (nvl == NULL && modrootloaded) 6503 nvl = read_on_disk_vhci_cache(vh->vh_class); 6504 6505 if (nvl != NULL) { 6506 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6507 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6508 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6509 else { 6510 cmn_err(CE_WARN, 6511 "%s: data file corrupted, will recreate\n", 6512 vhc->vhc_vhcache_filename); 6513 } 6514 rw_exit(&vhcache->vhcache_lock); 6515 nvlist_free(nvl); 6516 } 6517 6518 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6519 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6520 6521 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6522 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6523 } 6524 6525 /* 6526 * free all vhci cache related resources 6527 */ 6528 static int 6529 destroy_vhci_cache(mdi_vhci_t *vh) 6530 { 6531 mdi_vhci_config_t *vhc = vh->vh_config; 6532 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6533 mdi_vhcache_phci_t *cphci, *cphci_next; 6534 mdi_vhcache_client_t *cct, *cct_next; 6535 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6536 6537 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6538 return (MDI_FAILURE); 6539 6540 kmem_free(vhc->vhc_vhcache_filename, 6541 strlen(vhc->vhc_vhcache_filename) + 1); 6542 6543 if (vhc->vhc_phci_driver_list) 6544 free_phci_driver_list(vhc); 6545 6546 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6547 6548 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6549 cphci = cphci_next) { 6550 cphci_next = cphci->cphci_next; 6551 free_vhcache_phci(cphci); 6552 } 6553 6554 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6555 cct_next = cct->cct_next; 6556 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6557 cpi_next = cpi->cpi_next; 6558 free_vhcache_pathinfo(cpi); 6559 } 6560 free_vhcache_client(cct); 6561 } 6562 6563 rw_destroy(&vhcache->vhcache_lock); 6564 6565 mutex_destroy(&vhc->vhc_lock); 6566 cv_destroy(&vhc->vhc_cv); 6567 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6568 return (MDI_SUCCESS); 6569 } 6570 6571 /* 6572 * Setup the list of phci drivers associated with the specified vhci class. 6573 * MDI uses this information to rebuild bus config cache if in case the 6574 * cache is not available or corrupted. 6575 */ 6576 static void 6577 setup_phci_driver_list(mdi_vhci_t *vh) 6578 { 6579 mdi_vhci_config_t *vhc = vh->vh_config; 6580 mdi_phci_driver_info_t *driver_list; 6581 char **driver_list1; 6582 uint_t ndrivers, ndrivers1; 6583 int i, j; 6584 6585 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6586 driver_list = scsi_phci_driver_list; 6587 ndrivers = sizeof (scsi_phci_driver_list) / 6588 sizeof (mdi_phci_driver_info_t); 6589 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6590 driver_list = ib_phci_driver_list; 6591 ndrivers = sizeof (ib_phci_driver_list) / 6592 sizeof (mdi_phci_driver_info_t); 6593 } else { 6594 driver_list = NULL; 6595 ndrivers = 0; 6596 } 6597 6598 /* 6599 * The driver.conf file of a vhci driver can specify additional 6600 * phci drivers using a project private "phci-drivers" property. 6601 */ 6602 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6603 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6604 &ndrivers1) != DDI_PROP_SUCCESS) 6605 ndrivers1 = 0; 6606 6607 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6608 if (vhc->vhc_nphci_drivers == 0) 6609 return; 6610 6611 vhc->vhc_phci_driver_list = kmem_alloc( 6612 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6613 6614 for (i = 0; i < ndrivers; i++) { 6615 vhc->vhc_phci_driver_list[i].phdriver_name = 6616 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6617 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6618 driver_list[i].phdriver_root_support; 6619 } 6620 6621 for (j = 0; j < ndrivers1; j++, i++) { 6622 vhc->vhc_phci_driver_list[i].phdriver_name = 6623 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6624 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6625 } 6626 6627 if (ndrivers1) 6628 ddi_prop_free(driver_list1); 6629 } 6630 6631 /* 6632 * Free the memory allocated for the phci driver list 6633 */ 6634 static void 6635 free_phci_driver_list(mdi_vhci_config_t *vhc) 6636 { 6637 int i; 6638 6639 if (vhc->vhc_phci_driver_list == NULL) 6640 return; 6641 6642 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6643 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6644 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6645 } 6646 6647 kmem_free(vhc->vhc_phci_driver_list, 6648 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6649 } 6650 6651 /* 6652 * Stop all vhci cache related async threads and free their resources. 6653 */ 6654 static int 6655 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6656 { 6657 mdi_async_client_config_t *acc, *acc_next; 6658 6659 mutex_enter(&vhc->vhc_lock); 6660 vhc->vhc_flags |= MDI_VHC_EXIT; 6661 ASSERT(vhc->vhc_acc_thrcount >= 0); 6662 cv_broadcast(&vhc->vhc_cv); 6663 6664 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6665 vhc->vhc_acc_thrcount != 0) { 6666 mutex_exit(&vhc->vhc_lock); 6667 delay(1); 6668 mutex_enter(&vhc->vhc_lock); 6669 } 6670 6671 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6672 6673 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6674 acc_next = acc->acc_next; 6675 free_async_client_config(acc); 6676 } 6677 vhc->vhc_acc_list_head = NULL; 6678 vhc->vhc_acc_list_tail = NULL; 6679 vhc->vhc_acc_count = 0; 6680 6681 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6682 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6683 mutex_exit(&vhc->vhc_lock); 6684 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6685 vhcache_dirty(vhc); 6686 return (MDI_FAILURE); 6687 } 6688 } else 6689 mutex_exit(&vhc->vhc_lock); 6690 6691 if (callb_delete(vhc->vhc_cbid) != 0) 6692 return (MDI_FAILURE); 6693 6694 return (MDI_SUCCESS); 6695 } 6696 6697 /* 6698 * Stop vhci cache flush thread 6699 */ 6700 /* ARGSUSED */ 6701 static boolean_t 6702 stop_vhcache_flush_thread(void *arg, int code) 6703 { 6704 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6705 6706 mutex_enter(&vhc->vhc_lock); 6707 vhc->vhc_flags |= MDI_VHC_EXIT; 6708 cv_broadcast(&vhc->vhc_cv); 6709 6710 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6711 mutex_exit(&vhc->vhc_lock); 6712 delay(1); 6713 mutex_enter(&vhc->vhc_lock); 6714 } 6715 6716 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6717 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6718 mutex_exit(&vhc->vhc_lock); 6719 (void) flush_vhcache(vhc, 1); 6720 } else 6721 mutex_exit(&vhc->vhc_lock); 6722 6723 return (B_TRUE); 6724 } 6725 6726 /* 6727 * Enqueue the vhcache phci (cphci) at the tail of the list 6728 */ 6729 static void 6730 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6731 { 6732 cphci->cphci_next = NULL; 6733 if (vhcache->vhcache_phci_head == NULL) 6734 vhcache->vhcache_phci_head = cphci; 6735 else 6736 vhcache->vhcache_phci_tail->cphci_next = cphci; 6737 vhcache->vhcache_phci_tail = cphci; 6738 } 6739 6740 /* 6741 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6742 */ 6743 static void 6744 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6745 mdi_vhcache_pathinfo_t *cpi) 6746 { 6747 cpi->cpi_next = NULL; 6748 if (cct->cct_cpi_head == NULL) 6749 cct->cct_cpi_head = cpi; 6750 else 6751 cct->cct_cpi_tail->cpi_next = cpi; 6752 cct->cct_cpi_tail = cpi; 6753 } 6754 6755 /* 6756 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6757 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6758 * flag set come at the beginning of the list. All cpis which have this 6759 * flag set come at the end of the list. 6760 */ 6761 static void 6762 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6763 mdi_vhcache_pathinfo_t *newcpi) 6764 { 6765 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6766 6767 if (cct->cct_cpi_head == NULL || 6768 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6769 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6770 else { 6771 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6772 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6773 prev_cpi = cpi, cpi = cpi->cpi_next) 6774 ; 6775 6776 if (prev_cpi == NULL) 6777 cct->cct_cpi_head = newcpi; 6778 else 6779 prev_cpi->cpi_next = newcpi; 6780 6781 newcpi->cpi_next = cpi; 6782 6783 if (cpi == NULL) 6784 cct->cct_cpi_tail = newcpi; 6785 } 6786 } 6787 6788 /* 6789 * Enqueue the vhcache client (cct) at the tail of the list 6790 */ 6791 static void 6792 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6793 mdi_vhcache_client_t *cct) 6794 { 6795 cct->cct_next = NULL; 6796 if (vhcache->vhcache_client_head == NULL) 6797 vhcache->vhcache_client_head = cct; 6798 else 6799 vhcache->vhcache_client_tail->cct_next = cct; 6800 vhcache->vhcache_client_tail = cct; 6801 } 6802 6803 static void 6804 free_string_array(char **str, int nelem) 6805 { 6806 int i; 6807 6808 if (str) { 6809 for (i = 0; i < nelem; i++) { 6810 if (str[i]) 6811 kmem_free(str[i], strlen(str[i]) + 1); 6812 } 6813 kmem_free(str, sizeof (char *) * nelem); 6814 } 6815 } 6816 6817 static void 6818 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6819 { 6820 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6821 kmem_free(cphci, sizeof (*cphci)); 6822 } 6823 6824 static void 6825 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6826 { 6827 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6828 kmem_free(cpi, sizeof (*cpi)); 6829 } 6830 6831 static void 6832 free_vhcache_client(mdi_vhcache_client_t *cct) 6833 { 6834 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6835 kmem_free(cct, sizeof (*cct)); 6836 } 6837 6838 static char * 6839 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6840 { 6841 char *name_addr; 6842 int len; 6843 6844 len = strlen(ct_name) + strlen(ct_addr) + 2; 6845 name_addr = kmem_alloc(len, KM_SLEEP); 6846 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6847 6848 if (ret_len) 6849 *ret_len = len; 6850 return (name_addr); 6851 } 6852 6853 /* 6854 * Copy the contents of paddrnvl to vhci cache. 6855 * paddrnvl nvlist contains path information for a vhci client. 6856 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6857 */ 6858 static void 6859 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6860 mdi_vhcache_client_t *cct) 6861 { 6862 nvpair_t *nvp = NULL; 6863 mdi_vhcache_pathinfo_t *cpi; 6864 uint_t nelem; 6865 uint32_t *val; 6866 6867 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6868 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6869 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6870 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6871 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6872 ASSERT(nelem == 2); 6873 cpi->cpi_cphci = cphci_list[val[0]]; 6874 cpi->cpi_flags = val[1]; 6875 enqueue_tail_vhcache_pathinfo(cct, cpi); 6876 } 6877 } 6878 6879 /* 6880 * Copy the contents of caddrmapnvl to vhci cache. 6881 * caddrmapnvl nvlist contains vhci client address to phci client address 6882 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6883 * this nvlist. 6884 */ 6885 static void 6886 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6887 mdi_vhcache_phci_t *cphci_list[]) 6888 { 6889 nvpair_t *nvp = NULL; 6890 nvlist_t *paddrnvl; 6891 mdi_vhcache_client_t *cct; 6892 6893 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6894 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6895 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6896 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6897 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6898 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6899 /* the client must contain at least one path */ 6900 ASSERT(cct->cct_cpi_head != NULL); 6901 6902 enqueue_vhcache_client(vhcache, cct); 6903 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6904 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6905 } 6906 } 6907 6908 /* 6909 * Copy the contents of the main nvlist to vhci cache. 6910 * 6911 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6912 * The nvlist contains the mappings between the vhci client addresses and 6913 * their corresponding phci client addresses. 6914 * 6915 * The structure of the nvlist is as follows: 6916 * 6917 * Main nvlist: 6918 * NAME TYPE DATA 6919 * version int32 version number 6920 * phcis string array array of phci paths 6921 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6922 * 6923 * structure of c2paddrs_nvl: 6924 * NAME TYPE DATA 6925 * caddr1 nvlist_t paddrs_nvl1 6926 * caddr2 nvlist_t paddrs_nvl2 6927 * ... 6928 * where caddr1, caddr2, ... are vhci client name and addresses in the 6929 * form of "<clientname>@<clientaddress>". 6930 * (for example: "ssd@2000002037cd9f72"); 6931 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6932 * 6933 * structure of paddrs_nvl: 6934 * NAME TYPE DATA 6935 * pi_addr1 uint32_array (phci-id, cpi_flags) 6936 * pi_addr2 uint32_array (phci-id, cpi_flags) 6937 * ... 6938 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6939 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6940 * phci-ids are integers that identify PHCIs to which the 6941 * the bus specific address belongs to. These integers are used as an index 6942 * into to the phcis string array in the main nvlist to get the PHCI path. 6943 */ 6944 static int 6945 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6946 { 6947 char **phcis, **phci_namep; 6948 uint_t nphcis; 6949 mdi_vhcache_phci_t *cphci, **cphci_list; 6950 nvlist_t *caddrmapnvl; 6951 int32_t ver; 6952 int i; 6953 size_t cphci_list_size; 6954 6955 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6956 6957 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6958 ver != MDI_VHCI_CACHE_VERSION) 6959 return (MDI_FAILURE); 6960 6961 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6962 &nphcis) != 0) 6963 return (MDI_SUCCESS); 6964 6965 ASSERT(nphcis > 0); 6966 6967 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6968 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6969 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6970 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6971 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6972 enqueue_vhcache_phci(vhcache, cphci); 6973 cphci_list[i] = cphci; 6974 } 6975 6976 ASSERT(vhcache->vhcache_phci_head != NULL); 6977 6978 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6979 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6980 6981 kmem_free(cphci_list, cphci_list_size); 6982 return (MDI_SUCCESS); 6983 } 6984 6985 /* 6986 * Build paddrnvl for the specified client using the information in the 6987 * vhci cache and add it to the caddrmapnnvl. 6988 * Returns 0 on success, errno on failure. 6989 */ 6990 static int 6991 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 6992 nvlist_t *caddrmapnvl) 6993 { 6994 mdi_vhcache_pathinfo_t *cpi; 6995 nvlist_t *nvl; 6996 int err; 6997 uint32_t val[2]; 6998 6999 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7000 7001 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7002 return (err); 7003 7004 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7005 val[0] = cpi->cpi_cphci->cphci_id; 7006 val[1] = cpi->cpi_flags; 7007 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7008 != 0) 7009 goto out; 7010 } 7011 7012 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7013 out: 7014 nvlist_free(nvl); 7015 return (err); 7016 } 7017 7018 /* 7019 * Build caddrmapnvl using the information in the vhci cache 7020 * and add it to the mainnvl. 7021 * Returns 0 on success, errno on failure. 7022 */ 7023 static int 7024 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7025 { 7026 mdi_vhcache_client_t *cct; 7027 nvlist_t *nvl; 7028 int err; 7029 7030 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7031 7032 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7033 return (err); 7034 7035 for (cct = vhcache->vhcache_client_head; cct != NULL; 7036 cct = cct->cct_next) { 7037 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7038 goto out; 7039 } 7040 7041 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7042 out: 7043 nvlist_free(nvl); 7044 return (err); 7045 } 7046 7047 /* 7048 * Build nvlist using the information in the vhci cache. 7049 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7050 * Returns nvl on success, NULL on failure. 7051 */ 7052 static nvlist_t * 7053 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7054 { 7055 mdi_vhcache_phci_t *cphci; 7056 uint_t phci_count; 7057 char **phcis; 7058 nvlist_t *nvl; 7059 int err, i; 7060 7061 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7062 nvl = NULL; 7063 goto out; 7064 } 7065 7066 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7067 MDI_VHCI_CACHE_VERSION)) != 0) 7068 goto out; 7069 7070 rw_enter(&vhcache->vhcache_lock, RW_READER); 7071 if (vhcache->vhcache_phci_head == NULL) { 7072 rw_exit(&vhcache->vhcache_lock); 7073 return (nvl); 7074 } 7075 7076 phci_count = 0; 7077 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7078 cphci = cphci->cphci_next) 7079 cphci->cphci_id = phci_count++; 7080 7081 /* build phci pathname list */ 7082 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7083 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7084 cphci = cphci->cphci_next, i++) 7085 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7086 7087 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7088 phci_count); 7089 free_string_array(phcis, phci_count); 7090 7091 if (err == 0 && 7092 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7093 rw_exit(&vhcache->vhcache_lock); 7094 return (nvl); 7095 } 7096 7097 rw_exit(&vhcache->vhcache_lock); 7098 out: 7099 if (nvl) 7100 nvlist_free(nvl); 7101 return (NULL); 7102 } 7103 7104 /* 7105 * Lookup vhcache phci structure for the specified phci path. 7106 */ 7107 static mdi_vhcache_phci_t * 7108 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7109 { 7110 mdi_vhcache_phci_t *cphci; 7111 7112 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7113 7114 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7115 cphci = cphci->cphci_next) { 7116 if (strcmp(cphci->cphci_path, phci_path) == 0) 7117 return (cphci); 7118 } 7119 7120 return (NULL); 7121 } 7122 7123 /* 7124 * Lookup vhcache phci structure for the specified phci. 7125 */ 7126 static mdi_vhcache_phci_t * 7127 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7128 { 7129 mdi_vhcache_phci_t *cphci; 7130 7131 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7132 7133 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7134 cphci = cphci->cphci_next) { 7135 if (cphci->cphci_phci == ph) 7136 return (cphci); 7137 } 7138 7139 return (NULL); 7140 } 7141 7142 /* 7143 * Add the specified phci to the vhci cache if not already present. 7144 */ 7145 static void 7146 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7147 { 7148 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7149 mdi_vhcache_phci_t *cphci; 7150 char *pathname; 7151 int cache_updated; 7152 7153 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7154 7155 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7156 (void) ddi_pathname(ph->ph_dip, pathname); 7157 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7158 != NULL) { 7159 cphci->cphci_phci = ph; 7160 cache_updated = 0; 7161 } else { 7162 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7163 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7164 cphci->cphci_phci = ph; 7165 enqueue_vhcache_phci(vhcache, cphci); 7166 cache_updated = 1; 7167 } 7168 7169 rw_exit(&vhcache->vhcache_lock); 7170 7171 /* 7172 * Since a new phci has been added, reset 7173 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7174 * during next vhcache_discover_paths(). 7175 */ 7176 mutex_enter(&vhc->vhc_lock); 7177 vhc->vhc_path_discovery_cutoff_time = 0; 7178 mutex_exit(&vhc->vhc_lock); 7179 7180 kmem_free(pathname, MAXPATHLEN); 7181 if (cache_updated) 7182 vhcache_dirty(vhc); 7183 } 7184 7185 /* 7186 * Remove the reference to the specified phci from the vhci cache. 7187 */ 7188 static void 7189 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7190 { 7191 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7192 mdi_vhcache_phci_t *cphci; 7193 7194 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7195 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7196 /* do not remove the actual mdi_vhcache_phci structure */ 7197 cphci->cphci_phci = NULL; 7198 } 7199 rw_exit(&vhcache->vhcache_lock); 7200 } 7201 7202 static void 7203 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7204 mdi_vhcache_lookup_token_t *src) 7205 { 7206 if (src == NULL) { 7207 dst->lt_cct = NULL; 7208 dst->lt_cct_lookup_time = 0; 7209 } else { 7210 dst->lt_cct = src->lt_cct; 7211 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7212 } 7213 } 7214 7215 /* 7216 * Look up vhcache client for the specified client. 7217 */ 7218 static mdi_vhcache_client_t * 7219 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7220 mdi_vhcache_lookup_token_t *token) 7221 { 7222 mod_hash_val_t hv; 7223 char *name_addr; 7224 int len; 7225 7226 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7227 7228 /* 7229 * If no vhcache clean occurred since the last lookup, we can 7230 * simply return the cct from the last lookup operation. 7231 * It works because ccts are never freed except during the vhcache 7232 * cleanup operation. 7233 */ 7234 if (token != NULL && 7235 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7236 return (token->lt_cct); 7237 7238 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7239 if (mod_hash_find(vhcache->vhcache_client_hash, 7240 (mod_hash_key_t)name_addr, &hv) == 0) { 7241 if (token) { 7242 token->lt_cct = (mdi_vhcache_client_t *)hv; 7243 token->lt_cct_lookup_time = lbolt64; 7244 } 7245 } else { 7246 if (token) { 7247 token->lt_cct = NULL; 7248 token->lt_cct_lookup_time = 0; 7249 } 7250 hv = NULL; 7251 } 7252 kmem_free(name_addr, len); 7253 return ((mdi_vhcache_client_t *)hv); 7254 } 7255 7256 /* 7257 * Add the specified path to the vhci cache if not already present. 7258 * Also add the vhcache client for the client corresponding to this path 7259 * if it doesn't already exist. 7260 */ 7261 static void 7262 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7263 { 7264 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7265 mdi_vhcache_client_t *cct; 7266 mdi_vhcache_pathinfo_t *cpi; 7267 mdi_phci_t *ph = pip->pi_phci; 7268 mdi_client_t *ct = pip->pi_client; 7269 int cache_updated = 0; 7270 7271 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7272 7273 /* if vhcache client for this pip doesn't already exist, add it */ 7274 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7275 NULL)) == NULL) { 7276 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7277 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7278 ct->ct_guid, NULL); 7279 enqueue_vhcache_client(vhcache, cct); 7280 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7281 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7282 cache_updated = 1; 7283 } 7284 7285 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7286 if (cpi->cpi_cphci->cphci_phci == ph && 7287 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7288 cpi->cpi_pip = pip; 7289 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7290 cpi->cpi_flags &= 7291 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7292 sort_vhcache_paths(cct); 7293 cache_updated = 1; 7294 } 7295 break; 7296 } 7297 } 7298 7299 if (cpi == NULL) { 7300 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7301 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7302 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7303 ASSERT(cpi->cpi_cphci != NULL); 7304 cpi->cpi_pip = pip; 7305 enqueue_vhcache_pathinfo(cct, cpi); 7306 cache_updated = 1; 7307 } 7308 7309 rw_exit(&vhcache->vhcache_lock); 7310 7311 if (cache_updated) 7312 vhcache_dirty(vhc); 7313 } 7314 7315 /* 7316 * Remove the reference to the specified path from the vhci cache. 7317 */ 7318 static void 7319 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7320 { 7321 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7322 mdi_client_t *ct = pip->pi_client; 7323 mdi_vhcache_client_t *cct; 7324 mdi_vhcache_pathinfo_t *cpi; 7325 7326 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7327 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7328 NULL)) != NULL) { 7329 for (cpi = cct->cct_cpi_head; cpi != NULL; 7330 cpi = cpi->cpi_next) { 7331 if (cpi->cpi_pip == pip) { 7332 cpi->cpi_pip = NULL; 7333 break; 7334 } 7335 } 7336 } 7337 rw_exit(&vhcache->vhcache_lock); 7338 } 7339 7340 /* 7341 * Flush the vhci cache to disk. 7342 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7343 */ 7344 static int 7345 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7346 { 7347 nvlist_t *nvl; 7348 int err; 7349 int rv; 7350 7351 /* 7352 * It is possible that the system may shutdown before 7353 * i_ddi_io_initialized (during stmsboot for example). To allow for 7354 * flushing the cache in this case do not check for 7355 * i_ddi_io_initialized when force flag is set. 7356 */ 7357 if (force_flag == 0 && !i_ddi_io_initialized()) 7358 return (MDI_FAILURE); 7359 7360 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7361 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7362 nvlist_free(nvl); 7363 } else 7364 err = EFAULT; 7365 7366 rv = MDI_SUCCESS; 7367 mutex_enter(&vhc->vhc_lock); 7368 if (err != 0) { 7369 if (err == EROFS) { 7370 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7371 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7372 MDI_VHC_VHCACHE_DIRTY); 7373 } else { 7374 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7375 cmn_err(CE_CONT, "%s: update failed\n", 7376 vhc->vhc_vhcache_filename); 7377 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7378 } 7379 rv = MDI_FAILURE; 7380 } 7381 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7382 cmn_err(CE_CONT, 7383 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7384 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7385 } 7386 mutex_exit(&vhc->vhc_lock); 7387 7388 return (rv); 7389 } 7390 7391 /* 7392 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7393 * Exits itself if left idle for the idle timeout period. 7394 */ 7395 static void 7396 vhcache_flush_thread(void *arg) 7397 { 7398 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7399 clock_t idle_time, quit_at_ticks; 7400 callb_cpr_t cprinfo; 7401 7402 /* number of seconds to sleep idle before exiting */ 7403 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7404 7405 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7406 "mdi_vhcache_flush"); 7407 mutex_enter(&vhc->vhc_lock); 7408 for (; ; ) { 7409 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7410 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7411 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7412 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7413 (void) cv_timedwait(&vhc->vhc_cv, 7414 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7415 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7416 } else { 7417 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7418 mutex_exit(&vhc->vhc_lock); 7419 7420 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7421 vhcache_dirty(vhc); 7422 7423 mutex_enter(&vhc->vhc_lock); 7424 } 7425 } 7426 7427 quit_at_ticks = ddi_get_lbolt() + idle_time; 7428 7429 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7430 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7431 ddi_get_lbolt() < quit_at_ticks) { 7432 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7433 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7434 quit_at_ticks); 7435 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7436 } 7437 7438 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7439 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7440 goto out; 7441 } 7442 7443 out: 7444 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7445 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7446 CALLB_CPR_EXIT(&cprinfo); 7447 } 7448 7449 /* 7450 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7451 */ 7452 static void 7453 vhcache_dirty(mdi_vhci_config_t *vhc) 7454 { 7455 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7456 int create_thread; 7457 7458 rw_enter(&vhcache->vhcache_lock, RW_READER); 7459 /* do not flush cache until the cache is fully built */ 7460 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7461 rw_exit(&vhcache->vhcache_lock); 7462 return; 7463 } 7464 rw_exit(&vhcache->vhcache_lock); 7465 7466 mutex_enter(&vhc->vhc_lock); 7467 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7468 mutex_exit(&vhc->vhc_lock); 7469 return; 7470 } 7471 7472 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7473 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7474 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7475 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7476 cv_broadcast(&vhc->vhc_cv); 7477 create_thread = 0; 7478 } else { 7479 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7480 create_thread = 1; 7481 } 7482 mutex_exit(&vhc->vhc_lock); 7483 7484 if (create_thread) 7485 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7486 0, &p0, TS_RUN, minclsyspri); 7487 } 7488 7489 /* 7490 * phci bus config structure - one for for each phci bus config operation that 7491 * we initiate on behalf of a vhci. 7492 */ 7493 typedef struct mdi_phci_bus_config_s { 7494 char *phbc_phci_path; 7495 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7496 struct mdi_phci_bus_config_s *phbc_next; 7497 } mdi_phci_bus_config_t; 7498 7499 /* vhci bus config structure - one for each vhci bus config operation */ 7500 typedef struct mdi_vhci_bus_config_s { 7501 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7502 major_t vhbc_op_major; /* bus config op major */ 7503 uint_t vhbc_op_flags; /* bus config op flags */ 7504 kmutex_t vhbc_lock; 7505 kcondvar_t vhbc_cv; 7506 int vhbc_thr_count; 7507 } mdi_vhci_bus_config_t; 7508 7509 /* 7510 * bus config the specified phci 7511 */ 7512 static void 7513 bus_config_phci(void *arg) 7514 { 7515 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7516 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7517 dev_info_t *ph_dip; 7518 7519 /* 7520 * first configure all path components upto phci and then configure 7521 * the phci children. 7522 */ 7523 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7524 != NULL) { 7525 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7526 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7527 (void) ndi_devi_config_driver(ph_dip, 7528 vhbc->vhbc_op_flags, 7529 vhbc->vhbc_op_major); 7530 } else 7531 (void) ndi_devi_config(ph_dip, 7532 vhbc->vhbc_op_flags); 7533 7534 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7535 ndi_rele_devi(ph_dip); 7536 } 7537 7538 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7539 kmem_free(phbc, sizeof (*phbc)); 7540 7541 mutex_enter(&vhbc->vhbc_lock); 7542 vhbc->vhbc_thr_count--; 7543 if (vhbc->vhbc_thr_count == 0) 7544 cv_broadcast(&vhbc->vhbc_cv); 7545 mutex_exit(&vhbc->vhbc_lock); 7546 } 7547 7548 /* 7549 * Bus config all phcis associated with the vhci in parallel. 7550 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7551 */ 7552 static void 7553 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7554 ddi_bus_config_op_t op, major_t maj) 7555 { 7556 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7557 mdi_vhci_bus_config_t *vhbc; 7558 mdi_vhcache_phci_t *cphci; 7559 7560 rw_enter(&vhcache->vhcache_lock, RW_READER); 7561 if (vhcache->vhcache_phci_head == NULL) { 7562 rw_exit(&vhcache->vhcache_lock); 7563 return; 7564 } 7565 7566 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7567 7568 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7569 cphci = cphci->cphci_next) { 7570 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7571 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7572 KM_SLEEP); 7573 phbc->phbc_vhbusconfig = vhbc; 7574 phbc->phbc_next = phbc_head; 7575 phbc_head = phbc; 7576 vhbc->vhbc_thr_count++; 7577 } 7578 rw_exit(&vhcache->vhcache_lock); 7579 7580 vhbc->vhbc_op = op; 7581 vhbc->vhbc_op_major = maj; 7582 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7583 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7584 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7585 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7586 7587 /* now create threads to initiate bus config on all phcis in parallel */ 7588 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7589 phbc_next = phbc->phbc_next; 7590 if (mdi_mtc_off) 7591 bus_config_phci((void *)phbc); 7592 else 7593 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7594 0, &p0, TS_RUN, minclsyspri); 7595 } 7596 7597 mutex_enter(&vhbc->vhbc_lock); 7598 /* wait until all threads exit */ 7599 while (vhbc->vhbc_thr_count > 0) 7600 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7601 mutex_exit(&vhbc->vhbc_lock); 7602 7603 mutex_destroy(&vhbc->vhbc_lock); 7604 cv_destroy(&vhbc->vhbc_cv); 7605 kmem_free(vhbc, sizeof (*vhbc)); 7606 } 7607 7608 /* 7609 * Single threaded version of bus_config_all_phcis() 7610 */ 7611 static void 7612 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7613 ddi_bus_config_op_t op, major_t maj) 7614 { 7615 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7616 7617 single_threaded_vhconfig_enter(vhc); 7618 bus_config_all_phcis(vhcache, flags, op, maj); 7619 single_threaded_vhconfig_exit(vhc); 7620 } 7621 7622 /* 7623 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7624 * The path includes the child component in addition to the phci path. 7625 */ 7626 static int 7627 bus_config_one_phci_child(char *path) 7628 { 7629 dev_info_t *ph_dip, *child; 7630 char *devnm; 7631 int rv = MDI_FAILURE; 7632 7633 /* extract the child component of the phci */ 7634 devnm = strrchr(path, '/'); 7635 *devnm++ = '\0'; 7636 7637 /* 7638 * first configure all path components upto phci and then 7639 * configure the phci child. 7640 */ 7641 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7642 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7643 NDI_SUCCESS) { 7644 /* 7645 * release the hold that ndi_devi_config_one() placed 7646 */ 7647 ndi_rele_devi(child); 7648 rv = MDI_SUCCESS; 7649 } 7650 7651 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7652 ndi_rele_devi(ph_dip); 7653 } 7654 7655 devnm--; 7656 *devnm = '/'; 7657 return (rv); 7658 } 7659 7660 /* 7661 * Build a list of phci client paths for the specified vhci client. 7662 * The list includes only those phci client paths which aren't configured yet. 7663 */ 7664 static mdi_phys_path_t * 7665 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7666 { 7667 mdi_vhcache_pathinfo_t *cpi; 7668 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7669 int config_path, len; 7670 7671 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7672 /* 7673 * include only those paths that aren't configured. 7674 */ 7675 config_path = 0; 7676 if (cpi->cpi_pip == NULL) 7677 config_path = 1; 7678 else { 7679 MDI_PI_LOCK(cpi->cpi_pip); 7680 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7681 config_path = 1; 7682 MDI_PI_UNLOCK(cpi->cpi_pip); 7683 } 7684 7685 if (config_path) { 7686 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7687 len = strlen(cpi->cpi_cphci->cphci_path) + 7688 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7689 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7690 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7691 cpi->cpi_cphci->cphci_path, ct_name, 7692 cpi->cpi_addr); 7693 pp->phys_path_next = NULL; 7694 7695 if (pp_head == NULL) 7696 pp_head = pp; 7697 else 7698 pp_tail->phys_path_next = pp; 7699 pp_tail = pp; 7700 } 7701 } 7702 7703 return (pp_head); 7704 } 7705 7706 /* 7707 * Free the memory allocated for phci client path list. 7708 */ 7709 static void 7710 free_phclient_path_list(mdi_phys_path_t *pp_head) 7711 { 7712 mdi_phys_path_t *pp, *pp_next; 7713 7714 for (pp = pp_head; pp != NULL; pp = pp_next) { 7715 pp_next = pp->phys_path_next; 7716 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7717 kmem_free(pp, sizeof (*pp)); 7718 } 7719 } 7720 7721 /* 7722 * Allocated async client structure and initialize with the specified values. 7723 */ 7724 static mdi_async_client_config_t * 7725 alloc_async_client_config(char *ct_name, char *ct_addr, 7726 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7727 { 7728 mdi_async_client_config_t *acc; 7729 7730 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7731 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7732 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7733 acc->acc_phclient_path_list_head = pp_head; 7734 init_vhcache_lookup_token(&acc->acc_token, tok); 7735 acc->acc_next = NULL; 7736 return (acc); 7737 } 7738 7739 /* 7740 * Free the memory allocated for the async client structure and their members. 7741 */ 7742 static void 7743 free_async_client_config(mdi_async_client_config_t *acc) 7744 { 7745 if (acc->acc_phclient_path_list_head) 7746 free_phclient_path_list(acc->acc_phclient_path_list_head); 7747 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7748 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7749 kmem_free(acc, sizeof (*acc)); 7750 } 7751 7752 /* 7753 * Sort vhcache pathinfos (cpis) of the specified client. 7754 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7755 * flag set come at the beginning of the list. All cpis which have this 7756 * flag set come at the end of the list. 7757 */ 7758 static void 7759 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7760 { 7761 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7762 7763 cpi_head = cct->cct_cpi_head; 7764 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7765 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7766 cpi_next = cpi->cpi_next; 7767 enqueue_vhcache_pathinfo(cct, cpi); 7768 } 7769 } 7770 7771 /* 7772 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7773 * every vhcache pathinfo of the specified client. If not adjust the flag 7774 * setting appropriately. 7775 * 7776 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7777 * on-disk vhci cache. So every time this flag is updated the cache must be 7778 * flushed. 7779 */ 7780 static void 7781 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7782 mdi_vhcache_lookup_token_t *tok) 7783 { 7784 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7785 mdi_vhcache_client_t *cct; 7786 mdi_vhcache_pathinfo_t *cpi; 7787 7788 rw_enter(&vhcache->vhcache_lock, RW_READER); 7789 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7790 == NULL) { 7791 rw_exit(&vhcache->vhcache_lock); 7792 return; 7793 } 7794 7795 /* 7796 * to avoid unnecessary on-disk cache updates, first check if an 7797 * update is really needed. If no update is needed simply return. 7798 */ 7799 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7800 if ((cpi->cpi_pip != NULL && 7801 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7802 (cpi->cpi_pip == NULL && 7803 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7804 break; 7805 } 7806 } 7807 if (cpi == NULL) { 7808 rw_exit(&vhcache->vhcache_lock); 7809 return; 7810 } 7811 7812 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7813 rw_exit(&vhcache->vhcache_lock); 7814 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7815 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7816 tok)) == NULL) { 7817 rw_exit(&vhcache->vhcache_lock); 7818 return; 7819 } 7820 } 7821 7822 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7823 if (cpi->cpi_pip != NULL) 7824 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7825 else 7826 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7827 } 7828 sort_vhcache_paths(cct); 7829 7830 rw_exit(&vhcache->vhcache_lock); 7831 vhcache_dirty(vhc); 7832 } 7833 7834 /* 7835 * Configure all specified paths of the client. 7836 */ 7837 static void 7838 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7839 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7840 { 7841 mdi_phys_path_t *pp; 7842 7843 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7844 (void) bus_config_one_phci_child(pp->phys_path); 7845 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7846 } 7847 7848 /* 7849 * Dequeue elements from vhci async client config list and bus configure 7850 * their corresponding phci clients. 7851 */ 7852 static void 7853 config_client_paths_thread(void *arg) 7854 { 7855 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7856 mdi_async_client_config_t *acc; 7857 clock_t quit_at_ticks; 7858 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7859 callb_cpr_t cprinfo; 7860 7861 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7862 "mdi_config_client_paths"); 7863 7864 for (; ; ) { 7865 quit_at_ticks = ddi_get_lbolt() + idle_time; 7866 7867 mutex_enter(&vhc->vhc_lock); 7868 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7869 vhc->vhc_acc_list_head == NULL && 7870 ddi_get_lbolt() < quit_at_ticks) { 7871 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7872 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7873 quit_at_ticks); 7874 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7875 } 7876 7877 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7878 vhc->vhc_acc_list_head == NULL) 7879 goto out; 7880 7881 acc = vhc->vhc_acc_list_head; 7882 vhc->vhc_acc_list_head = acc->acc_next; 7883 if (vhc->vhc_acc_list_head == NULL) 7884 vhc->vhc_acc_list_tail = NULL; 7885 vhc->vhc_acc_count--; 7886 mutex_exit(&vhc->vhc_lock); 7887 7888 config_client_paths_sync(vhc, acc->acc_ct_name, 7889 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7890 &acc->acc_token); 7891 7892 free_async_client_config(acc); 7893 } 7894 7895 out: 7896 vhc->vhc_acc_thrcount--; 7897 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7898 CALLB_CPR_EXIT(&cprinfo); 7899 } 7900 7901 /* 7902 * Arrange for all the phci client paths (pp_head) for the specified client 7903 * to be bus configured asynchronously by a thread. 7904 */ 7905 static void 7906 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7907 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7908 { 7909 mdi_async_client_config_t *acc, *newacc; 7910 int create_thread; 7911 7912 if (pp_head == NULL) 7913 return; 7914 7915 if (mdi_mtc_off) { 7916 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7917 free_phclient_path_list(pp_head); 7918 return; 7919 } 7920 7921 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7922 ASSERT(newacc); 7923 7924 mutex_enter(&vhc->vhc_lock); 7925 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7926 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7927 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7928 free_async_client_config(newacc); 7929 mutex_exit(&vhc->vhc_lock); 7930 return; 7931 } 7932 } 7933 7934 if (vhc->vhc_acc_list_head == NULL) 7935 vhc->vhc_acc_list_head = newacc; 7936 else 7937 vhc->vhc_acc_list_tail->acc_next = newacc; 7938 vhc->vhc_acc_list_tail = newacc; 7939 vhc->vhc_acc_count++; 7940 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7941 cv_broadcast(&vhc->vhc_cv); 7942 create_thread = 0; 7943 } else { 7944 vhc->vhc_acc_thrcount++; 7945 create_thread = 1; 7946 } 7947 mutex_exit(&vhc->vhc_lock); 7948 7949 if (create_thread) 7950 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7951 0, &p0, TS_RUN, minclsyspri); 7952 } 7953 7954 /* 7955 * Return number of online paths for the specified client. 7956 */ 7957 static int 7958 nonline_paths(mdi_vhcache_client_t *cct) 7959 { 7960 mdi_vhcache_pathinfo_t *cpi; 7961 int online_count = 0; 7962 7963 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7964 if (cpi->cpi_pip != NULL) { 7965 MDI_PI_LOCK(cpi->cpi_pip); 7966 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7967 online_count++; 7968 MDI_PI_UNLOCK(cpi->cpi_pip); 7969 } 7970 } 7971 7972 return (online_count); 7973 } 7974 7975 /* 7976 * Bus configure all paths for the specified vhci client. 7977 * If at least one path for the client is already online, the remaining paths 7978 * will be configured asynchronously. Otherwise, it synchronously configures 7979 * the paths until at least one path is online and then rest of the paths 7980 * will be configured asynchronously. 7981 */ 7982 static void 7983 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7984 { 7985 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7986 mdi_phys_path_t *pp_head, *pp; 7987 mdi_vhcache_client_t *cct; 7988 mdi_vhcache_lookup_token_t tok; 7989 7990 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7991 7992 init_vhcache_lookup_token(&tok, NULL); 7993 7994 if (ct_name == NULL || ct_addr == NULL || 7995 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 7996 == NULL || 7997 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 7998 rw_exit(&vhcache->vhcache_lock); 7999 return; 8000 } 8001 8002 /* if at least one path is online, configure the rest asynchronously */ 8003 if (nonline_paths(cct) > 0) { 8004 rw_exit(&vhcache->vhcache_lock); 8005 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8006 return; 8007 } 8008 8009 rw_exit(&vhcache->vhcache_lock); 8010 8011 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8012 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8013 rw_enter(&vhcache->vhcache_lock, RW_READER); 8014 8015 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8016 ct_addr, &tok)) == NULL) { 8017 rw_exit(&vhcache->vhcache_lock); 8018 goto out; 8019 } 8020 8021 if (nonline_paths(cct) > 0 && 8022 pp->phys_path_next != NULL) { 8023 rw_exit(&vhcache->vhcache_lock); 8024 config_client_paths_async(vhc, ct_name, ct_addr, 8025 pp->phys_path_next, &tok); 8026 pp->phys_path_next = NULL; 8027 goto out; 8028 } 8029 8030 rw_exit(&vhcache->vhcache_lock); 8031 } 8032 } 8033 8034 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8035 out: 8036 free_phclient_path_list(pp_head); 8037 } 8038 8039 static void 8040 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8041 { 8042 mutex_enter(&vhc->vhc_lock); 8043 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8044 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8045 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8046 mutex_exit(&vhc->vhc_lock); 8047 } 8048 8049 static void 8050 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8051 { 8052 mutex_enter(&vhc->vhc_lock); 8053 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8054 cv_broadcast(&vhc->vhc_cv); 8055 mutex_exit(&vhc->vhc_lock); 8056 } 8057 8058 /* 8059 * Attach the phci driver instances associated with the vhci: 8060 * If root is mounted attach all phci driver instances. 8061 * If root is not mounted, attach the instances of only those phci 8062 * drivers that have the root support. 8063 */ 8064 static void 8065 attach_phci_drivers(mdi_vhci_config_t *vhc) 8066 { 8067 int i; 8068 major_t m; 8069 8070 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8071 if (modrootloaded == 0 && 8072 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8073 continue; 8074 8075 m = ddi_name_to_major( 8076 vhc->vhc_phci_driver_list[i].phdriver_name); 8077 if (m != (major_t)-1) { 8078 if (ddi_hold_installed_driver(m) != NULL) 8079 ddi_rele_driver(m); 8080 } 8081 } 8082 } 8083 8084 /* 8085 * Build vhci cache: 8086 * 8087 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8088 * the phci driver instances. During this process the cache gets built. 8089 * 8090 * Cache is built fully if the root is mounted. 8091 * If the root is not mounted, phci drivers that do not have root support 8092 * are not attached. As a result the cache is built partially. The entries 8093 * in the cache reflect only those phci drivers that have root support. 8094 */ 8095 static int 8096 build_vhci_cache(mdi_vhci_config_t *vhc) 8097 { 8098 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8099 8100 single_threaded_vhconfig_enter(vhc); 8101 8102 rw_enter(&vhcache->vhcache_lock, RW_READER); 8103 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8104 rw_exit(&vhcache->vhcache_lock); 8105 single_threaded_vhconfig_exit(vhc); 8106 return (0); 8107 } 8108 rw_exit(&vhcache->vhcache_lock); 8109 8110 attach_phci_drivers(vhc); 8111 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8112 BUS_CONFIG_ALL, (major_t)-1); 8113 8114 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8115 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8116 rw_exit(&vhcache->vhcache_lock); 8117 8118 single_threaded_vhconfig_exit(vhc); 8119 vhcache_dirty(vhc); 8120 return (1); 8121 } 8122 8123 /* 8124 * Determine if discovery of paths is needed. 8125 */ 8126 static int 8127 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8128 { 8129 int rv = 1; 8130 8131 mutex_enter(&vhc->vhc_lock); 8132 if (i_ddi_io_initialized() == 0) { 8133 if (vhc->vhc_path_discovery_boot > 0) { 8134 vhc->vhc_path_discovery_boot--; 8135 goto out; 8136 } 8137 } else { 8138 if (vhc->vhc_path_discovery_postboot > 0) { 8139 vhc->vhc_path_discovery_postboot--; 8140 goto out; 8141 } 8142 } 8143 8144 /* 8145 * Do full path discovery at most once per mdi_path_discovery_interval. 8146 * This is to avoid a series of full path discoveries when opening 8147 * stale /dev/[r]dsk links. 8148 */ 8149 if (mdi_path_discovery_interval != -1 && 8150 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8151 goto out; 8152 8153 rv = 0; 8154 out: 8155 mutex_exit(&vhc->vhc_lock); 8156 return (rv); 8157 } 8158 8159 /* 8160 * Discover all paths: 8161 * 8162 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8163 * driver instances. During this process all paths will be discovered. 8164 */ 8165 static int 8166 vhcache_discover_paths(mdi_vhci_config_t *vhc) 8167 { 8168 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8169 int rv = 0; 8170 8171 single_threaded_vhconfig_enter(vhc); 8172 8173 if (vhcache_do_discovery(vhc)) { 8174 attach_phci_drivers(vhc); 8175 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8176 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8177 8178 mutex_enter(&vhc->vhc_lock); 8179 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8180 mdi_path_discovery_interval * TICKS_PER_SECOND; 8181 mutex_exit(&vhc->vhc_lock); 8182 rv = 1; 8183 } 8184 8185 single_threaded_vhconfig_exit(vhc); 8186 return (rv); 8187 } 8188 8189 /* 8190 * Generic vhci bus config implementation: 8191 * 8192 * Parameters 8193 * vdip vhci dip 8194 * flags bus config flags 8195 * op bus config operation 8196 * The remaining parameters are bus config operation specific 8197 * 8198 * for BUS_CONFIG_ONE 8199 * arg pointer to name@addr 8200 * child upon successful return from this function, *child will be 8201 * set to the configured and held devinfo child node of vdip. 8202 * ct_addr pointer to client address (i.e. GUID) 8203 * 8204 * for BUS_CONFIG_DRIVER 8205 * arg major number of the driver 8206 * child and ct_addr parameters are ignored 8207 * 8208 * for BUS_CONFIG_ALL 8209 * arg, child, and ct_addr parameters are ignored 8210 * 8211 * Note that for the rest of the bus config operations, this function simply 8212 * calls the framework provided default bus config routine. 8213 */ 8214 int 8215 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8216 void *arg, dev_info_t **child, char *ct_addr) 8217 { 8218 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8219 mdi_vhci_config_t *vhc = vh->vh_config; 8220 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8221 int rv = 0; 8222 int params_valid = 0; 8223 char *cp; 8224 8225 /* 8226 * While bus configuring phcis, the phci driver interactions with MDI 8227 * cause child nodes to be enumerated under the vhci node for which 8228 * they need to ndi_devi_enter the vhci node. 8229 * 8230 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8231 * for the bus config operations on phcis to finish while holding the 8232 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8233 * phcis and call the default framework provided bus config function 8234 * if we are called with ndi_devi_enter lock held. 8235 */ 8236 if (DEVI_BUSY_OWNED(vdip)) { 8237 MDI_DEBUG(2, (CE_NOTE, vdip, 8238 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8239 goto default_bus_config; 8240 } 8241 8242 rw_enter(&vhcache->vhcache_lock, RW_READER); 8243 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8244 rw_exit(&vhcache->vhcache_lock); 8245 rv = build_vhci_cache(vhc); 8246 rw_enter(&vhcache->vhcache_lock, RW_READER); 8247 } 8248 8249 switch (op) { 8250 case BUS_CONFIG_ONE: 8251 if (arg != NULL && ct_addr != NULL) { 8252 /* extract node name */ 8253 cp = (char *)arg; 8254 while (*cp != '\0' && *cp != '@') 8255 cp++; 8256 if (*cp == '@') { 8257 params_valid = 1; 8258 *cp = '\0'; 8259 config_client_paths(vhc, (char *)arg, ct_addr); 8260 /* config_client_paths() releases cache_lock */ 8261 *cp = '@'; 8262 break; 8263 } 8264 } 8265 8266 rw_exit(&vhcache->vhcache_lock); 8267 break; 8268 8269 case BUS_CONFIG_DRIVER: 8270 rw_exit(&vhcache->vhcache_lock); 8271 if (rv == 0) 8272 st_bus_config_all_phcis(vhc, flags, op, 8273 (major_t)(uintptr_t)arg); 8274 break; 8275 8276 case BUS_CONFIG_ALL: 8277 rw_exit(&vhcache->vhcache_lock); 8278 if (rv == 0) 8279 st_bus_config_all_phcis(vhc, flags, op, -1); 8280 break; 8281 8282 default: 8283 rw_exit(&vhcache->vhcache_lock); 8284 break; 8285 } 8286 8287 8288 default_bus_config: 8289 /* 8290 * All requested child nodes are enumerated under the vhci. 8291 * Now configure them. 8292 */ 8293 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8294 NDI_SUCCESS) { 8295 return (MDI_SUCCESS); 8296 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8297 /* discover all paths and try configuring again */ 8298 if (vhcache_discover_paths(vhc) && 8299 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8300 NDI_SUCCESS) 8301 return (MDI_SUCCESS); 8302 } 8303 8304 return (MDI_FAILURE); 8305 } 8306 8307 /* 8308 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8309 */ 8310 static nvlist_t * 8311 read_on_disk_vhci_cache(char *vhci_class) 8312 { 8313 nvlist_t *nvl; 8314 int err; 8315 char *filename; 8316 8317 filename = vhclass2vhcache_filename(vhci_class); 8318 8319 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8320 kmem_free(filename, strlen(filename) + 1); 8321 return (nvl); 8322 } else if (err == EIO) 8323 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8324 else if (err == EINVAL) 8325 cmn_err(CE_WARN, 8326 "%s: data file corrupted, will recreate\n", filename); 8327 8328 kmem_free(filename, strlen(filename) + 1); 8329 return (NULL); 8330 } 8331 8332 /* 8333 * Read on-disk vhci cache into nvlists for all vhci classes. 8334 * Called during booting by i_ddi_read_devices_files(). 8335 */ 8336 void 8337 mdi_read_devices_files(void) 8338 { 8339 int i; 8340 8341 for (i = 0; i < N_VHCI_CLASSES; i++) 8342 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8343 } 8344 8345 /* 8346 * Remove all stale entries from vhci cache. 8347 */ 8348 static void 8349 clean_vhcache(mdi_vhci_config_t *vhc) 8350 { 8351 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8352 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8353 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8354 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8355 8356 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8357 8358 cct_head = vhcache->vhcache_client_head; 8359 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8360 for (cct = cct_head; cct != NULL; cct = cct_next) { 8361 cct_next = cct->cct_next; 8362 8363 cpi_head = cct->cct_cpi_head; 8364 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8365 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8366 cpi_next = cpi->cpi_next; 8367 if (cpi->cpi_pip != NULL) { 8368 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8369 enqueue_tail_vhcache_pathinfo(cct, cpi); 8370 } else 8371 free_vhcache_pathinfo(cpi); 8372 } 8373 8374 if (cct->cct_cpi_head != NULL) 8375 enqueue_vhcache_client(vhcache, cct); 8376 else { 8377 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8378 (mod_hash_key_t)cct->cct_name_addr); 8379 free_vhcache_client(cct); 8380 } 8381 } 8382 8383 cphci_head = vhcache->vhcache_phci_head; 8384 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8385 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8386 cphci_next = cphci->cphci_next; 8387 if (cphci->cphci_phci != NULL) 8388 enqueue_vhcache_phci(vhcache, cphci); 8389 else 8390 free_vhcache_phci(cphci); 8391 } 8392 8393 vhcache->vhcache_clean_time = lbolt64; 8394 rw_exit(&vhcache->vhcache_lock); 8395 vhcache_dirty(vhc); 8396 } 8397 8398 /* 8399 * Remove all stale entries from vhci cache. 8400 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8401 */ 8402 void 8403 mdi_clean_vhcache(void) 8404 { 8405 mdi_vhci_t *vh; 8406 8407 mutex_enter(&mdi_mutex); 8408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8409 vh->vh_refcnt++; 8410 mutex_exit(&mdi_mutex); 8411 clean_vhcache(vh->vh_config); 8412 mutex_enter(&mdi_mutex); 8413 vh->vh_refcnt--; 8414 } 8415 mutex_exit(&mdi_mutex); 8416 } 8417 8418 /* 8419 * mdi_vhci_walk_clients(): 8420 * Walker routine to traverse client dev_info nodes 8421 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8422 * below the client, including nexus devices, which we dont want. 8423 * So we just traverse the immediate siblings, starting from 1st client. 8424 */ 8425 void 8426 mdi_vhci_walk_clients(dev_info_t *vdip, 8427 int (*f)(dev_info_t *, void *), void *arg) 8428 { 8429 dev_info_t *cdip; 8430 mdi_client_t *ct; 8431 8432 mutex_enter(&mdi_mutex); 8433 8434 cdip = ddi_get_child(vdip); 8435 8436 while (cdip) { 8437 ct = i_devi_get_client(cdip); 8438 MDI_CLIENT_LOCK(ct); 8439 8440 switch ((*f)(cdip, arg)) { 8441 case DDI_WALK_CONTINUE: 8442 cdip = ddi_get_next_sibling(cdip); 8443 MDI_CLIENT_UNLOCK(ct); 8444 break; 8445 8446 default: 8447 MDI_CLIENT_UNLOCK(ct); 8448 mutex_exit(&mdi_mutex); 8449 return; 8450 } 8451 } 8452 8453 mutex_exit(&mdi_mutex); 8454 } 8455 8456 /* 8457 * mdi_vhci_walk_phcis(): 8458 * Walker routine to traverse phci dev_info nodes 8459 */ 8460 void 8461 mdi_vhci_walk_phcis(dev_info_t *vdip, 8462 int (*f)(dev_info_t *, void *), void *arg) 8463 { 8464 mdi_vhci_t *vh = NULL; 8465 mdi_phci_t *ph = NULL; 8466 8467 mutex_enter(&mdi_mutex); 8468 8469 vh = i_devi_get_vhci(vdip); 8470 ph = vh->vh_phci_head; 8471 8472 while (ph) { 8473 MDI_PHCI_LOCK(ph); 8474 8475 switch ((*f)(ph->ph_dip, arg)) { 8476 case DDI_WALK_CONTINUE: 8477 MDI_PHCI_UNLOCK(ph); 8478 ph = ph->ph_next; 8479 break; 8480 8481 default: 8482 MDI_PHCI_UNLOCK(ph); 8483 mutex_exit(&mdi_mutex); 8484 return; 8485 } 8486 } 8487 8488 mutex_exit(&mdi_mutex); 8489 } 8490 8491 8492 /* 8493 * mdi_walk_vhcis(): 8494 * Walker routine to traverse vhci dev_info nodes 8495 */ 8496 void 8497 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8498 { 8499 mdi_vhci_t *vh = NULL; 8500 8501 mutex_enter(&mdi_mutex); 8502 /* 8503 * Scan for already registered vhci 8504 */ 8505 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8506 vh->vh_refcnt++; 8507 mutex_exit(&mdi_mutex); 8508 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8509 mutex_enter(&mdi_mutex); 8510 vh->vh_refcnt--; 8511 break; 8512 } else { 8513 mutex_enter(&mdi_mutex); 8514 vh->vh_refcnt--; 8515 } 8516 } 8517 8518 mutex_exit(&mdi_mutex); 8519 } 8520 8521 /* 8522 * i_mdi_log_sysevent(): 8523 * Logs events for pickup by syseventd 8524 */ 8525 static void 8526 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8527 { 8528 char *path_name; 8529 nvlist_t *attr_list; 8530 8531 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8532 KM_SLEEP) != DDI_SUCCESS) { 8533 goto alloc_failed; 8534 } 8535 8536 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8537 (void) ddi_pathname(dip, path_name); 8538 8539 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8540 ddi_driver_name(dip)) != DDI_SUCCESS) { 8541 goto error; 8542 } 8543 8544 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8545 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8546 goto error; 8547 } 8548 8549 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8550 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8551 goto error; 8552 } 8553 8554 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8555 path_name) != DDI_SUCCESS) { 8556 goto error; 8557 } 8558 8559 if (nvlist_add_string(attr_list, DDI_CLASS, 8560 ph_vh_class) != DDI_SUCCESS) { 8561 goto error; 8562 } 8563 8564 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8565 attr_list, NULL, DDI_SLEEP); 8566 8567 error: 8568 kmem_free(path_name, MAXPATHLEN); 8569 nvlist_free(attr_list); 8570 return; 8571 8572 alloc_failed: 8573 MDI_DEBUG(1, (CE_WARN, dip, 8574 "!i_mdi_log_sysevent: Unable to send sysevent")); 8575 } 8576