1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 30 * detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 71 #ifdef DEBUG 72 #include <sys/debug.h> 73 int mdi_debug = 1; 74 #define MDI_DEBUG(level, stmnt) \ 75 if (mdi_debug >= (level)) i_mdi_log stmnt 76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 77 #else /* !DEBUG */ 78 #define MDI_DEBUG(level, stmnt) 79 #endif /* DEBUG */ 80 81 extern pri_t minclsyspri; 82 extern int modrootloaded; 83 84 /* 85 * Global mutex: 86 * Protects vHCI list and structure members, pHCI and Client lists. 87 */ 88 kmutex_t mdi_mutex; 89 90 /* 91 * Registered vHCI class driver lists 92 */ 93 int mdi_vhci_count; 94 mdi_vhci_t *mdi_vhci_head; 95 mdi_vhci_t *mdi_vhci_tail; 96 97 /* 98 * Client Hash Table size 99 */ 100 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 101 102 /* 103 * taskq interface definitions 104 */ 105 #define MDI_TASKQ_N_THREADS 8 106 #define MDI_TASKQ_PRI minclsyspri 107 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 108 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 109 110 taskq_t *mdi_taskq; 111 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 112 113 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 114 115 /* 116 * The data should be "quiet" for this interval (in seconds) before the 117 * vhci cached data is flushed to the disk. 118 */ 119 static int mdi_vhcache_flush_delay = 10; 120 121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 122 static int mdi_vhcache_flush_daemon_idle_time = 60; 123 124 /* 125 * number of seconds the asynchronous configuration thread will sleep idle 126 * before exiting. 127 */ 128 static int mdi_async_config_idle_time = 600; 129 130 static int mdi_bus_config_cache_hash_size = 256; 131 132 /* turns off multithreaded configuration for certain operations */ 133 static int mdi_mtc_off = 0; 134 135 /* 136 * MDI component property name/value string definitions 137 */ 138 const char *mdi_component_prop = "mpxio-component"; 139 const char *mdi_component_prop_vhci = "vhci"; 140 const char *mdi_component_prop_phci = "phci"; 141 const char *mdi_component_prop_client = "client"; 142 143 /* 144 * MDI client global unique identifier property name 145 */ 146 const char *mdi_client_guid_prop = "client-guid"; 147 148 /* 149 * MDI client load balancing property name/value string definitions 150 */ 151 const char *mdi_load_balance = "load-balance"; 152 const char *mdi_load_balance_none = "none"; 153 const char *mdi_load_balance_rr = "round-robin"; 154 const char *mdi_load_balance_lba = "logical-block"; 155 156 /* 157 * Obsolete vHCI class definition; to be removed after Leadville update 158 */ 159 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 160 161 static char vhci_greeting[] = 162 "\tThere already exists one vHCI driver for class %s\n" 163 "\tOnly one vHCI driver for each class is allowed\n"; 164 165 /* 166 * Static function prototypes 167 */ 168 static int i_mdi_phci_offline(dev_info_t *, uint_t); 169 static int i_mdi_client_offline(dev_info_t *, uint_t); 170 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 171 static void i_mdi_phci_post_detach(dev_info_t *, 172 ddi_detach_cmd_t, int); 173 static int i_mdi_client_pre_detach(dev_info_t *, 174 ddi_detach_cmd_t); 175 static void i_mdi_client_post_detach(dev_info_t *, 176 ddi_detach_cmd_t, int); 177 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 178 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 179 static int i_mdi_lba_lb(mdi_client_t *ct, 180 mdi_pathinfo_t **ret_pip, struct buf *buf); 181 static void i_mdi_pm_hold_client(mdi_client_t *, int); 182 static void i_mdi_pm_rele_client(mdi_client_t *, int); 183 static void i_mdi_pm_reset_client(mdi_client_t *); 184 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 185 static int i_mdi_power_all_phci(mdi_client_t *); 186 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 187 188 189 /* 190 * Internal mdi_pathinfo node functions 191 */ 192 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 193 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 194 195 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 196 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 197 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 198 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 199 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 200 mdi_client_t *); 201 static void i_mdi_phci_unlock(mdi_phci_t *); 202 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 203 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 204 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 205 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 206 mdi_client_t *); 207 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 208 static void i_mdi_client_remove_path(mdi_client_t *, 209 mdi_pathinfo_t *); 210 211 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 212 mdi_pathinfo_state_t, int); 213 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 214 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 215 char **, int); 216 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 217 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 218 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 219 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 220 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 221 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 222 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 223 static void i_mdi_client_update_state(mdi_client_t *); 224 static int i_mdi_client_compute_state(mdi_client_t *, 225 mdi_phci_t *); 226 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 227 static void i_mdi_client_unlock(mdi_client_t *); 228 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 229 static mdi_client_t *i_devi_get_client(dev_info_t *); 230 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, 231 int); 232 /* 233 * Failover related function prototypes 234 */ 235 static int i_mdi_failover(void *); 236 237 /* 238 * misc internal functions 239 */ 240 static int i_mdi_get_hash_key(char *); 241 static int i_map_nvlist_error_to_mdi(int); 242 static void i_mdi_report_path_state(mdi_client_t *, 243 mdi_pathinfo_t *); 244 245 static void setup_vhci_cache(mdi_vhci_t *); 246 static int destroy_vhci_cache(mdi_vhci_t *); 247 static void setup_phci_driver_list(mdi_vhci_t *); 248 static void free_phci_driver_list(mdi_vhci_config_t *); 249 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 250 static boolean_t stop_vhcache_flush_thread(void *, int); 251 static void free_string_array(char **, int); 252 static void free_vhcache_phci(mdi_vhcache_phci_t *); 253 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 254 static void free_vhcache_client(mdi_vhcache_client_t *); 255 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 256 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 257 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 258 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 259 static void vhcache_pi_add(mdi_vhci_config_t *, 260 struct mdi_pathinfo *); 261 static void vhcache_pi_remove(mdi_vhci_config_t *, 262 struct mdi_pathinfo *); 263 static void free_phclient_path_list(mdi_phys_path_t *); 264 static void sort_vhcache_paths(mdi_vhcache_client_t *); 265 static int flush_vhcache(mdi_vhci_config_t *, int); 266 static void vhcache_dirty(mdi_vhci_config_t *); 267 static void free_async_client_config(mdi_async_client_config_t *); 268 static nvlist_t *read_on_disk_vhci_cache(char *); 269 extern int fread_nvlist(char *, nvlist_t **); 270 extern int fwrite_nvlist(char *, nvlist_t *); 271 272 /* called once when first vhci registers with mdi */ 273 static void 274 i_mdi_init() 275 { 276 static int initialized = 0; 277 278 if (initialized) 279 return; 280 initialized = 1; 281 282 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 283 /* 284 * Create our taskq resources 285 */ 286 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 287 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 288 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 289 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 290 } 291 292 /* 293 * mdi_get_component_type(): 294 * Return mpxio component type 295 * Return Values: 296 * MDI_COMPONENT_NONE 297 * MDI_COMPONENT_VHCI 298 * MDI_COMPONENT_PHCI 299 * MDI_COMPONENT_CLIENT 300 * XXX This doesn't work under multi-level MPxIO and should be 301 * removed when clients migrate mdi_is_*() interfaces. 302 */ 303 int 304 mdi_get_component_type(dev_info_t *dip) 305 { 306 return (DEVI(dip)->devi_mdi_component); 307 } 308 309 /* 310 * mdi_vhci_register(): 311 * Register a vHCI module with the mpxio framework 312 * mdi_vhci_register() is called by vHCI drivers to register the 313 * 'class_driver' vHCI driver and its MDI entrypoints with the 314 * mpxio framework. The vHCI driver must call this interface as 315 * part of its attach(9e) handler. 316 * Competing threads may try to attach mdi_vhci_register() as 317 * the vHCI drivers are loaded and attached as a result of pHCI 318 * driver instance registration (mdi_phci_register()) with the 319 * framework. 320 * Return Values: 321 * MDI_SUCCESS 322 * MDI_FAILURE 323 */ 324 325 /*ARGSUSED*/ 326 int 327 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 328 int flags) 329 { 330 mdi_vhci_t *vh = NULL; 331 332 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 333 334 i_mdi_init(); 335 336 mutex_enter(&mdi_mutex); 337 /* 338 * Scan for already registered vhci 339 */ 340 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 341 if (strcmp(vh->vh_class, class) == 0) { 342 /* 343 * vHCI has already been created. Check for valid 344 * vHCI ops registration. We only support one vHCI 345 * module per class 346 */ 347 if (vh->vh_ops != NULL) { 348 mutex_exit(&mdi_mutex); 349 cmn_err(CE_NOTE, vhci_greeting, class); 350 return (MDI_FAILURE); 351 } 352 break; 353 } 354 } 355 356 /* 357 * if not yet created, create the vHCI component 358 */ 359 if (vh == NULL) { 360 struct client_hash *hash = NULL; 361 char *load_balance; 362 363 /* 364 * Allocate and initialize the mdi extensions 365 */ 366 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 367 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 368 KM_SLEEP); 369 vh->vh_client_table = hash; 370 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 371 (void) strcpy(vh->vh_class, class); 372 vh->vh_lb = LOAD_BALANCE_RR; 373 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 374 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 375 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 376 vh->vh_lb = LOAD_BALANCE_NONE; 377 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 378 == 0) { 379 vh->vh_lb = LOAD_BALANCE_LBA; 380 } 381 ddi_prop_free(load_balance); 382 } 383 384 /* 385 * Store the vHCI ops vectors 386 */ 387 vh->vh_dip = vdip; 388 vh->vh_ops = vops; 389 390 setup_vhci_cache(vh); 391 392 if (mdi_vhci_head == NULL) { 393 mdi_vhci_head = vh; 394 } 395 if (mdi_vhci_tail) { 396 mdi_vhci_tail->vh_next = vh; 397 } 398 mdi_vhci_tail = vh; 399 mdi_vhci_count++; 400 } 401 402 /* 403 * Claim the devfs node as a vhci component 404 */ 405 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 406 407 /* 408 * Initialize our back reference from dev_info node 409 */ 410 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 411 mutex_exit(&mdi_mutex); 412 return (MDI_SUCCESS); 413 } 414 415 /* 416 * mdi_vhci_unregister(): 417 * Unregister a vHCI module from mpxio framework 418 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 419 * of a vhci to unregister it from the framework. 420 * Return Values: 421 * MDI_SUCCESS 422 * MDI_FAILURE 423 */ 424 425 /*ARGSUSED*/ 426 int 427 mdi_vhci_unregister(dev_info_t *vdip, int flags) 428 { 429 mdi_vhci_t *found, *vh, *prev = NULL; 430 431 /* 432 * Check for invalid VHCI 433 */ 434 if ((vh = i_devi_get_vhci(vdip)) == NULL) 435 return (MDI_FAILURE); 436 437 mutex_enter(&mdi_mutex); 438 439 /* 440 * Scan the list of registered vHCIs for a match 441 */ 442 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 443 if (found == vh) 444 break; 445 prev = found; 446 } 447 448 if (found == NULL) { 449 mutex_exit(&mdi_mutex); 450 return (MDI_FAILURE); 451 } 452 453 /* 454 * Check the vHCI, pHCI and client count. All the pHCIs and clients 455 * should have been unregistered, before a vHCI can be 456 * unregistered. 457 */ 458 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 459 mutex_exit(&mdi_mutex); 460 return (MDI_FAILURE); 461 } 462 463 /* 464 * Remove the vHCI from the global list 465 */ 466 if (vh == mdi_vhci_head) { 467 mdi_vhci_head = vh->vh_next; 468 } else { 469 prev->vh_next = vh->vh_next; 470 } 471 if (vh == mdi_vhci_tail) { 472 mdi_vhci_tail = prev; 473 } 474 475 mdi_vhci_count--; 476 mutex_exit(&mdi_mutex); 477 478 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 479 /* add vhci to the global list */ 480 mutex_enter(&mdi_mutex); 481 if (mdi_vhci_head == NULL) 482 mdi_vhci_head = vh; 483 else 484 mdi_vhci_tail->vh_next = vh; 485 mdi_vhci_tail = vh; 486 mdi_vhci_count++; 487 mutex_exit(&mdi_mutex); 488 return (MDI_FAILURE); 489 } 490 491 vh->vh_ops = NULL; 492 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 493 DEVI(vdip)->devi_mdi_xhci = NULL; 494 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 495 kmem_free(vh->vh_client_table, 496 mdi_client_table_size * sizeof (struct client_hash)); 497 kmem_free(vh, sizeof (mdi_vhci_t)); 498 return (MDI_SUCCESS); 499 } 500 501 /* 502 * i_mdi_vhci_class2vhci(): 503 * Look for a matching vHCI module given a vHCI class name 504 * Return Values: 505 * Handle to a vHCI component 506 * NULL 507 */ 508 static mdi_vhci_t * 509 i_mdi_vhci_class2vhci(char *class) 510 { 511 mdi_vhci_t *vh = NULL; 512 513 ASSERT(!MUTEX_HELD(&mdi_mutex)); 514 515 mutex_enter(&mdi_mutex); 516 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 517 if (strcmp(vh->vh_class, class) == 0) { 518 break; 519 } 520 } 521 mutex_exit(&mdi_mutex); 522 return (vh); 523 } 524 525 /* 526 * i_devi_get_vhci(): 527 * Utility function to get the handle to a vHCI component 528 * Return Values: 529 * Handle to a vHCI component 530 * NULL 531 */ 532 mdi_vhci_t * 533 i_devi_get_vhci(dev_info_t *vdip) 534 { 535 mdi_vhci_t *vh = NULL; 536 if (MDI_VHCI(vdip)) { 537 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 538 } 539 return (vh); 540 } 541 542 /* 543 * mdi_phci_register(): 544 * Register a pHCI module with mpxio framework 545 * mdi_phci_register() is called by pHCI drivers to register with 546 * the mpxio framework and a specific 'class_driver' vHCI. The 547 * pHCI driver must call this interface as part of its attach(9e) 548 * handler. 549 * Return Values: 550 * MDI_SUCCESS 551 * MDI_FAILURE 552 */ 553 554 /*ARGSUSED*/ 555 int 556 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 557 { 558 mdi_phci_t *ph; 559 mdi_vhci_t *vh; 560 char *data; 561 char *pathname; 562 563 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 564 (void) ddi_pathname(pdip, pathname); 565 566 /* 567 * Check for mpxio-disable property. Enable mpxio if the property is 568 * missing or not set to "yes". 569 * If the property is set to "yes" then emit a brief message. 570 */ 571 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 572 &data) == DDI_SUCCESS)) { 573 if (strcmp(data, "yes") == 0) { 574 MDI_DEBUG(1, (CE_CONT, pdip, 575 "?%s (%s%d) multipath capabilities " 576 "disabled via %s.conf.\n", pathname, 577 ddi_driver_name(pdip), ddi_get_instance(pdip), 578 ddi_driver_name(pdip))); 579 ddi_prop_free(data); 580 kmem_free(pathname, MAXPATHLEN); 581 return (MDI_FAILURE); 582 } 583 ddi_prop_free(data); 584 } 585 586 kmem_free(pathname, MAXPATHLEN); 587 588 /* 589 * Search for a matching vHCI 590 */ 591 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 592 if (vh == NULL) { 593 return (MDI_FAILURE); 594 } 595 596 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 597 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 598 ph->ph_dip = pdip; 599 ph->ph_vhci = vh; 600 ph->ph_next = NULL; 601 ph->ph_unstable = 0; 602 ph->ph_vprivate = 0; 603 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 604 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 605 606 MDI_PHCI_SET_POWER_UP(ph); 607 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 608 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 609 610 vhcache_phci_add(vh->vh_config, ph); 611 612 mutex_enter(&mdi_mutex); 613 if (vh->vh_phci_head == NULL) { 614 vh->vh_phci_head = ph; 615 } 616 if (vh->vh_phci_tail) { 617 vh->vh_phci_tail->ph_next = ph; 618 } 619 vh->vh_phci_tail = ph; 620 vh->vh_phci_count++; 621 mutex_exit(&mdi_mutex); 622 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 623 return (MDI_SUCCESS); 624 } 625 626 /* 627 * mdi_phci_unregister(): 628 * Unregister a pHCI module from mpxio framework 629 * mdi_phci_unregister() is called by the pHCI drivers from their 630 * detach(9E) handler to unregister their instances from the 631 * framework. 632 * Return Values: 633 * MDI_SUCCESS 634 * MDI_FAILURE 635 */ 636 637 /*ARGSUSED*/ 638 int 639 mdi_phci_unregister(dev_info_t *pdip, int flags) 640 { 641 mdi_vhci_t *vh; 642 mdi_phci_t *ph; 643 mdi_phci_t *tmp; 644 mdi_phci_t *prev = NULL; 645 646 ph = i_devi_get_phci(pdip); 647 if (ph == NULL) { 648 MDI_DEBUG(1, (CE_WARN, pdip, 649 "!pHCI unregister: Not a valid pHCI")); 650 return (MDI_FAILURE); 651 } 652 653 vh = ph->ph_vhci; 654 ASSERT(vh != NULL); 655 if (vh == NULL) { 656 MDI_DEBUG(1, (CE_WARN, pdip, 657 "!pHCI unregister: Not a valid vHCI")); 658 return (MDI_FAILURE); 659 } 660 661 mutex_enter(&mdi_mutex); 662 tmp = vh->vh_phci_head; 663 while (tmp) { 664 if (tmp == ph) { 665 break; 666 } 667 prev = tmp; 668 tmp = tmp->ph_next; 669 } 670 671 if (ph == vh->vh_phci_head) { 672 vh->vh_phci_head = ph->ph_next; 673 } else { 674 prev->ph_next = ph->ph_next; 675 } 676 677 if (ph == vh->vh_phci_tail) { 678 vh->vh_phci_tail = prev; 679 } 680 681 vh->vh_phci_count--; 682 683 mutex_exit(&mdi_mutex); 684 685 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 686 ESC_DDI_INITIATOR_UNREGISTER); 687 vhcache_phci_remove(vh->vh_config, ph); 688 cv_destroy(&ph->ph_unstable_cv); 689 cv_destroy(&ph->ph_powerchange_cv); 690 mutex_destroy(&ph->ph_mutex); 691 kmem_free(ph, sizeof (mdi_phci_t)); 692 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 693 DEVI(pdip)->devi_mdi_xhci = NULL; 694 return (MDI_SUCCESS); 695 } 696 697 /* 698 * i_devi_get_phci(): 699 * Utility function to return the phci extensions. 700 */ 701 static mdi_phci_t * 702 i_devi_get_phci(dev_info_t *pdip) 703 { 704 mdi_phci_t *ph = NULL; 705 if (MDI_PHCI(pdip)) { 706 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 707 } 708 return (ph); 709 } 710 711 /* 712 * mdi_phci_path2devinfo(): 713 * Utility function to search for a valid phci device given 714 * the devfs pathname. 715 */ 716 717 dev_info_t * 718 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 719 { 720 char *temp_pathname; 721 mdi_vhci_t *vh; 722 mdi_phci_t *ph; 723 dev_info_t *pdip = NULL; 724 725 vh = i_devi_get_vhci(vdip); 726 ASSERT(vh != NULL); 727 728 if (vh == NULL) { 729 /* 730 * Invalid vHCI component, return failure 731 */ 732 return (NULL); 733 } 734 735 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 736 mutex_enter(&mdi_mutex); 737 ph = vh->vh_phci_head; 738 while (ph != NULL) { 739 pdip = ph->ph_dip; 740 ASSERT(pdip != NULL); 741 *temp_pathname = '\0'; 742 (void) ddi_pathname(pdip, temp_pathname); 743 if (strcmp(temp_pathname, pathname) == 0) { 744 break; 745 } 746 ph = ph->ph_next; 747 } 748 if (ph == NULL) { 749 pdip = NULL; 750 } 751 mutex_exit(&mdi_mutex); 752 kmem_free(temp_pathname, MAXPATHLEN); 753 return (pdip); 754 } 755 756 /* 757 * mdi_phci_get_path_count(): 758 * get number of path information nodes associated with a given 759 * pHCI device. 760 */ 761 int 762 mdi_phci_get_path_count(dev_info_t *pdip) 763 { 764 mdi_phci_t *ph; 765 int count = 0; 766 767 ph = i_devi_get_phci(pdip); 768 if (ph != NULL) { 769 count = ph->ph_path_count; 770 } 771 return (count); 772 } 773 774 /* 775 * i_mdi_phci_lock(): 776 * Lock a pHCI device 777 * Return Values: 778 * None 779 * Note: 780 * The default locking order is: 781 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 782 * But there are number of situations where locks need to be 783 * grabbed in reverse order. This routine implements try and lock 784 * mechanism depending on the requested parameter option. 785 */ 786 static void 787 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 788 { 789 if (pip) { 790 /* Reverse locking is requested. */ 791 while (MDI_PHCI_TRYLOCK(ph) == 0) { 792 /* 793 * tryenter failed. Try to grab again 794 * after a small delay 795 */ 796 MDI_PI_HOLD(pip); 797 MDI_PI_UNLOCK(pip); 798 delay(1); 799 MDI_PI_LOCK(pip); 800 MDI_PI_RELE(pip); 801 } 802 } else { 803 MDI_PHCI_LOCK(ph); 804 } 805 } 806 807 /* 808 * i_mdi_phci_get_client_lock(): 809 * Lock a pHCI device 810 * Return Values: 811 * None 812 * Note: 813 * The default locking order is: 814 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 815 * But there are number of situations where locks need to be 816 * grabbed in reverse order. This routine implements try and lock 817 * mechanism depending on the requested parameter option. 818 */ 819 static void 820 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 821 { 822 if (ct) { 823 /* Reverse locking is requested. */ 824 while (MDI_PHCI_TRYLOCK(ph) == 0) { 825 /* 826 * tryenter failed. Try to grab again 827 * after a small delay 828 */ 829 MDI_CLIENT_UNLOCK(ct); 830 delay(1); 831 MDI_CLIENT_LOCK(ct); 832 } 833 } else { 834 MDI_PHCI_LOCK(ph); 835 } 836 } 837 838 /* 839 * i_mdi_phci_unlock(): 840 * Unlock the pHCI component 841 */ 842 static void 843 i_mdi_phci_unlock(mdi_phci_t *ph) 844 { 845 MDI_PHCI_UNLOCK(ph); 846 } 847 848 /* 849 * i_mdi_devinfo_create(): 850 * create client device's devinfo node 851 * Return Values: 852 * dev_info 853 * NULL 854 * Notes: 855 */ 856 static dev_info_t * 857 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 858 char **compatible, int ncompatible) 859 { 860 dev_info_t *cdip = NULL; 861 862 ASSERT(MUTEX_HELD(&mdi_mutex)); 863 864 /* Verify for duplicate entry */ 865 cdip = i_mdi_devinfo_find(vh, name, guid); 866 ASSERT(cdip == NULL); 867 if (cdip) { 868 cmn_err(CE_WARN, 869 "i_mdi_devinfo_create: client dip %p already exists", 870 (void *)cdip); 871 } 872 873 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 874 if (cdip == NULL) 875 goto fail; 876 877 /* 878 * Create component type and Global unique identifier 879 * properties 880 */ 881 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 882 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 883 goto fail; 884 } 885 886 /* Decorate the node with compatible property */ 887 if (compatible && 888 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 889 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 890 goto fail; 891 } 892 893 return (cdip); 894 895 fail: 896 if (cdip) { 897 (void) ndi_prop_remove_all(cdip); 898 (void) ndi_devi_free(cdip); 899 } 900 return (NULL); 901 } 902 903 /* 904 * i_mdi_devinfo_find(): 905 * Find a matching devinfo node for given client node name 906 * and its guid. 907 * Return Values: 908 * Handle to a dev_info node or NULL 909 */ 910 911 static dev_info_t * 912 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 913 { 914 char *data; 915 dev_info_t *cdip = NULL; 916 dev_info_t *ndip = NULL; 917 int circular; 918 919 ndi_devi_enter(vh->vh_dip, &circular); 920 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 921 while ((cdip = ndip) != NULL) { 922 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 923 924 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 925 continue; 926 } 927 928 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 929 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 930 &data) != DDI_PROP_SUCCESS) { 931 continue; 932 } 933 934 if (strcmp(data, guid) != 0) { 935 ddi_prop_free(data); 936 continue; 937 } 938 ddi_prop_free(data); 939 break; 940 } 941 ndi_devi_exit(vh->vh_dip, circular); 942 return (cdip); 943 } 944 945 /* 946 * i_mdi_devinfo_remove(): 947 * Remove a client device node 948 */ 949 static int 950 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 951 { 952 int rv = MDI_SUCCESS; 953 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 954 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 955 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 956 if (rv != NDI_SUCCESS) { 957 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 958 " failed. cdip = %p\n", cdip)); 959 } 960 /* 961 * Convert to MDI error code 962 */ 963 switch (rv) { 964 case NDI_SUCCESS: 965 rv = MDI_SUCCESS; 966 break; 967 case NDI_BUSY: 968 rv = MDI_BUSY; 969 break; 970 default: 971 rv = MDI_FAILURE; 972 break; 973 } 974 } 975 return (rv); 976 } 977 978 /* 979 * i_devi_get_client() 980 * Utility function to get mpxio component extensions 981 */ 982 static mdi_client_t * 983 i_devi_get_client(dev_info_t *cdip) 984 { 985 mdi_client_t *ct = NULL; 986 if (MDI_CLIENT(cdip)) { 987 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 988 } 989 return (ct); 990 } 991 992 /* 993 * i_mdi_is_child_present(): 994 * Search for the presence of client device dev_info node 995 */ 996 997 static int 998 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 999 { 1000 int rv = MDI_FAILURE; 1001 struct dev_info *dip; 1002 int circular; 1003 1004 ndi_devi_enter(vdip, &circular); 1005 dip = DEVI(vdip)->devi_child; 1006 while (dip) { 1007 if (dip == DEVI(cdip)) { 1008 rv = MDI_SUCCESS; 1009 break; 1010 } 1011 dip = dip->devi_sibling; 1012 } 1013 ndi_devi_exit(vdip, circular); 1014 return (rv); 1015 } 1016 1017 1018 /* 1019 * i_mdi_client_lock(): 1020 * Grab client component lock 1021 * Return Values: 1022 * None 1023 * Note: 1024 * The default locking order is: 1025 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1026 * But there are number of situations where locks need to be 1027 * grabbed in reverse order. This routine implements try and lock 1028 * mechanism depending on the requested parameter option. 1029 */ 1030 1031 static void 1032 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1033 { 1034 if (pip) { 1035 /* 1036 * Reverse locking is requested. 1037 */ 1038 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1039 /* 1040 * tryenter failed. Try to grab again 1041 * after a small delay 1042 */ 1043 MDI_PI_HOLD(pip); 1044 MDI_PI_UNLOCK(pip); 1045 delay(1); 1046 MDI_PI_LOCK(pip); 1047 MDI_PI_RELE(pip); 1048 } 1049 } else { 1050 MDI_CLIENT_LOCK(ct); 1051 } 1052 } 1053 1054 /* 1055 * i_mdi_client_unlock(): 1056 * Unlock a client component 1057 */ 1058 1059 static void 1060 i_mdi_client_unlock(mdi_client_t *ct) 1061 { 1062 MDI_CLIENT_UNLOCK(ct); 1063 } 1064 1065 /* 1066 * i_mdi_client_alloc(): 1067 * Allocate and initialize a client structure. Caller should 1068 * hold the global mdi_mutex. 1069 * Return Values: 1070 * Handle to a client component 1071 */ 1072 /*ARGSUSED*/ 1073 static mdi_client_t * 1074 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1075 { 1076 mdi_client_t *ct; 1077 1078 ASSERT(MUTEX_HELD(&mdi_mutex)); 1079 1080 /* 1081 * Allocate and initialize a component structure. 1082 */ 1083 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1084 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1085 ct->ct_hnext = NULL; 1086 ct->ct_hprev = NULL; 1087 ct->ct_dip = NULL; 1088 ct->ct_vhci = vh; 1089 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1090 (void) strcpy(ct->ct_drvname, name); 1091 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1092 (void) strcpy(ct->ct_guid, lguid); 1093 ct->ct_cprivate = NULL; 1094 ct->ct_vprivate = NULL; 1095 ct->ct_flags = 0; 1096 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1097 MDI_CLIENT_SET_OFFLINE(ct); 1098 MDI_CLIENT_SET_DETACH(ct); 1099 MDI_CLIENT_SET_POWER_UP(ct); 1100 ct->ct_failover_flags = 0; 1101 ct->ct_failover_status = 0; 1102 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1103 ct->ct_unstable = 0; 1104 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1105 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1106 ct->ct_lb = vh->vh_lb; 1107 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1108 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1109 ct->ct_path_count = 0; 1110 ct->ct_path_head = NULL; 1111 ct->ct_path_tail = NULL; 1112 ct->ct_path_last = NULL; 1113 1114 /* 1115 * Add this client component to our client hash queue 1116 */ 1117 i_mdi_client_enlist_table(vh, ct); 1118 return (ct); 1119 } 1120 1121 /* 1122 * i_mdi_client_enlist_table(): 1123 * Attach the client device to the client hash table. Caller 1124 * should hold the mdi_mutex 1125 */ 1126 1127 static void 1128 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1129 { 1130 int index; 1131 struct client_hash *head; 1132 1133 ASSERT(MUTEX_HELD(&mdi_mutex)); 1134 index = i_mdi_get_hash_key(ct->ct_guid); 1135 head = &vh->vh_client_table[index]; 1136 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1137 head->ct_hash_head = ct; 1138 head->ct_hash_count++; 1139 vh->vh_client_count++; 1140 } 1141 1142 /* 1143 * i_mdi_client_delist_table(): 1144 * Attach the client device to the client hash table. 1145 * Caller should hold the mdi_mutex 1146 */ 1147 1148 static void 1149 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1150 { 1151 int index; 1152 char *guid; 1153 struct client_hash *head; 1154 mdi_client_t *next; 1155 mdi_client_t *last; 1156 1157 ASSERT(MUTEX_HELD(&mdi_mutex)); 1158 guid = ct->ct_guid; 1159 index = i_mdi_get_hash_key(guid); 1160 head = &vh->vh_client_table[index]; 1161 1162 last = NULL; 1163 next = (mdi_client_t *)head->ct_hash_head; 1164 while (next != NULL) { 1165 if (next == ct) { 1166 break; 1167 } 1168 last = next; 1169 next = next->ct_hnext; 1170 } 1171 1172 if (next) { 1173 head->ct_hash_count--; 1174 if (last == NULL) { 1175 head->ct_hash_head = ct->ct_hnext; 1176 } else { 1177 last->ct_hnext = ct->ct_hnext; 1178 } 1179 ct->ct_hnext = NULL; 1180 vh->vh_client_count--; 1181 } 1182 } 1183 1184 1185 /* 1186 * i_mdi_client_free(): 1187 * Free a client component 1188 */ 1189 static int 1190 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1191 { 1192 int rv = MDI_SUCCESS; 1193 int flags = ct->ct_flags; 1194 dev_info_t *cdip; 1195 dev_info_t *vdip; 1196 1197 ASSERT(MUTEX_HELD(&mdi_mutex)); 1198 vdip = vh->vh_dip; 1199 cdip = ct->ct_dip; 1200 1201 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1202 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1203 DEVI(cdip)->devi_mdi_client = NULL; 1204 1205 /* 1206 * Clear out back ref. to dev_info_t node 1207 */ 1208 ct->ct_dip = NULL; 1209 1210 /* 1211 * Remove this client from our hash queue 1212 */ 1213 i_mdi_client_delist_table(vh, ct); 1214 1215 /* 1216 * Uninitialize and free the component 1217 */ 1218 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1219 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1220 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1221 cv_destroy(&ct->ct_failover_cv); 1222 cv_destroy(&ct->ct_unstable_cv); 1223 cv_destroy(&ct->ct_powerchange_cv); 1224 mutex_destroy(&ct->ct_mutex); 1225 kmem_free(ct, sizeof (*ct)); 1226 1227 if (cdip != NULL) { 1228 mutex_exit(&mdi_mutex); 1229 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1230 mutex_enter(&mdi_mutex); 1231 } 1232 return (rv); 1233 } 1234 1235 /* 1236 * i_mdi_client_find(): 1237 * Find the client structure corresponding to a given guid 1238 * Caller should hold the mdi_mutex 1239 */ 1240 static mdi_client_t * 1241 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1242 { 1243 int index; 1244 struct client_hash *head; 1245 mdi_client_t *ct; 1246 1247 ASSERT(MUTEX_HELD(&mdi_mutex)); 1248 index = i_mdi_get_hash_key(guid); 1249 head = &vh->vh_client_table[index]; 1250 1251 ct = head->ct_hash_head; 1252 while (ct != NULL) { 1253 if (strcmp(ct->ct_guid, guid) == 0 && 1254 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1255 break; 1256 } 1257 ct = ct->ct_hnext; 1258 } 1259 return (ct); 1260 } 1261 1262 1263 1264 /* 1265 * i_mdi_client_update_state(): 1266 * Compute and update client device state 1267 * Notes: 1268 * A client device can be in any of three possible states: 1269 * 1270 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1271 * one online/standby paths. Can tolerate failures. 1272 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1273 * no alternate paths available as standby. A failure on the online 1274 * would result in loss of access to device data. 1275 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1276 * no paths available to access the device. 1277 */ 1278 static void 1279 i_mdi_client_update_state(mdi_client_t *ct) 1280 { 1281 int state; 1282 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1283 state = i_mdi_client_compute_state(ct, NULL); 1284 MDI_CLIENT_SET_STATE(ct, state); 1285 } 1286 1287 /* 1288 * i_mdi_client_compute_state(): 1289 * Compute client device state 1290 * 1291 * mdi_phci_t * Pointer to pHCI structure which should 1292 * while computing the new value. Used by 1293 * i_mdi_phci_offline() to find the new 1294 * client state after DR of a pHCI. 1295 */ 1296 static int 1297 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1298 { 1299 int state; 1300 int online_count = 0; 1301 int standby_count = 0; 1302 mdi_pathinfo_t *pip, *next; 1303 1304 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1305 pip = ct->ct_path_head; 1306 while (pip != NULL) { 1307 MDI_PI_LOCK(pip); 1308 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1309 if (MDI_PI(pip)->pi_phci == ph) { 1310 MDI_PI_UNLOCK(pip); 1311 pip = next; 1312 continue; 1313 } 1314 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1315 == MDI_PATHINFO_STATE_ONLINE) 1316 online_count++; 1317 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1318 == MDI_PATHINFO_STATE_STANDBY) 1319 standby_count++; 1320 MDI_PI_UNLOCK(pip); 1321 pip = next; 1322 } 1323 1324 if (online_count == 0) { 1325 if (standby_count == 0) { 1326 state = MDI_CLIENT_STATE_FAILED; 1327 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1328 " ct = %p\n", ct)); 1329 } else if (standby_count == 1) { 1330 state = MDI_CLIENT_STATE_DEGRADED; 1331 } else { 1332 state = MDI_CLIENT_STATE_OPTIMAL; 1333 } 1334 } else if (online_count == 1) { 1335 if (standby_count == 0) { 1336 state = MDI_CLIENT_STATE_DEGRADED; 1337 } else { 1338 state = MDI_CLIENT_STATE_OPTIMAL; 1339 } 1340 } else { 1341 state = MDI_CLIENT_STATE_OPTIMAL; 1342 } 1343 return (state); 1344 } 1345 1346 /* 1347 * i_mdi_client2devinfo(): 1348 * Utility function 1349 */ 1350 dev_info_t * 1351 i_mdi_client2devinfo(mdi_client_t *ct) 1352 { 1353 return (ct->ct_dip); 1354 } 1355 1356 /* 1357 * mdi_client_path2_devinfo(): 1358 * Given the parent devinfo and child devfs pathname, search for 1359 * a valid devfs node handle. 1360 */ 1361 dev_info_t * 1362 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1363 { 1364 dev_info_t *cdip = NULL; 1365 dev_info_t *ndip = NULL; 1366 char *temp_pathname; 1367 int circular; 1368 1369 /* 1370 * Allocate temp buffer 1371 */ 1372 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1373 1374 /* 1375 * Lock parent against changes 1376 */ 1377 ndi_devi_enter(vdip, &circular); 1378 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1379 while ((cdip = ndip) != NULL) { 1380 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1381 1382 *temp_pathname = '\0'; 1383 (void) ddi_pathname(cdip, temp_pathname); 1384 if (strcmp(temp_pathname, pathname) == 0) { 1385 break; 1386 } 1387 } 1388 /* 1389 * Release devinfo lock 1390 */ 1391 ndi_devi_exit(vdip, circular); 1392 1393 /* 1394 * Free the temp buffer 1395 */ 1396 kmem_free(temp_pathname, MAXPATHLEN); 1397 return (cdip); 1398 } 1399 1400 1401 /* 1402 * mdi_client_get_path_count(): 1403 * Utility function to get number of path information nodes 1404 * associated with a given client device. 1405 */ 1406 int 1407 mdi_client_get_path_count(dev_info_t *cdip) 1408 { 1409 mdi_client_t *ct; 1410 int count = 0; 1411 1412 ct = i_devi_get_client(cdip); 1413 if (ct != NULL) { 1414 count = ct->ct_path_count; 1415 } 1416 return (count); 1417 } 1418 1419 1420 /* 1421 * i_mdi_get_hash_key(): 1422 * Create a hash using strings as keys 1423 * 1424 */ 1425 static int 1426 i_mdi_get_hash_key(char *str) 1427 { 1428 uint32_t g, hash = 0; 1429 char *p; 1430 1431 for (p = str; *p != '\0'; p++) { 1432 g = *p; 1433 hash += g; 1434 } 1435 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1436 } 1437 1438 /* 1439 * mdi_get_lb_policy(): 1440 * Get current load balancing policy for a given client device 1441 */ 1442 client_lb_t 1443 mdi_get_lb_policy(dev_info_t *cdip) 1444 { 1445 client_lb_t lb = LOAD_BALANCE_NONE; 1446 mdi_client_t *ct; 1447 1448 ct = i_devi_get_client(cdip); 1449 if (ct != NULL) { 1450 lb = ct->ct_lb; 1451 } 1452 return (lb); 1453 } 1454 1455 /* 1456 * mdi_set_lb_region_size(): 1457 * Set current region size for the load-balance 1458 */ 1459 int 1460 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1461 { 1462 mdi_client_t *ct; 1463 int rv = MDI_FAILURE; 1464 1465 ct = i_devi_get_client(cdip); 1466 if (ct != NULL && ct->ct_lb_args != NULL) { 1467 ct->ct_lb_args->region_size = region_size; 1468 rv = MDI_SUCCESS; 1469 } 1470 return (rv); 1471 } 1472 1473 /* 1474 * mdi_Set_lb_policy(): 1475 * Set current load balancing policy for a given client device 1476 */ 1477 int 1478 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1479 { 1480 mdi_client_t *ct; 1481 int rv = MDI_FAILURE; 1482 1483 ct = i_devi_get_client(cdip); 1484 if (ct != NULL) { 1485 ct->ct_lb = lb; 1486 rv = MDI_SUCCESS; 1487 } 1488 return (rv); 1489 } 1490 1491 /* 1492 * mdi_failover(): 1493 * failover function called by the vHCI drivers to initiate 1494 * a failover operation. This is typically due to non-availability 1495 * of online paths to route I/O requests. Failover can be 1496 * triggered through user application also. 1497 * 1498 * The vHCI driver calls mdi_failover() to initiate a failover 1499 * operation. mdi_failover() calls back into the vHCI driver's 1500 * vo_failover() entry point to perform the actual failover 1501 * operation. The reason for requiring the vHCI driver to 1502 * initiate failover by calling mdi_failover(), instead of directly 1503 * executing vo_failover() itself, is to ensure that the mdi 1504 * framework can keep track of the client state properly. 1505 * Additionally, mdi_failover() provides as a convenience the 1506 * option of performing the failover operation synchronously or 1507 * asynchronously 1508 * 1509 * Upon successful completion of the failover operation, the 1510 * paths that were previously ONLINE will be in the STANDBY state, 1511 * and the newly activated paths will be in the ONLINE state. 1512 * 1513 * The flags modifier determines whether the activation is done 1514 * synchronously: MDI_FAILOVER_SYNC 1515 * Return Values: 1516 * MDI_SUCCESS 1517 * MDI_FAILURE 1518 * MDI_BUSY 1519 */ 1520 /*ARGSUSED*/ 1521 int 1522 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1523 { 1524 int rv; 1525 mdi_client_t *ct; 1526 1527 ct = i_devi_get_client(cdip); 1528 ASSERT(ct != NULL); 1529 if (ct == NULL) { 1530 /* cdip is not a valid client device. Nothing more to do. */ 1531 return (MDI_FAILURE); 1532 } 1533 1534 MDI_CLIENT_LOCK(ct); 1535 1536 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1537 /* A path to the client is being freed */ 1538 MDI_CLIENT_UNLOCK(ct); 1539 return (MDI_BUSY); 1540 } 1541 1542 1543 if (MDI_CLIENT_IS_FAILED(ct)) { 1544 /* 1545 * Client is in failed state. Nothing more to do. 1546 */ 1547 MDI_CLIENT_UNLOCK(ct); 1548 return (MDI_FAILURE); 1549 } 1550 1551 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1552 /* 1553 * Failover is already in progress; return BUSY 1554 */ 1555 MDI_CLIENT_UNLOCK(ct); 1556 return (MDI_BUSY); 1557 } 1558 /* 1559 * Make sure that mdi_pathinfo node state changes are processed. 1560 * We do not allow failovers to progress while client path state 1561 * changes are in progress 1562 */ 1563 if (ct->ct_unstable) { 1564 if (flags == MDI_FAILOVER_ASYNC) { 1565 MDI_CLIENT_UNLOCK(ct); 1566 return (MDI_BUSY); 1567 } else { 1568 while (ct->ct_unstable) 1569 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1570 } 1571 } 1572 1573 /* 1574 * Client device is in stable state. Before proceeding, perform sanity 1575 * checks again. 1576 */ 1577 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1578 (i_ddi_node_state(ct->ct_dip) < DS_READY)) { 1579 /* 1580 * Client is in failed state. Nothing more to do. 1581 */ 1582 MDI_CLIENT_UNLOCK(ct); 1583 return (MDI_FAILURE); 1584 } 1585 1586 /* 1587 * Set the client state as failover in progress. 1588 */ 1589 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1590 ct->ct_failover_flags = flags; 1591 MDI_CLIENT_UNLOCK(ct); 1592 1593 if (flags == MDI_FAILOVER_ASYNC) { 1594 /* 1595 * Submit the initiate failover request via CPR safe 1596 * taskq threads. 1597 */ 1598 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1599 ct, KM_SLEEP); 1600 return (MDI_ACCEPT); 1601 } else { 1602 /* 1603 * Synchronous failover mode. Typically invoked from the user 1604 * land. 1605 */ 1606 rv = i_mdi_failover(ct); 1607 } 1608 return (rv); 1609 } 1610 1611 /* 1612 * i_mdi_failover(): 1613 * internal failover function. Invokes vHCI drivers failover 1614 * callback function and process the failover status 1615 * Return Values: 1616 * None 1617 * 1618 * Note: A client device in failover state can not be detached or freed. 1619 */ 1620 static int 1621 i_mdi_failover(void *arg) 1622 { 1623 int rv = MDI_SUCCESS; 1624 mdi_client_t *ct = (mdi_client_t *)arg; 1625 mdi_vhci_t *vh = ct->ct_vhci; 1626 1627 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1628 1629 if (vh->vh_ops->vo_failover != NULL) { 1630 /* 1631 * Call vHCI drivers callback routine 1632 */ 1633 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1634 ct->ct_failover_flags); 1635 } 1636 1637 MDI_CLIENT_LOCK(ct); 1638 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1639 1640 /* 1641 * Save the failover return status 1642 */ 1643 ct->ct_failover_status = rv; 1644 1645 /* 1646 * As a result of failover, client status would have been changed. 1647 * Update the client state and wake up anyone waiting on this client 1648 * device. 1649 */ 1650 i_mdi_client_update_state(ct); 1651 1652 cv_broadcast(&ct->ct_failover_cv); 1653 MDI_CLIENT_UNLOCK(ct); 1654 return (rv); 1655 } 1656 1657 /* 1658 * Load balancing is logical block. 1659 * IOs within the range described by region_size 1660 * would go on the same path. This would improve the 1661 * performance by cache-hit on some of the RAID devices. 1662 * Search only for online paths(At some point we 1663 * may want to balance across target ports). 1664 * If no paths are found then default to round-robin. 1665 */ 1666 static int 1667 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1668 { 1669 int path_index = -1; 1670 int online_path_count = 0; 1671 int online_nonpref_path_count = 0; 1672 int region_size = ct->ct_lb_args->region_size; 1673 mdi_pathinfo_t *pip; 1674 mdi_pathinfo_t *next; 1675 int preferred, path_cnt; 1676 1677 pip = ct->ct_path_head; 1678 while (pip) { 1679 MDI_PI_LOCK(pip); 1680 if (MDI_PI(pip)->pi_state == 1681 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1682 online_path_count++; 1683 } else if (MDI_PI(pip)->pi_state == 1684 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1685 online_nonpref_path_count++; 1686 } 1687 next = (mdi_pathinfo_t *) 1688 MDI_PI(pip)->pi_client_link; 1689 MDI_PI_UNLOCK(pip); 1690 pip = next; 1691 } 1692 /* if found any online/preferred then use this type */ 1693 if (online_path_count > 0) { 1694 path_cnt = online_path_count; 1695 preferred = 1; 1696 } else if (online_nonpref_path_count > 0) { 1697 path_cnt = online_nonpref_path_count; 1698 preferred = 0; 1699 } else { 1700 path_cnt = 0; 1701 } 1702 if (path_cnt) { 1703 path_index = (bp->b_blkno >> region_size) % path_cnt; 1704 pip = ct->ct_path_head; 1705 while (pip && path_index != -1) { 1706 MDI_PI_LOCK(pip); 1707 if (path_index == 0 && 1708 (MDI_PI(pip)->pi_state == 1709 MDI_PATHINFO_STATE_ONLINE) && 1710 MDI_PI(pip)->pi_preferred == preferred) { 1711 MDI_PI_HOLD(pip); 1712 MDI_PI_UNLOCK(pip); 1713 *ret_pip = pip; 1714 return (MDI_SUCCESS); 1715 } 1716 path_index --; 1717 next = (mdi_pathinfo_t *) 1718 MDI_PI(pip)->pi_client_link; 1719 MDI_PI_UNLOCK(pip); 1720 pip = next; 1721 } 1722 if (pip == NULL) { 1723 MDI_DEBUG(4, (CE_NOTE, NULL, 1724 "!lba %p, no pip !!\n", 1725 bp->b_blkno)); 1726 } else { 1727 MDI_DEBUG(4, (CE_NOTE, NULL, 1728 "!lba %p, no pip for path_index, " 1729 "pip %p\n", pip)); 1730 } 1731 } 1732 return (MDI_FAILURE); 1733 } 1734 1735 /* 1736 * mdi_select_path(): 1737 * select a path to access a client device. 1738 * 1739 * mdi_select_path() function is called by the vHCI drivers to 1740 * select a path to route the I/O request to. The caller passes 1741 * the block I/O data transfer structure ("buf") as one of the 1742 * parameters. The mpxio framework uses the buf structure 1743 * contents to maintain per path statistics (total I/O size / 1744 * count pending). If more than one online paths are available to 1745 * select, the framework automatically selects a suitable path 1746 * for routing I/O request. If a failover operation is active for 1747 * this client device the call shall be failed with MDI_BUSY error 1748 * code. 1749 * 1750 * By default this function returns a suitable path in online 1751 * state based on the current load balancing policy. Currently 1752 * we support LOAD_BALANCE_NONE (Previously selected online path 1753 * will continue to be used till the path is usable) and 1754 * LOAD_BALANCE_RR (Online paths will be selected in a round 1755 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1756 * based on the logical block). The load balancing 1757 * through vHCI drivers configuration file (driver.conf). 1758 * 1759 * vHCI drivers may override this default behavior by specifying 1760 * appropriate flags. If start_pip is specified (non NULL) is 1761 * used as start point to walk and find the next appropriate path. 1762 * The following values are currently defined: 1763 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1764 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1765 * 1766 * The non-standard behavior is used by the scsi_vhci driver, 1767 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1768 * attach of client devices (to avoid an unnecessary failover 1769 * when the STANDBY path comes up first), during failover 1770 * (to activate a STANDBY path as ONLINE). 1771 * 1772 * The selected path in returned in a held state (ref_cnt). 1773 * Caller should release the hold by calling mdi_rele_path(). 1774 * 1775 * Return Values: 1776 * MDI_SUCCESS - Completed successfully 1777 * MDI_BUSY - Client device is busy failing over 1778 * MDI_NOPATH - Client device is online, but no valid path are 1779 * available to access this client device 1780 * MDI_FAILURE - Invalid client device or state 1781 * MDI_DEVI_ONLINING 1782 * - Client device (struct dev_info state) is in 1783 * onlining state. 1784 */ 1785 1786 /*ARGSUSED*/ 1787 int 1788 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1789 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1790 { 1791 mdi_client_t *ct; 1792 mdi_pathinfo_t *pip; 1793 mdi_pathinfo_t *next; 1794 mdi_pathinfo_t *head; 1795 mdi_pathinfo_t *start; 1796 client_lb_t lbp; /* load balancing policy */ 1797 int sb = 1; /* standard behavior */ 1798 int preferred = 1; /* preferred path */ 1799 int cond, cont = 1; 1800 int retry = 0; 1801 1802 if (flags != 0) { 1803 /* 1804 * disable default behavior 1805 */ 1806 sb = 0; 1807 } 1808 1809 *ret_pip = NULL; 1810 ct = i_devi_get_client(cdip); 1811 if (ct == NULL) { 1812 /* mdi extensions are NULL, Nothing more to do */ 1813 return (MDI_FAILURE); 1814 } 1815 1816 MDI_CLIENT_LOCK(ct); 1817 1818 if (sb) { 1819 if (MDI_CLIENT_IS_FAILED(ct)) { 1820 /* 1821 * Client is not ready to accept any I/O requests. 1822 * Fail this request. 1823 */ 1824 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1825 "client state offline ct = %p\n", ct)); 1826 MDI_CLIENT_UNLOCK(ct); 1827 return (MDI_FAILURE); 1828 } 1829 1830 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1831 /* 1832 * Check for Failover is in progress. If so tell the 1833 * caller that this device is busy. 1834 */ 1835 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1836 "client failover in progress ct = %p\n", ct)); 1837 MDI_CLIENT_UNLOCK(ct); 1838 return (MDI_BUSY); 1839 } 1840 1841 /* 1842 * Check to see whether the client device is attached. 1843 * If not so, let the vHCI driver manually select a path 1844 * (standby) and let the probe/attach process to continue. 1845 */ 1846 if ((MDI_CLIENT_IS_DETACHED(ct)) || 1847 i_ddi_node_state(cdip) < DS_READY) { 1848 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1849 MDI_CLIENT_UNLOCK(ct); 1850 return (MDI_DEVI_ONLINING); 1851 } 1852 } 1853 1854 /* 1855 * Cache in the client list head. If head of the list is NULL 1856 * return MDI_NOPATH 1857 */ 1858 head = ct->ct_path_head; 1859 if (head == NULL) { 1860 MDI_CLIENT_UNLOCK(ct); 1861 return (MDI_NOPATH); 1862 } 1863 1864 /* 1865 * for non default behavior, bypass current 1866 * load balancing policy and always use LOAD_BALANCE_RR 1867 * except that the start point will be adjusted based 1868 * on the provided start_pip 1869 */ 1870 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1871 1872 switch (lbp) { 1873 case LOAD_BALANCE_NONE: 1874 /* 1875 * Load balancing is None or Alternate path mode 1876 * Start looking for a online mdi_pathinfo node starting from 1877 * last known selected path 1878 */ 1879 preferred = 1; 1880 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1881 if (pip == NULL) { 1882 pip = head; 1883 } 1884 start = pip; 1885 do { 1886 MDI_PI_LOCK(pip); 1887 /* 1888 * No need to explicitly check if the path is disabled. 1889 * Since we are checking for state == ONLINE and the 1890 * same veriable is used for DISABLE/ENABLE information. 1891 */ 1892 if (MDI_PI(pip)->pi_state == 1893 MDI_PATHINFO_STATE_ONLINE && 1894 preferred == MDI_PI(pip)->pi_preferred) { 1895 /* 1896 * Return the path in hold state. Caller should 1897 * release the lock by calling mdi_rele_path() 1898 */ 1899 MDI_PI_HOLD(pip); 1900 MDI_PI_UNLOCK(pip); 1901 ct->ct_path_last = pip; 1902 *ret_pip = pip; 1903 MDI_CLIENT_UNLOCK(ct); 1904 return (MDI_SUCCESS); 1905 } 1906 1907 /* 1908 * Path is busy. 1909 */ 1910 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1911 MDI_PI_IS_TRANSIENT(pip)) 1912 retry = 1; 1913 /* 1914 * Keep looking for a next available online path 1915 */ 1916 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1917 if (next == NULL) { 1918 next = head; 1919 } 1920 MDI_PI_UNLOCK(pip); 1921 pip = next; 1922 if (start == pip && preferred) { 1923 preferred = 0; 1924 } else if (start == pip && !preferred) { 1925 cont = 0; 1926 } 1927 } while (cont); 1928 break; 1929 1930 case LOAD_BALANCE_LBA: 1931 /* 1932 * Make sure we are looking 1933 * for an online path. Otherwise, if it is for a STANDBY 1934 * path request, it will go through and fetch an ONLINE 1935 * path which is not desirable. 1936 */ 1937 if ((ct->ct_lb_args != NULL) && 1938 (ct->ct_lb_args->region_size) && bp && 1939 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1940 if (i_mdi_lba_lb(ct, ret_pip, bp) 1941 == MDI_SUCCESS) { 1942 MDI_CLIENT_UNLOCK(ct); 1943 return (MDI_SUCCESS); 1944 } 1945 } 1946 /* FALLTHROUGH */ 1947 case LOAD_BALANCE_RR: 1948 /* 1949 * Load balancing is Round Robin. Start looking for a online 1950 * mdi_pathinfo node starting from last known selected path 1951 * as the start point. If override flags are specified, 1952 * process accordingly. 1953 * If the search is already in effect(start_pip not null), 1954 * then lets just use the same path preference to continue the 1955 * traversal. 1956 */ 1957 1958 if (start_pip != NULL) { 1959 preferred = MDI_PI(start_pip)->pi_preferred; 1960 } else { 1961 preferred = 1; 1962 } 1963 1964 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1965 if (start == NULL) { 1966 pip = head; 1967 } else { 1968 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1969 if (pip == NULL) { 1970 if (!sb) { 1971 if (preferred == 0) { 1972 /* 1973 * Looks like we have completed 1974 * the traversal as preferred 1975 * value is 0. Time to bail out. 1976 */ 1977 *ret_pip = NULL; 1978 MDI_CLIENT_UNLOCK(ct); 1979 return (MDI_NOPATH); 1980 } else { 1981 /* 1982 * Looks like we reached the 1983 * end of the list. Lets enable 1984 * traversal of non preferred 1985 * paths. 1986 */ 1987 preferred = 0; 1988 } 1989 } 1990 pip = head; 1991 } 1992 } 1993 start = pip; 1994 do { 1995 MDI_PI_LOCK(pip); 1996 if (sb) { 1997 cond = ((MDI_PI(pip)->pi_state == 1998 MDI_PATHINFO_STATE_ONLINE && 1999 MDI_PI(pip)->pi_preferred == 2000 preferred) ? 1 : 0); 2001 } else { 2002 if (flags == MDI_SELECT_ONLINE_PATH) { 2003 cond = ((MDI_PI(pip)->pi_state == 2004 MDI_PATHINFO_STATE_ONLINE && 2005 MDI_PI(pip)->pi_preferred == 2006 preferred) ? 1 : 0); 2007 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2008 cond = ((MDI_PI(pip)->pi_state == 2009 MDI_PATHINFO_STATE_STANDBY && 2010 MDI_PI(pip)->pi_preferred == 2011 preferred) ? 1 : 0); 2012 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2013 MDI_SELECT_STANDBY_PATH)) { 2014 cond = (((MDI_PI(pip)->pi_state == 2015 MDI_PATHINFO_STATE_ONLINE || 2016 (MDI_PI(pip)->pi_state == 2017 MDI_PATHINFO_STATE_STANDBY)) && 2018 MDI_PI(pip)->pi_preferred == 2019 preferred) ? 1 : 0); 2020 } else { 2021 cond = 0; 2022 } 2023 } 2024 /* 2025 * No need to explicitly check if the path is disabled. 2026 * Since we are checking for state == ONLINE and the 2027 * same veriable is used for DISABLE/ENABLE information. 2028 */ 2029 if (cond) { 2030 /* 2031 * Return the path in hold state. Caller should 2032 * release the lock by calling mdi_rele_path() 2033 */ 2034 MDI_PI_HOLD(pip); 2035 MDI_PI_UNLOCK(pip); 2036 if (sb) 2037 ct->ct_path_last = pip; 2038 *ret_pip = pip; 2039 MDI_CLIENT_UNLOCK(ct); 2040 return (MDI_SUCCESS); 2041 } 2042 /* 2043 * Path is busy. 2044 */ 2045 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2046 MDI_PI_IS_TRANSIENT(pip)) 2047 retry = 1; 2048 2049 /* 2050 * Keep looking for a next available online path 2051 */ 2052 do_again: 2053 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2054 if (next == NULL) { 2055 if (!sb) { 2056 if (preferred == 1) { 2057 /* 2058 * Looks like we reached the 2059 * end of the list. Lets enable 2060 * traversal of non preferred 2061 * paths. 2062 */ 2063 preferred = 0; 2064 next = head; 2065 } else { 2066 /* 2067 * We have done both the passes 2068 * Preferred as well as for 2069 * Non-preferred. Bail out now. 2070 */ 2071 cont = 0; 2072 } 2073 } else { 2074 /* 2075 * Standard behavior case. 2076 */ 2077 next = head; 2078 } 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 if (cont == 0) { 2082 break; 2083 } 2084 pip = next; 2085 2086 if (!sb) { 2087 /* 2088 * We need to handle the selection of 2089 * non-preferred path in the following 2090 * case: 2091 * 2092 * +------+ +------+ +------+ +-----+ 2093 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2094 * +------+ +------+ +------+ +-----+ 2095 * 2096 * If we start the search with B, we need to 2097 * skip beyond B to pick C which is non - 2098 * preferred in the second pass. The following 2099 * test, if true, will allow us to skip over 2100 * the 'start'(B in the example) to select 2101 * other non preferred elements. 2102 */ 2103 if ((start_pip != NULL) && (start_pip == pip) && 2104 (MDI_PI(start_pip)->pi_preferred 2105 != preferred)) { 2106 /* 2107 * try again after going past the start 2108 * pip 2109 */ 2110 MDI_PI_LOCK(pip); 2111 goto do_again; 2112 } 2113 } else { 2114 /* 2115 * Standard behavior case 2116 */ 2117 if (start == pip && preferred) { 2118 /* look for nonpreferred paths */ 2119 preferred = 0; 2120 } else if (start == pip && !preferred) { 2121 /* 2122 * Exit condition 2123 */ 2124 cont = 0; 2125 } 2126 } 2127 } while (cont); 2128 break; 2129 } 2130 2131 MDI_CLIENT_UNLOCK(ct); 2132 if (retry == 1) { 2133 return (MDI_BUSY); 2134 } else { 2135 return (MDI_NOPATH); 2136 } 2137 } 2138 2139 /* 2140 * For a client, return the next available path to any phci 2141 * 2142 * Note: 2143 * Caller should hold the branch's devinfo node to get a consistent 2144 * snap shot of the mdi_pathinfo nodes. 2145 * 2146 * Please note that even the list is stable the mdi_pathinfo 2147 * node state and properties are volatile. The caller should lock 2148 * and unlock the nodes by calling mdi_pi_lock() and 2149 * mdi_pi_unlock() functions to get a stable properties. 2150 * 2151 * If there is a need to use the nodes beyond the hold of the 2152 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2153 * need to be held against unexpected removal by calling 2154 * mdi_hold_path() and should be released by calling 2155 * mdi_rele_path() on completion. 2156 */ 2157 mdi_pathinfo_t * 2158 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2159 { 2160 mdi_client_t *ct; 2161 2162 if (!MDI_CLIENT(ct_dip)) 2163 return (NULL); 2164 2165 /* 2166 * Walk through client link 2167 */ 2168 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2169 ASSERT(ct != NULL); 2170 2171 if (pip == NULL) 2172 return ((mdi_pathinfo_t *)ct->ct_path_head); 2173 2174 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2175 } 2176 2177 /* 2178 * For a phci, return the next available path to any client 2179 * Note: ditto mdi_get_next_phci_path() 2180 */ 2181 mdi_pathinfo_t * 2182 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2183 { 2184 mdi_phci_t *ph; 2185 2186 if (!MDI_PHCI(ph_dip)) 2187 return (NULL); 2188 2189 /* 2190 * Walk through pHCI link 2191 */ 2192 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2193 ASSERT(ph != NULL); 2194 2195 if (pip == NULL) 2196 return ((mdi_pathinfo_t *)ph->ph_path_head); 2197 2198 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2199 } 2200 2201 /* 2202 * mdi_get_nextpath(): 2203 * mdi_pathinfo node walker function. Get the next node from the 2204 * client or pHCI device list. 2205 * 2206 * XXX This is wrapper function for compatibility purposes only. 2207 * 2208 * It doesn't work under Multi-level MPxIO, where a dip 2209 * is both client and phci (which link should next_path follow?). 2210 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2211 * this interface should be removed. 2212 */ 2213 void 2214 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2215 mdi_pathinfo_t **ret_pip) 2216 { 2217 if (MDI_CLIENT(dip)) { 2218 *ret_pip = mdi_get_next_phci_path(dip, pip); 2219 } else if (MDI_PHCI(dip)) { 2220 *ret_pip = mdi_get_next_client_path(dip, pip); 2221 } else { 2222 *ret_pip = NULL; 2223 } 2224 } 2225 2226 /* 2227 * mdi_hold_path(): 2228 * Hold the mdi_pathinfo node against unwanted unexpected free. 2229 * Return Values: 2230 * None 2231 */ 2232 void 2233 mdi_hold_path(mdi_pathinfo_t *pip) 2234 { 2235 if (pip) { 2236 MDI_PI_LOCK(pip); 2237 MDI_PI_HOLD(pip); 2238 MDI_PI_UNLOCK(pip); 2239 } 2240 } 2241 2242 2243 /* 2244 * mdi_rele_path(): 2245 * Release the mdi_pathinfo node which was selected 2246 * through mdi_select_path() mechanism or manually held by 2247 * calling mdi_hold_path(). 2248 * Return Values: 2249 * None 2250 */ 2251 void 2252 mdi_rele_path(mdi_pathinfo_t *pip) 2253 { 2254 if (pip) { 2255 MDI_PI_LOCK(pip); 2256 MDI_PI_RELE(pip); 2257 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2258 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2259 } 2260 MDI_PI_UNLOCK(pip); 2261 } 2262 } 2263 2264 2265 /* 2266 * mdi_pi_lock(): 2267 * Lock the mdi_pathinfo node. 2268 * Note: 2269 * The caller should release the lock by calling mdi_pi_unlock() 2270 */ 2271 void 2272 mdi_pi_lock(mdi_pathinfo_t *pip) 2273 { 2274 ASSERT(pip != NULL); 2275 if (pip) { 2276 MDI_PI_LOCK(pip); 2277 } 2278 } 2279 2280 2281 /* 2282 * mdi_pi_unlock(): 2283 * Unlock the mdi_pathinfo node. 2284 * Note: 2285 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2286 */ 2287 void 2288 mdi_pi_unlock(mdi_pathinfo_t *pip) 2289 { 2290 ASSERT(pip != NULL); 2291 if (pip) { 2292 MDI_PI_UNLOCK(pip); 2293 } 2294 } 2295 2296 /* 2297 * mdi_pi_find(): 2298 * Search the list of mdi_pathinfo nodes attached to the 2299 * pHCI/Client device node whose path address matches "paddr". 2300 * Returns a pointer to the mdi_pathinfo node if a matching node is 2301 * found. 2302 * Return Values: 2303 * mdi_pathinfo node handle 2304 * NULL 2305 * Notes: 2306 * Caller need not hold any locks to call this function. 2307 */ 2308 mdi_pathinfo_t * 2309 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2310 { 2311 mdi_phci_t *ph; 2312 mdi_vhci_t *vh; 2313 mdi_client_t *ct; 2314 mdi_pathinfo_t *pip = NULL; 2315 2316 if ((pdip == NULL) || (paddr == NULL)) { 2317 return (NULL); 2318 } 2319 ph = i_devi_get_phci(pdip); 2320 if (ph == NULL) { 2321 /* 2322 * Invalid pHCI device, Nothing more to do. 2323 */ 2324 MDI_DEBUG(2, (CE_WARN, NULL, 2325 "!mdi_pi_find: invalid phci")); 2326 return (NULL); 2327 } 2328 2329 vh = ph->ph_vhci; 2330 if (vh == NULL) { 2331 /* 2332 * Invalid vHCI device, Nothing more to do. 2333 */ 2334 MDI_DEBUG(2, (CE_WARN, NULL, 2335 "!mdi_pi_find: invalid phci")); 2336 return (NULL); 2337 } 2338 2339 /* 2340 * Look for client device identified by caddr (guid) 2341 */ 2342 if (caddr == NULL) { 2343 /* 2344 * Find a mdi_pathinfo node under pHCI list for a matching 2345 * unit address. 2346 */ 2347 mutex_enter(&ph->ph_mutex); 2348 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2349 2350 while (pip != NULL) { 2351 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2352 break; 2353 } 2354 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2355 } 2356 mutex_exit(&ph->ph_mutex); 2357 return (pip); 2358 } 2359 2360 /* 2361 * XXX - Is the rest of the code in this function really necessary? 2362 * The consumers of mdi_pi_find() can search for the desired pathinfo 2363 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2364 * whether the search is based on the pathinfo nodes attached to 2365 * the pHCI or the client node, the result will be the same. 2366 */ 2367 2368 /* 2369 * Find the client device corresponding to 'caddr' 2370 */ 2371 mutex_enter(&mdi_mutex); 2372 2373 /* 2374 * XXX - Passing NULL to the following function works as long as the 2375 * the client addresses (caddr) are unique per vhci basis. 2376 */ 2377 ct = i_mdi_client_find(vh, NULL, caddr); 2378 if (ct == NULL) { 2379 /* 2380 * Client not found, Obviously mdi_pathinfo node has not been 2381 * created yet. 2382 */ 2383 mutex_exit(&mdi_mutex); 2384 return (pip); 2385 } 2386 2387 /* 2388 * Hold the client lock and look for a mdi_pathinfo node with matching 2389 * pHCI and paddr 2390 */ 2391 MDI_CLIENT_LOCK(ct); 2392 2393 /* 2394 * Release the global mutex as it is no more needed. Note: We always 2395 * respect the locking order while acquiring. 2396 */ 2397 mutex_exit(&mdi_mutex); 2398 2399 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2400 while (pip != NULL) { 2401 /* 2402 * Compare the unit address 2403 */ 2404 if ((MDI_PI(pip)->pi_phci == ph) && 2405 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2406 break; 2407 } 2408 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2409 } 2410 MDI_CLIENT_UNLOCK(ct); 2411 return (pip); 2412 } 2413 2414 /* 2415 * mdi_pi_alloc(): 2416 * Allocate and initialize a new instance of a mdi_pathinfo node. 2417 * The mdi_pathinfo node returned by this function identifies a 2418 * unique device path is capable of having properties attached 2419 * and passed to mdi_pi_online() to fully attach and online the 2420 * path and client device node. 2421 * The mdi_pathinfo node returned by this function must be 2422 * destroyed using mdi_pi_free() if the path is no longer 2423 * operational or if the caller fails to attach a client device 2424 * node when calling mdi_pi_online(). The framework will not free 2425 * the resources allocated. 2426 * This function can be called from both interrupt and kernel 2427 * contexts. DDI_NOSLEEP flag should be used while calling 2428 * from interrupt contexts. 2429 * Return Values: 2430 * MDI_SUCCESS 2431 * MDI_FAILURE 2432 * MDI_NOMEM 2433 */ 2434 /*ARGSUSED*/ 2435 int 2436 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2437 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2438 { 2439 mdi_vhci_t *vh; 2440 mdi_phci_t *ph; 2441 mdi_client_t *ct; 2442 mdi_pathinfo_t *pip = NULL; 2443 dev_info_t *cdip; 2444 int rv = MDI_NOMEM; 2445 int path_allocated = 0; 2446 2447 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2448 ret_pip == NULL) { 2449 /* Nothing more to do */ 2450 return (MDI_FAILURE); 2451 } 2452 2453 *ret_pip = NULL; 2454 ph = i_devi_get_phci(pdip); 2455 ASSERT(ph != NULL); 2456 if (ph == NULL) { 2457 /* Invalid pHCI device, return failure */ 2458 MDI_DEBUG(1, (CE_WARN, NULL, 2459 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2460 return (MDI_FAILURE); 2461 } 2462 2463 MDI_PHCI_LOCK(ph); 2464 vh = ph->ph_vhci; 2465 if (vh == NULL) { 2466 /* Invalid vHCI device, return failure */ 2467 MDI_DEBUG(1, (CE_WARN, NULL, 2468 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2469 MDI_PHCI_UNLOCK(ph); 2470 return (MDI_FAILURE); 2471 } 2472 2473 if (MDI_PHCI_IS_READY(ph) == 0) { 2474 /* 2475 * Do not allow new node creation when pHCI is in 2476 * offline/suspended states 2477 */ 2478 MDI_DEBUG(1, (CE_WARN, NULL, 2479 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2480 MDI_PHCI_UNLOCK(ph); 2481 return (MDI_BUSY); 2482 } 2483 MDI_PHCI_UNSTABLE(ph); 2484 MDI_PHCI_UNLOCK(ph); 2485 2486 /* look for a matching client, create one if not found */ 2487 mutex_enter(&mdi_mutex); 2488 ct = i_mdi_client_find(vh, cname, caddr); 2489 if (ct == NULL) { 2490 ct = i_mdi_client_alloc(vh, cname, caddr); 2491 ASSERT(ct != NULL); 2492 } 2493 2494 if (ct->ct_dip == NULL) { 2495 /* 2496 * Allocate a devinfo node 2497 */ 2498 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2499 compatible, ncompatible); 2500 if (ct->ct_dip == NULL) { 2501 (void) i_mdi_client_free(vh, ct); 2502 goto fail; 2503 } 2504 } 2505 cdip = ct->ct_dip; 2506 2507 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2508 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2509 2510 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2511 while (pip != NULL) { 2512 /* 2513 * Compare the unit address 2514 */ 2515 if ((MDI_PI(pip)->pi_phci == ph) && 2516 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2517 break; 2518 } 2519 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2520 } 2521 2522 if (pip == NULL) { 2523 /* 2524 * This is a new path for this client device. Allocate and 2525 * initialize a new pathinfo node 2526 */ 2527 pip = i_mdi_pi_alloc(ph, paddr, ct); 2528 ASSERT(pip != NULL); 2529 path_allocated = 1; 2530 } 2531 rv = MDI_SUCCESS; 2532 2533 fail: 2534 /* 2535 * Release the global mutex. 2536 */ 2537 mutex_exit(&mdi_mutex); 2538 2539 /* 2540 * Mark the pHCI as stable 2541 */ 2542 MDI_PHCI_LOCK(ph); 2543 MDI_PHCI_STABLE(ph); 2544 MDI_PHCI_UNLOCK(ph); 2545 *ret_pip = pip; 2546 2547 if (path_allocated) 2548 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2549 2550 return (rv); 2551 } 2552 2553 /*ARGSUSED*/ 2554 int 2555 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2556 int flags, mdi_pathinfo_t **ret_pip) 2557 { 2558 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2559 flags, ret_pip)); 2560 } 2561 2562 /* 2563 * i_mdi_pi_alloc(): 2564 * Allocate a mdi_pathinfo node and add to the pHCI path list 2565 * Return Values: 2566 * mdi_pathinfo 2567 */ 2568 2569 /*ARGSUSED*/ 2570 static mdi_pathinfo_t * 2571 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2572 { 2573 mdi_pathinfo_t *pip; 2574 int ct_circular; 2575 int ph_circular; 2576 int se_flag; 2577 int kmem_flag; 2578 2579 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2580 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2581 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2582 MDI_PATHINFO_STATE_TRANSIENT; 2583 2584 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2585 MDI_PI_SET_USER_DISABLE(pip); 2586 2587 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2588 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2589 2590 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2591 MDI_PI_SET_DRV_DISABLE(pip); 2592 2593 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2594 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2595 MDI_PI(pip)->pi_client = ct; 2596 MDI_PI(pip)->pi_phci = ph; 2597 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2598 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2599 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2600 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2601 MDI_PI(pip)->pi_pprivate = NULL; 2602 MDI_PI(pip)->pi_cprivate = NULL; 2603 MDI_PI(pip)->pi_vprivate = NULL; 2604 MDI_PI(pip)->pi_client_link = NULL; 2605 MDI_PI(pip)->pi_phci_link = NULL; 2606 MDI_PI(pip)->pi_ref_cnt = 0; 2607 MDI_PI(pip)->pi_kstats = NULL; 2608 MDI_PI(pip)->pi_preferred = 1; 2609 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2610 2611 /* 2612 * Lock both dev_info nodes against changes in parallel. 2613 */ 2614 ndi_devi_enter(ct->ct_dip, &ct_circular); 2615 ndi_devi_enter(ph->ph_dip, &ph_circular); 2616 2617 i_mdi_phci_add_path(ph, pip); 2618 i_mdi_client_add_path(ct, pip); 2619 2620 ndi_devi_exit(ph->ph_dip, ph_circular); 2621 ndi_devi_exit(ct->ct_dip, ct_circular); 2622 2623 /* determine interrupt context */ 2624 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2625 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2626 2627 i_ddi_di_cache_invalidate(kmem_flag); 2628 2629 return (pip); 2630 } 2631 2632 /* 2633 * i_mdi_phci_add_path(): 2634 * Add a mdi_pathinfo node to pHCI list. 2635 * Notes: 2636 * Caller should per-pHCI mutex 2637 */ 2638 2639 static void 2640 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2641 { 2642 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2643 2644 if (ph->ph_path_head == NULL) { 2645 ph->ph_path_head = pip; 2646 } else { 2647 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2648 } 2649 ph->ph_path_tail = pip; 2650 ph->ph_path_count++; 2651 } 2652 2653 /* 2654 * i_mdi_client_add_path(): 2655 * Add mdi_pathinfo node to client list 2656 */ 2657 2658 static void 2659 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2660 { 2661 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2662 2663 if (ct->ct_path_head == NULL) { 2664 ct->ct_path_head = pip; 2665 } else { 2666 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2667 } 2668 ct->ct_path_tail = pip; 2669 ct->ct_path_count++; 2670 } 2671 2672 /* 2673 * mdi_pi_free(): 2674 * Free the mdi_pathinfo node and also client device node if this 2675 * is the last path to the device 2676 * Return Values: 2677 * MDI_SUCCESS 2678 * MDI_FAILURE 2679 * MDI_BUSY 2680 */ 2681 2682 /*ARGSUSED*/ 2683 int 2684 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2685 { 2686 int rv = MDI_SUCCESS; 2687 mdi_vhci_t *vh; 2688 mdi_phci_t *ph; 2689 mdi_client_t *ct; 2690 int (*f)(); 2691 int client_held = 0; 2692 2693 MDI_PI_LOCK(pip); 2694 ph = MDI_PI(pip)->pi_phci; 2695 ASSERT(ph != NULL); 2696 if (ph == NULL) { 2697 /* 2698 * Invalid pHCI device, return failure 2699 */ 2700 MDI_DEBUG(1, (CE_WARN, NULL, 2701 "!mdi_pi_free: invalid pHCI")); 2702 MDI_PI_UNLOCK(pip); 2703 return (MDI_FAILURE); 2704 } 2705 2706 vh = ph->ph_vhci; 2707 ASSERT(vh != NULL); 2708 if (vh == NULL) { 2709 /* Invalid pHCI device, return failure */ 2710 MDI_DEBUG(1, (CE_WARN, NULL, 2711 "!mdi_pi_free: invalid vHCI")); 2712 MDI_PI_UNLOCK(pip); 2713 return (MDI_FAILURE); 2714 } 2715 2716 ct = MDI_PI(pip)->pi_client; 2717 ASSERT(ct != NULL); 2718 if (ct == NULL) { 2719 /* 2720 * Invalid Client device, return failure 2721 */ 2722 MDI_DEBUG(1, (CE_WARN, NULL, 2723 "!mdi_pi_free: invalid client")); 2724 MDI_PI_UNLOCK(pip); 2725 return (MDI_FAILURE); 2726 } 2727 2728 /* 2729 * Check to see for busy condition. A mdi_pathinfo can only be freed 2730 * if the node state is either offline or init and the reference count 2731 * is zero. 2732 */ 2733 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2734 MDI_PI_IS_INITING(pip))) { 2735 /* 2736 * Node is busy 2737 */ 2738 MDI_DEBUG(1, (CE_WARN, NULL, 2739 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2740 MDI_PI_UNLOCK(pip); 2741 return (MDI_BUSY); 2742 } 2743 2744 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2745 /* 2746 * Give a chance for pending I/Os to complete. 2747 */ 2748 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2749 "%d cmds still pending on path: %p\n", 2750 MDI_PI(pip)->pi_ref_cnt, pip)); 2751 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2752 &MDI_PI(pip)->pi_mutex, 2753 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2754 /* 2755 * The timeout time reached without ref_cnt being zero 2756 * being signaled. 2757 */ 2758 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2759 "!mdi_pi_free: " 2760 "Timeout reached on path %p without the cond\n", 2761 pip)); 2762 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2763 "!mdi_pi_free: " 2764 "%d cmds still pending on path: %p\n", 2765 MDI_PI(pip)->pi_ref_cnt, pip)); 2766 MDI_PI_UNLOCK(pip); 2767 return (MDI_BUSY); 2768 } 2769 } 2770 if (MDI_PI(pip)->pi_pm_held) { 2771 client_held = 1; 2772 } 2773 MDI_PI_UNLOCK(pip); 2774 2775 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2776 2777 MDI_CLIENT_LOCK(ct); 2778 2779 /* Prevent further failovers till mdi_mutex is held */ 2780 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2781 2782 /* 2783 * Wait till failover is complete before removing this node. 2784 */ 2785 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2786 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2787 2788 MDI_CLIENT_UNLOCK(ct); 2789 mutex_enter(&mdi_mutex); 2790 MDI_CLIENT_LOCK(ct); 2791 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2792 2793 if (!MDI_PI_IS_INITING(pip)) { 2794 f = vh->vh_ops->vo_pi_uninit; 2795 if (f != NULL) { 2796 rv = (*f)(vh->vh_dip, pip, 0); 2797 } 2798 } 2799 /* 2800 * If vo_pi_uninit() completed successfully. 2801 */ 2802 if (rv == MDI_SUCCESS) { 2803 if (client_held) { 2804 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2805 "i_mdi_pm_rele_client\n")); 2806 i_mdi_pm_rele_client(ct, 1); 2807 } 2808 i_mdi_pi_free(ph, pip, ct); 2809 if (ct->ct_path_count == 0) { 2810 /* 2811 * Client lost its last path. 2812 * Clean up the client device 2813 */ 2814 MDI_CLIENT_UNLOCK(ct); 2815 (void) i_mdi_client_free(ct->ct_vhci, ct); 2816 mutex_exit(&mdi_mutex); 2817 return (rv); 2818 } 2819 } 2820 MDI_CLIENT_UNLOCK(ct); 2821 mutex_exit(&mdi_mutex); 2822 2823 if (rv == MDI_FAILURE) 2824 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2825 2826 return (rv); 2827 } 2828 2829 /* 2830 * i_mdi_pi_free(): 2831 * Free the mdi_pathinfo node 2832 */ 2833 static void 2834 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2835 { 2836 int ct_circular; 2837 int ph_circular; 2838 int se_flag; 2839 int kmem_flag; 2840 2841 /* 2842 * remove any per-path kstats 2843 */ 2844 i_mdi_pi_kstat_destroy(pip); 2845 2846 ndi_devi_enter(ct->ct_dip, &ct_circular); 2847 ndi_devi_enter(ph->ph_dip, &ph_circular); 2848 2849 i_mdi_client_remove_path(ct, pip); 2850 i_mdi_phci_remove_path(ph, pip); 2851 2852 ndi_devi_exit(ph->ph_dip, ph_circular); 2853 ndi_devi_exit(ct->ct_dip, ct_circular); 2854 2855 /* determine interrupt context */ 2856 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2857 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2858 2859 i_ddi_di_cache_invalidate(kmem_flag); 2860 2861 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2862 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2863 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2864 if (MDI_PI(pip)->pi_addr) { 2865 kmem_free(MDI_PI(pip)->pi_addr, 2866 strlen(MDI_PI(pip)->pi_addr) + 1); 2867 MDI_PI(pip)->pi_addr = NULL; 2868 } 2869 2870 if (MDI_PI(pip)->pi_prop) { 2871 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2872 MDI_PI(pip)->pi_prop = NULL; 2873 } 2874 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2875 } 2876 2877 2878 /* 2879 * i_mdi_phci_remove_path(): 2880 * Remove a mdi_pathinfo node from pHCI list. 2881 * Notes: 2882 * Caller should hold per-pHCI mutex 2883 */ 2884 2885 static void 2886 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2887 { 2888 mdi_pathinfo_t *prev = NULL; 2889 mdi_pathinfo_t *path = NULL; 2890 2891 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2892 2893 path = ph->ph_path_head; 2894 while (path != NULL) { 2895 if (path == pip) { 2896 break; 2897 } 2898 prev = path; 2899 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2900 } 2901 2902 if (path) { 2903 ph->ph_path_count--; 2904 if (prev) { 2905 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2906 } else { 2907 ph->ph_path_head = 2908 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2909 } 2910 if (ph->ph_path_tail == path) { 2911 ph->ph_path_tail = prev; 2912 } 2913 } 2914 2915 /* 2916 * Clear the pHCI link 2917 */ 2918 MDI_PI(pip)->pi_phci_link = NULL; 2919 MDI_PI(pip)->pi_phci = NULL; 2920 } 2921 2922 /* 2923 * i_mdi_client_remove_path(): 2924 * Remove a mdi_pathinfo node from client path list. 2925 */ 2926 2927 static void 2928 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2929 { 2930 mdi_pathinfo_t *prev = NULL; 2931 mdi_pathinfo_t *path; 2932 2933 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2934 2935 path = ct->ct_path_head; 2936 while (path != NULL) { 2937 if (path == pip) { 2938 break; 2939 } 2940 prev = path; 2941 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2942 } 2943 2944 if (path) { 2945 ct->ct_path_count--; 2946 if (prev) { 2947 MDI_PI(prev)->pi_client_link = 2948 MDI_PI(path)->pi_client_link; 2949 } else { 2950 ct->ct_path_head = 2951 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2952 } 2953 if (ct->ct_path_tail == path) { 2954 ct->ct_path_tail = prev; 2955 } 2956 if (ct->ct_path_last == path) { 2957 ct->ct_path_last = ct->ct_path_head; 2958 } 2959 } 2960 MDI_PI(pip)->pi_client_link = NULL; 2961 MDI_PI(pip)->pi_client = NULL; 2962 } 2963 2964 /* 2965 * i_mdi_pi_state_change(): 2966 * online a mdi_pathinfo node 2967 * 2968 * Return Values: 2969 * MDI_SUCCESS 2970 * MDI_FAILURE 2971 */ 2972 /*ARGSUSED*/ 2973 static int 2974 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 2975 { 2976 int rv = MDI_SUCCESS; 2977 mdi_vhci_t *vh; 2978 mdi_phci_t *ph; 2979 mdi_client_t *ct; 2980 int (*f)(); 2981 dev_info_t *cdip; 2982 2983 MDI_PI_LOCK(pip); 2984 2985 ph = MDI_PI(pip)->pi_phci; 2986 ASSERT(ph); 2987 if (ph == NULL) { 2988 /* 2989 * Invalid pHCI device, fail the request 2990 */ 2991 MDI_PI_UNLOCK(pip); 2992 MDI_DEBUG(1, (CE_WARN, NULL, 2993 "!mdi_pi_state_change: invalid phci")); 2994 return (MDI_FAILURE); 2995 } 2996 2997 vh = ph->ph_vhci; 2998 ASSERT(vh); 2999 if (vh == NULL) { 3000 /* 3001 * Invalid vHCI device, fail the request 3002 */ 3003 MDI_PI_UNLOCK(pip); 3004 MDI_DEBUG(1, (CE_WARN, NULL, 3005 "!mdi_pi_state_change: invalid vhci")); 3006 return (MDI_FAILURE); 3007 } 3008 3009 ct = MDI_PI(pip)->pi_client; 3010 ASSERT(ct != NULL); 3011 if (ct == NULL) { 3012 /* 3013 * Invalid client device, fail the request 3014 */ 3015 MDI_PI_UNLOCK(pip); 3016 MDI_DEBUG(1, (CE_WARN, NULL, 3017 "!mdi_pi_state_change: invalid client")); 3018 return (MDI_FAILURE); 3019 } 3020 3021 /* 3022 * If this path has not been initialized yet, Callback vHCI driver's 3023 * pathinfo node initialize entry point 3024 */ 3025 3026 if (MDI_PI_IS_INITING(pip)) { 3027 MDI_PI_UNLOCK(pip); 3028 f = vh->vh_ops->vo_pi_init; 3029 if (f != NULL) { 3030 rv = (*f)(vh->vh_dip, pip, 0); 3031 if (rv != MDI_SUCCESS) { 3032 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3033 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3034 vh, pip)); 3035 return (MDI_FAILURE); 3036 } 3037 } 3038 MDI_PI_LOCK(pip); 3039 MDI_PI_CLEAR_TRANSIENT(pip); 3040 } 3041 3042 /* 3043 * Do not allow state transition when pHCI is in offline/suspended 3044 * states 3045 */ 3046 i_mdi_phci_lock(ph, pip); 3047 if (MDI_PHCI_IS_READY(ph) == 0) { 3048 MDI_DEBUG(1, (CE_WARN, NULL, 3049 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3050 MDI_PI_UNLOCK(pip); 3051 i_mdi_phci_unlock(ph); 3052 return (MDI_BUSY); 3053 } 3054 MDI_PHCI_UNSTABLE(ph); 3055 i_mdi_phci_unlock(ph); 3056 3057 /* 3058 * Check if mdi_pathinfo state is in transient state. 3059 * If yes, offlining is in progress and wait till transient state is 3060 * cleared. 3061 */ 3062 if (MDI_PI_IS_TRANSIENT(pip)) { 3063 while (MDI_PI_IS_TRANSIENT(pip)) { 3064 cv_wait(&MDI_PI(pip)->pi_state_cv, 3065 &MDI_PI(pip)->pi_mutex); 3066 } 3067 } 3068 3069 /* 3070 * Grab the client lock in reverse order sequence and release the 3071 * mdi_pathinfo mutex. 3072 */ 3073 i_mdi_client_lock(ct, pip); 3074 MDI_PI_UNLOCK(pip); 3075 3076 /* 3077 * Wait till failover state is cleared 3078 */ 3079 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3080 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3081 3082 /* 3083 * Mark the mdi_pathinfo node state as transient 3084 */ 3085 MDI_PI_LOCK(pip); 3086 switch (state) { 3087 case MDI_PATHINFO_STATE_ONLINE: 3088 MDI_PI_SET_ONLINING(pip); 3089 break; 3090 3091 case MDI_PATHINFO_STATE_STANDBY: 3092 MDI_PI_SET_STANDBYING(pip); 3093 break; 3094 3095 case MDI_PATHINFO_STATE_FAULT: 3096 /* 3097 * Mark the pathinfo state as FAULTED 3098 */ 3099 MDI_PI_SET_FAULTING(pip); 3100 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3101 break; 3102 3103 case MDI_PATHINFO_STATE_OFFLINE: 3104 /* 3105 * ndi_devi_offline() cannot hold pip or ct locks. 3106 */ 3107 MDI_PI_UNLOCK(pip); 3108 /* 3109 * Do not offline if path will become last path and path 3110 * is busy for user initiated events. 3111 */ 3112 cdip = ct->ct_dip; 3113 if ((flag & NDI_DEVI_REMOVE) && 3114 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3115 i_mdi_client_unlock(ct); 3116 rv = ndi_devi_offline(cdip, 0); 3117 if (rv != NDI_SUCCESS) { 3118 /* 3119 * Convert to MDI error code 3120 */ 3121 switch (rv) { 3122 case NDI_BUSY: 3123 rv = MDI_BUSY; 3124 break; 3125 default: 3126 rv = MDI_FAILURE; 3127 break; 3128 } 3129 goto state_change_exit; 3130 } else { 3131 i_mdi_client_lock(ct, NULL); 3132 } 3133 } 3134 /* 3135 * Mark the mdi_pathinfo node state as transient 3136 */ 3137 MDI_PI_LOCK(pip); 3138 MDI_PI_SET_OFFLINING(pip); 3139 break; 3140 } 3141 MDI_PI_UNLOCK(pip); 3142 MDI_CLIENT_UNSTABLE(ct); 3143 i_mdi_client_unlock(ct); 3144 3145 f = vh->vh_ops->vo_pi_state_change; 3146 if (f != NULL) { 3147 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3148 if (rv == MDI_NOT_SUPPORTED) { 3149 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3150 } 3151 if (rv != MDI_SUCCESS) { 3152 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3153 "!vo_pi_state_change: failed rv = %x", rv)); 3154 } 3155 } 3156 MDI_CLIENT_LOCK(ct); 3157 MDI_PI_LOCK(pip); 3158 if (MDI_PI_IS_TRANSIENT(pip)) { 3159 if (rv == MDI_SUCCESS) { 3160 MDI_PI_CLEAR_TRANSIENT(pip); 3161 } else { 3162 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3163 } 3164 } 3165 3166 /* 3167 * Wake anyone waiting for this mdi_pathinfo node 3168 */ 3169 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3170 MDI_PI_UNLOCK(pip); 3171 3172 /* 3173 * Mark the client device as stable 3174 */ 3175 MDI_CLIENT_STABLE(ct); 3176 if (rv == MDI_SUCCESS) { 3177 if (ct->ct_unstable == 0) { 3178 cdip = ct->ct_dip; 3179 3180 /* 3181 * Onlining the mdi_pathinfo node will impact the 3182 * client state Update the client and dev_info node 3183 * state accordingly 3184 */ 3185 rv = NDI_SUCCESS; 3186 i_mdi_client_update_state(ct); 3187 switch (MDI_CLIENT_STATE(ct)) { 3188 case MDI_CLIENT_STATE_OPTIMAL: 3189 case MDI_CLIENT_STATE_DEGRADED: 3190 if (cdip && 3191 (i_ddi_node_state(cdip) < DS_READY) && 3192 ((state == MDI_PATHINFO_STATE_ONLINE) || 3193 (state == MDI_PATHINFO_STATE_STANDBY))) { 3194 3195 i_mdi_client_unlock(ct); 3196 /* 3197 * Must do ndi_devi_online() through 3198 * hotplug thread for deferred 3199 * attach mechanism to work 3200 */ 3201 rv = ndi_devi_online(cdip, 0); 3202 i_mdi_client_lock(ct, NULL); 3203 if ((rv != NDI_SUCCESS) && 3204 (MDI_CLIENT_STATE(ct) == 3205 MDI_CLIENT_STATE_DEGRADED)) { 3206 /* 3207 * ndi_devi_online failed. 3208 * Reset client flags to 3209 * offline. 3210 */ 3211 MDI_DEBUG(1, (CE_WARN, cdip, 3212 "!ndi_devi_online: failed " 3213 " Error: %x", rv)); 3214 MDI_CLIENT_SET_OFFLINE(ct); 3215 } 3216 if (rv != NDI_SUCCESS) { 3217 /* Reset the path state */ 3218 MDI_PI_LOCK(pip); 3219 MDI_PI(pip)->pi_state = 3220 MDI_PI_OLD_STATE(pip); 3221 MDI_PI_UNLOCK(pip); 3222 } 3223 } 3224 break; 3225 3226 case MDI_CLIENT_STATE_FAILED: 3227 /* 3228 * This is the last path case for 3229 * non-user initiated events. 3230 */ 3231 if (((flag & NDI_DEVI_REMOVE) == 0) && 3232 cdip && (i_ddi_node_state(cdip) >= 3233 DS_INITIALIZED)) { 3234 i_mdi_client_unlock(ct); 3235 rv = ndi_devi_offline(cdip, 0); 3236 i_mdi_client_lock(ct, NULL); 3237 3238 if (rv != NDI_SUCCESS) { 3239 /* 3240 * ndi_devi_offline failed. 3241 * Reset client flags to 3242 * online as the path could not 3243 * be offlined. 3244 */ 3245 MDI_DEBUG(1, (CE_WARN, cdip, 3246 "!ndi_devi_offline: failed " 3247 " Error: %x", rv)); 3248 MDI_CLIENT_SET_ONLINE(ct); 3249 } 3250 } 3251 break; 3252 } 3253 /* 3254 * Convert to MDI error code 3255 */ 3256 switch (rv) { 3257 case NDI_SUCCESS: 3258 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3259 i_mdi_report_path_state(ct, pip); 3260 rv = MDI_SUCCESS; 3261 break; 3262 case NDI_BUSY: 3263 rv = MDI_BUSY; 3264 break; 3265 default: 3266 rv = MDI_FAILURE; 3267 break; 3268 } 3269 } 3270 } 3271 MDI_CLIENT_UNLOCK(ct); 3272 3273 state_change_exit: 3274 /* 3275 * Mark the pHCI as stable again. 3276 */ 3277 MDI_PHCI_LOCK(ph); 3278 MDI_PHCI_STABLE(ph); 3279 MDI_PHCI_UNLOCK(ph); 3280 return (rv); 3281 } 3282 3283 /* 3284 * mdi_pi_online(): 3285 * Place the path_info node in the online state. The path is 3286 * now available to be selected by mdi_select_path() for 3287 * transporting I/O requests to client devices. 3288 * Return Values: 3289 * MDI_SUCCESS 3290 * MDI_FAILURE 3291 */ 3292 int 3293 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3294 { 3295 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3296 dev_info_t *cdip; 3297 int client_held = 0; 3298 int rv; 3299 3300 ASSERT(ct != NULL); 3301 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3302 if (rv != MDI_SUCCESS) 3303 return (rv); 3304 3305 MDI_PI_LOCK(pip); 3306 if (MDI_PI(pip)->pi_pm_held == 0) { 3307 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3308 "i_mdi_pm_hold_pip\n")); 3309 i_mdi_pm_hold_pip(pip); 3310 client_held = 1; 3311 } 3312 MDI_PI_UNLOCK(pip); 3313 3314 if (client_held) { 3315 MDI_CLIENT_LOCK(ct); 3316 if (ct->ct_power_cnt == 0) { 3317 rv = i_mdi_power_all_phci(ct); 3318 } 3319 3320 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3321 "i_mdi_pm_hold_client\n")); 3322 i_mdi_pm_hold_client(ct, 1); 3323 MDI_CLIENT_UNLOCK(ct); 3324 } 3325 3326 /* 3327 * Create the per-path (pathinfo) IO and error kstats which 3328 * are reported via iostat(1m). 3329 * 3330 * Defer creating the per-path kstats if device is not yet 3331 * attached; the names of the kstats are constructed in part 3332 * using the devices instance number which is assigned during 3333 * process of attaching the client device. 3334 * 3335 * The framework post_attach handler, mdi_post_attach(), is 3336 * is responsible for initializing the client's pathinfo list 3337 * once successfully attached. 3338 */ 3339 cdip = ct->ct_dip; 3340 ASSERT(cdip); 3341 if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED)) 3342 return (rv); 3343 3344 MDI_CLIENT_LOCK(ct); 3345 rv = i_mdi_pi_kstat_create(pip); 3346 MDI_CLIENT_UNLOCK(ct); 3347 return (rv); 3348 } 3349 3350 /* 3351 * mdi_pi_standby(): 3352 * Place the mdi_pathinfo node in standby state 3353 * 3354 * Return Values: 3355 * MDI_SUCCESS 3356 * MDI_FAILURE 3357 */ 3358 int 3359 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3360 { 3361 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3362 } 3363 3364 /* 3365 * mdi_pi_fault(): 3366 * Place the mdi_pathinfo node in fault'ed state 3367 * Return Values: 3368 * MDI_SUCCESS 3369 * MDI_FAILURE 3370 */ 3371 int 3372 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3373 { 3374 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3375 } 3376 3377 /* 3378 * mdi_pi_offline(): 3379 * Offline a mdi_pathinfo node. 3380 * Return Values: 3381 * MDI_SUCCESS 3382 * MDI_FAILURE 3383 */ 3384 int 3385 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3386 { 3387 int ret, client_held = 0; 3388 mdi_client_t *ct; 3389 3390 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3391 3392 if (ret == MDI_SUCCESS) { 3393 MDI_PI_LOCK(pip); 3394 if (MDI_PI(pip)->pi_pm_held) { 3395 client_held = 1; 3396 } 3397 MDI_PI_UNLOCK(pip); 3398 3399 if (client_held) { 3400 ct = MDI_PI(pip)->pi_client; 3401 MDI_CLIENT_LOCK(ct); 3402 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3403 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3404 i_mdi_pm_rele_client(ct, 1); 3405 MDI_CLIENT_UNLOCK(ct); 3406 } 3407 } 3408 3409 return (ret); 3410 } 3411 3412 /* 3413 * i_mdi_pi_offline(): 3414 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3415 */ 3416 static int 3417 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3418 { 3419 dev_info_t *vdip = NULL; 3420 mdi_vhci_t *vh = NULL; 3421 mdi_client_t *ct = NULL; 3422 int (*f)(); 3423 int rv; 3424 3425 MDI_PI_LOCK(pip); 3426 ct = MDI_PI(pip)->pi_client; 3427 ASSERT(ct != NULL); 3428 3429 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3430 /* 3431 * Give a chance for pending I/Os to complete. 3432 */ 3433 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3434 "%d cmds still pending on path: %p\n", 3435 MDI_PI(pip)->pi_ref_cnt, pip)); 3436 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3437 &MDI_PI(pip)->pi_mutex, 3438 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3439 /* 3440 * The timeout time reached without ref_cnt being zero 3441 * being signaled. 3442 */ 3443 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3444 "Timeout reached on path %p without the cond\n", 3445 pip)); 3446 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3447 "%d cmds still pending on path: %p\n", 3448 MDI_PI(pip)->pi_ref_cnt, pip)); 3449 } 3450 } 3451 vh = ct->ct_vhci; 3452 vdip = vh->vh_dip; 3453 3454 /* 3455 * Notify vHCI that has registered this event 3456 */ 3457 ASSERT(vh->vh_ops); 3458 f = vh->vh_ops->vo_pi_state_change; 3459 3460 if (f != NULL) { 3461 MDI_PI_UNLOCK(pip); 3462 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3463 flags)) != MDI_SUCCESS) { 3464 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3465 "vdip 0x%x, pip 0x%x", vdip, pip)); 3466 } 3467 MDI_PI_LOCK(pip); 3468 } 3469 3470 /* 3471 * Set the mdi_pathinfo node state and clear the transient condition 3472 */ 3473 MDI_PI_SET_OFFLINE(pip); 3474 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3475 MDI_PI_UNLOCK(pip); 3476 3477 MDI_CLIENT_LOCK(ct); 3478 if (rv == MDI_SUCCESS) { 3479 if (ct->ct_unstable == 0) { 3480 dev_info_t *cdip = ct->ct_dip; 3481 3482 /* 3483 * Onlining the mdi_pathinfo node will impact the 3484 * client state Update the client and dev_info node 3485 * state accordingly 3486 */ 3487 i_mdi_client_update_state(ct); 3488 rv = NDI_SUCCESS; 3489 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3490 if (cdip && 3491 (i_ddi_node_state(cdip) >= 3492 DS_INITIALIZED)) { 3493 MDI_CLIENT_UNLOCK(ct); 3494 rv = ndi_devi_offline(cdip, 0); 3495 MDI_CLIENT_LOCK(ct); 3496 if (rv != NDI_SUCCESS) { 3497 /* 3498 * ndi_devi_offline failed. 3499 * Reset client flags to 3500 * online. 3501 */ 3502 MDI_DEBUG(4, (CE_WARN, cdip, 3503 "!ndi_devi_offline: failed " 3504 " Error: %x", rv)); 3505 MDI_CLIENT_SET_ONLINE(ct); 3506 } 3507 } 3508 } 3509 /* 3510 * Convert to MDI error code 3511 */ 3512 switch (rv) { 3513 case NDI_SUCCESS: 3514 rv = MDI_SUCCESS; 3515 break; 3516 case NDI_BUSY: 3517 rv = MDI_BUSY; 3518 break; 3519 default: 3520 rv = MDI_FAILURE; 3521 break; 3522 } 3523 } 3524 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3525 i_mdi_report_path_state(ct, pip); 3526 } 3527 3528 MDI_CLIENT_UNLOCK(ct); 3529 3530 /* 3531 * Change in the mdi_pathinfo node state will impact the client state 3532 */ 3533 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3534 ct, pip)); 3535 return (rv); 3536 } 3537 3538 3539 /* 3540 * mdi_pi_get_addr(): 3541 * Get the unit address associated with a mdi_pathinfo node 3542 * 3543 * Return Values: 3544 * char * 3545 */ 3546 char * 3547 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3548 { 3549 if (pip == NULL) 3550 return (NULL); 3551 3552 return (MDI_PI(pip)->pi_addr); 3553 } 3554 3555 /* 3556 * mdi_pi_get_client(): 3557 * Get the client devinfo associated with a mdi_pathinfo node 3558 * 3559 * Return Values: 3560 * Handle to client device dev_info node 3561 */ 3562 dev_info_t * 3563 mdi_pi_get_client(mdi_pathinfo_t *pip) 3564 { 3565 dev_info_t *dip = NULL; 3566 if (pip) { 3567 dip = MDI_PI(pip)->pi_client->ct_dip; 3568 } 3569 return (dip); 3570 } 3571 3572 /* 3573 * mdi_pi_get_phci(): 3574 * Get the pHCI devinfo associated with the mdi_pathinfo node 3575 * Return Values: 3576 * Handle to dev_info node 3577 */ 3578 dev_info_t * 3579 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3580 { 3581 dev_info_t *dip = NULL; 3582 if (pip) { 3583 dip = MDI_PI(pip)->pi_phci->ph_dip; 3584 } 3585 return (dip); 3586 } 3587 3588 /* 3589 * mdi_pi_get_client_private(): 3590 * Get the client private information associated with the 3591 * mdi_pathinfo node 3592 */ 3593 void * 3594 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3595 { 3596 void *cprivate = NULL; 3597 if (pip) { 3598 cprivate = MDI_PI(pip)->pi_cprivate; 3599 } 3600 return (cprivate); 3601 } 3602 3603 /* 3604 * mdi_pi_set_client_private(): 3605 * Set the client private information in the mdi_pathinfo node 3606 */ 3607 void 3608 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3609 { 3610 if (pip) { 3611 MDI_PI(pip)->pi_cprivate = priv; 3612 } 3613 } 3614 3615 /* 3616 * mdi_pi_get_phci_private(): 3617 * Get the pHCI private information associated with the 3618 * mdi_pathinfo node 3619 */ 3620 caddr_t 3621 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3622 { 3623 caddr_t pprivate = NULL; 3624 if (pip) { 3625 pprivate = MDI_PI(pip)->pi_pprivate; 3626 } 3627 return (pprivate); 3628 } 3629 3630 /* 3631 * mdi_pi_set_phci_private(): 3632 * Set the pHCI private information in the mdi_pathinfo node 3633 */ 3634 void 3635 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3636 { 3637 if (pip) { 3638 MDI_PI(pip)->pi_pprivate = priv; 3639 } 3640 } 3641 3642 /* 3643 * mdi_pi_get_state(): 3644 * Get the mdi_pathinfo node state. Transient states are internal 3645 * and not provided to the users 3646 */ 3647 mdi_pathinfo_state_t 3648 mdi_pi_get_state(mdi_pathinfo_t *pip) 3649 { 3650 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3651 3652 if (pip) { 3653 if (MDI_PI_IS_TRANSIENT(pip)) { 3654 /* 3655 * mdi_pathinfo is in state transition. Return the 3656 * last good state. 3657 */ 3658 state = MDI_PI_OLD_STATE(pip); 3659 } else { 3660 state = MDI_PI_STATE(pip); 3661 } 3662 } 3663 return (state); 3664 } 3665 3666 /* 3667 * Note that the following function needs to be the new interface for 3668 * mdi_pi_get_state when mpxio gets integrated to ON. 3669 */ 3670 int 3671 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3672 uint32_t *ext_state) 3673 { 3674 *state = MDI_PATHINFO_STATE_INIT; 3675 3676 if (pip) { 3677 if (MDI_PI_IS_TRANSIENT(pip)) { 3678 /* 3679 * mdi_pathinfo is in state transition. Return the 3680 * last good state. 3681 */ 3682 *state = MDI_PI_OLD_STATE(pip); 3683 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3684 } else { 3685 *state = MDI_PI_STATE(pip); 3686 *ext_state = MDI_PI_EXT_STATE(pip); 3687 } 3688 } 3689 return (MDI_SUCCESS); 3690 } 3691 3692 /* 3693 * mdi_pi_get_preferred: 3694 * Get the preferred path flag 3695 */ 3696 int 3697 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3698 { 3699 if (pip) { 3700 return (MDI_PI(pip)->pi_preferred); 3701 } 3702 return (0); 3703 } 3704 3705 /* 3706 * mdi_pi_set_preferred: 3707 * Set the preferred path flag 3708 */ 3709 void 3710 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3711 { 3712 if (pip) { 3713 MDI_PI(pip)->pi_preferred = preferred; 3714 } 3715 } 3716 3717 3718 /* 3719 * mdi_pi_set_state(): 3720 * Set the mdi_pathinfo node state 3721 */ 3722 void 3723 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3724 { 3725 uint32_t ext_state; 3726 3727 if (pip) { 3728 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3729 MDI_PI(pip)->pi_state = state; 3730 MDI_PI(pip)->pi_state |= ext_state; 3731 } 3732 } 3733 3734 /* 3735 * Property functions: 3736 */ 3737 3738 int 3739 i_map_nvlist_error_to_mdi(int val) 3740 { 3741 int rv; 3742 3743 switch (val) { 3744 case 0: 3745 rv = DDI_PROP_SUCCESS; 3746 break; 3747 case EINVAL: 3748 case ENOTSUP: 3749 rv = DDI_PROP_INVAL_ARG; 3750 break; 3751 case ENOMEM: 3752 rv = DDI_PROP_NO_MEMORY; 3753 break; 3754 default: 3755 rv = DDI_PROP_NOT_FOUND; 3756 break; 3757 } 3758 return (rv); 3759 } 3760 3761 /* 3762 * mdi_pi_get_next_prop(): 3763 * Property walk function. The caller should hold mdi_pi_lock() 3764 * and release by calling mdi_pi_unlock() at the end of walk to 3765 * get a consistent value. 3766 */ 3767 3768 nvpair_t * 3769 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3770 { 3771 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3772 return (NULL); 3773 } 3774 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3775 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3776 } 3777 3778 /* 3779 * mdi_prop_remove(): 3780 * Remove the named property from the named list. 3781 */ 3782 3783 int 3784 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3785 { 3786 if (pip == NULL) { 3787 return (DDI_PROP_NOT_FOUND); 3788 } 3789 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3790 MDI_PI_LOCK(pip); 3791 if (MDI_PI(pip)->pi_prop == NULL) { 3792 MDI_PI_UNLOCK(pip); 3793 return (DDI_PROP_NOT_FOUND); 3794 } 3795 if (name) { 3796 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3797 } else { 3798 char nvp_name[MAXNAMELEN]; 3799 nvpair_t *nvp; 3800 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3801 while (nvp) { 3802 nvpair_t *next; 3803 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3804 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3805 nvpair_name(nvp)); 3806 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3807 nvp_name); 3808 nvp = next; 3809 } 3810 } 3811 MDI_PI_UNLOCK(pip); 3812 return (DDI_PROP_SUCCESS); 3813 } 3814 3815 /* 3816 * mdi_prop_size(): 3817 * Get buffer size needed to pack the property data. 3818 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3819 * buffer size. 3820 */ 3821 3822 int 3823 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3824 { 3825 int rv; 3826 size_t bufsize; 3827 3828 *buflenp = 0; 3829 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3830 return (DDI_PROP_NOT_FOUND); 3831 } 3832 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3833 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3834 &bufsize, NV_ENCODE_NATIVE); 3835 *buflenp = bufsize; 3836 return (i_map_nvlist_error_to_mdi(rv)); 3837 } 3838 3839 /* 3840 * mdi_prop_pack(): 3841 * pack the property list. The caller should hold the 3842 * mdi_pathinfo_t node to get a consistent data 3843 */ 3844 3845 int 3846 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3847 { 3848 int rv; 3849 size_t bufsize; 3850 3851 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3852 return (DDI_PROP_NOT_FOUND); 3853 } 3854 3855 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3856 3857 bufsize = buflen; 3858 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3859 NV_ENCODE_NATIVE, KM_SLEEP); 3860 3861 return (i_map_nvlist_error_to_mdi(rv)); 3862 } 3863 3864 /* 3865 * mdi_prop_update_byte(): 3866 * Create/Update a byte property 3867 */ 3868 int 3869 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3870 { 3871 int rv; 3872 3873 if (pip == NULL) { 3874 return (DDI_PROP_INVAL_ARG); 3875 } 3876 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3877 MDI_PI_LOCK(pip); 3878 if (MDI_PI(pip)->pi_prop == NULL) { 3879 MDI_PI_UNLOCK(pip); 3880 return (DDI_PROP_NOT_FOUND); 3881 } 3882 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3883 MDI_PI_UNLOCK(pip); 3884 return (i_map_nvlist_error_to_mdi(rv)); 3885 } 3886 3887 /* 3888 * mdi_prop_update_byte_array(): 3889 * Create/Update a byte array property 3890 */ 3891 int 3892 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3893 uint_t nelements) 3894 { 3895 int rv; 3896 3897 if (pip == NULL) { 3898 return (DDI_PROP_INVAL_ARG); 3899 } 3900 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3901 MDI_PI_LOCK(pip); 3902 if (MDI_PI(pip)->pi_prop == NULL) { 3903 MDI_PI_UNLOCK(pip); 3904 return (DDI_PROP_NOT_FOUND); 3905 } 3906 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3907 MDI_PI_UNLOCK(pip); 3908 return (i_map_nvlist_error_to_mdi(rv)); 3909 } 3910 3911 /* 3912 * mdi_prop_update_int(): 3913 * Create/Update a 32 bit integer property 3914 */ 3915 int 3916 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3917 { 3918 int rv; 3919 3920 if (pip == NULL) { 3921 return (DDI_PROP_INVAL_ARG); 3922 } 3923 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3924 MDI_PI_LOCK(pip); 3925 if (MDI_PI(pip)->pi_prop == NULL) { 3926 MDI_PI_UNLOCK(pip); 3927 return (DDI_PROP_NOT_FOUND); 3928 } 3929 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3930 MDI_PI_UNLOCK(pip); 3931 return (i_map_nvlist_error_to_mdi(rv)); 3932 } 3933 3934 /* 3935 * mdi_prop_update_int64(): 3936 * Create/Update a 64 bit integer property 3937 */ 3938 int 3939 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3940 { 3941 int rv; 3942 3943 if (pip == NULL) { 3944 return (DDI_PROP_INVAL_ARG); 3945 } 3946 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3947 MDI_PI_LOCK(pip); 3948 if (MDI_PI(pip)->pi_prop == NULL) { 3949 MDI_PI_UNLOCK(pip); 3950 return (DDI_PROP_NOT_FOUND); 3951 } 3952 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3953 MDI_PI_UNLOCK(pip); 3954 return (i_map_nvlist_error_to_mdi(rv)); 3955 } 3956 3957 /* 3958 * mdi_prop_update_int_array(): 3959 * Create/Update a int array property 3960 */ 3961 int 3962 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 3963 uint_t nelements) 3964 { 3965 int rv; 3966 3967 if (pip == NULL) { 3968 return (DDI_PROP_INVAL_ARG); 3969 } 3970 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3971 MDI_PI_LOCK(pip); 3972 if (MDI_PI(pip)->pi_prop == NULL) { 3973 MDI_PI_UNLOCK(pip); 3974 return (DDI_PROP_NOT_FOUND); 3975 } 3976 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 3977 nelements); 3978 MDI_PI_UNLOCK(pip); 3979 return (i_map_nvlist_error_to_mdi(rv)); 3980 } 3981 3982 /* 3983 * mdi_prop_update_string(): 3984 * Create/Update a string property 3985 */ 3986 int 3987 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 3988 { 3989 int rv; 3990 3991 if (pip == NULL) { 3992 return (DDI_PROP_INVAL_ARG); 3993 } 3994 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3995 MDI_PI_LOCK(pip); 3996 if (MDI_PI(pip)->pi_prop == NULL) { 3997 MDI_PI_UNLOCK(pip); 3998 return (DDI_PROP_NOT_FOUND); 3999 } 4000 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4001 MDI_PI_UNLOCK(pip); 4002 return (i_map_nvlist_error_to_mdi(rv)); 4003 } 4004 4005 /* 4006 * mdi_prop_update_string_array(): 4007 * Create/Update a string array property 4008 */ 4009 int 4010 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4011 uint_t nelements) 4012 { 4013 int rv; 4014 4015 if (pip == NULL) { 4016 return (DDI_PROP_INVAL_ARG); 4017 } 4018 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4019 MDI_PI_LOCK(pip); 4020 if (MDI_PI(pip)->pi_prop == NULL) { 4021 MDI_PI_UNLOCK(pip); 4022 return (DDI_PROP_NOT_FOUND); 4023 } 4024 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4025 nelements); 4026 MDI_PI_UNLOCK(pip); 4027 return (i_map_nvlist_error_to_mdi(rv)); 4028 } 4029 4030 /* 4031 * mdi_prop_lookup_byte(): 4032 * Look for byte property identified by name. The data returned 4033 * is the actual property and valid as long as mdi_pathinfo_t node 4034 * is alive. 4035 */ 4036 int 4037 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4038 { 4039 int rv; 4040 4041 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4042 return (DDI_PROP_NOT_FOUND); 4043 } 4044 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4045 return (i_map_nvlist_error_to_mdi(rv)); 4046 } 4047 4048 4049 /* 4050 * mdi_prop_lookup_byte_array(): 4051 * Look for byte array property identified by name. The data 4052 * returned is the actual property and valid as long as 4053 * mdi_pathinfo_t node is alive. 4054 */ 4055 int 4056 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4057 uint_t *nelements) 4058 { 4059 int rv; 4060 4061 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4062 return (DDI_PROP_NOT_FOUND); 4063 } 4064 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4065 nelements); 4066 return (i_map_nvlist_error_to_mdi(rv)); 4067 } 4068 4069 /* 4070 * mdi_prop_lookup_int(): 4071 * Look for int property identified by name. The data returned 4072 * is the actual property and valid as long as mdi_pathinfo_t 4073 * node is alive. 4074 */ 4075 int 4076 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4077 { 4078 int rv; 4079 4080 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4081 return (DDI_PROP_NOT_FOUND); 4082 } 4083 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4084 return (i_map_nvlist_error_to_mdi(rv)); 4085 } 4086 4087 /* 4088 * mdi_prop_lookup_int64(): 4089 * Look for int64 property identified by name. The data returned 4090 * is the actual property and valid as long as mdi_pathinfo_t node 4091 * is alive. 4092 */ 4093 int 4094 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4095 { 4096 int rv; 4097 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4098 return (DDI_PROP_NOT_FOUND); 4099 } 4100 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4101 return (i_map_nvlist_error_to_mdi(rv)); 4102 } 4103 4104 /* 4105 * mdi_prop_lookup_int_array(): 4106 * Look for int array property identified by name. The data 4107 * returned is the actual property and valid as long as 4108 * mdi_pathinfo_t node is alive. 4109 */ 4110 int 4111 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4112 uint_t *nelements) 4113 { 4114 int rv; 4115 4116 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4117 return (DDI_PROP_NOT_FOUND); 4118 } 4119 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4120 (int32_t **)data, nelements); 4121 return (i_map_nvlist_error_to_mdi(rv)); 4122 } 4123 4124 /* 4125 * mdi_prop_lookup_string(): 4126 * Look for string property identified by name. The data 4127 * returned is the actual property and valid as long as 4128 * mdi_pathinfo_t node is alive. 4129 */ 4130 int 4131 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4132 { 4133 int rv; 4134 4135 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4136 return (DDI_PROP_NOT_FOUND); 4137 } 4138 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4139 return (i_map_nvlist_error_to_mdi(rv)); 4140 } 4141 4142 /* 4143 * mdi_prop_lookup_string_array(): 4144 * Look for string array property identified by name. The data 4145 * returned is the actual property and valid as long as 4146 * mdi_pathinfo_t node is alive. 4147 */ 4148 4149 int 4150 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4151 uint_t *nelements) 4152 { 4153 int rv; 4154 4155 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4156 return (DDI_PROP_NOT_FOUND); 4157 } 4158 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4159 nelements); 4160 return (i_map_nvlist_error_to_mdi(rv)); 4161 } 4162 4163 /* 4164 * mdi_prop_free(): 4165 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4166 * functions return the pointer to actual property data and not a 4167 * copy of it. So the data returned is valid as long as 4168 * mdi_pathinfo_t node is valid. 4169 */ 4170 4171 /*ARGSUSED*/ 4172 int 4173 mdi_prop_free(void *data) 4174 { 4175 return (DDI_PROP_SUCCESS); 4176 } 4177 4178 /*ARGSUSED*/ 4179 static void 4180 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4181 { 4182 char *phci_path, *ct_path; 4183 char *ct_status; 4184 char *status; 4185 dev_info_t *dip = ct->ct_dip; 4186 char lb_buf[64]; 4187 4188 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4189 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4190 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4191 return; 4192 } 4193 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4194 ct_status = "optimal"; 4195 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4196 ct_status = "degraded"; 4197 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4198 ct_status = "failed"; 4199 } else { 4200 ct_status = "unknown"; 4201 } 4202 4203 if (MDI_PI_IS_OFFLINE(pip)) { 4204 status = "offline"; 4205 } else if (MDI_PI_IS_ONLINE(pip)) { 4206 status = "online"; 4207 } else if (MDI_PI_IS_STANDBY(pip)) { 4208 status = "standby"; 4209 } else if (MDI_PI_IS_FAULT(pip)) { 4210 status = "faulted"; 4211 } else { 4212 status = "unknown"; 4213 } 4214 4215 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4216 (void) snprintf(lb_buf, sizeof (lb_buf), 4217 "%s, region-size: %d", mdi_load_balance_lba, 4218 ct->ct_lb_args->region_size); 4219 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4220 (void) snprintf(lb_buf, sizeof (lb_buf), 4221 "%s", mdi_load_balance_none); 4222 } else { 4223 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4224 mdi_load_balance_rr); 4225 } 4226 4227 if (dip) { 4228 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4229 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4230 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4231 "path %s (%s%d) to target address: %s is %s" 4232 " Load balancing: %s\n", 4233 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4234 ddi_get_instance(dip), ct_status, 4235 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4236 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4237 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4238 MDI_PI(pip)->pi_addr, status, lb_buf); 4239 kmem_free(phci_path, MAXPATHLEN); 4240 kmem_free(ct_path, MAXPATHLEN); 4241 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4242 } 4243 } 4244 4245 #ifdef DEBUG 4246 /* 4247 * i_mdi_log(): 4248 * Utility function for error message management 4249 * 4250 */ 4251 4252 /*VARARGS3*/ 4253 static void 4254 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4255 { 4256 char buf[MAXNAMELEN]; 4257 char name[MAXNAMELEN]; 4258 va_list ap; 4259 int log_only = 0; 4260 int boot_only = 0; 4261 int console_only = 0; 4262 4263 if (dip) { 4264 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4265 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4266 ddi_node_name(dip), ddi_get_instance(dip)); 4267 } else { 4268 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4269 ddi_node_name(dip), ddi_get_instance(dip)); 4270 } 4271 } else { 4272 name[0] = '\0'; 4273 } 4274 4275 va_start(ap, fmt); 4276 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4277 va_end(ap); 4278 4279 switch (buf[0]) { 4280 case '!': 4281 log_only = 1; 4282 break; 4283 case '?': 4284 boot_only = 1; 4285 break; 4286 case '^': 4287 console_only = 1; 4288 break; 4289 } 4290 4291 switch (level) { 4292 case CE_NOTE: 4293 level = CE_CONT; 4294 /* FALLTHROUGH */ 4295 case CE_CONT: 4296 case CE_WARN: 4297 case CE_PANIC: 4298 if (boot_only) { 4299 cmn_err(level, "?%s\t%s", name, &buf[1]); 4300 } else if (console_only) { 4301 cmn_err(level, "^%s\t%s", name, &buf[1]); 4302 } else if (log_only) { 4303 cmn_err(level, "!%s\t%s", name, &buf[1]); 4304 } else { 4305 cmn_err(level, "%s\t%s", name, buf); 4306 } 4307 break; 4308 default: 4309 cmn_err(level, "%s\t%s", name, buf); 4310 break; 4311 } 4312 } 4313 #endif /* DEBUG */ 4314 4315 void 4316 i_mdi_client_online(dev_info_t *ct_dip) 4317 { 4318 mdi_client_t *ct; 4319 4320 /* 4321 * Client online notification. Mark client state as online 4322 * restore our binding with dev_info node 4323 */ 4324 ct = i_devi_get_client(ct_dip); 4325 ASSERT(ct != NULL); 4326 MDI_CLIENT_LOCK(ct); 4327 MDI_CLIENT_SET_ONLINE(ct); 4328 /* catch for any memory leaks */ 4329 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4330 ct->ct_dip = ct_dip; 4331 4332 if (ct->ct_power_cnt == 0) 4333 (void) i_mdi_power_all_phci(ct); 4334 4335 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4336 "i_mdi_pm_hold_client\n")); 4337 i_mdi_pm_hold_client(ct, 1); 4338 4339 MDI_CLIENT_UNLOCK(ct); 4340 } 4341 4342 void 4343 i_mdi_phci_online(dev_info_t *ph_dip) 4344 { 4345 mdi_phci_t *ph; 4346 4347 /* pHCI online notification. Mark state accordingly */ 4348 ph = i_devi_get_phci(ph_dip); 4349 ASSERT(ph != NULL); 4350 MDI_PHCI_LOCK(ph); 4351 MDI_PHCI_SET_ONLINE(ph); 4352 MDI_PHCI_UNLOCK(ph); 4353 } 4354 4355 /* 4356 * mdi_devi_online(): 4357 * Online notification from NDI framework on pHCI/client 4358 * device online. 4359 * Return Values: 4360 * NDI_SUCCESS 4361 * MDI_FAILURE 4362 */ 4363 4364 /*ARGSUSED*/ 4365 int 4366 mdi_devi_online(dev_info_t *dip, uint_t flags) 4367 { 4368 if (MDI_PHCI(dip)) { 4369 i_mdi_phci_online(dip); 4370 } 4371 4372 if (MDI_CLIENT(dip)) { 4373 i_mdi_client_online(dip); 4374 } 4375 return (NDI_SUCCESS); 4376 } 4377 4378 /* 4379 * mdi_devi_offline(): 4380 * Offline notification from NDI framework on pHCI/Client device 4381 * offline. 4382 * 4383 * Return Values: 4384 * NDI_SUCCESS 4385 * NDI_FAILURE 4386 */ 4387 4388 /*ARGSUSED*/ 4389 int 4390 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4391 { 4392 int rv = NDI_SUCCESS; 4393 4394 if (MDI_CLIENT(dip)) { 4395 rv = i_mdi_client_offline(dip, flags); 4396 if (rv != NDI_SUCCESS) 4397 return (rv); 4398 } 4399 4400 if (MDI_PHCI(dip)) { 4401 rv = i_mdi_phci_offline(dip, flags); 4402 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4403 /* set client back online */ 4404 i_mdi_client_online(dip); 4405 } 4406 } 4407 4408 return (rv); 4409 } 4410 4411 /*ARGSUSED*/ 4412 static int 4413 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4414 { 4415 int rv = NDI_SUCCESS; 4416 mdi_phci_t *ph; 4417 mdi_client_t *ct; 4418 mdi_pathinfo_t *pip; 4419 mdi_pathinfo_t *next; 4420 mdi_pathinfo_t *failed_pip = NULL; 4421 dev_info_t *cdip; 4422 4423 /* 4424 * pHCI component offline notification 4425 * Make sure that this pHCI instance is free to be offlined. 4426 * If it is OK to proceed, Offline and remove all the child 4427 * mdi_pathinfo nodes. This process automatically offlines 4428 * corresponding client devices, for which this pHCI provides 4429 * critical services. 4430 */ 4431 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4432 dip)); 4433 4434 ph = i_devi_get_phci(dip); 4435 if (ph == NULL) { 4436 return (rv); 4437 } 4438 4439 MDI_PHCI_LOCK(ph); 4440 4441 if (MDI_PHCI_IS_OFFLINE(ph)) { 4442 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4443 MDI_PHCI_UNLOCK(ph); 4444 return (NDI_SUCCESS); 4445 } 4446 4447 /* 4448 * Check to see if the pHCI can be offlined 4449 */ 4450 if (ph->ph_unstable) { 4451 MDI_DEBUG(1, (CE_WARN, dip, 4452 "!One or more target devices are in transient " 4453 "state. This device can not be removed at " 4454 "this moment. Please try again later.")); 4455 MDI_PHCI_UNLOCK(ph); 4456 return (NDI_BUSY); 4457 } 4458 4459 pip = ph->ph_path_head; 4460 while (pip != NULL) { 4461 MDI_PI_LOCK(pip); 4462 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4463 /* 4464 * The mdi_pathinfo state is OK. Check the client state. 4465 * If failover in progress fail the pHCI from offlining 4466 */ 4467 ct = MDI_PI(pip)->pi_client; 4468 i_mdi_client_lock(ct, pip); 4469 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4470 (ct->ct_unstable)) { 4471 /* 4472 * Failover is in progress, Fail the DR 4473 */ 4474 MDI_DEBUG(1, (CE_WARN, dip, 4475 "!pHCI device (%s%d) is Busy. %s", 4476 ddi_driver_name(dip), ddi_get_instance(dip), 4477 "This device can not be removed at " 4478 "this moment. Please try again later.")); 4479 MDI_PI_UNLOCK(pip); 4480 MDI_CLIENT_UNLOCK(ct); 4481 MDI_PHCI_UNLOCK(ph); 4482 return (NDI_BUSY); 4483 } 4484 MDI_PI_UNLOCK(pip); 4485 4486 /* 4487 * Check to see of we are removing the last path of this 4488 * client device... 4489 */ 4490 cdip = ct->ct_dip; 4491 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4492 (i_mdi_client_compute_state(ct, ph) == 4493 MDI_CLIENT_STATE_FAILED)) { 4494 i_mdi_client_unlock(ct); 4495 MDI_PHCI_UNLOCK(ph); 4496 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4497 /* 4498 * ndi_devi_offline() failed. 4499 * This pHCI provides the critical path 4500 * to one or more client devices. 4501 * Return busy. 4502 */ 4503 MDI_PHCI_LOCK(ph); 4504 MDI_DEBUG(1, (CE_WARN, dip, 4505 "!pHCI device (%s%d) is Busy. %s", 4506 ddi_driver_name(dip), ddi_get_instance(dip), 4507 "This device can not be removed at " 4508 "this moment. Please try again later.")); 4509 failed_pip = pip; 4510 break; 4511 } else { 4512 MDI_PHCI_LOCK(ph); 4513 pip = next; 4514 } 4515 } else { 4516 i_mdi_client_unlock(ct); 4517 pip = next; 4518 } 4519 } 4520 4521 if (failed_pip) { 4522 pip = ph->ph_path_head; 4523 while (pip != failed_pip) { 4524 MDI_PI_LOCK(pip); 4525 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4526 ct = MDI_PI(pip)->pi_client; 4527 i_mdi_client_lock(ct, pip); 4528 cdip = ct->ct_dip; 4529 switch (MDI_CLIENT_STATE(ct)) { 4530 case MDI_CLIENT_STATE_OPTIMAL: 4531 case MDI_CLIENT_STATE_DEGRADED: 4532 if (cdip) { 4533 MDI_PI_UNLOCK(pip); 4534 i_mdi_client_unlock(ct); 4535 MDI_PHCI_UNLOCK(ph); 4536 (void) ndi_devi_online(cdip, 0); 4537 MDI_PHCI_LOCK(ph); 4538 pip = next; 4539 continue; 4540 } 4541 break; 4542 4543 case MDI_CLIENT_STATE_FAILED: 4544 if (cdip) { 4545 MDI_PI_UNLOCK(pip); 4546 i_mdi_client_unlock(ct); 4547 MDI_PHCI_UNLOCK(ph); 4548 (void) ndi_devi_offline(cdip, 0); 4549 MDI_PHCI_LOCK(ph); 4550 pip = next; 4551 continue; 4552 } 4553 break; 4554 } 4555 MDI_PI_UNLOCK(pip); 4556 i_mdi_client_unlock(ct); 4557 pip = next; 4558 } 4559 MDI_PHCI_UNLOCK(ph); 4560 return (NDI_BUSY); 4561 } 4562 4563 /* 4564 * Mark the pHCI as offline 4565 */ 4566 MDI_PHCI_SET_OFFLINE(ph); 4567 4568 /* 4569 * Mark the child mdi_pathinfo nodes as transient 4570 */ 4571 pip = ph->ph_path_head; 4572 while (pip != NULL) { 4573 MDI_PI_LOCK(pip); 4574 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4575 MDI_PI_SET_OFFLINING(pip); 4576 MDI_PI_UNLOCK(pip); 4577 pip = next; 4578 } 4579 MDI_PHCI_UNLOCK(ph); 4580 /* 4581 * Give a chance for any pending commands to execute 4582 */ 4583 delay(1); 4584 MDI_PHCI_LOCK(ph); 4585 pip = ph->ph_path_head; 4586 while (pip != NULL) { 4587 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4588 (void) i_mdi_pi_offline(pip, flags); 4589 MDI_PI_LOCK(pip); 4590 ct = MDI_PI(pip)->pi_client; 4591 if (!MDI_PI_IS_OFFLINE(pip)) { 4592 MDI_DEBUG(1, (CE_WARN, dip, 4593 "!pHCI device (%s%d) is Busy. %s", 4594 ddi_driver_name(dip), ddi_get_instance(dip), 4595 "This device can not be removed at " 4596 "this moment. Please try again later.")); 4597 MDI_PI_UNLOCK(pip); 4598 MDI_PHCI_SET_ONLINE(ph); 4599 MDI_PHCI_UNLOCK(ph); 4600 return (NDI_BUSY); 4601 } 4602 MDI_PI_UNLOCK(pip); 4603 pip = next; 4604 } 4605 MDI_PHCI_UNLOCK(ph); 4606 4607 return (rv); 4608 } 4609 4610 /*ARGSUSED*/ 4611 static int 4612 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4613 { 4614 int rv = NDI_SUCCESS; 4615 mdi_client_t *ct; 4616 4617 /* 4618 * Client component to go offline. Make sure that we are 4619 * not in failing over state and update client state 4620 * accordingly 4621 */ 4622 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4623 dip)); 4624 ct = i_devi_get_client(dip); 4625 if (ct != NULL) { 4626 MDI_CLIENT_LOCK(ct); 4627 if (ct->ct_unstable) { 4628 /* 4629 * One or more paths are in transient state, 4630 * Dont allow offline of a client device 4631 */ 4632 MDI_DEBUG(1, (CE_WARN, dip, 4633 "!One or more paths to this device is " 4634 "in transient state. This device can not " 4635 "be removed at this moment. " 4636 "Please try again later.")); 4637 MDI_CLIENT_UNLOCK(ct); 4638 return (NDI_BUSY); 4639 } 4640 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4641 /* 4642 * Failover is in progress, Dont allow DR of 4643 * a client device 4644 */ 4645 MDI_DEBUG(1, (CE_WARN, dip, 4646 "!Client device (%s%d) is Busy. %s", 4647 ddi_driver_name(dip), ddi_get_instance(dip), 4648 "This device can not be removed at " 4649 "this moment. Please try again later.")); 4650 MDI_CLIENT_UNLOCK(ct); 4651 return (NDI_BUSY); 4652 } 4653 MDI_CLIENT_SET_OFFLINE(ct); 4654 4655 /* 4656 * Unbind our relationship with the dev_info node 4657 */ 4658 if (flags & NDI_DEVI_REMOVE) { 4659 ct->ct_dip = NULL; 4660 } 4661 MDI_CLIENT_UNLOCK(ct); 4662 } 4663 return (rv); 4664 } 4665 4666 /* 4667 * mdi_pre_attach(): 4668 * Pre attach() notification handler 4669 */ 4670 4671 /*ARGSUSED*/ 4672 int 4673 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4674 { 4675 /* don't support old DDI_PM_RESUME */ 4676 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4677 (cmd == DDI_PM_RESUME)) 4678 return (DDI_FAILURE); 4679 4680 return (DDI_SUCCESS); 4681 } 4682 4683 /* 4684 * mdi_post_attach(): 4685 * Post attach() notification handler 4686 */ 4687 4688 /*ARGSUSED*/ 4689 void 4690 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4691 { 4692 mdi_phci_t *ph; 4693 mdi_client_t *ct; 4694 mdi_pathinfo_t *pip; 4695 4696 if (MDI_PHCI(dip)) { 4697 ph = i_devi_get_phci(dip); 4698 ASSERT(ph != NULL); 4699 4700 MDI_PHCI_LOCK(ph); 4701 switch (cmd) { 4702 case DDI_ATTACH: 4703 MDI_DEBUG(2, (CE_NOTE, dip, 4704 "!pHCI post_attach: called %p\n", ph)); 4705 if (error == DDI_SUCCESS) { 4706 MDI_PHCI_SET_ATTACH(ph); 4707 } else { 4708 MDI_DEBUG(1, (CE_NOTE, dip, 4709 "!pHCI post_attach: failed error=%d\n", 4710 error)); 4711 MDI_PHCI_SET_DETACH(ph); 4712 } 4713 break; 4714 4715 case DDI_RESUME: 4716 MDI_DEBUG(2, (CE_NOTE, dip, 4717 "!pHCI post_resume: called %p\n", ph)); 4718 if (error == DDI_SUCCESS) { 4719 MDI_PHCI_SET_RESUME(ph); 4720 } else { 4721 MDI_DEBUG(1, (CE_NOTE, dip, 4722 "!pHCI post_resume: failed error=%d\n", 4723 error)); 4724 MDI_PHCI_SET_SUSPEND(ph); 4725 } 4726 break; 4727 } 4728 MDI_PHCI_UNLOCK(ph); 4729 } 4730 4731 if (MDI_CLIENT(dip)) { 4732 ct = i_devi_get_client(dip); 4733 ASSERT(ct != NULL); 4734 4735 MDI_CLIENT_LOCK(ct); 4736 switch (cmd) { 4737 case DDI_ATTACH: 4738 MDI_DEBUG(2, (CE_NOTE, dip, 4739 "!Client post_attach: called %p\n", ct)); 4740 if (error != DDI_SUCCESS) { 4741 MDI_DEBUG(1, (CE_NOTE, dip, 4742 "!Client post_attach: failed error=%d\n", 4743 error)); 4744 MDI_CLIENT_SET_DETACH(ct); 4745 MDI_DEBUG(4, (CE_WARN, dip, 4746 "mdi_post_attach i_mdi_pm_reset_client\n")); 4747 i_mdi_pm_reset_client(ct); 4748 break; 4749 } 4750 4751 /* 4752 * Client device has successfully attached. 4753 * Create kstats for any pathinfo structures 4754 * initially associated with this client. 4755 */ 4756 for (pip = ct->ct_path_head; pip != NULL; 4757 pip = (mdi_pathinfo_t *) 4758 MDI_PI(pip)->pi_client_link) { 4759 (void) i_mdi_pi_kstat_create(pip); 4760 i_mdi_report_path_state(ct, pip); 4761 } 4762 MDI_CLIENT_SET_ATTACH(ct); 4763 break; 4764 4765 case DDI_RESUME: 4766 MDI_DEBUG(2, (CE_NOTE, dip, 4767 "!Client post_attach: called %p\n", ct)); 4768 if (error == DDI_SUCCESS) { 4769 MDI_CLIENT_SET_RESUME(ct); 4770 } else { 4771 MDI_DEBUG(1, (CE_NOTE, dip, 4772 "!Client post_resume: failed error=%d\n", 4773 error)); 4774 MDI_CLIENT_SET_SUSPEND(ct); 4775 } 4776 break; 4777 } 4778 MDI_CLIENT_UNLOCK(ct); 4779 } 4780 } 4781 4782 /* 4783 * mdi_pre_detach(): 4784 * Pre detach notification handler 4785 */ 4786 4787 /*ARGSUSED*/ 4788 int 4789 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4790 { 4791 int rv = DDI_SUCCESS; 4792 4793 if (MDI_CLIENT(dip)) { 4794 (void) i_mdi_client_pre_detach(dip, cmd); 4795 } 4796 4797 if (MDI_PHCI(dip)) { 4798 rv = i_mdi_phci_pre_detach(dip, cmd); 4799 } 4800 4801 return (rv); 4802 } 4803 4804 /*ARGSUSED*/ 4805 static int 4806 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4807 { 4808 int rv = DDI_SUCCESS; 4809 mdi_phci_t *ph; 4810 mdi_client_t *ct; 4811 mdi_pathinfo_t *pip; 4812 mdi_pathinfo_t *failed_pip = NULL; 4813 mdi_pathinfo_t *next; 4814 4815 ph = i_devi_get_phci(dip); 4816 if (ph == NULL) { 4817 return (rv); 4818 } 4819 4820 MDI_PHCI_LOCK(ph); 4821 switch (cmd) { 4822 case DDI_DETACH: 4823 MDI_DEBUG(2, (CE_NOTE, dip, 4824 "!pHCI pre_detach: called %p\n", ph)); 4825 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4826 /* 4827 * mdi_pathinfo nodes are still attached to 4828 * this pHCI. Fail the detach for this pHCI. 4829 */ 4830 MDI_DEBUG(2, (CE_WARN, dip, 4831 "!pHCI pre_detach: " 4832 "mdi_pathinfo nodes are still attached " 4833 "%p\n", ph)); 4834 rv = DDI_FAILURE; 4835 break; 4836 } 4837 MDI_PHCI_SET_DETACH(ph); 4838 break; 4839 4840 case DDI_SUSPEND: 4841 /* 4842 * pHCI is getting suspended. Since mpxio client 4843 * devices may not be suspended at this point, to avoid 4844 * a potential stack overflow, it is important to suspend 4845 * client devices before pHCI can be suspended. 4846 */ 4847 4848 MDI_DEBUG(2, (CE_NOTE, dip, 4849 "!pHCI pre_suspend: called %p\n", ph)); 4850 /* 4851 * Suspend all the client devices accessible through this pHCI 4852 */ 4853 pip = ph->ph_path_head; 4854 while (pip != NULL && rv == DDI_SUCCESS) { 4855 dev_info_t *cdip; 4856 MDI_PI_LOCK(pip); 4857 next = 4858 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4859 ct = MDI_PI(pip)->pi_client; 4860 i_mdi_client_lock(ct, pip); 4861 cdip = ct->ct_dip; 4862 MDI_PI_UNLOCK(pip); 4863 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4864 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4865 i_mdi_client_unlock(ct); 4866 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4867 DDI_SUCCESS) { 4868 /* 4869 * Suspend of one of the client 4870 * device has failed. 4871 */ 4872 MDI_DEBUG(1, (CE_WARN, dip, 4873 "!Suspend of device (%s%d) failed.", 4874 ddi_driver_name(cdip), 4875 ddi_get_instance(cdip))); 4876 failed_pip = pip; 4877 break; 4878 } 4879 } else { 4880 i_mdi_client_unlock(ct); 4881 } 4882 pip = next; 4883 } 4884 4885 if (rv == DDI_SUCCESS) { 4886 /* 4887 * Suspend of client devices is complete. Proceed 4888 * with pHCI suspend. 4889 */ 4890 MDI_PHCI_SET_SUSPEND(ph); 4891 } else { 4892 /* 4893 * Revert back all the suspended client device states 4894 * to converse. 4895 */ 4896 pip = ph->ph_path_head; 4897 while (pip != failed_pip) { 4898 dev_info_t *cdip; 4899 MDI_PI_LOCK(pip); 4900 next = 4901 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4902 ct = MDI_PI(pip)->pi_client; 4903 i_mdi_client_lock(ct, pip); 4904 cdip = ct->ct_dip; 4905 MDI_PI_UNLOCK(pip); 4906 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4907 i_mdi_client_unlock(ct); 4908 (void) devi_attach(cdip, DDI_RESUME); 4909 } else { 4910 i_mdi_client_unlock(ct); 4911 } 4912 pip = next; 4913 } 4914 } 4915 break; 4916 4917 default: 4918 rv = DDI_FAILURE; 4919 break; 4920 } 4921 MDI_PHCI_UNLOCK(ph); 4922 return (rv); 4923 } 4924 4925 /*ARGSUSED*/ 4926 static int 4927 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4928 { 4929 int rv = DDI_SUCCESS; 4930 mdi_client_t *ct; 4931 4932 ct = i_devi_get_client(dip); 4933 if (ct == NULL) { 4934 return (rv); 4935 } 4936 4937 MDI_CLIENT_LOCK(ct); 4938 switch (cmd) { 4939 case DDI_DETACH: 4940 MDI_DEBUG(2, (CE_NOTE, dip, 4941 "!Client pre_detach: called %p\n", ct)); 4942 MDI_CLIENT_SET_DETACH(ct); 4943 break; 4944 4945 case DDI_SUSPEND: 4946 MDI_DEBUG(2, (CE_NOTE, dip, 4947 "!Client pre_suspend: called %p\n", ct)); 4948 MDI_CLIENT_SET_SUSPEND(ct); 4949 break; 4950 4951 default: 4952 rv = DDI_FAILURE; 4953 break; 4954 } 4955 MDI_CLIENT_UNLOCK(ct); 4956 return (rv); 4957 } 4958 4959 /* 4960 * mdi_post_detach(): 4961 * Post detach notification handler 4962 */ 4963 4964 /*ARGSUSED*/ 4965 void 4966 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4967 { 4968 /* 4969 * Detach/Suspend of mpxio component failed. Update our state 4970 * too 4971 */ 4972 if (MDI_PHCI(dip)) 4973 i_mdi_phci_post_detach(dip, cmd, error); 4974 4975 if (MDI_CLIENT(dip)) 4976 i_mdi_client_post_detach(dip, cmd, error); 4977 } 4978 4979 /*ARGSUSED*/ 4980 static void 4981 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4982 { 4983 mdi_phci_t *ph; 4984 4985 /* 4986 * Detach/Suspend of phci component failed. Update our state 4987 * too 4988 */ 4989 ph = i_devi_get_phci(dip); 4990 if (ph == NULL) { 4991 return; 4992 } 4993 4994 MDI_PHCI_LOCK(ph); 4995 /* 4996 * Detach of pHCI failed. Restore back converse 4997 * state 4998 */ 4999 switch (cmd) { 5000 case DDI_DETACH: 5001 MDI_DEBUG(2, (CE_NOTE, dip, 5002 "!pHCI post_detach: called %p\n", ph)); 5003 if (error != DDI_SUCCESS) 5004 MDI_PHCI_SET_ATTACH(ph); 5005 break; 5006 5007 case DDI_SUSPEND: 5008 MDI_DEBUG(2, (CE_NOTE, dip, 5009 "!pHCI post_suspend: called %p\n", ph)); 5010 if (error != DDI_SUCCESS) 5011 MDI_PHCI_SET_RESUME(ph); 5012 break; 5013 } 5014 MDI_PHCI_UNLOCK(ph); 5015 } 5016 5017 /*ARGSUSED*/ 5018 static void 5019 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5020 { 5021 mdi_client_t *ct; 5022 5023 ct = i_devi_get_client(dip); 5024 if (ct == NULL) { 5025 return; 5026 } 5027 MDI_CLIENT_LOCK(ct); 5028 /* 5029 * Detach of Client failed. Restore back converse 5030 * state 5031 */ 5032 switch (cmd) { 5033 case DDI_DETACH: 5034 MDI_DEBUG(2, (CE_NOTE, dip, 5035 "!Client post_detach: called %p\n", ct)); 5036 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5037 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5038 "i_mdi_pm_rele_client\n")); 5039 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5040 } else { 5041 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5042 "i_mdi_pm_reset_client\n")); 5043 i_mdi_pm_reset_client(ct); 5044 } 5045 if (error != DDI_SUCCESS) 5046 MDI_CLIENT_SET_ATTACH(ct); 5047 break; 5048 5049 case DDI_SUSPEND: 5050 MDI_DEBUG(2, (CE_NOTE, dip, 5051 "!Client post_suspend: called %p\n", ct)); 5052 if (error != DDI_SUCCESS) 5053 MDI_CLIENT_SET_RESUME(ct); 5054 break; 5055 } 5056 MDI_CLIENT_UNLOCK(ct); 5057 } 5058 5059 /* 5060 * create and install per-path (client - pHCI) statistics 5061 * I/O stats supported: nread, nwritten, reads, and writes 5062 * Error stats - hard errors, soft errors, & transport errors 5063 */ 5064 static int 5065 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5066 { 5067 5068 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5069 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5070 char ksname[KSTAT_STRLEN]; 5071 mdi_pathinfo_t *cpip; 5072 const char *err_postfix = ",err"; 5073 kstat_t *kiosp, *kerrsp; 5074 struct pi_errs *nsp; 5075 struct mdi_pi_kstats *mdi_statp; 5076 5077 ASSERT(client != NULL && ppath != NULL); 5078 5079 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5080 5081 if (MDI_PI(pip)->pi_kstats != NULL) 5082 return (MDI_SUCCESS); 5083 5084 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5085 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5086 if (cpip == pip) 5087 continue; 5088 /* 5089 * We have found a different path with same parent 5090 * kstats for a given client-pHCI are common 5091 */ 5092 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5093 (MDI_PI(cpip)->pi_kstats != NULL)) { 5094 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5095 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5096 return (MDI_SUCCESS); 5097 } 5098 } 5099 5100 /* 5101 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5102 * clamp length of name against max length of error kstat name 5103 */ 5104 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5105 ddi_driver_name(client), ddi_get_instance(client), 5106 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5107 (KSTAT_STRLEN - strlen(err_postfix))) { 5108 return (MDI_FAILURE); 5109 } 5110 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5111 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5112 return (MDI_FAILURE); 5113 } 5114 5115 (void) strcat(ksname, err_postfix); 5116 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5117 KSTAT_TYPE_NAMED, 5118 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5119 5120 if (kerrsp == NULL) { 5121 kstat_delete(kiosp); 5122 return (MDI_FAILURE); 5123 } 5124 5125 nsp = (struct pi_errs *)kerrsp->ks_data; 5126 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5127 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5128 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5129 KSTAT_DATA_UINT32); 5130 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5131 KSTAT_DATA_UINT32); 5132 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5133 KSTAT_DATA_UINT32); 5134 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5135 KSTAT_DATA_UINT32); 5136 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5137 KSTAT_DATA_UINT32); 5138 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5139 KSTAT_DATA_UINT32); 5140 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5141 KSTAT_DATA_UINT32); 5142 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5143 5144 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5145 mdi_statp->pi_kstat_ref = 1; 5146 mdi_statp->pi_kstat_iostats = kiosp; 5147 mdi_statp->pi_kstat_errstats = kerrsp; 5148 kstat_install(kiosp); 5149 kstat_install(kerrsp); 5150 MDI_PI(pip)->pi_kstats = mdi_statp; 5151 return (MDI_SUCCESS); 5152 } 5153 5154 /* 5155 * destroy per-path properties 5156 */ 5157 static void 5158 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5159 { 5160 5161 struct mdi_pi_kstats *mdi_statp; 5162 5163 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5164 return; 5165 5166 MDI_PI(pip)->pi_kstats = NULL; 5167 5168 /* 5169 * the kstat may be shared between multiple pathinfo nodes 5170 * decrement this pathinfo's usage, removing the kstats 5171 * themselves when the last pathinfo reference is removed. 5172 */ 5173 ASSERT(mdi_statp->pi_kstat_ref > 0); 5174 if (--mdi_statp->pi_kstat_ref != 0) 5175 return; 5176 5177 kstat_delete(mdi_statp->pi_kstat_iostats); 5178 kstat_delete(mdi_statp->pi_kstat_errstats); 5179 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5180 } 5181 5182 /* 5183 * update I/O paths KSTATS 5184 */ 5185 void 5186 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5187 { 5188 kstat_t *iostatp; 5189 size_t xfer_cnt; 5190 5191 ASSERT(pip != NULL); 5192 5193 /* 5194 * I/O can be driven across a path prior to having path 5195 * statistics available, i.e. probe(9e). 5196 */ 5197 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5198 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5199 xfer_cnt = bp->b_bcount - bp->b_resid; 5200 if (bp->b_flags & B_READ) { 5201 KSTAT_IO_PTR(iostatp)->reads++; 5202 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5203 } else { 5204 KSTAT_IO_PTR(iostatp)->writes++; 5205 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5206 } 5207 } 5208 } 5209 5210 /* 5211 * disable the path to a particular pHCI (pHCI specified in the phci_path 5212 * argument) for a particular client (specified in the client_path argument). 5213 * Disabling a path means that MPxIO will not select the disabled path for 5214 * routing any new I/O requests. 5215 */ 5216 int 5217 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5218 { 5219 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5220 } 5221 5222 /* 5223 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5224 * argument) for a particular client (specified in the client_path argument). 5225 * Enabling a path means that MPxIO may select the enabled path for routing 5226 * future I/O requests, subject to other path state constraints. 5227 */ 5228 5229 int 5230 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5231 { 5232 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5233 } 5234 5235 5236 /* 5237 * Common routine for doing enable/disable. 5238 */ 5239 int 5240 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5241 { 5242 5243 mdi_phci_t *ph; 5244 mdi_vhci_t *vh = NULL; 5245 mdi_client_t *ct; 5246 mdi_pathinfo_t *next, *pip; 5247 int found_it; 5248 int (*f)() = NULL; 5249 int rv; 5250 int sync_flag = 0; 5251 5252 ph = i_devi_get_phci(pdip); 5253 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5254 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5255 if (ph == NULL) { 5256 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5257 " failed. ph = NULL operation = %d\n", op)); 5258 return (MDI_FAILURE); 5259 } 5260 5261 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5262 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5263 " Invalid operation = %d\n", op)); 5264 return (MDI_FAILURE); 5265 } 5266 5267 sync_flag = (flags << 8) & 0xf00; 5268 5269 vh = ph->ph_vhci; 5270 f = vh->vh_ops->vo_pi_state_change; 5271 5272 if (cdip == NULL) { 5273 /* 5274 * Need to mark the Phci as enabled/disabled. 5275 */ 5276 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5277 "Operation %d for the phci\n", op)); 5278 MDI_PHCI_LOCK(ph); 5279 switch (flags) { 5280 case USER_DISABLE: 5281 if (op == MDI_DISABLE_OP) 5282 MDI_PHCI_SET_USER_DISABLE(ph); 5283 else 5284 MDI_PHCI_SET_USER_ENABLE(ph); 5285 break; 5286 case DRIVER_DISABLE: 5287 if (op == MDI_DISABLE_OP) 5288 MDI_PHCI_SET_DRV_DISABLE(ph); 5289 else 5290 MDI_PHCI_SET_DRV_ENABLE(ph); 5291 break; 5292 case DRIVER_DISABLE_TRANSIENT: 5293 if (op == MDI_DISABLE_OP) 5294 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5295 else 5296 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5297 break; 5298 default: 5299 MDI_PHCI_UNLOCK(ph); 5300 MDI_DEBUG(1, (CE_NOTE, NULL, 5301 "!i_mdi_pi_enable_disable:" 5302 " Invalid flag argument= %d\n", flags)); 5303 } 5304 5305 /* 5306 * Phci has been disabled. Now try to enable/disable 5307 * path info's to each client. 5308 */ 5309 pip = ph->ph_path_head; 5310 while (pip != NULL) { 5311 /* 5312 * Do a callback into the mdi consumer to let it 5313 * know that path is about to be enabled/disabled. 5314 */ 5315 if (f != NULL) { 5316 rv = (*f)(vh->vh_dip, pip, 0, 5317 MDI_PI_EXT_STATE(pip), 5318 MDI_EXT_STATE_CHANGE | sync_flag | 5319 op | MDI_BEFORE_STATE_CHANGE); 5320 if (rv != MDI_SUCCESS) { 5321 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5322 "!vo_pi_state_change: failed rv = %x", rv)); 5323 } 5324 } 5325 5326 MDI_PI_LOCK(pip); 5327 next = 5328 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5329 switch (flags) { 5330 case USER_DISABLE: 5331 if (op == MDI_DISABLE_OP) 5332 MDI_PI_SET_USER_DISABLE(pip); 5333 else 5334 MDI_PI_SET_USER_ENABLE(pip); 5335 break; 5336 case DRIVER_DISABLE: 5337 if (op == MDI_DISABLE_OP) 5338 MDI_PI_SET_DRV_DISABLE(pip); 5339 else 5340 MDI_PI_SET_DRV_ENABLE(pip); 5341 break; 5342 case DRIVER_DISABLE_TRANSIENT: 5343 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5344 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5345 else 5346 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5347 break; 5348 } 5349 MDI_PI_UNLOCK(pip); 5350 /* 5351 * Do a callback into the mdi consumer to let it 5352 * know that path is now enabled/disabled. 5353 */ 5354 if (f != NULL) { 5355 rv = (*f)(vh->vh_dip, pip, 0, 5356 MDI_PI_EXT_STATE(pip), 5357 MDI_EXT_STATE_CHANGE | sync_flag | 5358 op | MDI_AFTER_STATE_CHANGE); 5359 if (rv != MDI_SUCCESS) { 5360 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5361 "!vo_pi_state_change: failed rv = %x", rv)); 5362 } 5363 } 5364 pip = next; 5365 } 5366 MDI_PHCI_UNLOCK(ph); 5367 } else { 5368 5369 /* 5370 * Disable a specific client. 5371 */ 5372 ct = i_devi_get_client(cdip); 5373 if (ct == NULL) { 5374 MDI_DEBUG(1, (CE_NOTE, NULL, 5375 "!i_mdi_pi_enable_disable:" 5376 " failed. ct = NULL operation = %d\n", op)); 5377 return (MDI_FAILURE); 5378 } 5379 5380 MDI_CLIENT_LOCK(ct); 5381 pip = ct->ct_path_head; 5382 found_it = 0; 5383 while (pip != NULL) { 5384 MDI_PI_LOCK(pip); 5385 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5386 if (MDI_PI(pip)->pi_phci == ph) { 5387 MDI_PI_UNLOCK(pip); 5388 found_it = 1; 5389 break; 5390 } 5391 MDI_PI_UNLOCK(pip); 5392 pip = next; 5393 } 5394 5395 MDI_CLIENT_UNLOCK(ct); 5396 if (found_it == 0) { 5397 MDI_DEBUG(1, (CE_NOTE, NULL, 5398 "!i_mdi_pi_enable_disable:" 5399 " failed. Could not find corresponding pip\n")); 5400 return (MDI_FAILURE); 5401 } 5402 /* 5403 * Do a callback into the mdi consumer to let it 5404 * know that path is about to get enabled/disabled. 5405 */ 5406 if (f != NULL) { 5407 rv = (*f)(vh->vh_dip, pip, 0, 5408 MDI_PI_EXT_STATE(pip), 5409 MDI_EXT_STATE_CHANGE | sync_flag | 5410 op | MDI_BEFORE_STATE_CHANGE); 5411 if (rv != MDI_SUCCESS) { 5412 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5413 "!vo_pi_state_change: failed rv = %x", rv)); 5414 } 5415 } 5416 MDI_PI_LOCK(pip); 5417 switch (flags) { 5418 case USER_DISABLE: 5419 if (op == MDI_DISABLE_OP) 5420 MDI_PI_SET_USER_DISABLE(pip); 5421 else 5422 MDI_PI_SET_USER_ENABLE(pip); 5423 break; 5424 case DRIVER_DISABLE: 5425 if (op == MDI_DISABLE_OP) 5426 MDI_PI_SET_DRV_DISABLE(pip); 5427 else 5428 MDI_PI_SET_DRV_ENABLE(pip); 5429 break; 5430 case DRIVER_DISABLE_TRANSIENT: 5431 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5432 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5433 else 5434 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5435 break; 5436 } 5437 MDI_PI_UNLOCK(pip); 5438 /* 5439 * Do a callback into the mdi consumer to let it 5440 * know that path is now enabled/disabled. 5441 */ 5442 if (f != NULL) { 5443 rv = (*f)(vh->vh_dip, pip, 0, 5444 MDI_PI_EXT_STATE(pip), 5445 MDI_EXT_STATE_CHANGE | sync_flag | 5446 op | MDI_AFTER_STATE_CHANGE); 5447 if (rv != MDI_SUCCESS) { 5448 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5449 "!vo_pi_state_change: failed rv = %x", rv)); 5450 } 5451 } 5452 } 5453 5454 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5455 " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); 5456 return (MDI_SUCCESS); 5457 } 5458 5459 /*ARGSUSED3*/ 5460 int 5461 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5462 int flags, clock_t timeout) 5463 { 5464 mdi_pathinfo_t *pip; 5465 dev_info_t *dip; 5466 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5467 char *paddr; 5468 5469 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5470 5471 if (!MDI_PHCI(pdip)) 5472 return (MDI_FAILURE); 5473 5474 paddr = strchr(devnm, '@'); 5475 if (paddr == NULL) 5476 return (MDI_FAILURE); 5477 5478 paddr++; /* skip '@' */ 5479 pip = mdi_pi_find(pdip, NULL, paddr); 5480 while (pip == NULL && timeout > 0) { 5481 if (interval > timeout) 5482 interval = timeout; 5483 if (flags & NDI_DEVI_DEBUG) { 5484 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5485 ddi_driver_name(pdip), ddi_get_instance(pdip), 5486 paddr, interval, timeout); 5487 } 5488 delay(interval); 5489 timeout -= interval; 5490 interval += interval; 5491 pip = mdi_pi_find(pdip, NULL, paddr); 5492 } 5493 5494 if (pip == NULL) 5495 return (MDI_FAILURE); 5496 dip = mdi_pi_get_client(pip); 5497 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5498 return (MDI_FAILURE); 5499 *cdipp = dip; 5500 5501 /* TODO: holding should happen inside search functions */ 5502 ndi_hold_devi(dip); 5503 return (MDI_SUCCESS); 5504 } 5505 5506 /* 5507 * Ensure phci powered up 5508 */ 5509 static void 5510 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5511 { 5512 dev_info_t *ph_dip; 5513 5514 ASSERT(pip != NULL); 5515 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5516 5517 if (MDI_PI(pip)->pi_pm_held) { 5518 return; 5519 } 5520 5521 ph_dip = mdi_pi_get_phci(pip); 5522 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5523 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5524 if (ph_dip == NULL) { 5525 return; 5526 } 5527 5528 MDI_PI_UNLOCK(pip); 5529 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5530 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5531 pm_hold_power(ph_dip); 5532 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5533 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5534 MDI_PI_LOCK(pip); 5535 5536 MDI_PI(pip)->pi_pm_held = 1; 5537 } 5538 5539 /* 5540 * Allow phci powered down 5541 */ 5542 static void 5543 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5544 { 5545 dev_info_t *ph_dip = NULL; 5546 5547 ASSERT(pip != NULL); 5548 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5549 5550 if (MDI_PI(pip)->pi_pm_held == 0) { 5551 return; 5552 } 5553 5554 ph_dip = mdi_pi_get_phci(pip); 5555 ASSERT(ph_dip != NULL); 5556 5557 MDI_PI_UNLOCK(pip); 5558 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5559 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5560 5561 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5562 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5563 pm_rele_power(ph_dip); 5564 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5565 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5566 5567 MDI_PI_LOCK(pip); 5568 MDI_PI(pip)->pi_pm_held = 0; 5569 } 5570 5571 static void 5572 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5573 { 5574 ASSERT(ct); 5575 5576 ct->ct_power_cnt += incr; 5577 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5578 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5579 ASSERT(ct->ct_power_cnt >= 0); 5580 } 5581 5582 static void 5583 i_mdi_rele_all_phci(mdi_client_t *ct) 5584 { 5585 mdi_pathinfo_t *pip; 5586 5587 ASSERT(mutex_owned(&ct->ct_mutex)); 5588 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5589 while (pip != NULL) { 5590 mdi_hold_path(pip); 5591 MDI_PI_LOCK(pip); 5592 i_mdi_pm_rele_pip(pip); 5593 MDI_PI_UNLOCK(pip); 5594 mdi_rele_path(pip); 5595 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5596 } 5597 } 5598 5599 static void 5600 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5601 { 5602 ASSERT(ct); 5603 5604 if (i_ddi_node_state(ct->ct_dip) >= DS_READY) { 5605 ct->ct_power_cnt -= decr; 5606 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5607 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5608 } 5609 5610 ASSERT(ct->ct_power_cnt >= 0); 5611 if (ct->ct_power_cnt == 0) { 5612 i_mdi_rele_all_phci(ct); 5613 return; 5614 } 5615 } 5616 5617 static void 5618 i_mdi_pm_reset_client(mdi_client_t *ct) 5619 { 5620 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5621 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5622 ct->ct_power_cnt = 0; 5623 i_mdi_rele_all_phci(ct); 5624 ct->ct_powercnt_reset = 1; 5625 ct->ct_powercnt_held = 0; 5626 } 5627 5628 static void 5629 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5630 { 5631 mdi_pathinfo_t *pip; 5632 ASSERT(mutex_owned(&ct->ct_mutex)); 5633 5634 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5635 while (pip != NULL) { 5636 mdi_hold_path(pip); 5637 MDI_PI_LOCK(pip); 5638 i_mdi_pm_hold_pip(pip); 5639 MDI_PI_UNLOCK(pip); 5640 mdi_rele_path(pip); 5641 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5642 } 5643 } 5644 5645 static int 5646 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5647 { 5648 int ret; 5649 dev_info_t *ph_dip; 5650 5651 MDI_PI_LOCK(pip); 5652 i_mdi_pm_hold_pip(pip); 5653 5654 ph_dip = mdi_pi_get_phci(pip); 5655 MDI_PI_UNLOCK(pip); 5656 5657 /* bring all components of phci to full power */ 5658 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5659 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5660 ddi_get_instance(ph_dip))); 5661 5662 ret = pm_powerup(ph_dip); 5663 5664 if (ret == DDI_FAILURE) { 5665 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5666 "pm_powerup FAILED for %s%d\n", 5667 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5668 5669 MDI_PI_LOCK(pip); 5670 i_mdi_pm_rele_pip(pip); 5671 MDI_PI_UNLOCK(pip); 5672 return (MDI_FAILURE); 5673 } 5674 5675 return (MDI_SUCCESS); 5676 } 5677 5678 static int 5679 i_mdi_power_all_phci(mdi_client_t *ct) 5680 { 5681 mdi_pathinfo_t *pip; 5682 int succeeded = 0; 5683 5684 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5685 while (pip != NULL) { 5686 mdi_hold_path(pip); 5687 MDI_CLIENT_UNLOCK(ct); 5688 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5689 succeeded = 1; 5690 5691 ASSERT(ct == MDI_PI(pip)->pi_client); 5692 MDI_CLIENT_LOCK(ct); 5693 mdi_rele_path(pip); 5694 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5695 } 5696 5697 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5698 } 5699 5700 /* 5701 * mdi_bus_power(): 5702 * 1. Place the phci(s) into powered up state so that 5703 * client can do power management 5704 * 2. Ensure phci powered up as client power managing 5705 * Return Values: 5706 * MDI_SUCCESS 5707 * MDI_FAILURE 5708 */ 5709 int 5710 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5711 void *arg, void *result) 5712 { 5713 int ret = MDI_SUCCESS; 5714 pm_bp_child_pwrchg_t *bpc; 5715 mdi_client_t *ct; 5716 dev_info_t *cdip; 5717 pm_bp_has_changed_t *bphc; 5718 5719 /* 5720 * BUS_POWER_NOINVOL not supported 5721 */ 5722 if (op == BUS_POWER_NOINVOL) 5723 return (MDI_FAILURE); 5724 5725 /* 5726 * ignore other OPs. 5727 * return quickly to save cou cycles on the ct processing 5728 */ 5729 switch (op) { 5730 case BUS_POWER_PRE_NOTIFICATION: 5731 case BUS_POWER_POST_NOTIFICATION: 5732 bpc = (pm_bp_child_pwrchg_t *)arg; 5733 cdip = bpc->bpc_dip; 5734 break; 5735 case BUS_POWER_HAS_CHANGED: 5736 bphc = (pm_bp_has_changed_t *)arg; 5737 cdip = bphc->bphc_dip; 5738 break; 5739 default: 5740 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5741 } 5742 5743 ASSERT(MDI_CLIENT(cdip)); 5744 5745 ct = i_devi_get_client(cdip); 5746 if (ct == NULL) 5747 return (MDI_FAILURE); 5748 5749 /* 5750 * wait till the mdi_pathinfo node state change are processed 5751 */ 5752 MDI_CLIENT_LOCK(ct); 5753 switch (op) { 5754 case BUS_POWER_PRE_NOTIFICATION: 5755 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5756 "BUS_POWER_PRE_NOTIFICATION:" 5757 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5758 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5759 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5760 5761 /* serialize power level change per client */ 5762 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5763 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5764 5765 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5766 5767 if (ct->ct_power_cnt == 0) { 5768 ret = i_mdi_power_all_phci(ct); 5769 } 5770 5771 /* 5772 * if new_level > 0: 5773 * - hold phci(s) 5774 * - power up phci(s) if not already 5775 * ignore power down 5776 */ 5777 if (bpc->bpc_nlevel > 0) { 5778 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5779 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5780 "mdi_bus_power i_mdi_pm_hold_client\n")); 5781 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5782 } 5783 } 5784 break; 5785 case BUS_POWER_POST_NOTIFICATION: 5786 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5787 "BUS_POWER_POST_NOTIFICATION:" 5788 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5789 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5790 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5791 *(int *)result)); 5792 5793 if (*(int *)result == DDI_SUCCESS) { 5794 if (bpc->bpc_nlevel > 0) { 5795 MDI_CLIENT_SET_POWER_UP(ct); 5796 } else { 5797 MDI_CLIENT_SET_POWER_DOWN(ct); 5798 } 5799 } 5800 5801 /* release the hold we did in pre-notification */ 5802 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5803 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5804 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5805 "mdi_bus_power i_mdi_pm_rele_client\n")); 5806 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5807 } 5808 5809 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5810 /* another thread might started attaching */ 5811 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5812 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5813 "mdi_bus_power i_mdi_pm_rele_client\n")); 5814 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5815 /* detaching has been taken care in pm_post_unconfig */ 5816 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5817 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5818 "mdi_bus_power i_mdi_pm_reset_client\n")); 5819 i_mdi_pm_reset_client(ct); 5820 } 5821 } 5822 5823 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5824 cv_broadcast(&ct->ct_powerchange_cv); 5825 5826 break; 5827 5828 /* need to do more */ 5829 case BUS_POWER_HAS_CHANGED: 5830 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5831 "BUS_POWER_HAS_CHANGED:" 5832 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5833 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5834 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5835 5836 if (bphc->bphc_nlevel > 0 && 5837 bphc->bphc_nlevel > bphc->bphc_olevel) { 5838 if (ct->ct_power_cnt == 0) { 5839 ret = i_mdi_power_all_phci(ct); 5840 } 5841 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5842 "mdi_bus_power i_mdi_pm_hold_client\n")); 5843 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5844 } 5845 5846 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5847 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5848 "mdi_bus_power i_mdi_pm_rele_client\n")); 5849 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5850 } 5851 break; 5852 } 5853 5854 MDI_CLIENT_UNLOCK(ct); 5855 return (ret); 5856 } 5857 5858 static int 5859 i_mdi_pm_pre_config_one(dev_info_t *child) 5860 { 5861 int ret = MDI_SUCCESS; 5862 mdi_client_t *ct; 5863 5864 ct = i_devi_get_client(child); 5865 if (ct == NULL) 5866 return (MDI_FAILURE); 5867 5868 MDI_CLIENT_LOCK(ct); 5869 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5870 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5871 5872 if (!MDI_CLIENT_IS_FAILED(ct)) { 5873 MDI_CLIENT_UNLOCK(ct); 5874 MDI_DEBUG(4, (CE_NOTE, child, 5875 "i_mdi_pm_pre_config_one already configured\n")); 5876 return (MDI_SUCCESS); 5877 } 5878 5879 if (ct->ct_powercnt_held) { 5880 MDI_CLIENT_UNLOCK(ct); 5881 MDI_DEBUG(4, (CE_NOTE, child, 5882 "i_mdi_pm_pre_config_one ALREADY held\n")); 5883 return (MDI_SUCCESS); 5884 } 5885 5886 if (ct->ct_power_cnt == 0) { 5887 ret = i_mdi_power_all_phci(ct); 5888 } 5889 MDI_DEBUG(4, (CE_NOTE, child, 5890 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5891 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5892 ct->ct_powercnt_held = 1; 5893 ct->ct_powercnt_reset = 0; 5894 MDI_CLIENT_UNLOCK(ct); 5895 return (ret); 5896 } 5897 5898 static int 5899 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5900 { 5901 int ret = MDI_SUCCESS; 5902 dev_info_t *cdip; 5903 int circ; 5904 5905 ASSERT(MDI_VHCI(parent)); 5906 5907 /* ndi_devi_config_one */ 5908 if (child) { 5909 return (i_mdi_pm_pre_config_one(child)); 5910 } 5911 5912 /* devi_config_common */ 5913 ndi_devi_enter(parent, &circ); 5914 cdip = ddi_get_child(parent); 5915 while (cdip) { 5916 dev_info_t *next = ddi_get_next_sibling(cdip); 5917 5918 ret = i_mdi_pm_pre_config_one(cdip); 5919 if (ret != MDI_SUCCESS) 5920 break; 5921 cdip = next; 5922 } 5923 ndi_devi_exit(parent, circ); 5924 return (ret); 5925 } 5926 5927 static int 5928 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5929 { 5930 int ret = MDI_SUCCESS; 5931 mdi_client_t *ct; 5932 5933 ct = i_devi_get_client(child); 5934 if (ct == NULL) 5935 return (MDI_FAILURE); 5936 5937 MDI_CLIENT_LOCK(ct); 5938 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5939 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5940 5941 if (i_ddi_node_state(ct->ct_dip) < DS_READY) { 5942 MDI_DEBUG(4, (CE_NOTE, child, 5943 "i_mdi_pm_pre_unconfig node detached already\n")); 5944 MDI_CLIENT_UNLOCK(ct); 5945 return (MDI_SUCCESS); 5946 } 5947 5948 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 5949 (flags & NDI_AUTODETACH)) { 5950 MDI_DEBUG(4, (CE_NOTE, child, 5951 "i_mdi_pm_pre_unconfig auto-modunload\n")); 5952 MDI_CLIENT_UNLOCK(ct); 5953 return (MDI_FAILURE); 5954 } 5955 5956 if (ct->ct_powercnt_held) { 5957 MDI_DEBUG(4, (CE_NOTE, child, 5958 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 5959 MDI_CLIENT_UNLOCK(ct); 5960 *held = 1; 5961 return (MDI_SUCCESS); 5962 } 5963 5964 if (ct->ct_power_cnt == 0) { 5965 ret = i_mdi_power_all_phci(ct); 5966 } 5967 MDI_DEBUG(4, (CE_NOTE, child, 5968 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 5969 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5970 ct->ct_powercnt_held = 1; 5971 ct->ct_powercnt_reset = 0; 5972 MDI_CLIENT_UNLOCK(ct); 5973 if (ret == MDI_SUCCESS) 5974 *held = 1; 5975 return (ret); 5976 } 5977 5978 static int 5979 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 5980 int flags) 5981 { 5982 int ret = MDI_SUCCESS; 5983 dev_info_t *cdip; 5984 int circ; 5985 5986 ASSERT(MDI_VHCI(parent)); 5987 *held = 0; 5988 5989 /* ndi_devi_unconfig_one */ 5990 if (child) { 5991 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 5992 } 5993 5994 /* devi_unconfig_common */ 5995 ndi_devi_enter(parent, &circ); 5996 cdip = ddi_get_child(parent); 5997 while (cdip) { 5998 dev_info_t *next = ddi_get_next_sibling(cdip); 5999 6000 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6001 cdip = next; 6002 } 6003 ndi_devi_exit(parent, circ); 6004 6005 if (*held) 6006 ret = MDI_SUCCESS; 6007 6008 return (ret); 6009 } 6010 6011 static void 6012 i_mdi_pm_post_config_one(dev_info_t *child) 6013 { 6014 mdi_client_t *ct; 6015 6016 ct = i_devi_get_client(child); 6017 if (ct == NULL) 6018 return; 6019 6020 MDI_CLIENT_LOCK(ct); 6021 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6022 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6023 6024 if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) { 6025 MDI_DEBUG(4, (CE_NOTE, child, 6026 "i_mdi_pm_post_config_one NOT held\n")); 6027 MDI_CLIENT_UNLOCK(ct); 6028 return; 6029 } 6030 6031 /* client has not been updated */ 6032 if (MDI_CLIENT_IS_FAILED(ct)) { 6033 MDI_DEBUG(4, (CE_NOTE, child, 6034 "i_mdi_pm_post_config_one NOT configured\n")); 6035 MDI_CLIENT_UNLOCK(ct); 6036 return; 6037 } 6038 6039 /* another thread might have powered it down or detached it */ 6040 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6041 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6042 (i_ddi_node_state(ct->ct_dip) < DS_READY && 6043 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6044 MDI_DEBUG(4, (CE_NOTE, child, 6045 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6046 i_mdi_pm_reset_client(ct); 6047 } else { 6048 mdi_pathinfo_t *pip, *next; 6049 int valid_path_count = 0; 6050 6051 MDI_DEBUG(4, (CE_NOTE, child, 6052 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6053 pip = ct->ct_path_head; 6054 while (pip != NULL) { 6055 MDI_PI_LOCK(pip); 6056 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6057 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6058 == MDI_PATHINFO_STATE_ONLINE || 6059 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6060 == MDI_PATHINFO_STATE_STANDBY) 6061 valid_path_count ++; 6062 MDI_PI_UNLOCK(pip); 6063 pip = next; 6064 } 6065 i_mdi_pm_rele_client(ct, valid_path_count); 6066 } 6067 ct->ct_powercnt_held = 0; 6068 MDI_CLIENT_UNLOCK(ct); 6069 } 6070 6071 static void 6072 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6073 { 6074 int circ; 6075 dev_info_t *cdip; 6076 ASSERT(MDI_VHCI(parent)); 6077 6078 /* ndi_devi_config_one */ 6079 if (child) { 6080 i_mdi_pm_post_config_one(child); 6081 return; 6082 } 6083 6084 /* devi_config_common */ 6085 ndi_devi_enter(parent, &circ); 6086 cdip = ddi_get_child(parent); 6087 while (cdip) { 6088 dev_info_t *next = ddi_get_next_sibling(cdip); 6089 6090 i_mdi_pm_post_config_one(cdip); 6091 cdip = next; 6092 } 6093 ndi_devi_exit(parent, circ); 6094 } 6095 6096 static void 6097 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6098 { 6099 mdi_client_t *ct; 6100 6101 ct = i_devi_get_client(child); 6102 if (ct == NULL) 6103 return; 6104 6105 MDI_CLIENT_LOCK(ct); 6106 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6107 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6108 6109 if (!ct->ct_powercnt_held) { 6110 MDI_DEBUG(4, (CE_NOTE, child, 6111 "i_mdi_pm_post_unconfig NOT held\n")); 6112 MDI_CLIENT_UNLOCK(ct); 6113 return; 6114 } 6115 6116 /* failure detaching or another thread just attached it */ 6117 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6118 i_ddi_node_state(ct->ct_dip) == DS_READY) || 6119 (i_ddi_node_state(ct->ct_dip) != DS_READY && 6120 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6121 MDI_DEBUG(4, (CE_NOTE, child, 6122 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6123 i_mdi_pm_reset_client(ct); 6124 } 6125 6126 MDI_DEBUG(4, (CE_NOTE, child, 6127 "i_mdi_pm_post_unconfig not changed\n")); 6128 MDI_CLIENT_UNLOCK(ct); 6129 } 6130 6131 static void 6132 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6133 { 6134 int circ; 6135 dev_info_t *cdip; 6136 6137 ASSERT(MDI_VHCI(parent)); 6138 6139 if (!held) { 6140 MDI_DEBUG(4, (CE_NOTE, parent, 6141 "i_mdi_pm_post_unconfig held = %d\n", held)); 6142 return; 6143 } 6144 6145 if (child) { 6146 i_mdi_pm_post_unconfig_one(child); 6147 return; 6148 } 6149 6150 ndi_devi_enter(parent, &circ); 6151 cdip = ddi_get_child(parent); 6152 while (cdip) { 6153 dev_info_t *next = ddi_get_next_sibling(cdip); 6154 6155 i_mdi_pm_post_unconfig_one(cdip); 6156 cdip = next; 6157 } 6158 ndi_devi_exit(parent, circ); 6159 } 6160 6161 int 6162 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6163 { 6164 int circ, ret = MDI_SUCCESS; 6165 dev_info_t *client_dip = NULL; 6166 mdi_client_t *ct; 6167 6168 /* 6169 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6170 * Power up pHCI for the named client device. 6171 * Note: Before the client is enumerated under vhci by phci, 6172 * client_dip can be NULL. Then proceed to power up all the 6173 * pHCIs. 6174 */ 6175 if (devnm != NULL) { 6176 ndi_devi_enter(vdip, &circ); 6177 client_dip = ndi_devi_findchild(vdip, devnm); 6178 ndi_devi_exit(vdip, circ); 6179 } 6180 6181 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6182 6183 switch (op) { 6184 case MDI_PM_PRE_CONFIG: 6185 ret = i_mdi_pm_pre_config(vdip, client_dip); 6186 6187 break; 6188 case MDI_PM_PRE_UNCONFIG: 6189 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6190 flags); 6191 6192 break; 6193 case MDI_PM_POST_CONFIG: 6194 i_mdi_pm_post_config(vdip, client_dip); 6195 6196 break; 6197 case MDI_PM_POST_UNCONFIG: 6198 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6199 6200 break; 6201 case MDI_PM_HOLD_POWER: 6202 case MDI_PM_RELE_POWER: 6203 ASSERT(args); 6204 6205 client_dip = (dev_info_t *)args; 6206 ASSERT(MDI_CLIENT(client_dip)); 6207 6208 ct = i_devi_get_client(client_dip); 6209 MDI_CLIENT_LOCK(ct); 6210 6211 if (op == MDI_PM_HOLD_POWER) { 6212 if (ct->ct_power_cnt == 0) { 6213 (void) i_mdi_power_all_phci(ct); 6214 MDI_DEBUG(4, (CE_NOTE, client_dip, 6215 "mdi_power i_mdi_pm_hold_client\n")); 6216 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6217 } 6218 } else { 6219 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6220 MDI_DEBUG(4, (CE_NOTE, client_dip, 6221 "mdi_power i_mdi_pm_rele_client\n")); 6222 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6223 } else { 6224 MDI_DEBUG(4, (CE_NOTE, client_dip, 6225 "mdi_power i_mdi_pm_reset_client\n")); 6226 i_mdi_pm_reset_client(ct); 6227 } 6228 } 6229 6230 MDI_CLIENT_UNLOCK(ct); 6231 break; 6232 default: 6233 break; 6234 } 6235 6236 return (ret); 6237 } 6238 6239 int 6240 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6241 { 6242 mdi_vhci_t *vhci; 6243 6244 if (!MDI_VHCI(dip)) 6245 return (MDI_FAILURE); 6246 6247 if (mdi_class) { 6248 vhci = DEVI(dip)->devi_mdi_xhci; 6249 ASSERT(vhci); 6250 *mdi_class = vhci->vh_class; 6251 } 6252 6253 return (MDI_SUCCESS); 6254 } 6255 6256 int 6257 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6258 { 6259 mdi_phci_t *phci; 6260 6261 if (!MDI_PHCI(dip)) 6262 return (MDI_FAILURE); 6263 6264 if (mdi_class) { 6265 phci = DEVI(dip)->devi_mdi_xhci; 6266 ASSERT(phci); 6267 *mdi_class = phci->ph_vhci->vh_class; 6268 } 6269 6270 return (MDI_SUCCESS); 6271 } 6272 6273 int 6274 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6275 { 6276 mdi_client_t *client; 6277 6278 if (!MDI_CLIENT(dip)) 6279 return (MDI_FAILURE); 6280 6281 if (mdi_class) { 6282 client = DEVI(dip)->devi_mdi_client; 6283 ASSERT(client); 6284 *mdi_class = client->ct_vhci->vh_class; 6285 } 6286 6287 return (MDI_SUCCESS); 6288 } 6289 6290 void * 6291 mdi_client_get_vhci_private(dev_info_t *dip) 6292 { 6293 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6294 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6295 mdi_client_t *ct; 6296 ct = i_devi_get_client(dip); 6297 return (ct->ct_vprivate); 6298 } 6299 return (NULL); 6300 } 6301 6302 void 6303 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6304 { 6305 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6306 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6307 mdi_client_t *ct; 6308 ct = i_devi_get_client(dip); 6309 ct->ct_vprivate = data; 6310 } 6311 } 6312 /* 6313 * mdi_pi_get_vhci_private(): 6314 * Get the vhci private information associated with the 6315 * mdi_pathinfo node 6316 */ 6317 void * 6318 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6319 { 6320 caddr_t vprivate = NULL; 6321 if (pip) { 6322 vprivate = MDI_PI(pip)->pi_vprivate; 6323 } 6324 return (vprivate); 6325 } 6326 6327 /* 6328 * mdi_pi_set_vhci_private(): 6329 * Set the vhci private information in the mdi_pathinfo node 6330 */ 6331 void 6332 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6333 { 6334 if (pip) { 6335 MDI_PI(pip)->pi_vprivate = priv; 6336 } 6337 } 6338 6339 /* 6340 * mdi_phci_get_vhci_private(): 6341 * Get the vhci private information associated with the 6342 * mdi_phci node 6343 */ 6344 void * 6345 mdi_phci_get_vhci_private(dev_info_t *dip) 6346 { 6347 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6348 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6349 mdi_phci_t *ph; 6350 ph = i_devi_get_phci(dip); 6351 return (ph->ph_vprivate); 6352 } 6353 return (NULL); 6354 } 6355 6356 /* 6357 * mdi_phci_set_vhci_private(): 6358 * Set the vhci private information in the mdi_phci node 6359 */ 6360 void 6361 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6362 { 6363 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6364 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6365 mdi_phci_t *ph; 6366 ph = i_devi_get_phci(dip); 6367 ph->ph_vprivate = priv; 6368 } 6369 } 6370 6371 /* 6372 * List of vhci class names: 6373 * A vhci class name must be in this list only if the corresponding vhci 6374 * driver intends to use the mdi provided bus config implementation 6375 * (i.e., mdi_vhci_bus_config()). 6376 */ 6377 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6378 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6379 6380 /* 6381 * Built-in list of phci drivers for every vhci class. 6382 * All phci drivers expect iscsi have root device support. 6383 */ 6384 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6385 { "fp", 1 }, 6386 { "iscsi", 0 }, 6387 { "ibsrp", 1 } 6388 }; 6389 6390 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6391 6392 /* 6393 * During boot time, the on-disk vhci cache for every vhci class is read 6394 * in the form of an nvlist and stored here. 6395 */ 6396 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6397 6398 /* nvpair names in vhci cache nvlist */ 6399 #define MDI_VHCI_CACHE_VERSION 1 6400 #define MDI_NVPNAME_VERSION "version" 6401 #define MDI_NVPNAME_PHCIS "phcis" 6402 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6403 6404 typedef enum { 6405 VHCACHE_NOT_REBUILT, 6406 VHCACHE_PARTIALLY_BUILT, 6407 VHCACHE_FULLY_BUILT 6408 } vhcache_build_status_t; 6409 6410 /* 6411 * Given vhci class name, return its on-disk vhci cache filename. 6412 * Memory for the returned filename which includes the full path is allocated 6413 * by this function. 6414 */ 6415 static char * 6416 vhclass2vhcache_filename(char *vhclass) 6417 { 6418 char *filename; 6419 int len; 6420 static char *fmt = "/etc/devices/mdi_%s_cache"; 6421 6422 /* 6423 * fmt contains the on-disk vhci cache file name format; 6424 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6425 */ 6426 6427 /* the -1 below is to account for "%s" in the format string */ 6428 len = strlen(fmt) + strlen(vhclass) - 1; 6429 filename = kmem_alloc(len, KM_SLEEP); 6430 (void) snprintf(filename, len, fmt, vhclass); 6431 ASSERT(len == (strlen(filename) + 1)); 6432 return (filename); 6433 } 6434 6435 /* 6436 * initialize the vhci cache related data structures and read the on-disk 6437 * vhci cached data into memory. 6438 */ 6439 static void 6440 setup_vhci_cache(mdi_vhci_t *vh) 6441 { 6442 mdi_vhci_config_t *vhc; 6443 mdi_vhci_cache_t *vhcache; 6444 int i; 6445 nvlist_t *nvl = NULL; 6446 6447 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6448 vh->vh_config = vhc; 6449 vhcache = &vhc->vhc_vhcache; 6450 6451 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6452 6453 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6454 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6455 6456 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6457 6458 /* 6459 * Create string hash; same as mod_hash_create_strhash() except that 6460 * we use NULL key destructor. 6461 */ 6462 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6463 mdi_bus_config_cache_hash_size, 6464 mod_hash_null_keydtor, mod_hash_null_valdtor, 6465 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6466 6467 setup_phci_driver_list(vh); 6468 6469 /* 6470 * The on-disk vhci cache is read during booting prior to the 6471 * lights-out period by mdi_read_devices_files(). 6472 */ 6473 for (i = 0; i < N_VHCI_CLASSES; i++) { 6474 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6475 nvl = vhcache_nvl[i]; 6476 vhcache_nvl[i] = NULL; 6477 break; 6478 } 6479 } 6480 6481 /* 6482 * this is to cover the case of some one manually causing unloading 6483 * (or detaching) and reloading (or attaching) of a vhci driver. 6484 */ 6485 if (nvl == NULL && modrootloaded) 6486 nvl = read_on_disk_vhci_cache(vh->vh_class); 6487 6488 if (nvl != NULL) { 6489 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6490 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6491 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6492 else { 6493 cmn_err(CE_WARN, 6494 "%s: data file corrupted, will recreate\n", 6495 vhc->vhc_vhcache_filename); 6496 } 6497 rw_exit(&vhcache->vhcache_lock); 6498 nvlist_free(nvl); 6499 } 6500 6501 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6502 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6503 } 6504 6505 /* 6506 * free all vhci cache related resources 6507 */ 6508 static int 6509 destroy_vhci_cache(mdi_vhci_t *vh) 6510 { 6511 mdi_vhci_config_t *vhc = vh->vh_config; 6512 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6513 mdi_vhcache_phci_t *cphci, *cphci_next; 6514 mdi_vhcache_client_t *cct, *cct_next; 6515 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6516 6517 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6518 return (MDI_FAILURE); 6519 6520 kmem_free(vhc->vhc_vhcache_filename, 6521 strlen(vhc->vhc_vhcache_filename) + 1); 6522 6523 if (vhc->vhc_phci_driver_list) 6524 free_phci_driver_list(vhc); 6525 6526 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6527 6528 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6529 cphci = cphci_next) { 6530 cphci_next = cphci->cphci_next; 6531 free_vhcache_phci(cphci); 6532 } 6533 6534 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6535 cct_next = cct->cct_next; 6536 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6537 cpi_next = cpi->cpi_next; 6538 free_vhcache_pathinfo(cpi); 6539 } 6540 free_vhcache_client(cct); 6541 } 6542 6543 rw_destroy(&vhcache->vhcache_lock); 6544 6545 mutex_destroy(&vhc->vhc_lock); 6546 cv_destroy(&vhc->vhc_cv); 6547 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6548 return (MDI_SUCCESS); 6549 } 6550 6551 /* 6552 * Setup the list of phci drivers associated with the specified vhci class. 6553 * MDI uses this information to rebuild bus config cache if in case the 6554 * cache is not available or corrupted. 6555 */ 6556 static void 6557 setup_phci_driver_list(mdi_vhci_t *vh) 6558 { 6559 mdi_vhci_config_t *vhc = vh->vh_config; 6560 mdi_phci_driver_info_t *driver_list; 6561 char **driver_list1; 6562 uint_t ndrivers, ndrivers1; 6563 int i, j; 6564 6565 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6566 driver_list = scsi_phci_driver_list; 6567 ndrivers = sizeof (scsi_phci_driver_list) / 6568 sizeof (mdi_phci_driver_info_t); 6569 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6570 driver_list = ib_phci_driver_list; 6571 ndrivers = sizeof (ib_phci_driver_list) / 6572 sizeof (mdi_phci_driver_info_t); 6573 } else { 6574 driver_list = NULL; 6575 ndrivers = 0; 6576 } 6577 6578 /* 6579 * The driver.conf file of a vhci driver can specify additional 6580 * phci drivers using a project private "phci-drivers" property. 6581 */ 6582 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6583 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6584 &ndrivers1) != DDI_PROP_SUCCESS) 6585 ndrivers1 = 0; 6586 6587 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6588 if (vhc->vhc_nphci_drivers == 0) 6589 return; 6590 6591 vhc->vhc_phci_driver_list = kmem_alloc( 6592 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6593 6594 for (i = 0; i < ndrivers; i++) { 6595 vhc->vhc_phci_driver_list[i].phdriver_name = 6596 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6597 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6598 driver_list[i].phdriver_root_support; 6599 } 6600 6601 for (j = 0; j < ndrivers1; j++, i++) { 6602 vhc->vhc_phci_driver_list[i].phdriver_name = 6603 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6604 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6605 } 6606 6607 if (ndrivers1) 6608 ddi_prop_free(driver_list1); 6609 } 6610 6611 /* 6612 * Free the memory allocated for the phci driver list 6613 */ 6614 static void 6615 free_phci_driver_list(mdi_vhci_config_t *vhc) 6616 { 6617 int i; 6618 6619 if (vhc->vhc_phci_driver_list == NULL) 6620 return; 6621 6622 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6623 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6624 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6625 } 6626 6627 kmem_free(vhc->vhc_phci_driver_list, 6628 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6629 } 6630 6631 /* 6632 * Stop all vhci cache related async threads and free their resources. 6633 */ 6634 static int 6635 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6636 { 6637 mdi_async_client_config_t *acc, *acc_next; 6638 6639 mutex_enter(&vhc->vhc_lock); 6640 vhc->vhc_flags |= MDI_VHC_EXIT; 6641 ASSERT(vhc->vhc_acc_thrcount >= 0); 6642 cv_broadcast(&vhc->vhc_cv); 6643 6644 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6645 (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) || 6646 vhc->vhc_acc_thrcount != 0) { 6647 mutex_exit(&vhc->vhc_lock); 6648 delay(1); 6649 mutex_enter(&vhc->vhc_lock); 6650 } 6651 6652 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6653 6654 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6655 acc_next = acc->acc_next; 6656 free_async_client_config(acc); 6657 } 6658 vhc->vhc_acc_list_head = NULL; 6659 vhc->vhc_acc_list_tail = NULL; 6660 vhc->vhc_acc_count = 0; 6661 6662 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6663 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6664 mutex_exit(&vhc->vhc_lock); 6665 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6666 vhcache_dirty(vhc); 6667 return (MDI_FAILURE); 6668 } 6669 } else 6670 mutex_exit(&vhc->vhc_lock); 6671 6672 if (callb_delete(vhc->vhc_cbid) != 0) 6673 return (MDI_FAILURE); 6674 6675 return (MDI_SUCCESS); 6676 } 6677 6678 /* 6679 * Stop vhci cache flush thread 6680 */ 6681 /* ARGSUSED */ 6682 static boolean_t 6683 stop_vhcache_flush_thread(void *arg, int code) 6684 { 6685 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6686 6687 mutex_enter(&vhc->vhc_lock); 6688 vhc->vhc_flags |= MDI_VHC_EXIT; 6689 cv_broadcast(&vhc->vhc_cv); 6690 6691 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6692 mutex_exit(&vhc->vhc_lock); 6693 delay(1); 6694 mutex_enter(&vhc->vhc_lock); 6695 } 6696 6697 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6698 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6699 mutex_exit(&vhc->vhc_lock); 6700 (void) flush_vhcache(vhc, 1); 6701 } else 6702 mutex_exit(&vhc->vhc_lock); 6703 6704 return (B_TRUE); 6705 } 6706 6707 /* 6708 * Enqueue the vhcache phci (cphci) at the tail of the list 6709 */ 6710 static void 6711 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6712 { 6713 cphci->cphci_next = NULL; 6714 if (vhcache->vhcache_phci_head == NULL) 6715 vhcache->vhcache_phci_head = cphci; 6716 else 6717 vhcache->vhcache_phci_tail->cphci_next = cphci; 6718 vhcache->vhcache_phci_tail = cphci; 6719 } 6720 6721 /* 6722 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6723 */ 6724 static void 6725 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6726 mdi_vhcache_pathinfo_t *cpi) 6727 { 6728 cpi->cpi_next = NULL; 6729 if (cct->cct_cpi_head == NULL) 6730 cct->cct_cpi_head = cpi; 6731 else 6732 cct->cct_cpi_tail->cpi_next = cpi; 6733 cct->cct_cpi_tail = cpi; 6734 } 6735 6736 /* 6737 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6738 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6739 * flag set come at the beginning of the list. All cpis which have this 6740 * flag set come at the end of the list. 6741 */ 6742 static void 6743 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6744 mdi_vhcache_pathinfo_t *newcpi) 6745 { 6746 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6747 6748 if (cct->cct_cpi_head == NULL || 6749 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6750 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6751 else { 6752 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6753 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6754 prev_cpi = cpi, cpi = cpi->cpi_next) 6755 ; 6756 6757 if (prev_cpi == NULL) 6758 cct->cct_cpi_head = newcpi; 6759 else 6760 prev_cpi->cpi_next = newcpi; 6761 6762 newcpi->cpi_next = cpi; 6763 6764 if (cpi == NULL) 6765 cct->cct_cpi_tail = newcpi; 6766 } 6767 } 6768 6769 /* 6770 * Enqueue the vhcache client (cct) at the tail of the list 6771 */ 6772 static void 6773 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6774 mdi_vhcache_client_t *cct) 6775 { 6776 cct->cct_next = NULL; 6777 if (vhcache->vhcache_client_head == NULL) 6778 vhcache->vhcache_client_head = cct; 6779 else 6780 vhcache->vhcache_client_tail->cct_next = cct; 6781 vhcache->vhcache_client_tail = cct; 6782 } 6783 6784 static void 6785 free_string_array(char **str, int nelem) 6786 { 6787 int i; 6788 6789 if (str) { 6790 for (i = 0; i < nelem; i++) { 6791 if (str[i]) 6792 kmem_free(str[i], strlen(str[i]) + 1); 6793 } 6794 kmem_free(str, sizeof (char *) * nelem); 6795 } 6796 } 6797 6798 static void 6799 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6800 { 6801 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6802 kmem_free(cphci, sizeof (*cphci)); 6803 } 6804 6805 static void 6806 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6807 { 6808 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6809 kmem_free(cpi, sizeof (*cpi)); 6810 } 6811 6812 static void 6813 free_vhcache_client(mdi_vhcache_client_t *cct) 6814 { 6815 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6816 kmem_free(cct, sizeof (*cct)); 6817 } 6818 6819 static char * 6820 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6821 { 6822 char *name_addr; 6823 int len; 6824 6825 len = strlen(ct_name) + strlen(ct_addr) + 2; 6826 name_addr = kmem_alloc(len, KM_SLEEP); 6827 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6828 6829 if (ret_len) 6830 *ret_len = len; 6831 return (name_addr); 6832 } 6833 6834 /* 6835 * Copy the contents of paddrnvl to vhci cache. 6836 * paddrnvl nvlist contains path information for a vhci client. 6837 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6838 */ 6839 static void 6840 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6841 mdi_vhcache_client_t *cct) 6842 { 6843 nvpair_t *nvp = NULL; 6844 mdi_vhcache_pathinfo_t *cpi; 6845 uint_t nelem; 6846 uint32_t *val; 6847 6848 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6849 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6850 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6851 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6852 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6853 ASSERT(nelem == 2); 6854 cpi->cpi_cphci = cphci_list[val[0]]; 6855 cpi->cpi_flags = val[1]; 6856 enqueue_tail_vhcache_pathinfo(cct, cpi); 6857 } 6858 } 6859 6860 /* 6861 * Copy the contents of caddrmapnvl to vhci cache. 6862 * caddrmapnvl nvlist contains vhci client address to phci client address 6863 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6864 * this nvlist. 6865 */ 6866 static void 6867 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6868 mdi_vhcache_phci_t *cphci_list[]) 6869 { 6870 nvpair_t *nvp = NULL; 6871 nvlist_t *paddrnvl; 6872 mdi_vhcache_client_t *cct; 6873 6874 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6875 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6876 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6877 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6878 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6879 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6880 /* the client must contain at least one path */ 6881 ASSERT(cct->cct_cpi_head != NULL); 6882 6883 enqueue_vhcache_client(vhcache, cct); 6884 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6885 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6886 } 6887 } 6888 6889 /* 6890 * Copy the contents of the main nvlist to vhci cache. 6891 * 6892 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6893 * The nvlist contains the mappings between the vhci client addresses and 6894 * their corresponding phci client addresses. 6895 * 6896 * The structure of the nvlist is as follows: 6897 * 6898 * Main nvlist: 6899 * NAME TYPE DATA 6900 * version int32 version number 6901 * phcis string array array of phci paths 6902 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6903 * 6904 * structure of c2paddrs_nvl: 6905 * NAME TYPE DATA 6906 * caddr1 nvlist_t paddrs_nvl1 6907 * caddr2 nvlist_t paddrs_nvl2 6908 * ... 6909 * where caddr1, caddr2, ... are vhci client name and addresses in the 6910 * form of "<clientname>@<clientaddress>". 6911 * (for example: "ssd@2000002037cd9f72"); 6912 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6913 * 6914 * structure of paddrs_nvl: 6915 * NAME TYPE DATA 6916 * pi_addr1 uint32_array (phci-id, cpi_flags) 6917 * pi_addr2 uint32_array (phci-id, cpi_flags) 6918 * ... 6919 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6920 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6921 * phci-ids are integers that identify PHCIs to which the 6922 * the bus specific address belongs to. These integers are used as an index 6923 * into to the phcis string array in the main nvlist to get the PHCI path. 6924 */ 6925 static int 6926 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6927 { 6928 char **phcis, **phci_namep; 6929 uint_t nphcis; 6930 mdi_vhcache_phci_t *cphci, **cphci_list; 6931 nvlist_t *caddrmapnvl; 6932 int32_t ver; 6933 int i; 6934 size_t cphci_list_size; 6935 6936 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6937 6938 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6939 ver != MDI_VHCI_CACHE_VERSION) 6940 return (MDI_FAILURE); 6941 6942 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6943 &nphcis) != 0) 6944 return (MDI_SUCCESS); 6945 6946 ASSERT(nphcis > 0); 6947 6948 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6949 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6950 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6951 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6952 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6953 enqueue_vhcache_phci(vhcache, cphci); 6954 cphci_list[i] = cphci; 6955 } 6956 6957 ASSERT(vhcache->vhcache_phci_head != NULL); 6958 6959 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6960 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6961 6962 kmem_free(cphci_list, cphci_list_size); 6963 return (MDI_SUCCESS); 6964 } 6965 6966 /* 6967 * Build paddrnvl for the specified client using the information in the 6968 * vhci cache and add it to the caddrmapnnvl. 6969 * Returns 0 on success, errno on failure. 6970 */ 6971 static int 6972 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 6973 nvlist_t *caddrmapnvl) 6974 { 6975 mdi_vhcache_pathinfo_t *cpi; 6976 nvlist_t *nvl; 6977 int err; 6978 uint32_t val[2]; 6979 6980 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 6981 6982 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 6983 return (err); 6984 6985 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 6986 val[0] = cpi->cpi_cphci->cphci_id; 6987 val[1] = cpi->cpi_flags; 6988 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 6989 != 0) 6990 goto out; 6991 } 6992 6993 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 6994 out: 6995 nvlist_free(nvl); 6996 return (err); 6997 } 6998 6999 /* 7000 * Build caddrmapnvl using the information in the vhci cache 7001 * and add it to the mainnvl. 7002 * Returns 0 on success, errno on failure. 7003 */ 7004 static int 7005 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7006 { 7007 mdi_vhcache_client_t *cct; 7008 nvlist_t *nvl; 7009 int err; 7010 7011 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7012 7013 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7014 return (err); 7015 7016 for (cct = vhcache->vhcache_client_head; cct != NULL; 7017 cct = cct->cct_next) { 7018 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7019 goto out; 7020 } 7021 7022 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7023 out: 7024 nvlist_free(nvl); 7025 return (err); 7026 } 7027 7028 /* 7029 * Build nvlist using the information in the vhci cache. 7030 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7031 * Returns nvl on success, NULL on failure. 7032 */ 7033 static nvlist_t * 7034 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7035 { 7036 mdi_vhcache_phci_t *cphci; 7037 uint_t phci_count; 7038 char **phcis; 7039 nvlist_t *nvl; 7040 int err, i; 7041 7042 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7043 nvl = NULL; 7044 goto out; 7045 } 7046 7047 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7048 MDI_VHCI_CACHE_VERSION)) != 0) 7049 goto out; 7050 7051 rw_enter(&vhcache->vhcache_lock, RW_READER); 7052 if (vhcache->vhcache_phci_head == NULL) { 7053 rw_exit(&vhcache->vhcache_lock); 7054 return (nvl); 7055 } 7056 7057 phci_count = 0; 7058 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7059 cphci = cphci->cphci_next) 7060 cphci->cphci_id = phci_count++; 7061 7062 /* build phci pathname list */ 7063 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7064 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7065 cphci = cphci->cphci_next, i++) 7066 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7067 7068 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7069 phci_count); 7070 free_string_array(phcis, phci_count); 7071 7072 if (err == 0 && 7073 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7074 rw_exit(&vhcache->vhcache_lock); 7075 return (nvl); 7076 } 7077 7078 rw_exit(&vhcache->vhcache_lock); 7079 out: 7080 if (nvl) 7081 nvlist_free(nvl); 7082 return (NULL); 7083 } 7084 7085 /* 7086 * Lookup vhcache phci structure for the specified phci path. 7087 */ 7088 static mdi_vhcache_phci_t * 7089 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7090 { 7091 mdi_vhcache_phci_t *cphci; 7092 7093 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7094 7095 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7096 cphci = cphci->cphci_next) { 7097 if (strcmp(cphci->cphci_path, phci_path) == 0) 7098 return (cphci); 7099 } 7100 7101 return (NULL); 7102 } 7103 7104 /* 7105 * Lookup vhcache phci structure for the specified phci. 7106 */ 7107 static mdi_vhcache_phci_t * 7108 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7109 { 7110 mdi_vhcache_phci_t *cphci; 7111 7112 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7113 7114 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7115 cphci = cphci->cphci_next) { 7116 if (cphci->cphci_phci == ph) 7117 return (cphci); 7118 } 7119 7120 return (NULL); 7121 } 7122 7123 /* 7124 * Add the specified phci to the vhci cache if not already present. 7125 */ 7126 static void 7127 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7128 { 7129 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7130 mdi_vhcache_phci_t *cphci; 7131 char *pathname; 7132 int cache_updated; 7133 7134 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7135 7136 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7137 (void) ddi_pathname(ph->ph_dip, pathname); 7138 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7139 != NULL) { 7140 cphci->cphci_phci = ph; 7141 cache_updated = 0; 7142 } else { 7143 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7144 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7145 cphci->cphci_phci = ph; 7146 enqueue_vhcache_phci(vhcache, cphci); 7147 cache_updated = 1; 7148 } 7149 rw_exit(&vhcache->vhcache_lock); 7150 7151 kmem_free(pathname, MAXPATHLEN); 7152 if (cache_updated) 7153 vhcache_dirty(vhc); 7154 } 7155 7156 /* 7157 * Remove the reference to the specified phci from the vhci cache. 7158 */ 7159 static void 7160 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7161 { 7162 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7163 mdi_vhcache_phci_t *cphci; 7164 7165 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7166 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7167 /* do not remove the actual mdi_vhcache_phci structure */ 7168 cphci->cphci_phci = NULL; 7169 } 7170 rw_exit(&vhcache->vhcache_lock); 7171 } 7172 7173 static void 7174 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7175 mdi_vhcache_lookup_token_t *src) 7176 { 7177 if (src == NULL) { 7178 dst->lt_cct = NULL; 7179 dst->lt_cct_lookup_time = 0; 7180 } else { 7181 dst->lt_cct = src->lt_cct; 7182 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7183 } 7184 } 7185 7186 /* 7187 * Look up vhcache client for the specified client. 7188 */ 7189 static mdi_vhcache_client_t * 7190 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7191 mdi_vhcache_lookup_token_t *token) 7192 { 7193 mod_hash_val_t hv; 7194 char *name_addr; 7195 int len; 7196 7197 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7198 7199 /* 7200 * If no vhcache clean occurred since the last lookup, we can 7201 * simply return the cct from the last lookup operation. 7202 * It works because ccts are never freed except during the vhcache 7203 * cleanup operation. 7204 */ 7205 if (token != NULL && 7206 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7207 return (token->lt_cct); 7208 7209 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7210 if (mod_hash_find(vhcache->vhcache_client_hash, 7211 (mod_hash_key_t)name_addr, &hv) == 0) { 7212 if (token) { 7213 token->lt_cct = (mdi_vhcache_client_t *)hv; 7214 token->lt_cct_lookup_time = lbolt64; 7215 } 7216 } else { 7217 if (token) { 7218 token->lt_cct = NULL; 7219 token->lt_cct_lookup_time = 0; 7220 } 7221 hv = NULL; 7222 } 7223 kmem_free(name_addr, len); 7224 return ((mdi_vhcache_client_t *)hv); 7225 } 7226 7227 /* 7228 * Add the specified path to the vhci cache if not already present. 7229 * Also add the vhcache client for the client corresponding to this path 7230 * if it doesn't already exist. 7231 */ 7232 static void 7233 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7234 { 7235 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7236 mdi_vhcache_client_t *cct; 7237 mdi_vhcache_pathinfo_t *cpi; 7238 mdi_phci_t *ph = pip->pi_phci; 7239 mdi_client_t *ct = pip->pi_client; 7240 int cache_updated = 0; 7241 7242 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7243 7244 /* if vhcache client for this pip doesn't already exist, add it */ 7245 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7246 NULL)) == NULL) { 7247 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7248 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7249 ct->ct_guid, NULL); 7250 enqueue_vhcache_client(vhcache, cct); 7251 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7252 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7253 cache_updated = 1; 7254 } 7255 7256 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7257 if (cpi->cpi_cphci->cphci_phci == ph && 7258 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7259 cpi->cpi_pip = pip; 7260 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7261 cpi->cpi_flags &= 7262 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7263 sort_vhcache_paths(cct); 7264 cache_updated = 1; 7265 } 7266 break; 7267 } 7268 } 7269 7270 if (cpi == NULL) { 7271 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7272 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7273 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7274 ASSERT(cpi->cpi_cphci != NULL); 7275 cpi->cpi_pip = pip; 7276 enqueue_vhcache_pathinfo(cct, cpi); 7277 cache_updated = 1; 7278 } 7279 7280 rw_exit(&vhcache->vhcache_lock); 7281 7282 if (cache_updated) 7283 vhcache_dirty(vhc); 7284 } 7285 7286 /* 7287 * Remove the reference to the specified path from the vhci cache. 7288 */ 7289 static void 7290 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7291 { 7292 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7293 mdi_client_t *ct = pip->pi_client; 7294 mdi_vhcache_client_t *cct; 7295 mdi_vhcache_pathinfo_t *cpi; 7296 7297 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7298 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7299 NULL)) != NULL) { 7300 for (cpi = cct->cct_cpi_head; cpi != NULL; 7301 cpi = cpi->cpi_next) { 7302 if (cpi->cpi_pip == pip) { 7303 cpi->cpi_pip = NULL; 7304 break; 7305 } 7306 } 7307 } 7308 rw_exit(&vhcache->vhcache_lock); 7309 } 7310 7311 /* 7312 * Flush the vhci cache to disk. 7313 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7314 */ 7315 static int 7316 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7317 { 7318 nvlist_t *nvl; 7319 int err; 7320 int rv; 7321 7322 /* 7323 * It is possible that the system may shutdown before 7324 * i_ddi_io_initialized (during stmsboot for example). To allow for 7325 * flushing the cache in this case do not check for 7326 * i_ddi_io_initialized when force flag is set. 7327 */ 7328 if (force_flag == 0 && !i_ddi_io_initialized()) 7329 return (MDI_FAILURE); 7330 7331 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7332 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7333 nvlist_free(nvl); 7334 } else 7335 err = EFAULT; 7336 7337 rv = MDI_SUCCESS; 7338 mutex_enter(&vhc->vhc_lock); 7339 if (err != 0) { 7340 if (err == EROFS) { 7341 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7342 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7343 MDI_VHC_VHCACHE_DIRTY); 7344 } else { 7345 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7346 cmn_err(CE_CONT, "%s: update failed\n", 7347 vhc->vhc_vhcache_filename); 7348 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7349 } 7350 rv = MDI_FAILURE; 7351 } 7352 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7353 cmn_err(CE_CONT, 7354 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7355 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7356 } 7357 mutex_exit(&vhc->vhc_lock); 7358 7359 return (rv); 7360 } 7361 7362 /* 7363 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7364 * Exits itself if left idle for the idle timeout period. 7365 */ 7366 static void 7367 vhcache_flush_thread(void *arg) 7368 { 7369 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7370 clock_t idle_time, quit_at_ticks; 7371 callb_cpr_t cprinfo; 7372 7373 /* number of seconds to sleep idle before exiting */ 7374 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7375 7376 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7377 "mdi_vhcache_flush"); 7378 mutex_enter(&vhc->vhc_lock); 7379 for (; ; ) { 7380 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7381 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7382 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7383 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7384 (void) cv_timedwait(&vhc->vhc_cv, 7385 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7386 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7387 } else { 7388 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7389 mutex_exit(&vhc->vhc_lock); 7390 7391 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7392 vhcache_dirty(vhc); 7393 7394 mutex_enter(&vhc->vhc_lock); 7395 } 7396 } 7397 7398 quit_at_ticks = ddi_get_lbolt() + idle_time; 7399 7400 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7401 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7402 ddi_get_lbolt() < quit_at_ticks) { 7403 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7404 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7405 quit_at_ticks); 7406 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7407 } 7408 7409 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7410 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7411 goto out; 7412 } 7413 7414 out: 7415 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7416 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7417 CALLB_CPR_EXIT(&cprinfo); 7418 } 7419 7420 /* 7421 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7422 */ 7423 static void 7424 vhcache_dirty(mdi_vhci_config_t *vhc) 7425 { 7426 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7427 int create_thread; 7428 7429 rw_enter(&vhcache->vhcache_lock, RW_READER); 7430 /* do not flush cache until the cache is fully built */ 7431 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7432 rw_exit(&vhcache->vhcache_lock); 7433 return; 7434 } 7435 rw_exit(&vhcache->vhcache_lock); 7436 7437 mutex_enter(&vhc->vhc_lock); 7438 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7439 mutex_exit(&vhc->vhc_lock); 7440 return; 7441 } 7442 7443 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7444 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7445 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7446 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7447 cv_broadcast(&vhc->vhc_cv); 7448 create_thread = 0; 7449 } else { 7450 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7451 create_thread = 1; 7452 } 7453 mutex_exit(&vhc->vhc_lock); 7454 7455 if (create_thread) 7456 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7457 0, &p0, TS_RUN, minclsyspri); 7458 } 7459 7460 /* 7461 * phci bus config structure - one for for each phci bus config operation that 7462 * we initiate on behalf of a vhci. 7463 */ 7464 typedef struct mdi_phci_bus_config_s { 7465 char *phbc_phci_path; 7466 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7467 struct mdi_phci_bus_config_s *phbc_next; 7468 } mdi_phci_bus_config_t; 7469 7470 /* vhci bus config structure - one for each vhci bus config operation */ 7471 typedef struct mdi_vhci_bus_config_s { 7472 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7473 major_t vhbc_op_major; /* bus config op major */ 7474 uint_t vhbc_op_flags; /* bus config op flags */ 7475 kmutex_t vhbc_lock; 7476 kcondvar_t vhbc_cv; 7477 int vhbc_thr_count; 7478 } mdi_vhci_bus_config_t; 7479 7480 /* 7481 * bus config the specified phci 7482 */ 7483 static void 7484 bus_config_phci(void *arg) 7485 { 7486 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7487 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7488 dev_info_t *ph_dip; 7489 7490 /* 7491 * first configure all path components upto phci and then configure 7492 * the phci children. 7493 */ 7494 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7495 != NULL) { 7496 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7497 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7498 (void) ndi_devi_config_driver(ph_dip, 7499 vhbc->vhbc_op_flags, 7500 vhbc->vhbc_op_major); 7501 } else 7502 (void) ndi_devi_config(ph_dip, 7503 vhbc->vhbc_op_flags); 7504 7505 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7506 ndi_rele_devi(ph_dip); 7507 } 7508 7509 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7510 kmem_free(phbc, sizeof (*phbc)); 7511 7512 mutex_enter(&vhbc->vhbc_lock); 7513 vhbc->vhbc_thr_count--; 7514 if (vhbc->vhbc_thr_count == 0) 7515 cv_broadcast(&vhbc->vhbc_cv); 7516 mutex_exit(&vhbc->vhbc_lock); 7517 } 7518 7519 /* 7520 * Bus config all phcis associated with the vhci in parallel. 7521 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7522 */ 7523 static void 7524 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7525 ddi_bus_config_op_t op, major_t maj) 7526 { 7527 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7528 mdi_vhci_bus_config_t *vhbc; 7529 mdi_vhcache_phci_t *cphci; 7530 7531 rw_enter(&vhcache->vhcache_lock, RW_READER); 7532 if (vhcache->vhcache_phci_head == NULL) { 7533 rw_exit(&vhcache->vhcache_lock); 7534 return; 7535 } 7536 7537 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7538 7539 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7540 cphci = cphci->cphci_next) { 7541 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7542 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7543 KM_SLEEP); 7544 phbc->phbc_vhbusconfig = vhbc; 7545 phbc->phbc_next = phbc_head; 7546 phbc_head = phbc; 7547 vhbc->vhbc_thr_count++; 7548 } 7549 rw_exit(&vhcache->vhcache_lock); 7550 7551 vhbc->vhbc_op = op; 7552 vhbc->vhbc_op_major = maj; 7553 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7554 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7555 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7556 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7557 7558 /* now create threads to initiate bus config on all phcis in parallel */ 7559 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7560 phbc_next = phbc->phbc_next; 7561 if (mdi_mtc_off) 7562 bus_config_phci((void *)phbc); 7563 else 7564 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7565 0, &p0, TS_RUN, minclsyspri); 7566 } 7567 7568 mutex_enter(&vhbc->vhbc_lock); 7569 /* wait until all threads exit */ 7570 while (vhbc->vhbc_thr_count > 0) 7571 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7572 mutex_exit(&vhbc->vhbc_lock); 7573 7574 mutex_destroy(&vhbc->vhbc_lock); 7575 cv_destroy(&vhbc->vhbc_cv); 7576 kmem_free(vhbc, sizeof (*vhbc)); 7577 } 7578 7579 /* 7580 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7581 * The path includes the child component in addition to the phci path. 7582 */ 7583 static int 7584 bus_config_one_phci_child(char *path) 7585 { 7586 dev_info_t *ph_dip, *child; 7587 char *devnm; 7588 int rv = MDI_FAILURE; 7589 7590 /* extract the child component of the phci */ 7591 devnm = strrchr(path, '/'); 7592 *devnm++ = '\0'; 7593 7594 /* 7595 * first configure all path components upto phci and then 7596 * configure the phci child. 7597 */ 7598 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7599 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7600 NDI_SUCCESS) { 7601 /* 7602 * release the hold that ndi_devi_config_one() placed 7603 */ 7604 ndi_rele_devi(child); 7605 rv = MDI_SUCCESS; 7606 } 7607 7608 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7609 ndi_rele_devi(ph_dip); 7610 } 7611 7612 devnm--; 7613 *devnm = '/'; 7614 return (rv); 7615 } 7616 7617 /* 7618 * Build a list of phci client paths for the specified vhci client. 7619 * The list includes only those phci client paths which aren't configured yet. 7620 */ 7621 static mdi_phys_path_t * 7622 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7623 { 7624 mdi_vhcache_pathinfo_t *cpi; 7625 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7626 int config_path, len; 7627 7628 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7629 /* 7630 * include only those paths that aren't configured. 7631 */ 7632 config_path = 0; 7633 if (cpi->cpi_pip == NULL) 7634 config_path = 1; 7635 else { 7636 MDI_PI_LOCK(cpi->cpi_pip); 7637 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7638 config_path = 1; 7639 MDI_PI_UNLOCK(cpi->cpi_pip); 7640 } 7641 7642 if (config_path) { 7643 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7644 len = strlen(cpi->cpi_cphci->cphci_path) + 7645 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7646 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7647 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7648 cpi->cpi_cphci->cphci_path, ct_name, 7649 cpi->cpi_addr); 7650 pp->phys_path_next = NULL; 7651 7652 if (pp_head == NULL) 7653 pp_head = pp; 7654 else 7655 pp_tail->phys_path_next = pp; 7656 pp_tail = pp; 7657 } 7658 } 7659 7660 return (pp_head); 7661 } 7662 7663 /* 7664 * Free the memory allocated for phci client path list. 7665 */ 7666 static void 7667 free_phclient_path_list(mdi_phys_path_t *pp_head) 7668 { 7669 mdi_phys_path_t *pp, *pp_next; 7670 7671 for (pp = pp_head; pp != NULL; pp = pp_next) { 7672 pp_next = pp->phys_path_next; 7673 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7674 kmem_free(pp, sizeof (*pp)); 7675 } 7676 } 7677 7678 /* 7679 * Allocated async client structure and initialize with the specified values. 7680 */ 7681 static mdi_async_client_config_t * 7682 alloc_async_client_config(char *ct_name, char *ct_addr, 7683 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7684 { 7685 mdi_async_client_config_t *acc; 7686 7687 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7688 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7689 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7690 acc->acc_phclient_path_list_head = pp_head; 7691 init_vhcache_lookup_token(&acc->acc_token, tok); 7692 acc->acc_next = NULL; 7693 return (acc); 7694 } 7695 7696 /* 7697 * Free the memory allocated for the async client structure and their members. 7698 */ 7699 static void 7700 free_async_client_config(mdi_async_client_config_t *acc) 7701 { 7702 if (acc->acc_phclient_path_list_head) 7703 free_phclient_path_list(acc->acc_phclient_path_list_head); 7704 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7705 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7706 kmem_free(acc, sizeof (*acc)); 7707 } 7708 7709 /* 7710 * Sort vhcache pathinfos (cpis) of the specified client. 7711 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7712 * flag set come at the beginning of the list. All cpis which have this 7713 * flag set come at the end of the list. 7714 */ 7715 static void 7716 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7717 { 7718 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7719 7720 cpi_head = cct->cct_cpi_head; 7721 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7722 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7723 cpi_next = cpi->cpi_next; 7724 enqueue_vhcache_pathinfo(cct, cpi); 7725 } 7726 } 7727 7728 /* 7729 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7730 * every vhcache pathinfo of the specified client. If not adjust the flag 7731 * setting appropriately. 7732 * 7733 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7734 * on-disk vhci cache. So every time this flag is updated the cache must be 7735 * flushed. 7736 */ 7737 static void 7738 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7739 mdi_vhcache_lookup_token_t *tok) 7740 { 7741 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7742 mdi_vhcache_client_t *cct; 7743 mdi_vhcache_pathinfo_t *cpi; 7744 7745 rw_enter(&vhcache->vhcache_lock, RW_READER); 7746 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7747 == NULL) { 7748 rw_exit(&vhcache->vhcache_lock); 7749 return; 7750 } 7751 7752 /* 7753 * to avoid unnecessary on-disk cache updates, first check if an 7754 * update is really needed. If no update is needed simply return. 7755 */ 7756 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7757 if ((cpi->cpi_pip != NULL && 7758 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7759 (cpi->cpi_pip == NULL && 7760 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7761 break; 7762 } 7763 } 7764 if (cpi == NULL) { 7765 rw_exit(&vhcache->vhcache_lock); 7766 return; 7767 } 7768 7769 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7770 rw_exit(&vhcache->vhcache_lock); 7771 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7772 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7773 tok)) == NULL) { 7774 rw_exit(&vhcache->vhcache_lock); 7775 return; 7776 } 7777 } 7778 7779 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7780 if (cpi->cpi_pip != NULL) 7781 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7782 else 7783 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7784 } 7785 sort_vhcache_paths(cct); 7786 7787 rw_exit(&vhcache->vhcache_lock); 7788 vhcache_dirty(vhc); 7789 } 7790 7791 /* 7792 * Configure all specified paths of the client. 7793 */ 7794 static void 7795 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7796 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7797 { 7798 mdi_phys_path_t *pp; 7799 7800 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7801 (void) bus_config_one_phci_child(pp->phys_path); 7802 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7803 } 7804 7805 /* 7806 * Dequeue elements from vhci async client config list and bus configure 7807 * their corresponding phci clients. 7808 */ 7809 static void 7810 config_client_paths_thread(void *arg) 7811 { 7812 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7813 mdi_async_client_config_t *acc; 7814 clock_t quit_at_ticks; 7815 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7816 callb_cpr_t cprinfo; 7817 7818 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7819 "mdi_config_client_paths"); 7820 7821 for (; ; ) { 7822 quit_at_ticks = ddi_get_lbolt() + idle_time; 7823 7824 mutex_enter(&vhc->vhc_lock); 7825 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7826 vhc->vhc_acc_list_head == NULL && 7827 ddi_get_lbolt() < quit_at_ticks) { 7828 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7829 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7830 quit_at_ticks); 7831 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7832 } 7833 7834 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7835 vhc->vhc_acc_list_head == NULL) 7836 goto out; 7837 7838 acc = vhc->vhc_acc_list_head; 7839 vhc->vhc_acc_list_head = acc->acc_next; 7840 if (vhc->vhc_acc_list_head == NULL) 7841 vhc->vhc_acc_list_tail = NULL; 7842 vhc->vhc_acc_count--; 7843 mutex_exit(&vhc->vhc_lock); 7844 7845 config_client_paths_sync(vhc, acc->acc_ct_name, 7846 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7847 &acc->acc_token); 7848 7849 free_async_client_config(acc); 7850 } 7851 7852 out: 7853 vhc->vhc_acc_thrcount--; 7854 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7855 CALLB_CPR_EXIT(&cprinfo); 7856 } 7857 7858 /* 7859 * Arrange for all the phci client paths (pp_head) for the specified client 7860 * to be bus configured asynchronously by a thread. 7861 */ 7862 static void 7863 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7864 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7865 { 7866 mdi_async_client_config_t *acc, *newacc; 7867 int create_thread; 7868 7869 if (pp_head == NULL) 7870 return; 7871 7872 if (mdi_mtc_off) { 7873 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7874 free_phclient_path_list(pp_head); 7875 return; 7876 } 7877 7878 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7879 ASSERT(newacc); 7880 7881 mutex_enter(&vhc->vhc_lock); 7882 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7883 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7884 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7885 free_async_client_config(newacc); 7886 mutex_exit(&vhc->vhc_lock); 7887 return; 7888 } 7889 } 7890 7891 if (vhc->vhc_acc_list_head == NULL) 7892 vhc->vhc_acc_list_head = newacc; 7893 else 7894 vhc->vhc_acc_list_tail->acc_next = newacc; 7895 vhc->vhc_acc_list_tail = newacc; 7896 vhc->vhc_acc_count++; 7897 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7898 cv_broadcast(&vhc->vhc_cv); 7899 create_thread = 0; 7900 } else { 7901 vhc->vhc_acc_thrcount++; 7902 create_thread = 1; 7903 } 7904 mutex_exit(&vhc->vhc_lock); 7905 7906 if (create_thread) 7907 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7908 0, &p0, TS_RUN, minclsyspri); 7909 } 7910 7911 /* 7912 * Return number of online paths for the specified client. 7913 */ 7914 static int 7915 nonline_paths(mdi_vhcache_client_t *cct) 7916 { 7917 mdi_vhcache_pathinfo_t *cpi; 7918 int online_count = 0; 7919 7920 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7921 if (cpi->cpi_pip != NULL) { 7922 MDI_PI_LOCK(cpi->cpi_pip); 7923 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7924 online_count++; 7925 MDI_PI_UNLOCK(cpi->cpi_pip); 7926 } 7927 } 7928 7929 return (online_count); 7930 } 7931 7932 /* 7933 * Bus configure all paths for the specified vhci client. 7934 * If at least one path for the client is already online, the remaining paths 7935 * will be configured asynchronously. Otherwise, it synchronously configures 7936 * the paths until at least one path is online and then rest of the paths 7937 * will be configured asynchronously. 7938 */ 7939 static void 7940 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7941 { 7942 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7943 mdi_phys_path_t *pp_head, *pp; 7944 mdi_vhcache_client_t *cct; 7945 mdi_vhcache_lookup_token_t tok; 7946 7947 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7948 7949 init_vhcache_lookup_token(&tok, NULL); 7950 7951 if (ct_name == NULL || ct_addr == NULL || 7952 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 7953 == NULL || 7954 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 7955 rw_exit(&vhcache->vhcache_lock); 7956 return; 7957 } 7958 7959 /* if at least one path is online, configure the rest asynchronously */ 7960 if (nonline_paths(cct) > 0) { 7961 rw_exit(&vhcache->vhcache_lock); 7962 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 7963 return; 7964 } 7965 7966 rw_exit(&vhcache->vhcache_lock); 7967 7968 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 7969 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 7970 rw_enter(&vhcache->vhcache_lock, RW_READER); 7971 7972 if ((cct = lookup_vhcache_client(vhcache, ct_name, 7973 ct_addr, &tok)) == NULL) { 7974 rw_exit(&vhcache->vhcache_lock); 7975 goto out; 7976 } 7977 7978 if (nonline_paths(cct) > 0 && 7979 pp->phys_path_next != NULL) { 7980 rw_exit(&vhcache->vhcache_lock); 7981 config_client_paths_async(vhc, ct_name, ct_addr, 7982 pp->phys_path_next, &tok); 7983 pp->phys_path_next = NULL; 7984 goto out; 7985 } 7986 7987 rw_exit(&vhcache->vhcache_lock); 7988 } 7989 } 7990 7991 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 7992 out: 7993 free_phclient_path_list(pp_head); 7994 } 7995 7996 static void 7997 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 7998 { 7999 mutex_enter(&vhc->vhc_lock); 8000 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8001 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8002 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8003 mutex_exit(&vhc->vhc_lock); 8004 } 8005 8006 static void 8007 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8008 { 8009 mutex_enter(&vhc->vhc_lock); 8010 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8011 cv_broadcast(&vhc->vhc_cv); 8012 mutex_exit(&vhc->vhc_lock); 8013 } 8014 8015 /* 8016 * Attach the phci driver instances associated with the vhci: 8017 * If root is mounted attach all phci driver instances. 8018 * If root is not mounted, attach the instances of only those phci 8019 * drivers that have the root support. 8020 */ 8021 static void 8022 attach_phci_drivers(mdi_vhci_config_t *vhc, int root_mounted) 8023 { 8024 int i; 8025 major_t m; 8026 8027 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8028 if (root_mounted == 0 && 8029 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8030 continue; 8031 8032 m = ddi_name_to_major( 8033 vhc->vhc_phci_driver_list[i].phdriver_name); 8034 if (m != (major_t)-1) { 8035 if (ddi_hold_installed_driver(m) != NULL) 8036 ddi_rele_driver(m); 8037 } 8038 } 8039 } 8040 8041 /* 8042 * Build vhci cache: 8043 * 8044 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8045 * the phci driver instances. During this process the cache gets built. 8046 * 8047 * Cache is built fully if the root is mounted (i.e., root_mounted is nonzero). 8048 * 8049 * If the root is not mounted, phci drivers that do not have root support 8050 * are not attached. As a result the cache is built partially. The entries 8051 * in the cache reflect only those phci drivers that have root support. 8052 */ 8053 static vhcache_build_status_t 8054 build_vhci_cache(mdi_vhci_config_t *vhc, int root_mounted) 8055 { 8056 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8057 8058 rw_enter(&vhcache->vhcache_lock, RW_READER); 8059 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8060 rw_exit(&vhcache->vhcache_lock); 8061 return (VHCACHE_NOT_REBUILT); 8062 } 8063 rw_exit(&vhcache->vhcache_lock); 8064 8065 attach_phci_drivers(vhc, root_mounted); 8066 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8067 BUS_CONFIG_ALL, (major_t)-1); 8068 8069 if (root_mounted) { 8070 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8071 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8072 rw_exit(&vhcache->vhcache_lock); 8073 vhcache_dirty(vhc); 8074 return (VHCACHE_FULLY_BUILT); 8075 } else 8076 return (VHCACHE_PARTIALLY_BUILT); 8077 } 8078 8079 /* 8080 * Wait until the root is mounted and then build the vhci cache. 8081 */ 8082 static void 8083 build_vhci_cache_thread(void *arg) 8084 { 8085 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8086 8087 mutex_enter(&vhc->vhc_lock); 8088 while (!modrootloaded && !(vhc->vhc_flags & MDI_VHC_EXIT)) { 8089 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8090 ddi_get_lbolt() + 10 * TICKS_PER_SECOND); 8091 } 8092 if (vhc->vhc_flags & MDI_VHC_EXIT) 8093 goto out; 8094 8095 mutex_exit(&vhc->vhc_lock); 8096 8097 /* 8098 * Now that the root is mounted. So build_vhci_cache() will build 8099 * the full cache. 8100 */ 8101 (void) build_vhci_cache(vhc, 1); 8102 8103 mutex_enter(&vhc->vhc_lock); 8104 out: 8105 vhc->vhc_flags &= ~MDI_VHC_BUILD_VHCI_CACHE_THREAD; 8106 mutex_exit(&vhc->vhc_lock); 8107 } 8108 8109 /* 8110 * Build vhci cache - a wrapper for build_vhci_cache(). 8111 * 8112 * In a normal case on-disk vhci cache is read and setup during booting. 8113 * But if the on-disk vhci cache is not there or deleted or corrupted then 8114 * this function sets up the vhci cache. 8115 * 8116 * The cache is built fully if the root is mounted. 8117 * 8118 * If the root is not mounted, initially the cache is built reflecting only 8119 * those driver entries that have the root support. A separate thread is 8120 * created to handle the creation of full cache. This thread will wait 8121 * until the root is mounted and then rebuilds the cache. 8122 */ 8123 static int 8124 e_build_vhci_cache(mdi_vhci_config_t *vhc) 8125 { 8126 vhcache_build_status_t rv; 8127 8128 single_threaded_vhconfig_enter(vhc); 8129 8130 mutex_enter(&vhc->vhc_lock); 8131 if (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) { 8132 if (modrootloaded) { 8133 cv_broadcast(&vhc->vhc_cv); 8134 /* wait until build vhci cache thread exits */ 8135 while (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) 8136 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8137 rv = VHCACHE_FULLY_BUILT; 8138 } else { 8139 /* 8140 * The presense of MDI_VHC_BUILD_VHCI_CACHE_THREAD 8141 * flag indicates that the cache has already been 8142 * partially built. 8143 */ 8144 rv = VHCACHE_PARTIALLY_BUILT; 8145 } 8146 8147 mutex_exit(&vhc->vhc_lock); 8148 single_threaded_vhconfig_exit(vhc); 8149 return (rv); 8150 } 8151 mutex_exit(&vhc->vhc_lock); 8152 8153 rv = build_vhci_cache(vhc, modrootloaded); 8154 8155 if (rv == VHCACHE_PARTIALLY_BUILT) { 8156 /* 8157 * create a thread; this thread will wait until the root is 8158 * mounted and then fully rebuilds the cache. 8159 */ 8160 mutex_enter(&vhc->vhc_lock); 8161 vhc->vhc_flags |= MDI_VHC_BUILD_VHCI_CACHE_THREAD; 8162 mutex_exit(&vhc->vhc_lock); 8163 (void) thread_create(NULL, 0, build_vhci_cache_thread, 8164 vhc, 0, &p0, TS_RUN, minclsyspri); 8165 } 8166 8167 single_threaded_vhconfig_exit(vhc); 8168 return (rv); 8169 } 8170 8171 /* 8172 * Generic vhci bus config implementation: 8173 * 8174 * Parameters 8175 * vdip vhci dip 8176 * flags bus config flags 8177 * op bus config operation 8178 * The remaining parameters are bus config operation specific 8179 * 8180 * for BUS_CONFIG_ONE 8181 * arg pointer to name@addr 8182 * child upon successful return from this function, *child will be 8183 * set to the configured and held devinfo child node of vdip. 8184 * ct_addr pointer to client address (i.e. GUID) 8185 * 8186 * for BUS_CONFIG_DRIVER 8187 * arg major number of the driver 8188 * child and ct_addr parameters are ignored 8189 * 8190 * for BUS_CONFIG_ALL 8191 * arg, child, and ct_addr parameters are ignored 8192 * 8193 * Note that for the rest of the bus config operations, this function simply 8194 * calls the framework provided default bus config routine. 8195 */ 8196 int 8197 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8198 void *arg, dev_info_t **child, char *ct_addr) 8199 { 8200 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8201 mdi_vhci_config_t *vhc = vh->vh_config; 8202 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8203 vhcache_build_status_t rv = VHCACHE_NOT_REBUILT; 8204 char *cp; 8205 8206 /* 8207 * While bus configuring phcis, the phci driver interactions with MDI 8208 * cause child nodes to be enumerated under the vhci node for which 8209 * they need to ndi_devi_enter the vhci node. 8210 * 8211 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8212 * for the bus config operations on phcis to finish while holding the 8213 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8214 * phcis and call the default framework provided bus config function 8215 * if we are called with ndi_devi_enter lock held. 8216 */ 8217 if (DEVI_BUSY_OWNED(vdip)) { 8218 MDI_DEBUG(2, (CE_NOTE, vdip, 8219 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8220 goto default_bus_config; 8221 } 8222 8223 rw_enter(&vhcache->vhcache_lock, RW_READER); 8224 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8225 rw_exit(&vhcache->vhcache_lock); 8226 rv = e_build_vhci_cache(vhc); 8227 rw_enter(&vhcache->vhcache_lock, RW_READER); 8228 } 8229 8230 switch (op) { 8231 case BUS_CONFIG_ONE: 8232 /* extract node name */ 8233 cp = (char *)arg; 8234 while (*cp != '\0' && *cp != '@') 8235 cp++; 8236 if (*cp == '@') { 8237 *cp = '\0'; 8238 config_client_paths(vhc, (char *)arg, ct_addr); 8239 /* config_client_paths() releases the cache_lock */ 8240 *cp = '@'; 8241 } else 8242 rw_exit(&vhcache->vhcache_lock); 8243 break; 8244 8245 case BUS_CONFIG_DRIVER: 8246 rw_exit(&vhcache->vhcache_lock); 8247 if (rv == VHCACHE_NOT_REBUILT) 8248 bus_config_all_phcis(vhcache, flags, op, 8249 (major_t)(uintptr_t)arg); 8250 break; 8251 8252 case BUS_CONFIG_ALL: 8253 rw_exit(&vhcache->vhcache_lock); 8254 if (rv == VHCACHE_NOT_REBUILT) 8255 bus_config_all_phcis(vhcache, flags, op, -1); 8256 break; 8257 8258 default: 8259 rw_exit(&vhcache->vhcache_lock); 8260 break; 8261 } 8262 8263 8264 default_bus_config: 8265 /* 8266 * All requested child nodes are enumerated under the vhci. 8267 * Now configure them. 8268 */ 8269 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8270 NDI_SUCCESS) { 8271 return (MDI_SUCCESS); 8272 } 8273 8274 return (MDI_FAILURE); 8275 } 8276 8277 /* 8278 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8279 */ 8280 static nvlist_t * 8281 read_on_disk_vhci_cache(char *vhci_class) 8282 { 8283 nvlist_t *nvl; 8284 int err; 8285 char *filename; 8286 8287 filename = vhclass2vhcache_filename(vhci_class); 8288 8289 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8290 kmem_free(filename, strlen(filename) + 1); 8291 return (nvl); 8292 } else if (err == EIO) 8293 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8294 else if (err == EINVAL) 8295 cmn_err(CE_WARN, 8296 "%s: data file corrupted, will recreate\n", filename); 8297 8298 kmem_free(filename, strlen(filename) + 1); 8299 return (NULL); 8300 } 8301 8302 /* 8303 * Read on-disk vhci cache into nvlists for all vhci classes. 8304 * Called during booting by i_ddi_read_devices_files(). 8305 */ 8306 void 8307 mdi_read_devices_files(void) 8308 { 8309 int i; 8310 8311 for (i = 0; i < N_VHCI_CLASSES; i++) 8312 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8313 } 8314 8315 /* 8316 * Remove all stale entries from vhci cache. 8317 */ 8318 static void 8319 clean_vhcache(mdi_vhci_config_t *vhc) 8320 { 8321 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8322 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8323 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8324 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8325 8326 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8327 8328 cct_head = vhcache->vhcache_client_head; 8329 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8330 for (cct = cct_head; cct != NULL; cct = cct_next) { 8331 cct_next = cct->cct_next; 8332 8333 cpi_head = cct->cct_cpi_head; 8334 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8335 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8336 cpi_next = cpi->cpi_next; 8337 if (cpi->cpi_pip != NULL) { 8338 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8339 enqueue_tail_vhcache_pathinfo(cct, cpi); 8340 } else 8341 free_vhcache_pathinfo(cpi); 8342 } 8343 8344 if (cct->cct_cpi_head != NULL) 8345 enqueue_vhcache_client(vhcache, cct); 8346 else { 8347 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8348 (mod_hash_key_t)cct->cct_name_addr); 8349 free_vhcache_client(cct); 8350 } 8351 } 8352 8353 cphci_head = vhcache->vhcache_phci_head; 8354 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8355 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8356 cphci_next = cphci->cphci_next; 8357 if (cphci->cphci_phci != NULL) 8358 enqueue_vhcache_phci(vhcache, cphci); 8359 else 8360 free_vhcache_phci(cphci); 8361 } 8362 8363 vhcache->vhcache_clean_time = lbolt64; 8364 rw_exit(&vhcache->vhcache_lock); 8365 vhcache_dirty(vhc); 8366 } 8367 8368 /* 8369 * Remove all stale entries from vhci cache. 8370 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8371 */ 8372 void 8373 mdi_clean_vhcache(void) 8374 { 8375 mdi_vhci_t *vh; 8376 8377 mutex_enter(&mdi_mutex); 8378 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8379 vh->vh_refcnt++; 8380 mutex_exit(&mdi_mutex); 8381 clean_vhcache(vh->vh_config); 8382 mutex_enter(&mdi_mutex); 8383 vh->vh_refcnt--; 8384 } 8385 mutex_exit(&mdi_mutex); 8386 } 8387 8388 /* 8389 * mdi_vhci_walk_clients(): 8390 * Walker routine to traverse client dev_info nodes 8391 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8392 * below the client, including nexus devices, which we dont want. 8393 * So we just traverse the immediate siblings, starting from 1st client. 8394 */ 8395 void 8396 mdi_vhci_walk_clients(dev_info_t *vdip, 8397 int (*f)(dev_info_t *, void *), void *arg) 8398 { 8399 dev_info_t *cdip; 8400 mdi_client_t *ct; 8401 8402 mutex_enter(&mdi_mutex); 8403 8404 cdip = ddi_get_child(vdip); 8405 8406 while (cdip) { 8407 ct = i_devi_get_client(cdip); 8408 MDI_CLIENT_LOCK(ct); 8409 8410 switch ((*f)(cdip, arg)) { 8411 case DDI_WALK_CONTINUE: 8412 cdip = ddi_get_next_sibling(cdip); 8413 MDI_CLIENT_UNLOCK(ct); 8414 break; 8415 8416 default: 8417 MDI_CLIENT_UNLOCK(ct); 8418 mutex_exit(&mdi_mutex); 8419 return; 8420 } 8421 } 8422 8423 mutex_exit(&mdi_mutex); 8424 } 8425 8426 /* 8427 * mdi_vhci_walk_phcis(): 8428 * Walker routine to traverse phci dev_info nodes 8429 */ 8430 void 8431 mdi_vhci_walk_phcis(dev_info_t *vdip, 8432 int (*f)(dev_info_t *, void *), void *arg) 8433 { 8434 mdi_vhci_t *vh = NULL; 8435 mdi_phci_t *ph = NULL; 8436 8437 mutex_enter(&mdi_mutex); 8438 8439 vh = i_devi_get_vhci(vdip); 8440 ph = vh->vh_phci_head; 8441 8442 while (ph) { 8443 MDI_PHCI_LOCK(ph); 8444 8445 switch ((*f)(ph->ph_dip, arg)) { 8446 case DDI_WALK_CONTINUE: 8447 MDI_PHCI_UNLOCK(ph); 8448 ph = ph->ph_next; 8449 break; 8450 8451 default: 8452 MDI_PHCI_UNLOCK(ph); 8453 mutex_exit(&mdi_mutex); 8454 return; 8455 } 8456 } 8457 8458 mutex_exit(&mdi_mutex); 8459 } 8460 8461 8462 /* 8463 * mdi_walk_vhcis(): 8464 * Walker routine to traverse vhci dev_info nodes 8465 */ 8466 void 8467 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8468 { 8469 mdi_vhci_t *vh = NULL; 8470 8471 mutex_enter(&mdi_mutex); 8472 /* 8473 * Scan for already registered vhci 8474 */ 8475 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8476 vh->vh_refcnt++; 8477 mutex_exit(&mdi_mutex); 8478 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8479 mutex_enter(&mdi_mutex); 8480 vh->vh_refcnt--; 8481 break; 8482 } else { 8483 mutex_enter(&mdi_mutex); 8484 vh->vh_refcnt--; 8485 } 8486 } 8487 8488 mutex_exit(&mdi_mutex); 8489 } 8490 8491 /* 8492 * i_mdi_log_sysevent(): 8493 * Logs events for pickup by syseventd 8494 */ 8495 static void 8496 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8497 { 8498 char *path_name; 8499 nvlist_t *attr_list; 8500 8501 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8502 KM_SLEEP) != DDI_SUCCESS) { 8503 goto alloc_failed; 8504 } 8505 8506 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8507 (void) ddi_pathname(dip, path_name); 8508 8509 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8510 ddi_driver_name(dip)) != DDI_SUCCESS) { 8511 goto error; 8512 } 8513 8514 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8515 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8516 goto error; 8517 } 8518 8519 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8520 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8521 goto error; 8522 } 8523 8524 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8525 path_name) != DDI_SUCCESS) { 8526 goto error; 8527 } 8528 8529 if (nvlist_add_string(attr_list, DDI_CLASS, 8530 ph_vh_class) != DDI_SUCCESS) { 8531 goto error; 8532 } 8533 8534 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8535 attr_list, NULL, DDI_SLEEP); 8536 8537 error: 8538 kmem_free(path_name, MAXPATHLEN); 8539 nvlist_free(attr_list); 8540 return; 8541 8542 alloc_failed: 8543 MDI_DEBUG(1, (CE_WARN, dip, 8544 "!i_mdi_log_sysevent: Unable to send sysevent")); 8545 } 8546