1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 30 * detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 69 #ifdef DEBUG 70 #include <sys/debug.h> 71 int mdi_debug = 1; 72 #define MDI_DEBUG(level, stmnt) \ 73 if (mdi_debug >= (level)) i_mdi_log stmnt 74 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 75 #else /* !DEBUG */ 76 #define MDI_DEBUG(level, stmnt) 77 #endif /* DEBUG */ 78 79 extern pri_t minclsyspri; 80 extern int modrootloaded; 81 82 /* 83 * Global mutex: 84 * Protects vHCI list and structure members, pHCI and Client lists. 85 */ 86 kmutex_t mdi_mutex; 87 88 /* 89 * Registered vHCI class driver lists 90 */ 91 int mdi_vhci_count; 92 mdi_vhci_t *mdi_vhci_head; 93 mdi_vhci_t *mdi_vhci_tail; 94 95 /* 96 * Client Hash Table size 97 */ 98 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 99 100 /* 101 * taskq interface definitions 102 */ 103 #define MDI_TASKQ_N_THREADS 8 104 #define MDI_TASKQ_PRI minclsyspri 105 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 106 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 107 108 taskq_t *mdi_taskq; 109 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 110 111 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 112 113 /* 114 * The data should be "quiet" for this interval (in seconds) before the 115 * vhci cached data is flushed to the disk. 116 */ 117 static int mdi_vhcache_flush_delay = 10; 118 119 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 120 static int mdi_vhcache_flush_daemon_idle_time = 60; 121 122 /* 123 * number of seconds the asynchronous configuration thread will sleep idle 124 * before exiting. 125 */ 126 static int mdi_async_config_idle_time = 600; 127 128 static int mdi_bus_config_cache_hash_size = 256; 129 130 /* turns off multithreaded configuration for certain operations */ 131 static int mdi_mtc_off = 0; 132 133 /* 134 * MDI component property name/value string definitions 135 */ 136 const char *mdi_component_prop = "mpxio-component"; 137 const char *mdi_component_prop_vhci = "vhci"; 138 const char *mdi_component_prop_phci = "phci"; 139 const char *mdi_component_prop_client = "client"; 140 141 /* 142 * MDI client global unique identifier property name 143 */ 144 const char *mdi_client_guid_prop = "client-guid"; 145 146 /* 147 * MDI client load balancing property name/value string definitions 148 */ 149 const char *mdi_load_balance = "load-balance"; 150 const char *mdi_load_balance_none = "none"; 151 const char *mdi_load_balance_rr = "round-robin"; 152 const char *mdi_load_balance_lba = "logical-block"; 153 154 /* 155 * Obsolete vHCI class definition; to be removed after Leadville update 156 */ 157 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 158 159 static char vhci_greeting[] = 160 "\tThere already exists one vHCI driver for class %s\n" 161 "\tOnly one vHCI driver for each class is allowed\n"; 162 163 /* 164 * Static function prototypes 165 */ 166 static int i_mdi_phci_offline(dev_info_t *, uint_t); 167 static int i_mdi_client_offline(dev_info_t *, uint_t); 168 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 169 static void i_mdi_phci_post_detach(dev_info_t *, 170 ddi_detach_cmd_t, int); 171 static int i_mdi_client_pre_detach(dev_info_t *, 172 ddi_detach_cmd_t); 173 static void i_mdi_client_post_detach(dev_info_t *, 174 ddi_detach_cmd_t, int); 175 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 176 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 177 static int i_mdi_lba_lb(mdi_client_t *ct, 178 mdi_pathinfo_t **ret_pip, struct buf *buf); 179 static void i_mdi_pm_hold_client(mdi_client_t *, int); 180 static void i_mdi_pm_rele_client(mdi_client_t *, int); 181 static void i_mdi_pm_reset_client(mdi_client_t *); 182 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 183 static int i_mdi_power_all_phci(mdi_client_t *); 184 185 186 /* 187 * Internal mdi_pathinfo node functions 188 */ 189 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 190 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 191 192 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 193 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 194 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 195 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 196 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 197 mdi_client_t *); 198 static void i_mdi_phci_unlock(mdi_phci_t *); 199 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 200 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 201 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 202 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 203 mdi_client_t *); 204 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 205 static void i_mdi_client_remove_path(mdi_client_t *, 206 mdi_pathinfo_t *); 207 208 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 209 mdi_pathinfo_state_t, int); 210 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 211 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 212 char **, int); 213 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 214 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 215 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 216 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 217 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 218 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 219 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 220 static void i_mdi_client_update_state(mdi_client_t *); 221 static int i_mdi_client_compute_state(mdi_client_t *, 222 mdi_phci_t *); 223 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_unlock(mdi_client_t *); 225 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 226 static mdi_client_t *i_devi_get_client(dev_info_t *); 227 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, 228 int); 229 /* 230 * Failover related function prototypes 231 */ 232 static int i_mdi_failover(void *); 233 234 /* 235 * misc internal functions 236 */ 237 static int i_mdi_get_hash_key(char *); 238 static int i_map_nvlist_error_to_mdi(int); 239 static void i_mdi_report_path_state(mdi_client_t *, 240 mdi_pathinfo_t *); 241 242 static void setup_vhci_cache(mdi_vhci_t *); 243 static int destroy_vhci_cache(mdi_vhci_t *); 244 static void setup_phci_driver_list(mdi_vhci_t *); 245 static void free_phci_driver_list(mdi_vhci_config_t *); 246 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 247 static boolean_t stop_vhcache_flush_thread(void *, int); 248 static void free_string_array(char **, int); 249 static void free_vhcache_phci(mdi_vhcache_phci_t *); 250 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 251 static void free_vhcache_client(mdi_vhcache_client_t *); 252 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 253 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 254 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 255 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 256 static void vhcache_pi_add(mdi_vhci_config_t *, 257 struct mdi_pathinfo *); 258 static void vhcache_pi_remove(mdi_vhci_config_t *, 259 struct mdi_pathinfo *); 260 static void free_phclient_path_list(mdi_phys_path_t *); 261 static void sort_vhcache_paths(mdi_vhcache_client_t *); 262 static int flush_vhcache(mdi_vhci_config_t *, int); 263 static void vhcache_dirty(mdi_vhci_config_t *); 264 static void free_async_client_config(mdi_async_client_config_t *); 265 static nvlist_t *read_on_disk_vhci_cache(char *); 266 extern int fread_nvlist(char *, nvlist_t **); 267 extern int fwrite_nvlist(char *, nvlist_t *); 268 269 /* called once when first vhci registers with mdi */ 270 static void 271 i_mdi_init() 272 { 273 static int initialized = 0; 274 275 if (initialized) 276 return; 277 initialized = 1; 278 279 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 280 /* 281 * Create our taskq resources 282 */ 283 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 284 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 285 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 286 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 287 } 288 289 /* 290 * mdi_get_component_type(): 291 * Return mpxio component type 292 * Return Values: 293 * MDI_COMPONENT_NONE 294 * MDI_COMPONENT_VHCI 295 * MDI_COMPONENT_PHCI 296 * MDI_COMPONENT_CLIENT 297 * XXX This doesn't work under multi-level MPxIO and should be 298 * removed when clients migrate mdi_is_*() interfaces. 299 */ 300 int 301 mdi_get_component_type(dev_info_t *dip) 302 { 303 return (DEVI(dip)->devi_mdi_component); 304 } 305 306 /* 307 * mdi_vhci_register(): 308 * Register a vHCI module with the mpxio framework 309 * mdi_vhci_register() is called by vHCI drivers to register the 310 * 'class_driver' vHCI driver and its MDI entrypoints with the 311 * mpxio framework. The vHCI driver must call this interface as 312 * part of its attach(9e) handler. 313 * Competing threads may try to attach mdi_vhci_register() as 314 * the vHCI drivers are loaded and attached as a result of pHCI 315 * driver instance registration (mdi_phci_register()) with the 316 * framework. 317 * Return Values: 318 * MDI_SUCCESS 319 * MDI_FAILURE 320 */ 321 322 /*ARGSUSED*/ 323 int 324 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 325 int flags) 326 { 327 mdi_vhci_t *vh = NULL; 328 329 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 330 331 i_mdi_init(); 332 333 mutex_enter(&mdi_mutex); 334 /* 335 * Scan for already registered vhci 336 */ 337 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 338 if (strcmp(vh->vh_class, class) == 0) { 339 /* 340 * vHCI has already been created. Check for valid 341 * vHCI ops registration. We only support one vHCI 342 * module per class 343 */ 344 if (vh->vh_ops != NULL) { 345 mutex_exit(&mdi_mutex); 346 cmn_err(CE_NOTE, vhci_greeting, class); 347 return (MDI_FAILURE); 348 } 349 break; 350 } 351 } 352 353 /* 354 * if not yet created, create the vHCI component 355 */ 356 if (vh == NULL) { 357 struct client_hash *hash = NULL; 358 char *load_balance; 359 360 /* 361 * Allocate and initialize the mdi extensions 362 */ 363 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 364 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 365 KM_SLEEP); 366 vh->vh_client_table = hash; 367 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 368 (void) strcpy(vh->vh_class, class); 369 vh->vh_lb = LOAD_BALANCE_RR; 370 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 371 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 372 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 373 vh->vh_lb = LOAD_BALANCE_NONE; 374 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 375 == 0) { 376 vh->vh_lb = LOAD_BALANCE_LBA; 377 } 378 ddi_prop_free(load_balance); 379 } 380 381 /* 382 * Store the vHCI ops vectors 383 */ 384 vh->vh_dip = vdip; 385 vh->vh_ops = vops; 386 387 setup_vhci_cache(vh); 388 389 if (mdi_vhci_head == NULL) { 390 mdi_vhci_head = vh; 391 } 392 if (mdi_vhci_tail) { 393 mdi_vhci_tail->vh_next = vh; 394 } 395 mdi_vhci_tail = vh; 396 mdi_vhci_count++; 397 } 398 399 /* 400 * Claim the devfs node as a vhci component 401 */ 402 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 403 404 /* 405 * Initialize our back reference from dev_info node 406 */ 407 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 408 mutex_exit(&mdi_mutex); 409 return (MDI_SUCCESS); 410 } 411 412 /* 413 * mdi_vhci_unregister(): 414 * Unregister a vHCI module from mpxio framework 415 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 416 * of a vhci to unregister it from the framework. 417 * Return Values: 418 * MDI_SUCCESS 419 * MDI_FAILURE 420 */ 421 422 /*ARGSUSED*/ 423 int 424 mdi_vhci_unregister(dev_info_t *vdip, int flags) 425 { 426 mdi_vhci_t *found, *vh, *prev = NULL; 427 428 /* 429 * Check for invalid VHCI 430 */ 431 if ((vh = i_devi_get_vhci(vdip)) == NULL) 432 return (MDI_FAILURE); 433 434 mutex_enter(&mdi_mutex); 435 436 /* 437 * Scan the list of registered vHCIs for a match 438 */ 439 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 440 if (found == vh) 441 break; 442 prev = found; 443 } 444 445 if (found == NULL) { 446 mutex_exit(&mdi_mutex); 447 return (MDI_FAILURE); 448 } 449 450 /* 451 * Check the pHCI and client count. All the pHCIs and clients 452 * should have been unregistered, before a vHCI can be 453 * unregistered. 454 */ 455 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 456 mutex_exit(&mdi_mutex); 457 return (MDI_FAILURE); 458 } 459 460 /* 461 * Remove the vHCI from the global list 462 */ 463 if (vh == mdi_vhci_head) { 464 mdi_vhci_head = vh->vh_next; 465 } else { 466 prev->vh_next = vh->vh_next; 467 } 468 if (vh == mdi_vhci_tail) { 469 mdi_vhci_tail = prev; 470 } 471 472 mdi_vhci_count--; 473 mutex_exit(&mdi_mutex); 474 475 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 476 /* add vhci to the global list */ 477 mutex_enter(&mdi_mutex); 478 if (mdi_vhci_head == NULL) 479 mdi_vhci_head = vh; 480 else 481 mdi_vhci_tail->vh_next = vh; 482 mdi_vhci_tail = vh; 483 mdi_vhci_count++; 484 mutex_exit(&mdi_mutex); 485 return (MDI_FAILURE); 486 } 487 488 vh->vh_ops = NULL; 489 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 490 DEVI(vdip)->devi_mdi_xhci = NULL; 491 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 492 kmem_free(vh->vh_client_table, 493 mdi_client_table_size * sizeof (struct client_hash)); 494 kmem_free(vh, sizeof (mdi_vhci_t)); 495 return (MDI_SUCCESS); 496 } 497 498 /* 499 * i_mdi_vhci_class2vhci(): 500 * Look for a matching vHCI module given a vHCI class name 501 * Return Values: 502 * Handle to a vHCI component 503 * NULL 504 */ 505 static mdi_vhci_t * 506 i_mdi_vhci_class2vhci(char *class) 507 { 508 mdi_vhci_t *vh = NULL; 509 510 ASSERT(!MUTEX_HELD(&mdi_mutex)); 511 512 mutex_enter(&mdi_mutex); 513 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 514 if (strcmp(vh->vh_class, class) == 0) { 515 break; 516 } 517 } 518 mutex_exit(&mdi_mutex); 519 return (vh); 520 } 521 522 /* 523 * i_devi_get_vhci(): 524 * Utility function to get the handle to a vHCI component 525 * Return Values: 526 * Handle to a vHCI component 527 * NULL 528 */ 529 mdi_vhci_t * 530 i_devi_get_vhci(dev_info_t *vdip) 531 { 532 mdi_vhci_t *vh = NULL; 533 if (MDI_VHCI(vdip)) { 534 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 535 } 536 return (vh); 537 } 538 539 /* 540 * mdi_phci_register(): 541 * Register a pHCI module with mpxio framework 542 * mdi_phci_register() is called by pHCI drivers to register with 543 * the mpxio framework and a specific 'class_driver' vHCI. The 544 * pHCI driver must call this interface as part of its attach(9e) 545 * handler. 546 * Return Values: 547 * MDI_SUCCESS 548 * MDI_FAILURE 549 */ 550 551 /*ARGSUSED*/ 552 int 553 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 554 { 555 mdi_phci_t *ph; 556 mdi_vhci_t *vh; 557 char *data; 558 char *pathname; 559 560 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 561 (void) ddi_pathname(pdip, pathname); 562 563 /* 564 * Check for mpxio-disable property. Enable mpxio if the property is 565 * missing or not set to "yes". 566 * If the property is set to "yes" then emit a brief message. 567 */ 568 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 569 &data) == DDI_SUCCESS)) { 570 if (strcmp(data, "yes") == 0) { 571 MDI_DEBUG(1, (CE_CONT, pdip, 572 "?%s (%s%d) multipath capabilities " 573 "disabled via %s.conf.\n", pathname, 574 ddi_driver_name(pdip), ddi_get_instance(pdip), 575 ddi_driver_name(pdip))); 576 ddi_prop_free(data); 577 kmem_free(pathname, MAXPATHLEN); 578 return (MDI_FAILURE); 579 } 580 ddi_prop_free(data); 581 } 582 583 kmem_free(pathname, MAXPATHLEN); 584 585 /* 586 * Search for a matching vHCI 587 */ 588 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 589 if (vh == NULL) { 590 return (MDI_FAILURE); 591 } 592 593 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 594 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 595 ph->ph_dip = pdip; 596 ph->ph_vhci = vh; 597 ph->ph_next = NULL; 598 ph->ph_unstable = 0; 599 ph->ph_vprivate = 0; 600 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 601 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 602 603 MDI_PHCI_SET_POWER_UP(ph); 604 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 605 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 606 607 vhcache_phci_add(vh->vh_config, ph); 608 609 mutex_enter(&mdi_mutex); 610 if (vh->vh_phci_head == NULL) { 611 vh->vh_phci_head = ph; 612 } 613 if (vh->vh_phci_tail) { 614 vh->vh_phci_tail->ph_next = ph; 615 } 616 vh->vh_phci_tail = ph; 617 vh->vh_phci_count++; 618 mutex_exit(&mdi_mutex); 619 return (MDI_SUCCESS); 620 } 621 622 /* 623 * mdi_phci_unregister(): 624 * Unregister a pHCI module from mpxio framework 625 * mdi_phci_unregister() is called by the pHCI drivers from their 626 * detach(9E) handler to unregister their instances from the 627 * framework. 628 * Return Values: 629 * MDI_SUCCESS 630 * MDI_FAILURE 631 */ 632 633 /*ARGSUSED*/ 634 int 635 mdi_phci_unregister(dev_info_t *pdip, int flags) 636 { 637 mdi_vhci_t *vh; 638 mdi_phci_t *ph; 639 mdi_phci_t *tmp; 640 mdi_phci_t *prev = NULL; 641 642 ph = i_devi_get_phci(pdip); 643 if (ph == NULL) { 644 MDI_DEBUG(1, (CE_WARN, pdip, 645 "!pHCI unregister: Not a valid pHCI")); 646 return (MDI_FAILURE); 647 } 648 649 vh = ph->ph_vhci; 650 ASSERT(vh != NULL); 651 if (vh == NULL) { 652 MDI_DEBUG(1, (CE_WARN, pdip, 653 "!pHCI unregister: Not a valid vHCI")); 654 return (MDI_FAILURE); 655 } 656 657 mutex_enter(&mdi_mutex); 658 tmp = vh->vh_phci_head; 659 while (tmp) { 660 if (tmp == ph) { 661 break; 662 } 663 prev = tmp; 664 tmp = tmp->ph_next; 665 } 666 667 if (ph == vh->vh_phci_head) { 668 vh->vh_phci_head = ph->ph_next; 669 } else { 670 prev->ph_next = ph->ph_next; 671 } 672 673 if (ph == vh->vh_phci_tail) { 674 vh->vh_phci_tail = prev; 675 } 676 677 vh->vh_phci_count--; 678 679 mutex_exit(&mdi_mutex); 680 681 vhcache_phci_remove(vh->vh_config, ph); 682 cv_destroy(&ph->ph_unstable_cv); 683 cv_destroy(&ph->ph_powerchange_cv); 684 mutex_destroy(&ph->ph_mutex); 685 kmem_free(ph, sizeof (mdi_phci_t)); 686 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 687 DEVI(pdip)->devi_mdi_xhci = NULL; 688 return (MDI_SUCCESS); 689 } 690 691 /* 692 * i_devi_get_phci(): 693 * Utility function to return the phci extensions. 694 */ 695 static mdi_phci_t * 696 i_devi_get_phci(dev_info_t *pdip) 697 { 698 mdi_phci_t *ph = NULL; 699 if (MDI_PHCI(pdip)) { 700 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 701 } 702 return (ph); 703 } 704 705 /* 706 * mdi_phci_path2devinfo(): 707 * Utility function to search for a valid phci device given 708 * the devfs pathname. 709 */ 710 711 dev_info_t * 712 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 713 { 714 char *temp_pathname; 715 mdi_vhci_t *vh; 716 mdi_phci_t *ph; 717 dev_info_t *pdip = NULL; 718 719 vh = i_devi_get_vhci(vdip); 720 ASSERT(vh != NULL); 721 722 if (vh == NULL) { 723 /* 724 * Invalid vHCI component, return failure 725 */ 726 return (NULL); 727 } 728 729 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 730 mutex_enter(&mdi_mutex); 731 ph = vh->vh_phci_head; 732 while (ph != NULL) { 733 pdip = ph->ph_dip; 734 ASSERT(pdip != NULL); 735 *temp_pathname = '\0'; 736 (void) ddi_pathname(pdip, temp_pathname); 737 if (strcmp(temp_pathname, pathname) == 0) { 738 break; 739 } 740 ph = ph->ph_next; 741 } 742 if (ph == NULL) { 743 pdip = NULL; 744 } 745 mutex_exit(&mdi_mutex); 746 kmem_free(temp_pathname, MAXPATHLEN); 747 return (pdip); 748 } 749 750 /* 751 * mdi_phci_get_path_count(): 752 * get number of path information nodes associated with a given 753 * pHCI device. 754 */ 755 int 756 mdi_phci_get_path_count(dev_info_t *pdip) 757 { 758 mdi_phci_t *ph; 759 int count = 0; 760 761 ph = i_devi_get_phci(pdip); 762 if (ph != NULL) { 763 count = ph->ph_path_count; 764 } 765 return (count); 766 } 767 768 /* 769 * i_mdi_phci_lock(): 770 * Lock a pHCI device 771 * Return Values: 772 * None 773 * Note: 774 * The default locking order is: 775 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 776 * But there are number of situations where locks need to be 777 * grabbed in reverse order. This routine implements try and lock 778 * mechanism depending on the requested parameter option. 779 */ 780 static void 781 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 782 { 783 if (pip) { 784 /* Reverse locking is requested. */ 785 while (MDI_PHCI_TRYLOCK(ph) == 0) { 786 /* 787 * tryenter failed. Try to grab again 788 * after a small delay 789 */ 790 MDI_PI_HOLD(pip); 791 MDI_PI_UNLOCK(pip); 792 delay(1); 793 MDI_PI_LOCK(pip); 794 MDI_PI_RELE(pip); 795 } 796 } else { 797 MDI_PHCI_LOCK(ph); 798 } 799 } 800 801 /* 802 * i_mdi_phci_get_client_lock(): 803 * Lock a pHCI device 804 * Return Values: 805 * None 806 * Note: 807 * The default locking order is: 808 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 809 * But there are number of situations where locks need to be 810 * grabbed in reverse order. This routine implements try and lock 811 * mechanism depending on the requested parameter option. 812 */ 813 static void 814 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 815 { 816 if (ct) { 817 /* Reverse locking is requested. */ 818 while (MDI_PHCI_TRYLOCK(ph) == 0) { 819 /* 820 * tryenter failed. Try to grab again 821 * after a small delay 822 */ 823 MDI_CLIENT_UNLOCK(ct); 824 delay(1); 825 MDI_CLIENT_LOCK(ct); 826 } 827 } else { 828 MDI_PHCI_LOCK(ph); 829 } 830 } 831 832 /* 833 * i_mdi_phci_unlock(): 834 * Unlock the pHCI component 835 */ 836 static void 837 i_mdi_phci_unlock(mdi_phci_t *ph) 838 { 839 MDI_PHCI_UNLOCK(ph); 840 } 841 842 /* 843 * i_mdi_devinfo_create(): 844 * create client device's devinfo node 845 * Return Values: 846 * dev_info 847 * NULL 848 * Notes: 849 */ 850 static dev_info_t * 851 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 852 char **compatible, int ncompatible) 853 { 854 dev_info_t *cdip = NULL; 855 856 ASSERT(MUTEX_HELD(&mdi_mutex)); 857 858 /* Verify for duplicate entry */ 859 cdip = i_mdi_devinfo_find(vh, name, guid); 860 ASSERT(cdip == NULL); 861 if (cdip) { 862 cmn_err(CE_WARN, 863 "i_mdi_devinfo_create: client dip %p already exists", 864 (void *)cdip); 865 } 866 867 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 868 if (cdip == NULL) 869 goto fail; 870 871 /* 872 * Create component type and Global unique identifier 873 * properties 874 */ 875 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 876 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 877 goto fail; 878 } 879 880 /* Decorate the node with compatible property */ 881 if (compatible && 882 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 883 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 884 goto fail; 885 } 886 887 return (cdip); 888 889 fail: 890 if (cdip) { 891 (void) ndi_prop_remove_all(cdip); 892 (void) ndi_devi_free(cdip); 893 } 894 return (NULL); 895 } 896 897 /* 898 * i_mdi_devinfo_find(): 899 * Find a matching devinfo node for given client node name 900 * and its guid. 901 * Return Values: 902 * Handle to a dev_info node or NULL 903 */ 904 905 static dev_info_t * 906 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 907 { 908 char *data; 909 dev_info_t *cdip = NULL; 910 dev_info_t *ndip = NULL; 911 int circular; 912 913 ndi_devi_enter(vh->vh_dip, &circular); 914 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 915 while ((cdip = ndip) != NULL) { 916 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 917 918 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 919 continue; 920 } 921 922 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 923 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 924 &data) != DDI_PROP_SUCCESS) { 925 continue; 926 } 927 928 if (strcmp(data, guid) != 0) { 929 ddi_prop_free(data); 930 continue; 931 } 932 ddi_prop_free(data); 933 break; 934 } 935 ndi_devi_exit(vh->vh_dip, circular); 936 return (cdip); 937 } 938 939 /* 940 * i_mdi_devinfo_remove(): 941 * Remove a client device node 942 */ 943 static int 944 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 945 { 946 int rv = MDI_SUCCESS; 947 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 948 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 949 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 950 if (rv != NDI_SUCCESS) { 951 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 952 " failed. cdip = %p\n", cdip)); 953 } 954 /* 955 * Convert to MDI error code 956 */ 957 switch (rv) { 958 case NDI_SUCCESS: 959 rv = MDI_SUCCESS; 960 break; 961 case NDI_BUSY: 962 rv = MDI_BUSY; 963 break; 964 default: 965 rv = MDI_FAILURE; 966 break; 967 } 968 } 969 return (rv); 970 } 971 972 /* 973 * i_devi_get_client() 974 * Utility function to get mpxio component extensions 975 */ 976 static mdi_client_t * 977 i_devi_get_client(dev_info_t *cdip) 978 { 979 mdi_client_t *ct = NULL; 980 if (MDI_CLIENT(cdip)) { 981 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 982 } 983 return (ct); 984 } 985 986 /* 987 * i_mdi_is_child_present(): 988 * Search for the presence of client device dev_info node 989 */ 990 991 static int 992 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 993 { 994 int rv = MDI_FAILURE; 995 struct dev_info *dip; 996 int circular; 997 998 ndi_devi_enter(vdip, &circular); 999 dip = DEVI(vdip)->devi_child; 1000 while (dip) { 1001 if (dip == DEVI(cdip)) { 1002 rv = MDI_SUCCESS; 1003 break; 1004 } 1005 dip = dip->devi_sibling; 1006 } 1007 ndi_devi_exit(vdip, circular); 1008 return (rv); 1009 } 1010 1011 1012 /* 1013 * i_mdi_client_lock(): 1014 * Grab client component lock 1015 * Return Values: 1016 * None 1017 * Note: 1018 * The default locking order is: 1019 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1020 * But there are number of situations where locks need to be 1021 * grabbed in reverse order. This routine implements try and lock 1022 * mechanism depending on the requested parameter option. 1023 */ 1024 1025 static void 1026 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1027 { 1028 if (pip) { 1029 /* 1030 * Reverse locking is requested. 1031 */ 1032 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1033 /* 1034 * tryenter failed. Try to grab again 1035 * after a small delay 1036 */ 1037 MDI_PI_HOLD(pip); 1038 MDI_PI_UNLOCK(pip); 1039 delay(1); 1040 MDI_PI_LOCK(pip); 1041 MDI_PI_RELE(pip); 1042 } 1043 } else { 1044 MDI_CLIENT_LOCK(ct); 1045 } 1046 } 1047 1048 /* 1049 * i_mdi_client_unlock(): 1050 * Unlock a client component 1051 */ 1052 1053 static void 1054 i_mdi_client_unlock(mdi_client_t *ct) 1055 { 1056 MDI_CLIENT_UNLOCK(ct); 1057 } 1058 1059 /* 1060 * i_mdi_client_alloc(): 1061 * Allocate and initialize a client structure. Caller should 1062 * hold the global mdi_mutex. 1063 * Return Values: 1064 * Handle to a client component 1065 */ 1066 /*ARGSUSED*/ 1067 static mdi_client_t * 1068 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1069 { 1070 mdi_client_t *ct; 1071 1072 ASSERT(MUTEX_HELD(&mdi_mutex)); 1073 1074 /* 1075 * Allocate and initialize a component structure. 1076 */ 1077 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1078 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1079 ct->ct_hnext = NULL; 1080 ct->ct_hprev = NULL; 1081 ct->ct_dip = NULL; 1082 ct->ct_vhci = vh; 1083 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1084 (void) strcpy(ct->ct_drvname, name); 1085 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1086 (void) strcpy(ct->ct_guid, lguid); 1087 ct->ct_cprivate = NULL; 1088 ct->ct_vprivate = NULL; 1089 ct->ct_flags = 0; 1090 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1091 MDI_CLIENT_SET_OFFLINE(ct); 1092 MDI_CLIENT_SET_DETACH(ct); 1093 MDI_CLIENT_SET_POWER_UP(ct); 1094 ct->ct_failover_flags = 0; 1095 ct->ct_failover_status = 0; 1096 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1097 ct->ct_unstable = 0; 1098 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1099 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1100 ct->ct_lb = vh->vh_lb; 1101 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1102 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1103 ct->ct_path_count = 0; 1104 ct->ct_path_head = NULL; 1105 ct->ct_path_tail = NULL; 1106 ct->ct_path_last = NULL; 1107 1108 /* 1109 * Add this client component to our client hash queue 1110 */ 1111 i_mdi_client_enlist_table(vh, ct); 1112 return (ct); 1113 } 1114 1115 /* 1116 * i_mdi_client_enlist_table(): 1117 * Attach the client device to the client hash table. Caller 1118 * should hold the mdi_mutex 1119 */ 1120 1121 static void 1122 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1123 { 1124 int index; 1125 struct client_hash *head; 1126 1127 ASSERT(MUTEX_HELD(&mdi_mutex)); 1128 index = i_mdi_get_hash_key(ct->ct_guid); 1129 head = &vh->vh_client_table[index]; 1130 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1131 head->ct_hash_head = ct; 1132 head->ct_hash_count++; 1133 vh->vh_client_count++; 1134 } 1135 1136 /* 1137 * i_mdi_client_delist_table(): 1138 * Attach the client device to the client hash table. 1139 * Caller should hold the mdi_mutex 1140 */ 1141 1142 static void 1143 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1144 { 1145 int index; 1146 char *guid; 1147 struct client_hash *head; 1148 mdi_client_t *next; 1149 mdi_client_t *last; 1150 1151 ASSERT(MUTEX_HELD(&mdi_mutex)); 1152 guid = ct->ct_guid; 1153 index = i_mdi_get_hash_key(guid); 1154 head = &vh->vh_client_table[index]; 1155 1156 last = NULL; 1157 next = (mdi_client_t *)head->ct_hash_head; 1158 while (next != NULL) { 1159 if (next == ct) { 1160 break; 1161 } 1162 last = next; 1163 next = next->ct_hnext; 1164 } 1165 1166 if (next) { 1167 head->ct_hash_count--; 1168 if (last == NULL) { 1169 head->ct_hash_head = ct->ct_hnext; 1170 } else { 1171 last->ct_hnext = ct->ct_hnext; 1172 } 1173 ct->ct_hnext = NULL; 1174 vh->vh_client_count--; 1175 } 1176 } 1177 1178 1179 /* 1180 * i_mdi_client_free(): 1181 * Free a client component 1182 */ 1183 static int 1184 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1185 { 1186 int rv = MDI_SUCCESS; 1187 int flags = ct->ct_flags; 1188 dev_info_t *cdip; 1189 dev_info_t *vdip; 1190 1191 ASSERT(MUTEX_HELD(&mdi_mutex)); 1192 vdip = vh->vh_dip; 1193 cdip = ct->ct_dip; 1194 1195 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1196 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1197 DEVI(cdip)->devi_mdi_client = NULL; 1198 1199 /* 1200 * Clear out back ref. to dev_info_t node 1201 */ 1202 ct->ct_dip = NULL; 1203 1204 /* 1205 * Remove this client from our hash queue 1206 */ 1207 i_mdi_client_delist_table(vh, ct); 1208 1209 /* 1210 * Uninitialize and free the component 1211 */ 1212 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1213 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1214 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1215 cv_destroy(&ct->ct_failover_cv); 1216 cv_destroy(&ct->ct_unstable_cv); 1217 cv_destroy(&ct->ct_powerchange_cv); 1218 mutex_destroy(&ct->ct_mutex); 1219 kmem_free(ct, sizeof (*ct)); 1220 1221 if (cdip != NULL) { 1222 mutex_exit(&mdi_mutex); 1223 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1224 mutex_enter(&mdi_mutex); 1225 } 1226 return (rv); 1227 } 1228 1229 /* 1230 * i_mdi_client_find(): 1231 * Find the client structure corresponding to a given guid 1232 * Caller should hold the mdi_mutex 1233 */ 1234 static mdi_client_t * 1235 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1236 { 1237 int index; 1238 struct client_hash *head; 1239 mdi_client_t *ct; 1240 1241 ASSERT(MUTEX_HELD(&mdi_mutex)); 1242 index = i_mdi_get_hash_key(guid); 1243 head = &vh->vh_client_table[index]; 1244 1245 ct = head->ct_hash_head; 1246 while (ct != NULL) { 1247 if (strcmp(ct->ct_guid, guid) == 0 && 1248 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1249 break; 1250 } 1251 ct = ct->ct_hnext; 1252 } 1253 return (ct); 1254 } 1255 1256 1257 1258 /* 1259 * i_mdi_client_update_state(): 1260 * Compute and update client device state 1261 * Notes: 1262 * A client device can be in any of three possible states: 1263 * 1264 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1265 * one online/standby paths. Can tolerate failures. 1266 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1267 * no alternate paths available as standby. A failure on the online 1268 * would result in loss of access to device data. 1269 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1270 * no paths available to access the device. 1271 */ 1272 static void 1273 i_mdi_client_update_state(mdi_client_t *ct) 1274 { 1275 int state; 1276 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1277 state = i_mdi_client_compute_state(ct, NULL); 1278 MDI_CLIENT_SET_STATE(ct, state); 1279 } 1280 1281 /* 1282 * i_mdi_client_compute_state(): 1283 * Compute client device state 1284 * 1285 * mdi_phci_t * Pointer to pHCI structure which should 1286 * while computing the new value. Used by 1287 * i_mdi_phci_offline() to find the new 1288 * client state after DR of a pHCI. 1289 */ 1290 static int 1291 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1292 { 1293 int state; 1294 int online_count = 0; 1295 int standby_count = 0; 1296 mdi_pathinfo_t *pip, *next; 1297 1298 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1299 pip = ct->ct_path_head; 1300 while (pip != NULL) { 1301 MDI_PI_LOCK(pip); 1302 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1303 if (MDI_PI(pip)->pi_phci == ph) { 1304 MDI_PI_UNLOCK(pip); 1305 pip = next; 1306 continue; 1307 } 1308 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1309 == MDI_PATHINFO_STATE_ONLINE) 1310 online_count++; 1311 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1312 == MDI_PATHINFO_STATE_STANDBY) 1313 standby_count++; 1314 MDI_PI_UNLOCK(pip); 1315 pip = next; 1316 } 1317 1318 if (online_count == 0) { 1319 if (standby_count == 0) { 1320 state = MDI_CLIENT_STATE_FAILED; 1321 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1322 " ct = %p\n", ct)); 1323 } else if (standby_count == 1) { 1324 state = MDI_CLIENT_STATE_DEGRADED; 1325 } else { 1326 state = MDI_CLIENT_STATE_OPTIMAL; 1327 } 1328 } else if (online_count == 1) { 1329 if (standby_count == 0) { 1330 state = MDI_CLIENT_STATE_DEGRADED; 1331 } else { 1332 state = MDI_CLIENT_STATE_OPTIMAL; 1333 } 1334 } else { 1335 state = MDI_CLIENT_STATE_OPTIMAL; 1336 } 1337 return (state); 1338 } 1339 1340 /* 1341 * i_mdi_client2devinfo(): 1342 * Utility function 1343 */ 1344 dev_info_t * 1345 i_mdi_client2devinfo(mdi_client_t *ct) 1346 { 1347 return (ct->ct_dip); 1348 } 1349 1350 /* 1351 * mdi_client_path2_devinfo(): 1352 * Given the parent devinfo and child devfs pathname, search for 1353 * a valid devfs node handle. 1354 */ 1355 dev_info_t * 1356 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1357 { 1358 dev_info_t *cdip = NULL; 1359 dev_info_t *ndip = NULL; 1360 char *temp_pathname; 1361 int circular; 1362 1363 /* 1364 * Allocate temp buffer 1365 */ 1366 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1367 1368 /* 1369 * Lock parent against changes 1370 */ 1371 ndi_devi_enter(vdip, &circular); 1372 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1373 while ((cdip = ndip) != NULL) { 1374 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1375 1376 *temp_pathname = '\0'; 1377 (void) ddi_pathname(cdip, temp_pathname); 1378 if (strcmp(temp_pathname, pathname) == 0) { 1379 break; 1380 } 1381 } 1382 /* 1383 * Release devinfo lock 1384 */ 1385 ndi_devi_exit(vdip, circular); 1386 1387 /* 1388 * Free the temp buffer 1389 */ 1390 kmem_free(temp_pathname, MAXPATHLEN); 1391 return (cdip); 1392 } 1393 1394 1395 /* 1396 * mdi_client_get_path_count(): 1397 * Utility function to get number of path information nodes 1398 * associated with a given client device. 1399 */ 1400 int 1401 mdi_client_get_path_count(dev_info_t *cdip) 1402 { 1403 mdi_client_t *ct; 1404 int count = 0; 1405 1406 ct = i_devi_get_client(cdip); 1407 if (ct != NULL) { 1408 count = ct->ct_path_count; 1409 } 1410 return (count); 1411 } 1412 1413 1414 /* 1415 * i_mdi_get_hash_key(): 1416 * Create a hash using strings as keys 1417 * 1418 */ 1419 static int 1420 i_mdi_get_hash_key(char *str) 1421 { 1422 uint32_t g, hash = 0; 1423 char *p; 1424 1425 for (p = str; *p != '\0'; p++) { 1426 g = *p; 1427 hash += g; 1428 } 1429 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1430 } 1431 1432 /* 1433 * mdi_get_lb_policy(): 1434 * Get current load balancing policy for a given client device 1435 */ 1436 client_lb_t 1437 mdi_get_lb_policy(dev_info_t *cdip) 1438 { 1439 client_lb_t lb = LOAD_BALANCE_NONE; 1440 mdi_client_t *ct; 1441 1442 ct = i_devi_get_client(cdip); 1443 if (ct != NULL) { 1444 lb = ct->ct_lb; 1445 } 1446 return (lb); 1447 } 1448 1449 /* 1450 * mdi_set_lb_region_size(): 1451 * Set current region size for the load-balance 1452 */ 1453 int 1454 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1455 { 1456 mdi_client_t *ct; 1457 int rv = MDI_FAILURE; 1458 1459 ct = i_devi_get_client(cdip); 1460 if (ct != NULL && ct->ct_lb_args != NULL) { 1461 ct->ct_lb_args->region_size = region_size; 1462 rv = MDI_SUCCESS; 1463 } 1464 return (rv); 1465 } 1466 1467 /* 1468 * mdi_Set_lb_policy(): 1469 * Set current load balancing policy for a given client device 1470 */ 1471 int 1472 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1473 { 1474 mdi_client_t *ct; 1475 int rv = MDI_FAILURE; 1476 1477 ct = i_devi_get_client(cdip); 1478 if (ct != NULL) { 1479 ct->ct_lb = lb; 1480 rv = MDI_SUCCESS; 1481 } 1482 return (rv); 1483 } 1484 1485 /* 1486 * mdi_failover(): 1487 * failover function called by the vHCI drivers to initiate 1488 * a failover operation. This is typically due to non-availability 1489 * of online paths to route I/O requests. Failover can be 1490 * triggered through user application also. 1491 * 1492 * The vHCI driver calls mdi_failover() to initiate a failover 1493 * operation. mdi_failover() calls back into the vHCI driver's 1494 * vo_failover() entry point to perform the actual failover 1495 * operation. The reason for requiring the vHCI driver to 1496 * initiate failover by calling mdi_failover(), instead of directly 1497 * executing vo_failover() itself, is to ensure that the mdi 1498 * framework can keep track of the client state properly. 1499 * Additionally, mdi_failover() provides as a convenience the 1500 * option of performing the failover operation synchronously or 1501 * asynchronously 1502 * 1503 * Upon successful completion of the failover operation, the 1504 * paths that were previously ONLINE will be in the STANDBY state, 1505 * and the newly activated paths will be in the ONLINE state. 1506 * 1507 * The flags modifier determines whether the activation is done 1508 * synchronously: MDI_FAILOVER_SYNC 1509 * Return Values: 1510 * MDI_SUCCESS 1511 * MDI_FAILURE 1512 * MDI_BUSY 1513 */ 1514 /*ARGSUSED*/ 1515 int 1516 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1517 { 1518 int rv; 1519 mdi_client_t *ct; 1520 1521 ct = i_devi_get_client(cdip); 1522 ASSERT(ct != NULL); 1523 if (ct == NULL) { 1524 /* cdip is not a valid client device. Nothing more to do. */ 1525 return (MDI_FAILURE); 1526 } 1527 1528 MDI_CLIENT_LOCK(ct); 1529 1530 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1531 /* A path to the client is being freed */ 1532 MDI_CLIENT_UNLOCK(ct); 1533 return (MDI_BUSY); 1534 } 1535 1536 1537 if (MDI_CLIENT_IS_FAILED(ct)) { 1538 /* 1539 * Client is in failed state. Nothing more to do. 1540 */ 1541 MDI_CLIENT_UNLOCK(ct); 1542 return (MDI_FAILURE); 1543 } 1544 1545 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1546 /* 1547 * Failover is already in progress; return BUSY 1548 */ 1549 MDI_CLIENT_UNLOCK(ct); 1550 return (MDI_BUSY); 1551 } 1552 /* 1553 * Make sure that mdi_pathinfo node state changes are processed. 1554 * We do not allow failovers to progress while client path state 1555 * changes are in progress 1556 */ 1557 if (ct->ct_unstable) { 1558 if (flags == MDI_FAILOVER_ASYNC) { 1559 MDI_CLIENT_UNLOCK(ct); 1560 return (MDI_BUSY); 1561 } else { 1562 while (ct->ct_unstable) 1563 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1564 } 1565 } 1566 1567 /* 1568 * Client device is in stable state. Before proceeding, perform sanity 1569 * checks again. 1570 */ 1571 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1572 (i_ddi_node_state(ct->ct_dip) < DS_READY)) { 1573 /* 1574 * Client is in failed state. Nothing more to do. 1575 */ 1576 MDI_CLIENT_UNLOCK(ct); 1577 return (MDI_FAILURE); 1578 } 1579 1580 /* 1581 * Set the client state as failover in progress. 1582 */ 1583 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1584 ct->ct_failover_flags = flags; 1585 MDI_CLIENT_UNLOCK(ct); 1586 1587 if (flags == MDI_FAILOVER_ASYNC) { 1588 /* 1589 * Submit the initiate failover request via CPR safe 1590 * taskq threads. 1591 */ 1592 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1593 ct, KM_SLEEP); 1594 return (MDI_ACCEPT); 1595 } else { 1596 /* 1597 * Synchronous failover mode. Typically invoked from the user 1598 * land. 1599 */ 1600 rv = i_mdi_failover(ct); 1601 } 1602 return (rv); 1603 } 1604 1605 /* 1606 * i_mdi_failover(): 1607 * internal failover function. Invokes vHCI drivers failover 1608 * callback function and process the failover status 1609 * Return Values: 1610 * None 1611 * 1612 * Note: A client device in failover state can not be detached or freed. 1613 */ 1614 static int 1615 i_mdi_failover(void *arg) 1616 { 1617 int rv = MDI_SUCCESS; 1618 mdi_client_t *ct = (mdi_client_t *)arg; 1619 mdi_vhci_t *vh = ct->ct_vhci; 1620 1621 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1622 1623 if (vh->vh_ops->vo_failover != NULL) { 1624 /* 1625 * Call vHCI drivers callback routine 1626 */ 1627 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1628 ct->ct_failover_flags); 1629 } 1630 1631 MDI_CLIENT_LOCK(ct); 1632 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1633 1634 /* 1635 * Save the failover return status 1636 */ 1637 ct->ct_failover_status = rv; 1638 1639 /* 1640 * As a result of failover, client status would have been changed. 1641 * Update the client state and wake up anyone waiting on this client 1642 * device. 1643 */ 1644 i_mdi_client_update_state(ct); 1645 1646 cv_broadcast(&ct->ct_failover_cv); 1647 MDI_CLIENT_UNLOCK(ct); 1648 return (rv); 1649 } 1650 1651 /* 1652 * Load balancing is logical block. 1653 * IOs within the range described by region_size 1654 * would go on the same path. This would improve the 1655 * performance by cache-hit on some of the RAID devices. 1656 * Search only for online paths(At some point we 1657 * may want to balance across target ports). 1658 * If no paths are found then default to round-robin. 1659 */ 1660 static int 1661 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1662 { 1663 int path_index = -1; 1664 int online_path_count = 0; 1665 int online_nonpref_path_count = 0; 1666 int region_size = ct->ct_lb_args->region_size; 1667 mdi_pathinfo_t *pip; 1668 mdi_pathinfo_t *next; 1669 int preferred, path_cnt; 1670 1671 pip = ct->ct_path_head; 1672 while (pip) { 1673 MDI_PI_LOCK(pip); 1674 if (MDI_PI(pip)->pi_state == 1675 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1676 online_path_count++; 1677 } else if (MDI_PI(pip)->pi_state == 1678 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1679 online_nonpref_path_count++; 1680 } 1681 next = (mdi_pathinfo_t *) 1682 MDI_PI(pip)->pi_client_link; 1683 MDI_PI_UNLOCK(pip); 1684 pip = next; 1685 } 1686 /* if found any online/preferred then use this type */ 1687 if (online_path_count > 0) { 1688 path_cnt = online_path_count; 1689 preferred = 1; 1690 } else if (online_nonpref_path_count > 0) { 1691 path_cnt = online_nonpref_path_count; 1692 preferred = 0; 1693 } else { 1694 path_cnt = 0; 1695 } 1696 if (path_cnt) { 1697 path_index = (bp->b_blkno >> region_size) % path_cnt; 1698 pip = ct->ct_path_head; 1699 while (pip && path_index != -1) { 1700 MDI_PI_LOCK(pip); 1701 if (path_index == 0 && 1702 (MDI_PI(pip)->pi_state == 1703 MDI_PATHINFO_STATE_ONLINE) && 1704 MDI_PI(pip)->pi_preferred == preferred) { 1705 MDI_PI_HOLD(pip); 1706 MDI_PI_UNLOCK(pip); 1707 *ret_pip = pip; 1708 return (MDI_SUCCESS); 1709 } 1710 path_index --; 1711 next = (mdi_pathinfo_t *) 1712 MDI_PI(pip)->pi_client_link; 1713 MDI_PI_UNLOCK(pip); 1714 pip = next; 1715 } 1716 if (pip == NULL) { 1717 MDI_DEBUG(4, (CE_NOTE, NULL, 1718 "!lba %p, no pip !!\n", 1719 bp->b_blkno)); 1720 } else { 1721 MDI_DEBUG(4, (CE_NOTE, NULL, 1722 "!lba %p, no pip for path_index, " 1723 "pip %p\n", pip)); 1724 } 1725 } 1726 return (MDI_FAILURE); 1727 } 1728 1729 /* 1730 * mdi_select_path(): 1731 * select a path to access a client device. 1732 * 1733 * mdi_select_path() function is called by the vHCI drivers to 1734 * select a path to route the I/O request to. The caller passes 1735 * the block I/O data transfer structure ("buf") as one of the 1736 * parameters. The mpxio framework uses the buf structure 1737 * contents to maintain per path statistics (total I/O size / 1738 * count pending). If more than one online paths are available to 1739 * select, the framework automatically selects a suitable path 1740 * for routing I/O request. If a failover operation is active for 1741 * this client device the call shall be failed with MDI_BUSY error 1742 * code. 1743 * 1744 * By default this function returns a suitable path in online 1745 * state based on the current load balancing policy. Currently 1746 * we support LOAD_BALANCE_NONE (Previously selected online path 1747 * will continue to be used till the path is usable) and 1748 * LOAD_BALANCE_RR (Online paths will be selected in a round 1749 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1750 * based on the logical block). The load balancing 1751 * through vHCI drivers configuration file (driver.conf). 1752 * 1753 * vHCI drivers may override this default behavior by specifying 1754 * appropriate flags. If start_pip is specified (non NULL) is 1755 * used as start point to walk and find the next appropriate path. 1756 * The following values are currently defined: 1757 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1758 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1759 * 1760 * The non-standard behavior is used by the scsi_vhci driver, 1761 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1762 * attach of client devices (to avoid an unnecessary failover 1763 * when the STANDBY path comes up first), during failover 1764 * (to activate a STANDBY path as ONLINE). 1765 * 1766 * The selected path in returned in a held state (ref_cnt). 1767 * Caller should release the hold by calling mdi_rele_path(). 1768 * 1769 * Return Values: 1770 * MDI_SUCCESS - Completed successfully 1771 * MDI_BUSY - Client device is busy failing over 1772 * MDI_NOPATH - Client device is online, but no valid path are 1773 * available to access this client device 1774 * MDI_FAILURE - Invalid client device or state 1775 * MDI_DEVI_ONLINING 1776 * - Client device (struct dev_info state) is in 1777 * onlining state. 1778 */ 1779 1780 /*ARGSUSED*/ 1781 int 1782 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1783 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1784 { 1785 mdi_client_t *ct; 1786 mdi_pathinfo_t *pip; 1787 mdi_pathinfo_t *next; 1788 mdi_pathinfo_t *head; 1789 mdi_pathinfo_t *start; 1790 client_lb_t lbp; /* load balancing policy */ 1791 int sb = 1; /* standard behavior */ 1792 int preferred = 1; /* preferred path */ 1793 int cond, cont = 1; 1794 int retry = 0; 1795 1796 if (flags != 0) { 1797 /* 1798 * disable default behavior 1799 */ 1800 sb = 0; 1801 } 1802 1803 *ret_pip = NULL; 1804 ct = i_devi_get_client(cdip); 1805 if (ct == NULL) { 1806 /* mdi extensions are NULL, Nothing more to do */ 1807 return (MDI_FAILURE); 1808 } 1809 1810 MDI_CLIENT_LOCK(ct); 1811 1812 if (sb) { 1813 if (MDI_CLIENT_IS_FAILED(ct)) { 1814 /* 1815 * Client is not ready to accept any I/O requests. 1816 * Fail this request. 1817 */ 1818 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1819 "client state offline ct = %p\n", ct)); 1820 MDI_CLIENT_UNLOCK(ct); 1821 return (MDI_FAILURE); 1822 } 1823 1824 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1825 /* 1826 * Check for Failover is in progress. If so tell the 1827 * caller that this device is busy. 1828 */ 1829 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1830 "client failover in progress ct = %p\n", ct)); 1831 MDI_CLIENT_UNLOCK(ct); 1832 return (MDI_BUSY); 1833 } 1834 1835 /* 1836 * Check to see whether the client device is attached. 1837 * If not so, let the vHCI driver manually select a path 1838 * (standby) and let the probe/attach process to continue. 1839 */ 1840 if ((MDI_CLIENT_IS_DETACHED(ct)) || 1841 i_ddi_node_state(cdip) < DS_READY) { 1842 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1843 MDI_CLIENT_UNLOCK(ct); 1844 return (MDI_DEVI_ONLINING); 1845 } 1846 } 1847 1848 /* 1849 * Cache in the client list head. If head of the list is NULL 1850 * return MDI_NOPATH 1851 */ 1852 head = ct->ct_path_head; 1853 if (head == NULL) { 1854 MDI_CLIENT_UNLOCK(ct); 1855 return (MDI_NOPATH); 1856 } 1857 1858 /* 1859 * for non default behavior, bypass current 1860 * load balancing policy and always use LOAD_BALANCE_RR 1861 * except that the start point will be adjusted based 1862 * on the provided start_pip 1863 */ 1864 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1865 1866 switch (lbp) { 1867 case LOAD_BALANCE_NONE: 1868 /* 1869 * Load balancing is None or Alternate path mode 1870 * Start looking for a online mdi_pathinfo node starting from 1871 * last known selected path 1872 */ 1873 preferred = 1; 1874 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1875 if (pip == NULL) { 1876 pip = head; 1877 } 1878 start = pip; 1879 do { 1880 MDI_PI_LOCK(pip); 1881 /* 1882 * No need to explicitly check if the path is disabled. 1883 * Since we are checking for state == ONLINE and the 1884 * same veriable is used for DISABLE/ENABLE information. 1885 */ 1886 if (MDI_PI(pip)->pi_state == 1887 MDI_PATHINFO_STATE_ONLINE && 1888 preferred == MDI_PI(pip)->pi_preferred) { 1889 /* 1890 * Return the path in hold state. Caller should 1891 * release the lock by calling mdi_rele_path() 1892 */ 1893 MDI_PI_HOLD(pip); 1894 MDI_PI_UNLOCK(pip); 1895 ct->ct_path_last = pip; 1896 *ret_pip = pip; 1897 MDI_CLIENT_UNLOCK(ct); 1898 return (MDI_SUCCESS); 1899 } 1900 1901 /* 1902 * Path is busy. 1903 */ 1904 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1905 MDI_PI_IS_TRANSIENT(pip)) 1906 retry = 1; 1907 /* 1908 * Keep looking for a next available online path 1909 */ 1910 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1911 if (next == NULL) { 1912 next = head; 1913 } 1914 MDI_PI_UNLOCK(pip); 1915 pip = next; 1916 if (start == pip && preferred) { 1917 preferred = 0; 1918 } else if (start == pip && !preferred) { 1919 cont = 0; 1920 } 1921 } while (cont); 1922 break; 1923 1924 case LOAD_BALANCE_LBA: 1925 /* 1926 * Make sure we are looking 1927 * for an online path. Otherwise, if it is for a STANDBY 1928 * path request, it will go through and fetch an ONLINE 1929 * path which is not desirable. 1930 */ 1931 if ((ct->ct_lb_args != NULL) && 1932 (ct->ct_lb_args->region_size) && bp && 1933 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1934 if (i_mdi_lba_lb(ct, ret_pip, bp) 1935 == MDI_SUCCESS) { 1936 MDI_CLIENT_UNLOCK(ct); 1937 return (MDI_SUCCESS); 1938 } 1939 } 1940 /* FALLTHROUGH */ 1941 case LOAD_BALANCE_RR: 1942 /* 1943 * Load balancing is Round Robin. Start looking for a online 1944 * mdi_pathinfo node starting from last known selected path 1945 * as the start point. If override flags are specified, 1946 * process accordingly. 1947 * If the search is already in effect(start_pip not null), 1948 * then lets just use the same path preference to continue the 1949 * traversal. 1950 */ 1951 1952 if (start_pip != NULL) { 1953 preferred = MDI_PI(start_pip)->pi_preferred; 1954 } else { 1955 preferred = 1; 1956 } 1957 1958 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1959 if (start == NULL) { 1960 pip = head; 1961 } else { 1962 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1963 if (pip == NULL) { 1964 if (!sb) { 1965 if (preferred == 0) { 1966 /* 1967 * Looks like we have completed 1968 * the traversal as preferred 1969 * value is 0. Time to bail out. 1970 */ 1971 *ret_pip = NULL; 1972 MDI_CLIENT_UNLOCK(ct); 1973 return (MDI_NOPATH); 1974 } else { 1975 /* 1976 * Looks like we reached the 1977 * end of the list. Lets enable 1978 * traversal of non preferred 1979 * paths. 1980 */ 1981 preferred = 0; 1982 } 1983 } 1984 pip = head; 1985 } 1986 } 1987 start = pip; 1988 do { 1989 MDI_PI_LOCK(pip); 1990 if (sb) { 1991 cond = ((MDI_PI(pip)->pi_state == 1992 MDI_PATHINFO_STATE_ONLINE && 1993 MDI_PI(pip)->pi_preferred == 1994 preferred) ? 1 : 0); 1995 } else { 1996 if (flags == MDI_SELECT_ONLINE_PATH) { 1997 cond = ((MDI_PI(pip)->pi_state == 1998 MDI_PATHINFO_STATE_ONLINE && 1999 MDI_PI(pip)->pi_preferred == 2000 preferred) ? 1 : 0); 2001 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2002 cond = ((MDI_PI(pip)->pi_state == 2003 MDI_PATHINFO_STATE_STANDBY && 2004 MDI_PI(pip)->pi_preferred == 2005 preferred) ? 1 : 0); 2006 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2007 MDI_SELECT_STANDBY_PATH)) { 2008 cond = (((MDI_PI(pip)->pi_state == 2009 MDI_PATHINFO_STATE_ONLINE || 2010 (MDI_PI(pip)->pi_state == 2011 MDI_PATHINFO_STATE_STANDBY)) && 2012 MDI_PI(pip)->pi_preferred == 2013 preferred) ? 1 : 0); 2014 } else { 2015 cond = 0; 2016 } 2017 } 2018 /* 2019 * No need to explicitly check if the path is disabled. 2020 * Since we are checking for state == ONLINE and the 2021 * same veriable is used for DISABLE/ENABLE information. 2022 */ 2023 if (cond) { 2024 /* 2025 * Return the path in hold state. Caller should 2026 * release the lock by calling mdi_rele_path() 2027 */ 2028 MDI_PI_HOLD(pip); 2029 MDI_PI_UNLOCK(pip); 2030 if (sb) 2031 ct->ct_path_last = pip; 2032 *ret_pip = pip; 2033 MDI_CLIENT_UNLOCK(ct); 2034 return (MDI_SUCCESS); 2035 } 2036 /* 2037 * Path is busy. 2038 */ 2039 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2040 MDI_PI_IS_TRANSIENT(pip)) 2041 retry = 1; 2042 2043 /* 2044 * Keep looking for a next available online path 2045 */ 2046 do_again: 2047 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2048 if (next == NULL) { 2049 if (!sb) { 2050 if (preferred == 1) { 2051 /* 2052 * Looks like we reached the 2053 * end of the list. Lets enable 2054 * traversal of non preferred 2055 * paths. 2056 */ 2057 preferred = 0; 2058 next = head; 2059 } else { 2060 /* 2061 * We have done both the passes 2062 * Preferred as well as for 2063 * Non-preferred. Bail out now. 2064 */ 2065 cont = 0; 2066 } 2067 } else { 2068 /* 2069 * Standard behavior case. 2070 */ 2071 next = head; 2072 } 2073 } 2074 MDI_PI_UNLOCK(pip); 2075 if (cont == 0) { 2076 break; 2077 } 2078 pip = next; 2079 2080 if (!sb) { 2081 /* 2082 * We need to handle the selection of 2083 * non-preferred path in the following 2084 * case: 2085 * 2086 * +------+ +------+ +------+ +-----+ 2087 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2088 * +------+ +------+ +------+ +-----+ 2089 * 2090 * If we start the search with B, we need to 2091 * skip beyond B to pick C which is non - 2092 * preferred in the second pass. The following 2093 * test, if true, will allow us to skip over 2094 * the 'start'(B in the example) to select 2095 * other non preferred elements. 2096 */ 2097 if ((start_pip != NULL) && (start_pip == pip) && 2098 (MDI_PI(start_pip)->pi_preferred 2099 != preferred)) { 2100 /* 2101 * try again after going past the start 2102 * pip 2103 */ 2104 MDI_PI_LOCK(pip); 2105 goto do_again; 2106 } 2107 } else { 2108 /* 2109 * Standard behavior case 2110 */ 2111 if (start == pip && preferred) { 2112 /* look for nonpreferred paths */ 2113 preferred = 0; 2114 } else if (start == pip && !preferred) { 2115 /* 2116 * Exit condition 2117 */ 2118 cont = 0; 2119 } 2120 } 2121 } while (cont); 2122 break; 2123 } 2124 2125 MDI_CLIENT_UNLOCK(ct); 2126 if (retry == 1) { 2127 return (MDI_BUSY); 2128 } else { 2129 return (MDI_NOPATH); 2130 } 2131 } 2132 2133 /* 2134 * For a client, return the next available path to any phci 2135 * 2136 * Note: 2137 * Caller should hold the branch's devinfo node to get a consistent 2138 * snap shot of the mdi_pathinfo nodes. 2139 * 2140 * Please note that even the list is stable the mdi_pathinfo 2141 * node state and properties are volatile. The caller should lock 2142 * and unlock the nodes by calling mdi_pi_lock() and 2143 * mdi_pi_unlock() functions to get a stable properties. 2144 * 2145 * If there is a need to use the nodes beyond the hold of the 2146 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2147 * need to be held against unexpected removal by calling 2148 * mdi_hold_path() and should be released by calling 2149 * mdi_rele_path() on completion. 2150 */ 2151 mdi_pathinfo_t * 2152 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2153 { 2154 mdi_client_t *ct; 2155 2156 if (!MDI_CLIENT(ct_dip)) 2157 return (NULL); 2158 2159 /* 2160 * Walk through client link 2161 */ 2162 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2163 ASSERT(ct != NULL); 2164 2165 if (pip == NULL) 2166 return ((mdi_pathinfo_t *)ct->ct_path_head); 2167 2168 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2169 } 2170 2171 /* 2172 * For a phci, return the next available path to any client 2173 * Note: ditto mdi_get_next_phci_path() 2174 */ 2175 mdi_pathinfo_t * 2176 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2177 { 2178 mdi_phci_t *ph; 2179 2180 if (!MDI_PHCI(ph_dip)) 2181 return (NULL); 2182 2183 /* 2184 * Walk through pHCI link 2185 */ 2186 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2187 ASSERT(ph != NULL); 2188 2189 if (pip == NULL) 2190 return ((mdi_pathinfo_t *)ph->ph_path_head); 2191 2192 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2193 } 2194 2195 /* 2196 * mdi_get_nextpath(): 2197 * mdi_pathinfo node walker function. Get the next node from the 2198 * client or pHCI device list. 2199 * 2200 * XXX This is wrapper function for compatibility purposes only. 2201 * 2202 * It doesn't work under Multi-level MPxIO, where a dip 2203 * is both client and phci (which link should next_path follow?). 2204 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2205 * this interface should be removed. 2206 */ 2207 void 2208 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2209 mdi_pathinfo_t **ret_pip) 2210 { 2211 if (MDI_CLIENT(dip)) { 2212 *ret_pip = mdi_get_next_phci_path(dip, pip); 2213 } else if (MDI_PHCI(dip)) { 2214 *ret_pip = mdi_get_next_client_path(dip, pip); 2215 } else { 2216 *ret_pip = NULL; 2217 } 2218 } 2219 2220 /* 2221 * mdi_hold_path(): 2222 * Hold the mdi_pathinfo node against unwanted unexpected free. 2223 * Return Values: 2224 * None 2225 */ 2226 void 2227 mdi_hold_path(mdi_pathinfo_t *pip) 2228 { 2229 if (pip) { 2230 MDI_PI_LOCK(pip); 2231 MDI_PI_HOLD(pip); 2232 MDI_PI_UNLOCK(pip); 2233 } 2234 } 2235 2236 2237 /* 2238 * mdi_rele_path(): 2239 * Release the mdi_pathinfo node which was selected 2240 * through mdi_select_path() mechanism or manually held by 2241 * calling mdi_hold_path(). 2242 * Return Values: 2243 * None 2244 */ 2245 void 2246 mdi_rele_path(mdi_pathinfo_t *pip) 2247 { 2248 if (pip) { 2249 MDI_PI_LOCK(pip); 2250 MDI_PI_RELE(pip); 2251 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2252 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2253 } 2254 MDI_PI_UNLOCK(pip); 2255 } 2256 } 2257 2258 2259 /* 2260 * mdi_pi_lock(): 2261 * Lock the mdi_pathinfo node. 2262 * Note: 2263 * The caller should release the lock by calling mdi_pi_unlock() 2264 */ 2265 void 2266 mdi_pi_lock(mdi_pathinfo_t *pip) 2267 { 2268 ASSERT(pip != NULL); 2269 if (pip) { 2270 MDI_PI_LOCK(pip); 2271 } 2272 } 2273 2274 2275 /* 2276 * mdi_pi_unlock(): 2277 * Unlock the mdi_pathinfo node. 2278 * Note: 2279 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2280 */ 2281 void 2282 mdi_pi_unlock(mdi_pathinfo_t *pip) 2283 { 2284 ASSERT(pip != NULL); 2285 if (pip) { 2286 MDI_PI_UNLOCK(pip); 2287 } 2288 } 2289 2290 /* 2291 * mdi_pi_find(): 2292 * Search the list of mdi_pathinfo nodes attached to the 2293 * pHCI/Client device node whose path address matches "paddr". 2294 * Returns a pointer to the mdi_pathinfo node if a matching node is 2295 * found. 2296 * Return Values: 2297 * mdi_pathinfo node handle 2298 * NULL 2299 * Notes: 2300 * Caller need not hold any locks to call this function. 2301 */ 2302 mdi_pathinfo_t * 2303 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2304 { 2305 mdi_phci_t *ph; 2306 mdi_vhci_t *vh; 2307 mdi_client_t *ct; 2308 mdi_pathinfo_t *pip = NULL; 2309 2310 if ((pdip == NULL) || (paddr == NULL)) { 2311 return (NULL); 2312 } 2313 ph = i_devi_get_phci(pdip); 2314 if (ph == NULL) { 2315 /* 2316 * Invalid pHCI device, Nothing more to do. 2317 */ 2318 MDI_DEBUG(2, (CE_WARN, NULL, 2319 "!mdi_pi_find: invalid phci")); 2320 return (NULL); 2321 } 2322 2323 vh = ph->ph_vhci; 2324 if (vh == NULL) { 2325 /* 2326 * Invalid vHCI device, Nothing more to do. 2327 */ 2328 MDI_DEBUG(2, (CE_WARN, NULL, 2329 "!mdi_pi_find: invalid phci")); 2330 return (NULL); 2331 } 2332 2333 /* 2334 * Look for client device identified by caddr (guid) 2335 */ 2336 if (caddr == NULL) { 2337 /* 2338 * Find a mdi_pathinfo node under pHCI list for a matching 2339 * unit address. 2340 */ 2341 mutex_enter(&ph->ph_mutex); 2342 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2343 2344 while (pip != NULL) { 2345 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2346 break; 2347 } 2348 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2349 } 2350 mutex_exit(&ph->ph_mutex); 2351 return (pip); 2352 } 2353 2354 /* 2355 * XXX - Is the rest of the code in this function really necessary? 2356 * The consumers of mdi_pi_find() can search for the desired pathinfo 2357 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2358 * whether the search is based on the pathinfo nodes attached to 2359 * the pHCI or the client node, the result will be the same. 2360 */ 2361 2362 /* 2363 * Find the client device corresponding to 'caddr' 2364 */ 2365 mutex_enter(&mdi_mutex); 2366 2367 /* 2368 * XXX - Passing NULL to the following function works as long as the 2369 * the client addresses (caddr) are unique per vhci basis. 2370 */ 2371 ct = i_mdi_client_find(vh, NULL, caddr); 2372 if (ct == NULL) { 2373 /* 2374 * Client not found, Obviously mdi_pathinfo node has not been 2375 * created yet. 2376 */ 2377 mutex_exit(&mdi_mutex); 2378 return (pip); 2379 } 2380 2381 /* 2382 * Hold the client lock and look for a mdi_pathinfo node with matching 2383 * pHCI and paddr 2384 */ 2385 MDI_CLIENT_LOCK(ct); 2386 2387 /* 2388 * Release the global mutex as it is no more needed. Note: We always 2389 * respect the locking order while acquiring. 2390 */ 2391 mutex_exit(&mdi_mutex); 2392 2393 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2394 while (pip != NULL) { 2395 /* 2396 * Compare the unit address 2397 */ 2398 if ((MDI_PI(pip)->pi_phci == ph) && 2399 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2400 break; 2401 } 2402 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2403 } 2404 MDI_CLIENT_UNLOCK(ct); 2405 return (pip); 2406 } 2407 2408 /* 2409 * mdi_pi_alloc(): 2410 * Allocate and initialize a new instance of a mdi_pathinfo node. 2411 * The mdi_pathinfo node returned by this function identifies a 2412 * unique device path is capable of having properties attached 2413 * and passed to mdi_pi_online() to fully attach and online the 2414 * path and client device node. 2415 * The mdi_pathinfo node returned by this function must be 2416 * destroyed using mdi_pi_free() if the path is no longer 2417 * operational or if the caller fails to attach a client device 2418 * node when calling mdi_pi_online(). The framework will not free 2419 * the resources allocated. 2420 * This function can be called from both interrupt and kernel 2421 * contexts. DDI_NOSLEEP flag should be used while calling 2422 * from interrupt contexts. 2423 * Return Values: 2424 * MDI_SUCCESS 2425 * MDI_FAILURE 2426 * MDI_NOMEM 2427 */ 2428 /*ARGSUSED*/ 2429 int 2430 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2431 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2432 { 2433 mdi_vhci_t *vh; 2434 mdi_phci_t *ph; 2435 mdi_client_t *ct; 2436 mdi_pathinfo_t *pip = NULL; 2437 dev_info_t *cdip; 2438 int rv = MDI_NOMEM; 2439 int path_allocated = 0; 2440 2441 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2442 ret_pip == NULL) { 2443 /* Nothing more to do */ 2444 return (MDI_FAILURE); 2445 } 2446 2447 *ret_pip = NULL; 2448 ph = i_devi_get_phci(pdip); 2449 ASSERT(ph != NULL); 2450 if (ph == NULL) { 2451 /* Invalid pHCI device, return failure */ 2452 MDI_DEBUG(1, (CE_WARN, NULL, 2453 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2454 return (MDI_FAILURE); 2455 } 2456 2457 MDI_PHCI_LOCK(ph); 2458 vh = ph->ph_vhci; 2459 if (vh == NULL) { 2460 /* Invalid vHCI device, return failure */ 2461 MDI_DEBUG(1, (CE_WARN, NULL, 2462 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2463 MDI_PHCI_UNLOCK(ph); 2464 return (MDI_FAILURE); 2465 } 2466 2467 if (MDI_PHCI_IS_READY(ph) == 0) { 2468 /* 2469 * Do not allow new node creation when pHCI is in 2470 * offline/suspended states 2471 */ 2472 MDI_DEBUG(1, (CE_WARN, NULL, 2473 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2474 MDI_PHCI_UNLOCK(ph); 2475 return (MDI_BUSY); 2476 } 2477 MDI_PHCI_UNSTABLE(ph); 2478 MDI_PHCI_UNLOCK(ph); 2479 2480 /* look for a matching client, create one if not found */ 2481 mutex_enter(&mdi_mutex); 2482 ct = i_mdi_client_find(vh, cname, caddr); 2483 if (ct == NULL) { 2484 ct = i_mdi_client_alloc(vh, cname, caddr); 2485 ASSERT(ct != NULL); 2486 } 2487 2488 if (ct->ct_dip == NULL) { 2489 /* 2490 * Allocate a devinfo node 2491 */ 2492 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2493 compatible, ncompatible); 2494 if (ct->ct_dip == NULL) { 2495 (void) i_mdi_client_free(vh, ct); 2496 goto fail; 2497 } 2498 } 2499 cdip = ct->ct_dip; 2500 2501 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2502 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2503 2504 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2505 while (pip != NULL) { 2506 /* 2507 * Compare the unit address 2508 */ 2509 if ((MDI_PI(pip)->pi_phci == ph) && 2510 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2511 break; 2512 } 2513 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2514 } 2515 2516 if (pip == NULL) { 2517 /* 2518 * This is a new path for this client device. Allocate and 2519 * initialize a new pathinfo node 2520 */ 2521 pip = i_mdi_pi_alloc(ph, paddr, ct); 2522 ASSERT(pip != NULL); 2523 path_allocated = 1; 2524 } 2525 rv = MDI_SUCCESS; 2526 2527 fail: 2528 /* 2529 * Release the global mutex. 2530 */ 2531 mutex_exit(&mdi_mutex); 2532 2533 /* 2534 * Mark the pHCI as stable 2535 */ 2536 MDI_PHCI_LOCK(ph); 2537 MDI_PHCI_STABLE(ph); 2538 MDI_PHCI_UNLOCK(ph); 2539 *ret_pip = pip; 2540 2541 if (path_allocated) 2542 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2543 2544 return (rv); 2545 } 2546 2547 /*ARGSUSED*/ 2548 int 2549 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2550 int flags, mdi_pathinfo_t **ret_pip) 2551 { 2552 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2553 flags, ret_pip)); 2554 } 2555 2556 /* 2557 * i_mdi_pi_alloc(): 2558 * Allocate a mdi_pathinfo node and add to the pHCI path list 2559 * Return Values: 2560 * mdi_pathinfo 2561 */ 2562 2563 /*ARGSUSED*/ 2564 static mdi_pathinfo_t * 2565 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2566 { 2567 mdi_pathinfo_t *pip; 2568 int ct_circular; 2569 int ph_circular; 2570 2571 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2572 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2573 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2574 MDI_PATHINFO_STATE_TRANSIENT; 2575 2576 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2577 MDI_PI_SET_USER_DISABLE(pip); 2578 2579 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2580 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2581 2582 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2583 MDI_PI_SET_DRV_DISABLE(pip); 2584 2585 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2586 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2587 MDI_PI(pip)->pi_client = ct; 2588 MDI_PI(pip)->pi_phci = ph; 2589 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2590 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2591 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2592 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2593 MDI_PI(pip)->pi_pprivate = NULL; 2594 MDI_PI(pip)->pi_cprivate = NULL; 2595 MDI_PI(pip)->pi_vprivate = NULL; 2596 MDI_PI(pip)->pi_client_link = NULL; 2597 MDI_PI(pip)->pi_phci_link = NULL; 2598 MDI_PI(pip)->pi_ref_cnt = 0; 2599 MDI_PI(pip)->pi_kstats = NULL; 2600 MDI_PI(pip)->pi_preferred = 1; 2601 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2602 2603 /* 2604 * Lock both dev_info nodes against changes in parallel. 2605 */ 2606 ndi_devi_enter(ct->ct_dip, &ct_circular); 2607 ndi_devi_enter(ph->ph_dip, &ph_circular); 2608 2609 i_mdi_phci_add_path(ph, pip); 2610 i_mdi_client_add_path(ct, pip); 2611 2612 ndi_devi_exit(ph->ph_dip, ph_circular); 2613 ndi_devi_exit(ct->ct_dip, ct_circular); 2614 2615 return (pip); 2616 } 2617 2618 /* 2619 * i_mdi_phci_add_path(): 2620 * Add a mdi_pathinfo node to pHCI list. 2621 * Notes: 2622 * Caller should per-pHCI mutex 2623 */ 2624 2625 static void 2626 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2627 { 2628 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2629 2630 if (ph->ph_path_head == NULL) { 2631 ph->ph_path_head = pip; 2632 } else { 2633 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2634 } 2635 ph->ph_path_tail = pip; 2636 ph->ph_path_count++; 2637 } 2638 2639 /* 2640 * i_mdi_client_add_path(): 2641 * Add mdi_pathinfo node to client list 2642 */ 2643 2644 static void 2645 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2646 { 2647 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2648 2649 if (ct->ct_path_head == NULL) { 2650 ct->ct_path_head = pip; 2651 } else { 2652 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2653 } 2654 ct->ct_path_tail = pip; 2655 ct->ct_path_count++; 2656 } 2657 2658 /* 2659 * mdi_pi_free(): 2660 * Free the mdi_pathinfo node and also client device node if this 2661 * is the last path to the device 2662 * Return Values: 2663 * MDI_SUCCESS 2664 * MDI_FAILURE 2665 * MDI_BUSY 2666 */ 2667 2668 /*ARGSUSED*/ 2669 int 2670 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2671 { 2672 int rv = MDI_SUCCESS; 2673 mdi_vhci_t *vh; 2674 mdi_phci_t *ph; 2675 mdi_client_t *ct; 2676 int (*f)(); 2677 int client_held = 0; 2678 2679 MDI_PI_LOCK(pip); 2680 ph = MDI_PI(pip)->pi_phci; 2681 ASSERT(ph != NULL); 2682 if (ph == NULL) { 2683 /* 2684 * Invalid pHCI device, return failure 2685 */ 2686 MDI_DEBUG(1, (CE_WARN, NULL, 2687 "!mdi_pi_free: invalid pHCI")); 2688 MDI_PI_UNLOCK(pip); 2689 return (MDI_FAILURE); 2690 } 2691 2692 vh = ph->ph_vhci; 2693 ASSERT(vh != NULL); 2694 if (vh == NULL) { 2695 /* Invalid pHCI device, return failure */ 2696 MDI_DEBUG(1, (CE_WARN, NULL, 2697 "!mdi_pi_free: invalid vHCI")); 2698 MDI_PI_UNLOCK(pip); 2699 return (MDI_FAILURE); 2700 } 2701 2702 ct = MDI_PI(pip)->pi_client; 2703 ASSERT(ct != NULL); 2704 if (ct == NULL) { 2705 /* 2706 * Invalid Client device, return failure 2707 */ 2708 MDI_DEBUG(1, (CE_WARN, NULL, 2709 "!mdi_pi_free: invalid client")); 2710 MDI_PI_UNLOCK(pip); 2711 return (MDI_FAILURE); 2712 } 2713 2714 /* 2715 * Check to see for busy condition. A mdi_pathinfo can only be freed 2716 * if the node state is either offline or init and the reference count 2717 * is zero. 2718 */ 2719 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2720 MDI_PI_IS_INITING(pip))) { 2721 /* 2722 * Node is busy 2723 */ 2724 MDI_DEBUG(1, (CE_WARN, NULL, 2725 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2726 MDI_PI_UNLOCK(pip); 2727 return (MDI_BUSY); 2728 } 2729 2730 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2731 /* 2732 * Give a chance for pending I/Os to complete. 2733 */ 2734 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!i_mdi_pi_free: " 2735 "%d cmds still pending on path: %p\n", 2736 MDI_PI(pip)->pi_ref_cnt, pip)); 2737 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2738 &MDI_PI(pip)->pi_mutex, 2739 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2740 /* 2741 * The timeout time reached without ref_cnt being zero 2742 * being signaled. 2743 */ 2744 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2745 "!i_mdi_pi_free: " 2746 "Timeout reached on path %p without the cond\n", 2747 pip)); 2748 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2749 "!i_mdi_pi_free: " 2750 "%d cmds still pending on path: %p\n", 2751 MDI_PI(pip)->pi_ref_cnt, pip)); 2752 MDI_PI_UNLOCK(pip); 2753 return (MDI_BUSY); 2754 } 2755 } 2756 if (MDI_PI(pip)->pi_pm_held) { 2757 client_held = 1; 2758 } 2759 MDI_PI_UNLOCK(pip); 2760 2761 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2762 2763 MDI_CLIENT_LOCK(ct); 2764 2765 /* Prevent further failovers till mdi_mutex is held */ 2766 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2767 2768 /* 2769 * Wait till failover is complete before removing this node. 2770 */ 2771 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2772 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2773 2774 MDI_CLIENT_UNLOCK(ct); 2775 mutex_enter(&mdi_mutex); 2776 MDI_CLIENT_LOCK(ct); 2777 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2778 2779 if (!MDI_PI_IS_INITING(pip)) { 2780 f = vh->vh_ops->vo_pi_uninit; 2781 if (f != NULL) { 2782 rv = (*f)(vh->vh_dip, pip, 0); 2783 } 2784 } 2785 /* 2786 * If vo_pi_uninit() completed successfully. 2787 */ 2788 if (rv == MDI_SUCCESS) { 2789 if (client_held) { 2790 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2791 "i_mdi_pm_rele_client\n")); 2792 i_mdi_pm_rele_client(ct, 1); 2793 } 2794 i_mdi_pi_free(ph, pip, ct); 2795 if (ct->ct_path_count == 0) { 2796 /* 2797 * Client lost its last path. 2798 * Clean up the client device 2799 */ 2800 MDI_CLIENT_UNLOCK(ct); 2801 (void) i_mdi_client_free(ct->ct_vhci, ct); 2802 mutex_exit(&mdi_mutex); 2803 return (rv); 2804 } 2805 } 2806 MDI_CLIENT_UNLOCK(ct); 2807 mutex_exit(&mdi_mutex); 2808 2809 if (rv == MDI_FAILURE) 2810 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2811 2812 return (rv); 2813 } 2814 2815 /* 2816 * i_mdi_pi_free(): 2817 * Free the mdi_pathinfo node 2818 */ 2819 static void 2820 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2821 { 2822 int ct_circular; 2823 int ph_circular; 2824 2825 /* 2826 * remove any per-path kstats 2827 */ 2828 i_mdi_pi_kstat_destroy(pip); 2829 2830 ndi_devi_enter(ct->ct_dip, &ct_circular); 2831 ndi_devi_enter(ph->ph_dip, &ph_circular); 2832 2833 i_mdi_client_remove_path(ct, pip); 2834 i_mdi_phci_remove_path(ph, pip); 2835 2836 ndi_devi_exit(ph->ph_dip, ph_circular); 2837 ndi_devi_exit(ct->ct_dip, ct_circular); 2838 2839 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2840 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2841 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2842 if (MDI_PI(pip)->pi_addr) { 2843 kmem_free(MDI_PI(pip)->pi_addr, 2844 strlen(MDI_PI(pip)->pi_addr) + 1); 2845 MDI_PI(pip)->pi_addr = NULL; 2846 } 2847 2848 if (MDI_PI(pip)->pi_prop) { 2849 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2850 MDI_PI(pip)->pi_prop = NULL; 2851 } 2852 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2853 } 2854 2855 2856 /* 2857 * i_mdi_phci_remove_path(): 2858 * Remove a mdi_pathinfo node from pHCI list. 2859 * Notes: 2860 * Caller should hold per-pHCI mutex 2861 */ 2862 2863 static void 2864 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2865 { 2866 mdi_pathinfo_t *prev = NULL; 2867 mdi_pathinfo_t *path = NULL; 2868 2869 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2870 2871 path = ph->ph_path_head; 2872 while (path != NULL) { 2873 if (path == pip) { 2874 break; 2875 } 2876 prev = path; 2877 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2878 } 2879 2880 if (path) { 2881 ph->ph_path_count--; 2882 if (prev) { 2883 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2884 } else { 2885 ph->ph_path_head = 2886 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2887 } 2888 if (ph->ph_path_tail == path) { 2889 ph->ph_path_tail = prev; 2890 } 2891 } 2892 2893 /* 2894 * Clear the pHCI link 2895 */ 2896 MDI_PI(pip)->pi_phci_link = NULL; 2897 MDI_PI(pip)->pi_phci = NULL; 2898 } 2899 2900 /* 2901 * i_mdi_client_remove_path(): 2902 * Remove a mdi_pathinfo node from client path list. 2903 */ 2904 2905 static void 2906 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2907 { 2908 mdi_pathinfo_t *prev = NULL; 2909 mdi_pathinfo_t *path; 2910 2911 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2912 2913 path = ct->ct_path_head; 2914 while (path != NULL) { 2915 if (path == pip) { 2916 break; 2917 } 2918 prev = path; 2919 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2920 } 2921 2922 if (path) { 2923 ct->ct_path_count--; 2924 if (prev) { 2925 MDI_PI(prev)->pi_client_link = 2926 MDI_PI(path)->pi_client_link; 2927 } else { 2928 ct->ct_path_head = 2929 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2930 } 2931 if (ct->ct_path_tail == path) { 2932 ct->ct_path_tail = prev; 2933 } 2934 if (ct->ct_path_last == path) { 2935 ct->ct_path_last = ct->ct_path_head; 2936 } 2937 } 2938 MDI_PI(pip)->pi_client_link = NULL; 2939 MDI_PI(pip)->pi_client = NULL; 2940 } 2941 2942 /* 2943 * i_mdi_pi_state_change(): 2944 * online a mdi_pathinfo node 2945 * 2946 * Return Values: 2947 * MDI_SUCCESS 2948 * MDI_FAILURE 2949 */ 2950 /*ARGSUSED*/ 2951 static int 2952 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 2953 { 2954 int rv = MDI_SUCCESS; 2955 mdi_vhci_t *vh; 2956 mdi_phci_t *ph; 2957 mdi_client_t *ct; 2958 int (*f)(); 2959 dev_info_t *cdip; 2960 2961 MDI_PI_LOCK(pip); 2962 2963 ph = MDI_PI(pip)->pi_phci; 2964 ASSERT(ph); 2965 if (ph == NULL) { 2966 /* 2967 * Invalid pHCI device, fail the request 2968 */ 2969 MDI_PI_UNLOCK(pip); 2970 MDI_DEBUG(1, (CE_WARN, NULL, 2971 "!mdi_pi_state_change: invalid phci")); 2972 return (MDI_FAILURE); 2973 } 2974 2975 vh = ph->ph_vhci; 2976 ASSERT(vh); 2977 if (vh == NULL) { 2978 /* 2979 * Invalid vHCI device, fail the request 2980 */ 2981 MDI_PI_UNLOCK(pip); 2982 MDI_DEBUG(1, (CE_WARN, NULL, 2983 "!mdi_pi_state_change: invalid vhci")); 2984 return (MDI_FAILURE); 2985 } 2986 2987 ct = MDI_PI(pip)->pi_client; 2988 ASSERT(ct != NULL); 2989 if (ct == NULL) { 2990 /* 2991 * Invalid client device, fail the request 2992 */ 2993 MDI_PI_UNLOCK(pip); 2994 MDI_DEBUG(1, (CE_WARN, NULL, 2995 "!mdi_pi_state_change: invalid client")); 2996 return (MDI_FAILURE); 2997 } 2998 2999 /* 3000 * If this path has not been initialized yet, Callback vHCI driver's 3001 * pathinfo node initialize entry point 3002 */ 3003 3004 if (MDI_PI_IS_INITING(pip)) { 3005 MDI_PI_UNLOCK(pip); 3006 f = vh->vh_ops->vo_pi_init; 3007 if (f != NULL) { 3008 rv = (*f)(vh->vh_dip, pip, 0); 3009 if (rv != MDI_SUCCESS) { 3010 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3011 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3012 vh, pip)); 3013 return (MDI_FAILURE); 3014 } 3015 } 3016 MDI_PI_LOCK(pip); 3017 MDI_PI_CLEAR_TRANSIENT(pip); 3018 } 3019 3020 /* 3021 * Do not allow state transition when pHCI is in offline/suspended 3022 * states 3023 */ 3024 i_mdi_phci_lock(ph, pip); 3025 if (MDI_PHCI_IS_READY(ph) == 0) { 3026 MDI_DEBUG(1, (CE_WARN, NULL, 3027 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3028 MDI_PI_UNLOCK(pip); 3029 i_mdi_phci_unlock(ph); 3030 return (MDI_BUSY); 3031 } 3032 MDI_PHCI_UNSTABLE(ph); 3033 i_mdi_phci_unlock(ph); 3034 3035 /* 3036 * Check if mdi_pathinfo state is in transient state. 3037 * If yes, offlining is in progress and wait till transient state is 3038 * cleared. 3039 */ 3040 if (MDI_PI_IS_TRANSIENT(pip)) { 3041 while (MDI_PI_IS_TRANSIENT(pip)) { 3042 cv_wait(&MDI_PI(pip)->pi_state_cv, 3043 &MDI_PI(pip)->pi_mutex); 3044 } 3045 } 3046 3047 /* 3048 * Grab the client lock in reverse order sequence and release the 3049 * mdi_pathinfo mutex. 3050 */ 3051 i_mdi_client_lock(ct, pip); 3052 MDI_PI_UNLOCK(pip); 3053 3054 /* 3055 * Wait till failover state is cleared 3056 */ 3057 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3058 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3059 3060 /* 3061 * Mark the mdi_pathinfo node state as transient 3062 */ 3063 MDI_PI_LOCK(pip); 3064 switch (state) { 3065 case MDI_PATHINFO_STATE_ONLINE: 3066 MDI_PI_SET_ONLINING(pip); 3067 break; 3068 3069 case MDI_PATHINFO_STATE_STANDBY: 3070 MDI_PI_SET_STANDBYING(pip); 3071 break; 3072 3073 case MDI_PATHINFO_STATE_FAULT: 3074 /* 3075 * Mark the pathinfo state as FAULTED 3076 */ 3077 MDI_PI_SET_FAULTING(pip); 3078 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3079 break; 3080 3081 case MDI_PATHINFO_STATE_OFFLINE: 3082 /* 3083 * ndi_devi_offline() cannot hold pip or ct locks. 3084 */ 3085 MDI_PI_UNLOCK(pip); 3086 /* 3087 * Do not offline if path will become last path and path 3088 * is busy for user initiated events. 3089 */ 3090 cdip = ct->ct_dip; 3091 if ((flag & NDI_DEVI_REMOVE) && 3092 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3093 i_mdi_client_unlock(ct); 3094 rv = ndi_devi_offline(cdip, 0); 3095 if (rv != NDI_SUCCESS) { 3096 /* 3097 * Convert to MDI error code 3098 */ 3099 switch (rv) { 3100 case NDI_BUSY: 3101 rv = MDI_BUSY; 3102 break; 3103 default: 3104 rv = MDI_FAILURE; 3105 break; 3106 } 3107 goto state_change_exit; 3108 } else { 3109 i_mdi_client_lock(ct, NULL); 3110 } 3111 } 3112 /* 3113 * Mark the mdi_pathinfo node state as transient 3114 */ 3115 MDI_PI_LOCK(pip); 3116 MDI_PI_SET_OFFLINING(pip); 3117 break; 3118 } 3119 MDI_PI_UNLOCK(pip); 3120 MDI_CLIENT_UNSTABLE(ct); 3121 i_mdi_client_unlock(ct); 3122 3123 f = vh->vh_ops->vo_pi_state_change; 3124 if (f != NULL) { 3125 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3126 if (rv == MDI_NOT_SUPPORTED) { 3127 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3128 } 3129 if (rv != MDI_SUCCESS) { 3130 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3131 "!vo_pi_state_change: failed rv = %x", rv)); 3132 } 3133 } 3134 MDI_CLIENT_LOCK(ct); 3135 MDI_PI_LOCK(pip); 3136 if (MDI_PI_IS_TRANSIENT(pip)) { 3137 if (rv == MDI_SUCCESS) { 3138 MDI_PI_CLEAR_TRANSIENT(pip); 3139 } else { 3140 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3141 } 3142 } 3143 3144 /* 3145 * Wake anyone waiting for this mdi_pathinfo node 3146 */ 3147 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3148 MDI_PI_UNLOCK(pip); 3149 3150 /* 3151 * Mark the client device as stable 3152 */ 3153 MDI_CLIENT_STABLE(ct); 3154 if (rv == MDI_SUCCESS) { 3155 if (ct->ct_unstable == 0) { 3156 cdip = ct->ct_dip; 3157 3158 /* 3159 * Onlining the mdi_pathinfo node will impact the 3160 * client state Update the client and dev_info node 3161 * state accordingly 3162 */ 3163 rv = NDI_SUCCESS; 3164 i_mdi_client_update_state(ct); 3165 switch (MDI_CLIENT_STATE(ct)) { 3166 case MDI_CLIENT_STATE_OPTIMAL: 3167 case MDI_CLIENT_STATE_DEGRADED: 3168 if (cdip && 3169 (i_ddi_node_state(cdip) < DS_READY) && 3170 ((state == MDI_PATHINFO_STATE_ONLINE) || 3171 (state == MDI_PATHINFO_STATE_STANDBY))) { 3172 3173 i_mdi_client_unlock(ct); 3174 /* 3175 * Must do ndi_devi_online() through 3176 * hotplug thread for deferred 3177 * attach mechanism to work 3178 */ 3179 rv = ndi_devi_online(cdip, 0); 3180 i_mdi_client_lock(ct, NULL); 3181 if ((rv != NDI_SUCCESS) && 3182 (MDI_CLIENT_STATE(ct) == 3183 MDI_CLIENT_STATE_DEGRADED)) { 3184 /* 3185 * ndi_devi_online failed. 3186 * Reset client flags to 3187 * offline. 3188 */ 3189 MDI_DEBUG(1, (CE_WARN, cdip, 3190 "!ndi_devi_online: failed " 3191 " Error: %x", rv)); 3192 MDI_CLIENT_SET_OFFLINE(ct); 3193 } 3194 if (rv != NDI_SUCCESS) { 3195 /* Reset the path state */ 3196 MDI_PI_LOCK(pip); 3197 MDI_PI(pip)->pi_state = 3198 MDI_PI_OLD_STATE(pip); 3199 MDI_PI_UNLOCK(pip); 3200 } 3201 } 3202 break; 3203 3204 case MDI_CLIENT_STATE_FAILED: 3205 /* 3206 * This is the last path case for 3207 * non-user initiated events. 3208 */ 3209 if (((flag & NDI_DEVI_REMOVE) == 0) && 3210 cdip && (i_ddi_node_state(cdip) >= 3211 DS_INITIALIZED)) { 3212 i_mdi_client_unlock(ct); 3213 rv = ndi_devi_offline(cdip, 0); 3214 i_mdi_client_lock(ct, NULL); 3215 3216 if (rv != NDI_SUCCESS) { 3217 /* 3218 * ndi_devi_offline failed. 3219 * Reset client flags to 3220 * online as the path could not 3221 * be offlined. 3222 */ 3223 MDI_DEBUG(1, (CE_WARN, cdip, 3224 "!ndi_devi_offline: failed " 3225 " Error: %x", rv)); 3226 MDI_CLIENT_SET_ONLINE(ct); 3227 } 3228 } 3229 break; 3230 } 3231 /* 3232 * Convert to MDI error code 3233 */ 3234 switch (rv) { 3235 case NDI_SUCCESS: 3236 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3237 i_mdi_report_path_state(ct, pip); 3238 rv = MDI_SUCCESS; 3239 break; 3240 case NDI_BUSY: 3241 rv = MDI_BUSY; 3242 break; 3243 default: 3244 rv = MDI_FAILURE; 3245 break; 3246 } 3247 } 3248 } 3249 MDI_CLIENT_UNLOCK(ct); 3250 3251 state_change_exit: 3252 /* 3253 * Mark the pHCI as stable again. 3254 */ 3255 MDI_PHCI_LOCK(ph); 3256 MDI_PHCI_STABLE(ph); 3257 MDI_PHCI_UNLOCK(ph); 3258 return (rv); 3259 } 3260 3261 /* 3262 * mdi_pi_online(): 3263 * Place the path_info node in the online state. The path is 3264 * now available to be selected by mdi_select_path() for 3265 * transporting I/O requests to client devices. 3266 * Return Values: 3267 * MDI_SUCCESS 3268 * MDI_FAILURE 3269 */ 3270 int 3271 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3272 { 3273 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3274 dev_info_t *cdip; 3275 int client_held = 0; 3276 int rv; 3277 3278 ASSERT(ct != NULL); 3279 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3280 if (rv != MDI_SUCCESS) 3281 return (rv); 3282 3283 MDI_PI_LOCK(pip); 3284 if (MDI_PI(pip)->pi_pm_held == 0) { 3285 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3286 "i_mdi_pm_hold_pip\n")); 3287 i_mdi_pm_hold_pip(pip); 3288 client_held = 1; 3289 } 3290 MDI_PI_UNLOCK(pip); 3291 3292 if (client_held) { 3293 MDI_CLIENT_LOCK(ct); 3294 if (ct->ct_power_cnt == 0) { 3295 rv = i_mdi_power_all_phci(ct); 3296 } 3297 3298 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3299 "i_mdi_pm_hold_client\n")); 3300 i_mdi_pm_hold_client(ct, 1); 3301 MDI_CLIENT_UNLOCK(ct); 3302 } 3303 3304 /* 3305 * Create the per-path (pathinfo) IO and error kstats which 3306 * are reported via iostat(1m). 3307 * 3308 * Defer creating the per-path kstats if device is not yet 3309 * attached; the names of the kstats are constructed in part 3310 * using the devices instance number which is assigned during 3311 * process of attaching the client device. 3312 * 3313 * The framework post_attach handler, mdi_post_attach(), is 3314 * is responsible for initializing the client's pathinfo list 3315 * once successfully attached. 3316 */ 3317 cdip = ct->ct_dip; 3318 ASSERT(cdip); 3319 if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED)) 3320 return (rv); 3321 3322 MDI_CLIENT_LOCK(ct); 3323 rv = i_mdi_pi_kstat_create(pip); 3324 MDI_CLIENT_UNLOCK(ct); 3325 return (rv); 3326 } 3327 3328 /* 3329 * mdi_pi_standby(): 3330 * Place the mdi_pathinfo node in standby state 3331 * 3332 * Return Values: 3333 * MDI_SUCCESS 3334 * MDI_FAILURE 3335 */ 3336 int 3337 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3338 { 3339 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3340 } 3341 3342 /* 3343 * mdi_pi_fault(): 3344 * Place the mdi_pathinfo node in fault'ed state 3345 * Return Values: 3346 * MDI_SUCCESS 3347 * MDI_FAILURE 3348 */ 3349 int 3350 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3351 { 3352 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3353 } 3354 3355 /* 3356 * mdi_pi_offline(): 3357 * Offline a mdi_pathinfo node. 3358 * Return Values: 3359 * MDI_SUCCESS 3360 * MDI_FAILURE 3361 */ 3362 int 3363 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3364 { 3365 int ret, client_held = 0; 3366 mdi_client_t *ct; 3367 3368 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3369 3370 if (ret == MDI_SUCCESS) { 3371 MDI_PI_LOCK(pip); 3372 if (MDI_PI(pip)->pi_pm_held) { 3373 client_held = 1; 3374 } 3375 MDI_PI_UNLOCK(pip); 3376 3377 if (client_held) { 3378 ct = MDI_PI(pip)->pi_client; 3379 MDI_CLIENT_LOCK(ct); 3380 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3381 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3382 i_mdi_pm_rele_client(ct, 1); 3383 MDI_CLIENT_UNLOCK(ct); 3384 } 3385 } 3386 3387 return (ret); 3388 } 3389 3390 /* 3391 * i_mdi_pi_offline(): 3392 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3393 */ 3394 static int 3395 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3396 { 3397 dev_info_t *vdip = NULL; 3398 mdi_vhci_t *vh = NULL; 3399 mdi_client_t *ct = NULL; 3400 int (*f)(); 3401 int rv; 3402 3403 MDI_PI_LOCK(pip); 3404 ct = MDI_PI(pip)->pi_client; 3405 ASSERT(ct != NULL); 3406 3407 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3408 /* 3409 * Give a chance for pending I/Os to complete. 3410 */ 3411 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3412 "%d cmds still pending on path: %p\n", 3413 MDI_PI(pip)->pi_ref_cnt, pip)); 3414 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3415 &MDI_PI(pip)->pi_mutex, 3416 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3417 /* 3418 * The timeout time reached without ref_cnt being zero 3419 * being signaled. 3420 */ 3421 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3422 "Timeout reached on path %p without the cond\n", 3423 pip)); 3424 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3425 "%d cmds still pending on path: %p\n", 3426 MDI_PI(pip)->pi_ref_cnt, pip)); 3427 } 3428 } 3429 vh = ct->ct_vhci; 3430 vdip = vh->vh_dip; 3431 3432 /* 3433 * Notify vHCI that has registered this event 3434 */ 3435 ASSERT(vh->vh_ops); 3436 f = vh->vh_ops->vo_pi_state_change; 3437 3438 if (f != NULL) { 3439 MDI_PI_UNLOCK(pip); 3440 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3441 flags)) != MDI_SUCCESS) { 3442 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3443 "vdip 0x%x, pip 0x%x", vdip, pip)); 3444 } 3445 MDI_PI_LOCK(pip); 3446 } 3447 3448 /* 3449 * Set the mdi_pathinfo node state and clear the transient condition 3450 */ 3451 MDI_PI_SET_OFFLINE(pip); 3452 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3453 MDI_PI_UNLOCK(pip); 3454 3455 MDI_CLIENT_LOCK(ct); 3456 if (rv == MDI_SUCCESS) { 3457 if (ct->ct_unstable == 0) { 3458 dev_info_t *cdip = ct->ct_dip; 3459 3460 /* 3461 * Onlining the mdi_pathinfo node will impact the 3462 * client state Update the client and dev_info node 3463 * state accordingly 3464 */ 3465 i_mdi_client_update_state(ct); 3466 rv = NDI_SUCCESS; 3467 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3468 if (cdip && 3469 (i_ddi_node_state(cdip) >= 3470 DS_INITIALIZED)) { 3471 MDI_CLIENT_UNLOCK(ct); 3472 rv = ndi_devi_offline(cdip, 0); 3473 MDI_CLIENT_LOCK(ct); 3474 if (rv != NDI_SUCCESS) { 3475 /* 3476 * ndi_devi_offline failed. 3477 * Reset client flags to 3478 * online. 3479 */ 3480 MDI_DEBUG(4, (CE_WARN, cdip, 3481 "!ndi_devi_offline: failed " 3482 " Error: %x", rv)); 3483 MDI_CLIENT_SET_ONLINE(ct); 3484 } 3485 } 3486 } 3487 /* 3488 * Convert to MDI error code 3489 */ 3490 switch (rv) { 3491 case NDI_SUCCESS: 3492 rv = MDI_SUCCESS; 3493 break; 3494 case NDI_BUSY: 3495 rv = MDI_BUSY; 3496 break; 3497 default: 3498 rv = MDI_FAILURE; 3499 break; 3500 } 3501 } 3502 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3503 i_mdi_report_path_state(ct, pip); 3504 } 3505 3506 MDI_CLIENT_UNLOCK(ct); 3507 3508 /* 3509 * Change in the mdi_pathinfo node state will impact the client state 3510 */ 3511 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3512 ct, pip)); 3513 return (rv); 3514 } 3515 3516 3517 /* 3518 * mdi_pi_get_addr(): 3519 * Get the unit address associated with a mdi_pathinfo node 3520 * 3521 * Return Values: 3522 * char * 3523 */ 3524 char * 3525 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3526 { 3527 if (pip == NULL) 3528 return (NULL); 3529 3530 return (MDI_PI(pip)->pi_addr); 3531 } 3532 3533 /* 3534 * mdi_pi_get_client(): 3535 * Get the client devinfo associated with a mdi_pathinfo node 3536 * 3537 * Return Values: 3538 * Handle to client device dev_info node 3539 */ 3540 dev_info_t * 3541 mdi_pi_get_client(mdi_pathinfo_t *pip) 3542 { 3543 dev_info_t *dip = NULL; 3544 if (pip) { 3545 dip = MDI_PI(pip)->pi_client->ct_dip; 3546 } 3547 return (dip); 3548 } 3549 3550 /* 3551 * mdi_pi_get_phci(): 3552 * Get the pHCI devinfo associated with the mdi_pathinfo node 3553 * Return Values: 3554 * Handle to dev_info node 3555 */ 3556 dev_info_t * 3557 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3558 { 3559 dev_info_t *dip = NULL; 3560 if (pip) { 3561 dip = MDI_PI(pip)->pi_phci->ph_dip; 3562 } 3563 return (dip); 3564 } 3565 3566 /* 3567 * mdi_pi_get_client_private(): 3568 * Get the client private information associated with the 3569 * mdi_pathinfo node 3570 */ 3571 void * 3572 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3573 { 3574 void *cprivate = NULL; 3575 if (pip) { 3576 cprivate = MDI_PI(pip)->pi_cprivate; 3577 } 3578 return (cprivate); 3579 } 3580 3581 /* 3582 * mdi_pi_set_client_private(): 3583 * Set the client private information in the mdi_pathinfo node 3584 */ 3585 void 3586 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3587 { 3588 if (pip) { 3589 MDI_PI(pip)->pi_cprivate = priv; 3590 } 3591 } 3592 3593 /* 3594 * mdi_pi_get_phci_private(): 3595 * Get the pHCI private information associated with the 3596 * mdi_pathinfo node 3597 */ 3598 caddr_t 3599 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3600 { 3601 caddr_t pprivate = NULL; 3602 if (pip) { 3603 pprivate = MDI_PI(pip)->pi_pprivate; 3604 } 3605 return (pprivate); 3606 } 3607 3608 /* 3609 * mdi_pi_set_phci_private(): 3610 * Set the pHCI private information in the mdi_pathinfo node 3611 */ 3612 void 3613 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3614 { 3615 if (pip) { 3616 MDI_PI(pip)->pi_pprivate = priv; 3617 } 3618 } 3619 3620 /* 3621 * mdi_pi_get_state(): 3622 * Get the mdi_pathinfo node state. Transient states are internal 3623 * and not provided to the users 3624 */ 3625 mdi_pathinfo_state_t 3626 mdi_pi_get_state(mdi_pathinfo_t *pip) 3627 { 3628 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3629 3630 if (pip) { 3631 if (MDI_PI_IS_TRANSIENT(pip)) { 3632 /* 3633 * mdi_pathinfo is in state transition. Return the 3634 * last good state. 3635 */ 3636 state = MDI_PI_OLD_STATE(pip); 3637 } else { 3638 state = MDI_PI_STATE(pip); 3639 } 3640 } 3641 return (state); 3642 } 3643 3644 /* 3645 * Note that the following function needs to be the new interface for 3646 * mdi_pi_get_state when mpxio gets integrated to ON. 3647 */ 3648 int 3649 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3650 uint32_t *ext_state) 3651 { 3652 *state = MDI_PATHINFO_STATE_INIT; 3653 3654 if (pip) { 3655 if (MDI_PI_IS_TRANSIENT(pip)) { 3656 /* 3657 * mdi_pathinfo is in state transition. Return the 3658 * last good state. 3659 */ 3660 *state = MDI_PI_OLD_STATE(pip); 3661 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3662 } else { 3663 *state = MDI_PI_STATE(pip); 3664 *ext_state = MDI_PI_EXT_STATE(pip); 3665 } 3666 } 3667 return (MDI_SUCCESS); 3668 } 3669 3670 /* 3671 * mdi_pi_get_preferred: 3672 * Get the preferred path flag 3673 */ 3674 int 3675 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3676 { 3677 if (pip) { 3678 return (MDI_PI(pip)->pi_preferred); 3679 } 3680 return (0); 3681 } 3682 3683 /* 3684 * mdi_pi_set_preferred: 3685 * Set the preferred path flag 3686 */ 3687 void 3688 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3689 { 3690 if (pip) { 3691 MDI_PI(pip)->pi_preferred = preferred; 3692 } 3693 } 3694 3695 3696 /* 3697 * mdi_pi_set_state(): 3698 * Set the mdi_pathinfo node state 3699 */ 3700 void 3701 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3702 { 3703 uint32_t ext_state; 3704 3705 if (pip) { 3706 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3707 MDI_PI(pip)->pi_state = state; 3708 MDI_PI(pip)->pi_state |= ext_state; 3709 } 3710 } 3711 3712 /* 3713 * Property functions: 3714 */ 3715 3716 int 3717 i_map_nvlist_error_to_mdi(int val) 3718 { 3719 int rv; 3720 3721 switch (val) { 3722 case 0: 3723 rv = DDI_PROP_SUCCESS; 3724 break; 3725 case EINVAL: 3726 case ENOTSUP: 3727 rv = DDI_PROP_INVAL_ARG; 3728 break; 3729 case ENOMEM: 3730 rv = DDI_PROP_NO_MEMORY; 3731 break; 3732 default: 3733 rv = DDI_PROP_NOT_FOUND; 3734 break; 3735 } 3736 return (rv); 3737 } 3738 3739 /* 3740 * mdi_pi_get_next_prop(): 3741 * Property walk function. The caller should hold mdi_pi_lock() 3742 * and release by calling mdi_pi_unlock() at the end of walk to 3743 * get a consistent value. 3744 */ 3745 3746 nvpair_t * 3747 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3748 { 3749 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3750 return (NULL); 3751 } 3752 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3753 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3754 } 3755 3756 /* 3757 * mdi_prop_remove(): 3758 * Remove the named property from the named list. 3759 */ 3760 3761 int 3762 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3763 { 3764 if (pip == NULL) { 3765 return (DDI_PROP_NOT_FOUND); 3766 } 3767 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3768 MDI_PI_LOCK(pip); 3769 if (MDI_PI(pip)->pi_prop == NULL) { 3770 MDI_PI_UNLOCK(pip); 3771 return (DDI_PROP_NOT_FOUND); 3772 } 3773 if (name) { 3774 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3775 } else { 3776 char nvp_name[MAXNAMELEN]; 3777 nvpair_t *nvp; 3778 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3779 while (nvp) { 3780 nvpair_t *next; 3781 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3782 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3783 nvpair_name(nvp)); 3784 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3785 nvp_name); 3786 nvp = next; 3787 } 3788 } 3789 MDI_PI_UNLOCK(pip); 3790 return (DDI_PROP_SUCCESS); 3791 } 3792 3793 /* 3794 * mdi_prop_size(): 3795 * Get buffer size needed to pack the property data. 3796 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3797 * buffer size. 3798 */ 3799 3800 int 3801 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3802 { 3803 int rv; 3804 size_t bufsize; 3805 3806 *buflenp = 0; 3807 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3808 return (DDI_PROP_NOT_FOUND); 3809 } 3810 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3811 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3812 &bufsize, NV_ENCODE_NATIVE); 3813 *buflenp = bufsize; 3814 return (i_map_nvlist_error_to_mdi(rv)); 3815 } 3816 3817 /* 3818 * mdi_prop_pack(): 3819 * pack the property list. The caller should hold the 3820 * mdi_pathinfo_t node to get a consistent data 3821 */ 3822 3823 int 3824 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3825 { 3826 int rv; 3827 size_t bufsize; 3828 3829 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3830 return (DDI_PROP_NOT_FOUND); 3831 } 3832 3833 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3834 3835 bufsize = buflen; 3836 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3837 NV_ENCODE_NATIVE, KM_SLEEP); 3838 3839 return (i_map_nvlist_error_to_mdi(rv)); 3840 } 3841 3842 /* 3843 * mdi_prop_update_byte(): 3844 * Create/Update a byte property 3845 */ 3846 int 3847 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3848 { 3849 int rv; 3850 3851 if (pip == NULL) { 3852 return (DDI_PROP_INVAL_ARG); 3853 } 3854 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3855 MDI_PI_LOCK(pip); 3856 if (MDI_PI(pip)->pi_prop == NULL) { 3857 MDI_PI_UNLOCK(pip); 3858 return (DDI_PROP_NOT_FOUND); 3859 } 3860 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3861 MDI_PI_UNLOCK(pip); 3862 return (i_map_nvlist_error_to_mdi(rv)); 3863 } 3864 3865 /* 3866 * mdi_prop_update_byte_array(): 3867 * Create/Update a byte array property 3868 */ 3869 int 3870 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3871 uint_t nelements) 3872 { 3873 int rv; 3874 3875 if (pip == NULL) { 3876 return (DDI_PROP_INVAL_ARG); 3877 } 3878 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3879 MDI_PI_LOCK(pip); 3880 if (MDI_PI(pip)->pi_prop == NULL) { 3881 MDI_PI_UNLOCK(pip); 3882 return (DDI_PROP_NOT_FOUND); 3883 } 3884 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3885 MDI_PI_UNLOCK(pip); 3886 return (i_map_nvlist_error_to_mdi(rv)); 3887 } 3888 3889 /* 3890 * mdi_prop_update_int(): 3891 * Create/Update a 32 bit integer property 3892 */ 3893 int 3894 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3895 { 3896 int rv; 3897 3898 if (pip == NULL) { 3899 return (DDI_PROP_INVAL_ARG); 3900 } 3901 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3902 MDI_PI_LOCK(pip); 3903 if (MDI_PI(pip)->pi_prop == NULL) { 3904 MDI_PI_UNLOCK(pip); 3905 return (DDI_PROP_NOT_FOUND); 3906 } 3907 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3908 MDI_PI_UNLOCK(pip); 3909 return (i_map_nvlist_error_to_mdi(rv)); 3910 } 3911 3912 /* 3913 * mdi_prop_update_int64(): 3914 * Create/Update a 64 bit integer property 3915 */ 3916 int 3917 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3918 { 3919 int rv; 3920 3921 if (pip == NULL) { 3922 return (DDI_PROP_INVAL_ARG); 3923 } 3924 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3925 MDI_PI_LOCK(pip); 3926 if (MDI_PI(pip)->pi_prop == NULL) { 3927 MDI_PI_UNLOCK(pip); 3928 return (DDI_PROP_NOT_FOUND); 3929 } 3930 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3931 MDI_PI_UNLOCK(pip); 3932 return (i_map_nvlist_error_to_mdi(rv)); 3933 } 3934 3935 /* 3936 * mdi_prop_update_int_array(): 3937 * Create/Update a int array property 3938 */ 3939 int 3940 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 3941 uint_t nelements) 3942 { 3943 int rv; 3944 3945 if (pip == NULL) { 3946 return (DDI_PROP_INVAL_ARG); 3947 } 3948 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3949 MDI_PI_LOCK(pip); 3950 if (MDI_PI(pip)->pi_prop == NULL) { 3951 MDI_PI_UNLOCK(pip); 3952 return (DDI_PROP_NOT_FOUND); 3953 } 3954 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 3955 nelements); 3956 MDI_PI_UNLOCK(pip); 3957 return (i_map_nvlist_error_to_mdi(rv)); 3958 } 3959 3960 /* 3961 * mdi_prop_update_string(): 3962 * Create/Update a string property 3963 */ 3964 int 3965 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 3966 { 3967 int rv; 3968 3969 if (pip == NULL) { 3970 return (DDI_PROP_INVAL_ARG); 3971 } 3972 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3973 MDI_PI_LOCK(pip); 3974 if (MDI_PI(pip)->pi_prop == NULL) { 3975 MDI_PI_UNLOCK(pip); 3976 return (DDI_PROP_NOT_FOUND); 3977 } 3978 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 3979 MDI_PI_UNLOCK(pip); 3980 return (i_map_nvlist_error_to_mdi(rv)); 3981 } 3982 3983 /* 3984 * mdi_prop_update_string_array(): 3985 * Create/Update a string array property 3986 */ 3987 int 3988 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 3989 uint_t nelements) 3990 { 3991 int rv; 3992 3993 if (pip == NULL) { 3994 return (DDI_PROP_INVAL_ARG); 3995 } 3996 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3997 MDI_PI_LOCK(pip); 3998 if (MDI_PI(pip)->pi_prop == NULL) { 3999 MDI_PI_UNLOCK(pip); 4000 return (DDI_PROP_NOT_FOUND); 4001 } 4002 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4003 nelements); 4004 MDI_PI_UNLOCK(pip); 4005 return (i_map_nvlist_error_to_mdi(rv)); 4006 } 4007 4008 /* 4009 * mdi_prop_lookup_byte(): 4010 * Look for byte property identified by name. The data returned 4011 * is the actual property and valid as long as mdi_pathinfo_t node 4012 * is alive. 4013 */ 4014 int 4015 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4016 { 4017 int rv; 4018 4019 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4020 return (DDI_PROP_NOT_FOUND); 4021 } 4022 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4023 return (i_map_nvlist_error_to_mdi(rv)); 4024 } 4025 4026 4027 /* 4028 * mdi_prop_lookup_byte_array(): 4029 * Look for byte array property identified by name. The data 4030 * returned is the actual property and valid as long as 4031 * mdi_pathinfo_t node is alive. 4032 */ 4033 int 4034 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4035 uint_t *nelements) 4036 { 4037 int rv; 4038 4039 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4040 return (DDI_PROP_NOT_FOUND); 4041 } 4042 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4043 nelements); 4044 return (i_map_nvlist_error_to_mdi(rv)); 4045 } 4046 4047 /* 4048 * mdi_prop_lookup_int(): 4049 * Look for int property identified by name. The data returned 4050 * is the actual property and valid as long as mdi_pathinfo_t 4051 * node is alive. 4052 */ 4053 int 4054 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4055 { 4056 int rv; 4057 4058 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4059 return (DDI_PROP_NOT_FOUND); 4060 } 4061 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4062 return (i_map_nvlist_error_to_mdi(rv)); 4063 } 4064 4065 /* 4066 * mdi_prop_lookup_int64(): 4067 * Look for int64 property identified by name. The data returned 4068 * is the actual property and valid as long as mdi_pathinfo_t node 4069 * is alive. 4070 */ 4071 int 4072 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4073 { 4074 int rv; 4075 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4076 return (DDI_PROP_NOT_FOUND); 4077 } 4078 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4079 return (i_map_nvlist_error_to_mdi(rv)); 4080 } 4081 4082 /* 4083 * mdi_prop_lookup_int_array(): 4084 * Look for int array property identified by name. The data 4085 * returned is the actual property and valid as long as 4086 * mdi_pathinfo_t node is alive. 4087 */ 4088 int 4089 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4090 uint_t *nelements) 4091 { 4092 int rv; 4093 4094 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4095 return (DDI_PROP_NOT_FOUND); 4096 } 4097 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4098 (int32_t **)data, nelements); 4099 return (i_map_nvlist_error_to_mdi(rv)); 4100 } 4101 4102 /* 4103 * mdi_prop_lookup_string(): 4104 * Look for string property identified by name. The data 4105 * returned is the actual property and valid as long as 4106 * mdi_pathinfo_t node is alive. 4107 */ 4108 int 4109 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4110 { 4111 int rv; 4112 4113 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4114 return (DDI_PROP_NOT_FOUND); 4115 } 4116 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4117 return (i_map_nvlist_error_to_mdi(rv)); 4118 } 4119 4120 /* 4121 * mdi_prop_lookup_string_array(): 4122 * Look for string array property identified by name. The data 4123 * returned is the actual property and valid as long as 4124 * mdi_pathinfo_t node is alive. 4125 */ 4126 4127 int 4128 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4129 uint_t *nelements) 4130 { 4131 int rv; 4132 4133 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4134 return (DDI_PROP_NOT_FOUND); 4135 } 4136 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4137 nelements); 4138 return (i_map_nvlist_error_to_mdi(rv)); 4139 } 4140 4141 /* 4142 * mdi_prop_free(): 4143 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4144 * functions return the pointer to actual property data and not a 4145 * copy of it. So the data returned is valid as long as 4146 * mdi_pathinfo_t node is valid. 4147 */ 4148 4149 /*ARGSUSED*/ 4150 int 4151 mdi_prop_free(void *data) 4152 { 4153 return (DDI_PROP_SUCCESS); 4154 } 4155 4156 /*ARGSUSED*/ 4157 static void 4158 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4159 { 4160 char *phci_path, *ct_path; 4161 char *ct_status; 4162 char *status; 4163 dev_info_t *dip = ct->ct_dip; 4164 char lb_buf[64]; 4165 4166 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4167 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4168 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4169 return; 4170 } 4171 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4172 ct_status = "optimal"; 4173 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4174 ct_status = "degraded"; 4175 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4176 ct_status = "failed"; 4177 } else { 4178 ct_status = "unknown"; 4179 } 4180 4181 if (MDI_PI_IS_OFFLINE(pip)) { 4182 status = "offline"; 4183 } else if (MDI_PI_IS_ONLINE(pip)) { 4184 status = "online"; 4185 } else if (MDI_PI_IS_STANDBY(pip)) { 4186 status = "standby"; 4187 } else if (MDI_PI_IS_FAULT(pip)) { 4188 status = "faulted"; 4189 } else { 4190 status = "unknown"; 4191 } 4192 4193 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4194 (void) snprintf(lb_buf, sizeof (lb_buf), 4195 "%s, region-size: %d", mdi_load_balance_lba, 4196 ct->ct_lb_args->region_size); 4197 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4198 (void) snprintf(lb_buf, sizeof (lb_buf), 4199 "%s", mdi_load_balance_none); 4200 } else { 4201 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4202 mdi_load_balance_rr); 4203 } 4204 4205 if (dip) { 4206 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4207 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4208 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4209 "path %s (%s%d) to target address: %s is %s" 4210 " Load balancing: %s\n", 4211 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4212 ddi_get_instance(dip), ct_status, 4213 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4214 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4215 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4216 MDI_PI(pip)->pi_addr, status, lb_buf); 4217 kmem_free(phci_path, MAXPATHLEN); 4218 kmem_free(ct_path, MAXPATHLEN); 4219 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4220 } 4221 } 4222 4223 #ifdef DEBUG 4224 /* 4225 * i_mdi_log(): 4226 * Utility function for error message management 4227 * 4228 */ 4229 4230 /*VARARGS3*/ 4231 static void 4232 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4233 { 4234 char buf[MAXNAMELEN]; 4235 char name[MAXNAMELEN]; 4236 va_list ap; 4237 int log_only = 0; 4238 int boot_only = 0; 4239 int console_only = 0; 4240 4241 if (dip) { 4242 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4243 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4244 ddi_node_name(dip), ddi_get_instance(dip)); 4245 } else { 4246 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4247 ddi_node_name(dip), ddi_get_instance(dip)); 4248 } 4249 } else { 4250 name[0] = '\0'; 4251 } 4252 4253 va_start(ap, fmt); 4254 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4255 va_end(ap); 4256 4257 switch (buf[0]) { 4258 case '!': 4259 log_only = 1; 4260 break; 4261 case '?': 4262 boot_only = 1; 4263 break; 4264 case '^': 4265 console_only = 1; 4266 break; 4267 } 4268 4269 switch (level) { 4270 case CE_NOTE: 4271 level = CE_CONT; 4272 /* FALLTHROUGH */ 4273 case CE_CONT: 4274 case CE_WARN: 4275 case CE_PANIC: 4276 if (boot_only) { 4277 cmn_err(level, "?%s\t%s", name, &buf[1]); 4278 } else if (console_only) { 4279 cmn_err(level, "^%s\t%s", name, &buf[1]); 4280 } else if (log_only) { 4281 cmn_err(level, "!%s\t%s", name, &buf[1]); 4282 } else { 4283 cmn_err(level, "%s\t%s", name, buf); 4284 } 4285 break; 4286 default: 4287 cmn_err(level, "%s\t%s", name, buf); 4288 break; 4289 } 4290 } 4291 #endif /* DEBUG */ 4292 4293 void 4294 i_mdi_client_online(dev_info_t *ct_dip) 4295 { 4296 mdi_client_t *ct; 4297 4298 /* 4299 * Client online notification. Mark client state as online 4300 * restore our binding with dev_info node 4301 */ 4302 ct = i_devi_get_client(ct_dip); 4303 ASSERT(ct != NULL); 4304 MDI_CLIENT_LOCK(ct); 4305 MDI_CLIENT_SET_ONLINE(ct); 4306 /* catch for any memory leaks */ 4307 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4308 ct->ct_dip = ct_dip; 4309 4310 if (ct->ct_power_cnt == 0) 4311 (void) i_mdi_power_all_phci(ct); 4312 4313 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4314 "i_mdi_pm_hold_client\n")); 4315 i_mdi_pm_hold_client(ct, 1); 4316 4317 MDI_CLIENT_UNLOCK(ct); 4318 } 4319 4320 void 4321 i_mdi_phci_online(dev_info_t *ph_dip) 4322 { 4323 mdi_phci_t *ph; 4324 4325 /* pHCI online notification. Mark state accordingly */ 4326 ph = i_devi_get_phci(ph_dip); 4327 ASSERT(ph != NULL); 4328 MDI_PHCI_LOCK(ph); 4329 MDI_PHCI_SET_ONLINE(ph); 4330 MDI_PHCI_UNLOCK(ph); 4331 } 4332 4333 /* 4334 * mdi_devi_online(): 4335 * Online notification from NDI framework on pHCI/client 4336 * device online. 4337 * Return Values: 4338 * NDI_SUCCESS 4339 * MDI_FAILURE 4340 */ 4341 4342 /*ARGSUSED*/ 4343 int 4344 mdi_devi_online(dev_info_t *dip, uint_t flags) 4345 { 4346 if (MDI_PHCI(dip)) { 4347 i_mdi_phci_online(dip); 4348 } 4349 4350 if (MDI_CLIENT(dip)) { 4351 i_mdi_client_online(dip); 4352 } 4353 return (NDI_SUCCESS); 4354 } 4355 4356 /* 4357 * mdi_devi_offline(): 4358 * Offline notification from NDI framework on pHCI/Client device 4359 * offline. 4360 * 4361 * Return Values: 4362 * NDI_SUCCESS 4363 * NDI_FAILURE 4364 */ 4365 4366 /*ARGSUSED*/ 4367 int 4368 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4369 { 4370 int rv = NDI_SUCCESS; 4371 4372 if (MDI_CLIENT(dip)) { 4373 rv = i_mdi_client_offline(dip, flags); 4374 if (rv != NDI_SUCCESS) 4375 return (rv); 4376 } 4377 4378 if (MDI_PHCI(dip)) { 4379 rv = i_mdi_phci_offline(dip, flags); 4380 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4381 /* set client back online */ 4382 i_mdi_client_online(dip); 4383 } 4384 } 4385 4386 return (rv); 4387 } 4388 4389 /*ARGSUSED*/ 4390 static int 4391 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4392 { 4393 int rv = NDI_SUCCESS; 4394 mdi_phci_t *ph; 4395 mdi_client_t *ct; 4396 mdi_pathinfo_t *pip; 4397 mdi_pathinfo_t *next; 4398 mdi_pathinfo_t *failed_pip = NULL; 4399 dev_info_t *cdip; 4400 4401 /* 4402 * pHCI component offline notification 4403 * Make sure that this pHCI instance is free to be offlined. 4404 * If it is OK to proceed, Offline and remove all the child 4405 * mdi_pathinfo nodes. This process automatically offlines 4406 * corresponding client devices, for which this pHCI provides 4407 * critical services. 4408 */ 4409 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4410 dip)); 4411 4412 ph = i_devi_get_phci(dip); 4413 if (ph == NULL) { 4414 return (rv); 4415 } 4416 4417 MDI_PHCI_LOCK(ph); 4418 4419 if (MDI_PHCI_IS_OFFLINE(ph)) { 4420 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4421 MDI_PHCI_UNLOCK(ph); 4422 return (NDI_SUCCESS); 4423 } 4424 4425 /* 4426 * Check to see if the pHCI can be offlined 4427 */ 4428 if (ph->ph_unstable) { 4429 MDI_DEBUG(1, (CE_WARN, dip, 4430 "!One or more target devices are in transient " 4431 "state. This device can not be removed at " 4432 "this moment. Please try again later.")); 4433 MDI_PHCI_UNLOCK(ph); 4434 return (NDI_BUSY); 4435 } 4436 4437 pip = ph->ph_path_head; 4438 while (pip != NULL) { 4439 MDI_PI_LOCK(pip); 4440 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4441 /* 4442 * The mdi_pathinfo state is OK. Check the client state. 4443 * If failover in progress fail the pHCI from offlining 4444 */ 4445 ct = MDI_PI(pip)->pi_client; 4446 i_mdi_client_lock(ct, pip); 4447 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4448 (ct->ct_unstable)) { 4449 /* 4450 * Failover is in progress, Fail the DR 4451 */ 4452 MDI_DEBUG(1, (CE_WARN, dip, 4453 "!pHCI device (%s%d) is Busy. %s", 4454 ddi_driver_name(dip), ddi_get_instance(dip), 4455 "This device can not be removed at " 4456 "this moment. Please try again later.")); 4457 MDI_PI_UNLOCK(pip); 4458 MDI_CLIENT_UNLOCK(ct); 4459 MDI_PHCI_UNLOCK(ph); 4460 return (NDI_BUSY); 4461 } 4462 MDI_PI_UNLOCK(pip); 4463 4464 /* 4465 * Check to see of we are removing the last path of this 4466 * client device... 4467 */ 4468 cdip = ct->ct_dip; 4469 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4470 (i_mdi_client_compute_state(ct, ph) == 4471 MDI_CLIENT_STATE_FAILED)) { 4472 i_mdi_client_unlock(ct); 4473 MDI_PHCI_UNLOCK(ph); 4474 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4475 /* 4476 * ndi_devi_offline() failed. 4477 * This pHCI provides the critical path 4478 * to one or more client devices. 4479 * Return busy. 4480 */ 4481 MDI_PHCI_LOCK(ph); 4482 MDI_DEBUG(1, (CE_WARN, dip, 4483 "!pHCI device (%s%d) is Busy. %s", 4484 ddi_driver_name(dip), ddi_get_instance(dip), 4485 "This device can not be removed at " 4486 "this moment. Please try again later.")); 4487 failed_pip = pip; 4488 break; 4489 } else { 4490 MDI_PHCI_LOCK(ph); 4491 pip = next; 4492 } 4493 } else { 4494 i_mdi_client_unlock(ct); 4495 pip = next; 4496 } 4497 } 4498 4499 if (failed_pip) { 4500 pip = ph->ph_path_head; 4501 while (pip != failed_pip) { 4502 MDI_PI_LOCK(pip); 4503 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4504 ct = MDI_PI(pip)->pi_client; 4505 i_mdi_client_lock(ct, pip); 4506 cdip = ct->ct_dip; 4507 switch (MDI_CLIENT_STATE(ct)) { 4508 case MDI_CLIENT_STATE_OPTIMAL: 4509 case MDI_CLIENT_STATE_DEGRADED: 4510 if (cdip) { 4511 MDI_PI_UNLOCK(pip); 4512 i_mdi_client_unlock(ct); 4513 MDI_PHCI_UNLOCK(ph); 4514 (void) ndi_devi_online(cdip, 0); 4515 MDI_PHCI_LOCK(ph); 4516 pip = next; 4517 continue; 4518 } 4519 break; 4520 4521 case MDI_CLIENT_STATE_FAILED: 4522 if (cdip) { 4523 MDI_PI_UNLOCK(pip); 4524 i_mdi_client_unlock(ct); 4525 MDI_PHCI_UNLOCK(ph); 4526 (void) ndi_devi_offline(cdip, 0); 4527 MDI_PHCI_LOCK(ph); 4528 pip = next; 4529 continue; 4530 } 4531 break; 4532 } 4533 MDI_PI_UNLOCK(pip); 4534 i_mdi_client_unlock(ct); 4535 pip = next; 4536 } 4537 MDI_PHCI_UNLOCK(ph); 4538 return (NDI_BUSY); 4539 } 4540 4541 /* 4542 * Mark the pHCI as offline 4543 */ 4544 MDI_PHCI_SET_OFFLINE(ph); 4545 4546 /* 4547 * Mark the child mdi_pathinfo nodes as transient 4548 */ 4549 pip = ph->ph_path_head; 4550 while (pip != NULL) { 4551 MDI_PI_LOCK(pip); 4552 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4553 MDI_PI_SET_OFFLINING(pip); 4554 MDI_PI_UNLOCK(pip); 4555 pip = next; 4556 } 4557 MDI_PHCI_UNLOCK(ph); 4558 /* 4559 * Give a chance for any pending commands to execute 4560 */ 4561 delay(1); 4562 MDI_PHCI_LOCK(ph); 4563 pip = ph->ph_path_head; 4564 while (pip != NULL) { 4565 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4566 (void) i_mdi_pi_offline(pip, flags); 4567 MDI_PI_LOCK(pip); 4568 ct = MDI_PI(pip)->pi_client; 4569 if (!MDI_PI_IS_OFFLINE(pip)) { 4570 MDI_DEBUG(1, (CE_WARN, dip, 4571 "!pHCI device (%s%d) is Busy. %s", 4572 ddi_driver_name(dip), ddi_get_instance(dip), 4573 "This device can not be removed at " 4574 "this moment. Please try again later.")); 4575 MDI_PI_UNLOCK(pip); 4576 MDI_PHCI_SET_ONLINE(ph); 4577 MDI_PHCI_UNLOCK(ph); 4578 return (NDI_BUSY); 4579 } 4580 MDI_PI_UNLOCK(pip); 4581 pip = next; 4582 } 4583 MDI_PHCI_UNLOCK(ph); 4584 4585 return (rv); 4586 } 4587 4588 /*ARGSUSED*/ 4589 static int 4590 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4591 { 4592 int rv = NDI_SUCCESS; 4593 mdi_client_t *ct; 4594 4595 /* 4596 * Client component to go offline. Make sure that we are 4597 * not in failing over state and update client state 4598 * accordingly 4599 */ 4600 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4601 dip)); 4602 ct = i_devi_get_client(dip); 4603 if (ct != NULL) { 4604 MDI_CLIENT_LOCK(ct); 4605 if (ct->ct_unstable) { 4606 /* 4607 * One or more paths are in transient state, 4608 * Dont allow offline of a client device 4609 */ 4610 MDI_DEBUG(1, (CE_WARN, dip, 4611 "!One or more paths to this device is " 4612 "in transient state. This device can not " 4613 "be removed at this moment. " 4614 "Please try again later.")); 4615 MDI_CLIENT_UNLOCK(ct); 4616 return (NDI_BUSY); 4617 } 4618 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4619 /* 4620 * Failover is in progress, Dont allow DR of 4621 * a client device 4622 */ 4623 MDI_DEBUG(1, (CE_WARN, dip, 4624 "!Client device (%s%d) is Busy. %s", 4625 ddi_driver_name(dip), ddi_get_instance(dip), 4626 "This device can not be removed at " 4627 "this moment. Please try again later.")); 4628 MDI_CLIENT_UNLOCK(ct); 4629 return (NDI_BUSY); 4630 } 4631 MDI_CLIENT_SET_OFFLINE(ct); 4632 4633 /* 4634 * Unbind our relationship with the dev_info node 4635 */ 4636 if (flags & NDI_DEVI_REMOVE) { 4637 ct->ct_dip = NULL; 4638 } 4639 MDI_CLIENT_UNLOCK(ct); 4640 } 4641 return (rv); 4642 } 4643 4644 /* 4645 * mdi_pre_attach(): 4646 * Pre attach() notification handler 4647 */ 4648 4649 /*ARGSUSED*/ 4650 int 4651 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4652 { 4653 /* don't support old DDI_PM_RESUME */ 4654 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4655 (cmd == DDI_PM_RESUME)) 4656 return (DDI_FAILURE); 4657 4658 return (DDI_SUCCESS); 4659 } 4660 4661 /* 4662 * mdi_post_attach(): 4663 * Post attach() notification handler 4664 */ 4665 4666 /*ARGSUSED*/ 4667 void 4668 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4669 { 4670 mdi_phci_t *ph; 4671 mdi_client_t *ct; 4672 mdi_pathinfo_t *pip; 4673 4674 if (MDI_PHCI(dip)) { 4675 ph = i_devi_get_phci(dip); 4676 ASSERT(ph != NULL); 4677 4678 MDI_PHCI_LOCK(ph); 4679 switch (cmd) { 4680 case DDI_ATTACH: 4681 MDI_DEBUG(2, (CE_NOTE, dip, 4682 "!pHCI post_attach: called %p\n", ph)); 4683 if (error == DDI_SUCCESS) { 4684 MDI_PHCI_SET_ATTACH(ph); 4685 } else { 4686 MDI_DEBUG(1, (CE_NOTE, dip, 4687 "!pHCI post_attach: failed error=%d\n", 4688 error)); 4689 MDI_PHCI_SET_DETACH(ph); 4690 } 4691 break; 4692 4693 case DDI_RESUME: 4694 MDI_DEBUG(2, (CE_NOTE, dip, 4695 "!pHCI post_resume: called %p\n", ph)); 4696 if (error == DDI_SUCCESS) { 4697 MDI_PHCI_SET_RESUME(ph); 4698 } else { 4699 MDI_DEBUG(1, (CE_NOTE, dip, 4700 "!pHCI post_resume: failed error=%d\n", 4701 error)); 4702 MDI_PHCI_SET_SUSPEND(ph); 4703 } 4704 break; 4705 } 4706 MDI_PHCI_UNLOCK(ph); 4707 } 4708 4709 if (MDI_CLIENT(dip)) { 4710 ct = i_devi_get_client(dip); 4711 ASSERT(ct != NULL); 4712 4713 MDI_CLIENT_LOCK(ct); 4714 switch (cmd) { 4715 case DDI_ATTACH: 4716 MDI_DEBUG(2, (CE_NOTE, dip, 4717 "!Client post_attach: called %p\n", ct)); 4718 if (error != DDI_SUCCESS) { 4719 MDI_DEBUG(1, (CE_NOTE, dip, 4720 "!Client post_attach: failed error=%d\n", 4721 error)); 4722 MDI_CLIENT_SET_DETACH(ct); 4723 MDI_DEBUG(4, (CE_WARN, dip, 4724 "mdi_post_attach i_mdi_pm_reset_client\n")); 4725 i_mdi_pm_reset_client(ct); 4726 break; 4727 } 4728 4729 /* 4730 * Client device has successfully attached. 4731 * Create kstats for any pathinfo structures 4732 * initially associated with this client. 4733 */ 4734 for (pip = ct->ct_path_head; pip != NULL; 4735 pip = (mdi_pathinfo_t *) 4736 MDI_PI(pip)->pi_client_link) { 4737 (void) i_mdi_pi_kstat_create(pip); 4738 i_mdi_report_path_state(ct, pip); 4739 } 4740 MDI_CLIENT_SET_ATTACH(ct); 4741 break; 4742 4743 case DDI_RESUME: 4744 MDI_DEBUG(2, (CE_NOTE, dip, 4745 "!Client post_attach: called %p\n", ct)); 4746 if (error == DDI_SUCCESS) { 4747 MDI_CLIENT_SET_RESUME(ct); 4748 } else { 4749 MDI_DEBUG(1, (CE_NOTE, dip, 4750 "!Client post_resume: failed error=%d\n", 4751 error)); 4752 MDI_CLIENT_SET_SUSPEND(ct); 4753 } 4754 break; 4755 } 4756 MDI_CLIENT_UNLOCK(ct); 4757 } 4758 } 4759 4760 /* 4761 * mdi_pre_detach(): 4762 * Pre detach notification handler 4763 */ 4764 4765 /*ARGSUSED*/ 4766 int 4767 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4768 { 4769 int rv = DDI_SUCCESS; 4770 4771 if (MDI_CLIENT(dip)) { 4772 (void) i_mdi_client_pre_detach(dip, cmd); 4773 } 4774 4775 if (MDI_PHCI(dip)) { 4776 rv = i_mdi_phci_pre_detach(dip, cmd); 4777 } 4778 4779 return (rv); 4780 } 4781 4782 /*ARGSUSED*/ 4783 static int 4784 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4785 { 4786 int rv = DDI_SUCCESS; 4787 mdi_phci_t *ph; 4788 mdi_client_t *ct; 4789 mdi_pathinfo_t *pip; 4790 mdi_pathinfo_t *failed_pip = NULL; 4791 mdi_pathinfo_t *next; 4792 4793 ph = i_devi_get_phci(dip); 4794 if (ph == NULL) { 4795 return (rv); 4796 } 4797 4798 MDI_PHCI_LOCK(ph); 4799 switch (cmd) { 4800 case DDI_DETACH: 4801 MDI_DEBUG(2, (CE_NOTE, dip, 4802 "!pHCI pre_detach: called %p\n", ph)); 4803 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4804 /* 4805 * mdi_pathinfo nodes are still attached to 4806 * this pHCI. Fail the detach for this pHCI. 4807 */ 4808 MDI_DEBUG(2, (CE_WARN, dip, 4809 "!pHCI pre_detach: " 4810 "mdi_pathinfo nodes are still attached " 4811 "%p\n", ph)); 4812 rv = DDI_FAILURE; 4813 break; 4814 } 4815 MDI_PHCI_SET_DETACH(ph); 4816 break; 4817 4818 case DDI_SUSPEND: 4819 /* 4820 * pHCI is getting suspended. Since mpxio client 4821 * devices may not be suspended at this point, to avoid 4822 * a potential stack overflow, it is important to suspend 4823 * client devices before pHCI can be suspended. 4824 */ 4825 4826 MDI_DEBUG(2, (CE_NOTE, dip, 4827 "!pHCI pre_suspend: called %p\n", ph)); 4828 /* 4829 * Suspend all the client devices accessible through this pHCI 4830 */ 4831 pip = ph->ph_path_head; 4832 while (pip != NULL && rv == DDI_SUCCESS) { 4833 dev_info_t *cdip; 4834 MDI_PI_LOCK(pip); 4835 next = 4836 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4837 ct = MDI_PI(pip)->pi_client; 4838 i_mdi_client_lock(ct, pip); 4839 cdip = ct->ct_dip; 4840 MDI_PI_UNLOCK(pip); 4841 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4842 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4843 i_mdi_client_unlock(ct); 4844 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4845 DDI_SUCCESS) { 4846 /* 4847 * Suspend of one of the client 4848 * device has failed. 4849 */ 4850 MDI_DEBUG(1, (CE_WARN, dip, 4851 "!Suspend of device (%s%d) failed.", 4852 ddi_driver_name(cdip), 4853 ddi_get_instance(cdip))); 4854 failed_pip = pip; 4855 break; 4856 } 4857 } else { 4858 i_mdi_client_unlock(ct); 4859 } 4860 pip = next; 4861 } 4862 4863 if (rv == DDI_SUCCESS) { 4864 /* 4865 * Suspend of client devices is complete. Proceed 4866 * with pHCI suspend. 4867 */ 4868 MDI_PHCI_SET_SUSPEND(ph); 4869 } else { 4870 /* 4871 * Revert back all the suspended client device states 4872 * to converse. 4873 */ 4874 pip = ph->ph_path_head; 4875 while (pip != failed_pip) { 4876 dev_info_t *cdip; 4877 MDI_PI_LOCK(pip); 4878 next = 4879 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4880 ct = MDI_PI(pip)->pi_client; 4881 i_mdi_client_lock(ct, pip); 4882 cdip = ct->ct_dip; 4883 MDI_PI_UNLOCK(pip); 4884 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4885 i_mdi_client_unlock(ct); 4886 (void) devi_attach(cdip, DDI_RESUME); 4887 } else { 4888 i_mdi_client_unlock(ct); 4889 } 4890 pip = next; 4891 } 4892 } 4893 break; 4894 4895 default: 4896 rv = DDI_FAILURE; 4897 break; 4898 } 4899 MDI_PHCI_UNLOCK(ph); 4900 return (rv); 4901 } 4902 4903 /*ARGSUSED*/ 4904 static int 4905 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4906 { 4907 int rv = DDI_SUCCESS; 4908 mdi_client_t *ct; 4909 4910 ct = i_devi_get_client(dip); 4911 if (ct == NULL) { 4912 return (rv); 4913 } 4914 4915 MDI_CLIENT_LOCK(ct); 4916 switch (cmd) { 4917 case DDI_DETACH: 4918 MDI_DEBUG(2, (CE_NOTE, dip, 4919 "!Client pre_detach: called %p\n", ct)); 4920 MDI_CLIENT_SET_DETACH(ct); 4921 break; 4922 4923 case DDI_SUSPEND: 4924 MDI_DEBUG(2, (CE_NOTE, dip, 4925 "!Client pre_suspend: called %p\n", ct)); 4926 MDI_CLIENT_SET_SUSPEND(ct); 4927 break; 4928 4929 default: 4930 rv = DDI_FAILURE; 4931 break; 4932 } 4933 MDI_CLIENT_UNLOCK(ct); 4934 return (rv); 4935 } 4936 4937 /* 4938 * mdi_post_detach(): 4939 * Post detach notification handler 4940 */ 4941 4942 /*ARGSUSED*/ 4943 void 4944 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4945 { 4946 /* 4947 * Detach/Suspend of mpxio component failed. Update our state 4948 * too 4949 */ 4950 if (MDI_PHCI(dip)) 4951 i_mdi_phci_post_detach(dip, cmd, error); 4952 4953 if (MDI_CLIENT(dip)) 4954 i_mdi_client_post_detach(dip, cmd, error); 4955 } 4956 4957 /*ARGSUSED*/ 4958 static void 4959 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4960 { 4961 mdi_phci_t *ph; 4962 4963 /* 4964 * Detach/Suspend of phci component failed. Update our state 4965 * too 4966 */ 4967 ph = i_devi_get_phci(dip); 4968 if (ph == NULL) { 4969 return; 4970 } 4971 4972 MDI_PHCI_LOCK(ph); 4973 /* 4974 * Detach of pHCI failed. Restore back converse 4975 * state 4976 */ 4977 switch (cmd) { 4978 case DDI_DETACH: 4979 MDI_DEBUG(2, (CE_NOTE, dip, 4980 "!pHCI post_detach: called %p\n", ph)); 4981 if (error != DDI_SUCCESS) 4982 MDI_PHCI_SET_ATTACH(ph); 4983 break; 4984 4985 case DDI_SUSPEND: 4986 MDI_DEBUG(2, (CE_NOTE, dip, 4987 "!pHCI post_suspend: called %p\n", ph)); 4988 if (error != DDI_SUCCESS) 4989 MDI_PHCI_SET_RESUME(ph); 4990 break; 4991 } 4992 MDI_PHCI_UNLOCK(ph); 4993 } 4994 4995 /*ARGSUSED*/ 4996 static void 4997 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4998 { 4999 mdi_client_t *ct; 5000 5001 ct = i_devi_get_client(dip); 5002 if (ct == NULL) { 5003 return; 5004 } 5005 MDI_CLIENT_LOCK(ct); 5006 /* 5007 * Detach of Client failed. Restore back converse 5008 * state 5009 */ 5010 switch (cmd) { 5011 case DDI_DETACH: 5012 MDI_DEBUG(2, (CE_NOTE, dip, 5013 "!Client post_detach: called %p\n", ct)); 5014 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5015 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5016 "i_mdi_pm_rele_client\n")); 5017 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5018 } else { 5019 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5020 "i_mdi_pm_reset_client\n")); 5021 i_mdi_pm_reset_client(ct); 5022 } 5023 if (error != DDI_SUCCESS) 5024 MDI_CLIENT_SET_ATTACH(ct); 5025 break; 5026 5027 case DDI_SUSPEND: 5028 MDI_DEBUG(2, (CE_NOTE, dip, 5029 "!Client post_suspend: called %p\n", ct)); 5030 if (error != DDI_SUCCESS) 5031 MDI_CLIENT_SET_RESUME(ct); 5032 break; 5033 } 5034 MDI_CLIENT_UNLOCK(ct); 5035 } 5036 5037 /* 5038 * create and install per-path (client - pHCI) statistics 5039 * I/O stats supported: nread, nwritten, reads, and writes 5040 * Error stats - hard errors, soft errors, & transport errors 5041 */ 5042 static int 5043 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5044 { 5045 5046 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5047 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5048 char ksname[KSTAT_STRLEN]; 5049 mdi_pathinfo_t *cpip; 5050 const char *err_postfix = ",err"; 5051 kstat_t *kiosp, *kerrsp; 5052 struct pi_errs *nsp; 5053 struct mdi_pi_kstats *mdi_statp; 5054 5055 ASSERT(client != NULL && ppath != NULL); 5056 5057 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5058 5059 if (MDI_PI(pip)->pi_kstats != NULL) 5060 return (MDI_SUCCESS); 5061 5062 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5063 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5064 if (cpip == pip) 5065 continue; 5066 /* 5067 * We have found a different path with same parent 5068 * kstats for a given client-pHCI are common 5069 */ 5070 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5071 (MDI_PI(cpip)->pi_kstats != NULL)) { 5072 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5073 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5074 return (MDI_SUCCESS); 5075 } 5076 } 5077 5078 /* 5079 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5080 * clamp length of name against max length of error kstat name 5081 */ 5082 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5083 ddi_driver_name(client), ddi_get_instance(client), 5084 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5085 (KSTAT_STRLEN - strlen(err_postfix))) { 5086 return (MDI_FAILURE); 5087 } 5088 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5089 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5090 return (MDI_FAILURE); 5091 } 5092 5093 (void) strcat(ksname, err_postfix); 5094 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5095 KSTAT_TYPE_NAMED, 5096 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5097 5098 if (kerrsp == NULL) { 5099 kstat_delete(kiosp); 5100 return (MDI_FAILURE); 5101 } 5102 5103 nsp = (struct pi_errs *)kerrsp->ks_data; 5104 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5105 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5106 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5107 KSTAT_DATA_UINT32); 5108 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5109 KSTAT_DATA_UINT32); 5110 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5111 KSTAT_DATA_UINT32); 5112 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5113 KSTAT_DATA_UINT32); 5114 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5115 KSTAT_DATA_UINT32); 5116 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5117 KSTAT_DATA_UINT32); 5118 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5119 KSTAT_DATA_UINT32); 5120 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5121 5122 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5123 mdi_statp->pi_kstat_ref = 1; 5124 mdi_statp->pi_kstat_iostats = kiosp; 5125 mdi_statp->pi_kstat_errstats = kerrsp; 5126 kstat_install(kiosp); 5127 kstat_install(kerrsp); 5128 MDI_PI(pip)->pi_kstats = mdi_statp; 5129 return (MDI_SUCCESS); 5130 } 5131 5132 /* 5133 * destroy per-path properties 5134 */ 5135 static void 5136 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5137 { 5138 5139 struct mdi_pi_kstats *mdi_statp; 5140 5141 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5142 return; 5143 5144 MDI_PI(pip)->pi_kstats = NULL; 5145 5146 /* 5147 * the kstat may be shared between multiple pathinfo nodes 5148 * decrement this pathinfo's usage, removing the kstats 5149 * themselves when the last pathinfo reference is removed. 5150 */ 5151 ASSERT(mdi_statp->pi_kstat_ref > 0); 5152 if (--mdi_statp->pi_kstat_ref != 0) 5153 return; 5154 5155 kstat_delete(mdi_statp->pi_kstat_iostats); 5156 kstat_delete(mdi_statp->pi_kstat_errstats); 5157 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5158 } 5159 5160 /* 5161 * update I/O paths KSTATS 5162 */ 5163 void 5164 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5165 { 5166 kstat_t *iostatp; 5167 size_t xfer_cnt; 5168 5169 ASSERT(pip != NULL); 5170 5171 /* 5172 * I/O can be driven across a path prior to having path 5173 * statistics available, i.e. probe(9e). 5174 */ 5175 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5176 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5177 xfer_cnt = bp->b_bcount - bp->b_resid; 5178 if (bp->b_flags & B_READ) { 5179 KSTAT_IO_PTR(iostatp)->reads++; 5180 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5181 } else { 5182 KSTAT_IO_PTR(iostatp)->writes++; 5183 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5184 } 5185 } 5186 } 5187 5188 /* 5189 * disable the path to a particular pHCI (pHCI specified in the phci_path 5190 * argument) for a particular client (specified in the client_path argument). 5191 * Disabling a path means that MPxIO will not select the disabled path for 5192 * routing any new I/O requests. 5193 */ 5194 int 5195 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5196 { 5197 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5198 } 5199 5200 /* 5201 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5202 * argument) for a particular client (specified in the client_path argument). 5203 * Enabling a path means that MPxIO may select the enabled path for routing 5204 * future I/O requests, subject to other path state constraints. 5205 */ 5206 5207 int 5208 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5209 { 5210 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5211 } 5212 5213 5214 /* 5215 * Common routine for doing enable/disable. 5216 */ 5217 int 5218 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5219 { 5220 5221 mdi_phci_t *ph; 5222 mdi_vhci_t *vh = NULL; 5223 mdi_client_t *ct; 5224 mdi_pathinfo_t *next, *pip; 5225 int found_it; 5226 int (*f)() = NULL; 5227 int rv; 5228 int sync_flag = 0; 5229 5230 ph = i_devi_get_phci(pdip); 5231 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5232 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5233 if (ph == NULL) { 5234 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5235 " failed. ph = NULL operation = %d\n", op)); 5236 return (MDI_FAILURE); 5237 } 5238 5239 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5240 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5241 " Invalid operation = %d\n", op)); 5242 return (MDI_FAILURE); 5243 } 5244 5245 sync_flag = (flags << 8) & 0xf00; 5246 5247 vh = ph->ph_vhci; 5248 f = vh->vh_ops->vo_pi_state_change; 5249 5250 if (cdip == NULL) { 5251 /* 5252 * Need to mark the Phci as enabled/disabled. 5253 */ 5254 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5255 "Operation %d for the phci\n", op)); 5256 MDI_PHCI_LOCK(ph); 5257 switch (flags) { 5258 case USER_DISABLE: 5259 if (op == MDI_DISABLE_OP) 5260 MDI_PHCI_SET_USER_DISABLE(ph); 5261 else 5262 MDI_PHCI_SET_USER_ENABLE(ph); 5263 break; 5264 case DRIVER_DISABLE: 5265 if (op == MDI_DISABLE_OP) 5266 MDI_PHCI_SET_DRV_DISABLE(ph); 5267 else 5268 MDI_PHCI_SET_DRV_ENABLE(ph); 5269 break; 5270 case DRIVER_DISABLE_TRANSIENT: 5271 if (op == MDI_DISABLE_OP) 5272 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5273 else 5274 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5275 break; 5276 default: 5277 MDI_PHCI_UNLOCK(ph); 5278 MDI_DEBUG(1, (CE_NOTE, NULL, 5279 "!i_mdi_pi_enable_disable:" 5280 " Invalid flag argument= %d\n", flags)); 5281 } 5282 5283 /* 5284 * Phci has been disabled. Now try to enable/disable 5285 * path info's to each client. 5286 */ 5287 pip = ph->ph_path_head; 5288 while (pip != NULL) { 5289 /* 5290 * Do a callback into the mdi consumer to let it 5291 * know that path is about to be enabled/disabled. 5292 */ 5293 if (f != NULL) { 5294 rv = (*f)(vh->vh_dip, pip, 0, 5295 MDI_PI_EXT_STATE(pip), 5296 MDI_EXT_STATE_CHANGE | sync_flag | 5297 op | MDI_BEFORE_STATE_CHANGE); 5298 if (rv != MDI_SUCCESS) { 5299 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5300 "!vo_pi_state_change: failed rv = %x", rv)); 5301 } 5302 } 5303 5304 MDI_PI_LOCK(pip); 5305 next = 5306 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5307 switch (flags) { 5308 case USER_DISABLE: 5309 if (op == MDI_DISABLE_OP) 5310 MDI_PI_SET_USER_DISABLE(pip); 5311 else 5312 MDI_PI_SET_USER_ENABLE(pip); 5313 break; 5314 case DRIVER_DISABLE: 5315 if (op == MDI_DISABLE_OP) 5316 MDI_PI_SET_DRV_DISABLE(pip); 5317 else 5318 MDI_PI_SET_DRV_ENABLE(pip); 5319 break; 5320 case DRIVER_DISABLE_TRANSIENT: 5321 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5322 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5323 else 5324 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5325 break; 5326 } 5327 MDI_PI_UNLOCK(pip); 5328 /* 5329 * Do a callback into the mdi consumer to let it 5330 * know that path is now enabled/disabled. 5331 */ 5332 if (f != NULL) { 5333 rv = (*f)(vh->vh_dip, pip, 0, 5334 MDI_PI_EXT_STATE(pip), 5335 MDI_EXT_STATE_CHANGE | sync_flag | 5336 op | MDI_AFTER_STATE_CHANGE); 5337 if (rv != MDI_SUCCESS) { 5338 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5339 "!vo_pi_state_change: failed rv = %x", rv)); 5340 } 5341 } 5342 pip = next; 5343 } 5344 MDI_PHCI_UNLOCK(ph); 5345 } else { 5346 5347 /* 5348 * Disable a specific client. 5349 */ 5350 ct = i_devi_get_client(cdip); 5351 if (ct == NULL) { 5352 MDI_DEBUG(1, (CE_NOTE, NULL, 5353 "!i_mdi_pi_enable_disable:" 5354 " failed. ct = NULL operation = %d\n", op)); 5355 return (MDI_FAILURE); 5356 } 5357 5358 MDI_CLIENT_LOCK(ct); 5359 pip = ct->ct_path_head; 5360 found_it = 0; 5361 while (pip != NULL) { 5362 MDI_PI_LOCK(pip); 5363 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5364 if (MDI_PI(pip)->pi_phci == ph) { 5365 MDI_PI_UNLOCK(pip); 5366 found_it = 1; 5367 break; 5368 } 5369 MDI_PI_UNLOCK(pip); 5370 pip = next; 5371 } 5372 5373 MDI_CLIENT_UNLOCK(ct); 5374 if (found_it == 0) { 5375 MDI_DEBUG(1, (CE_NOTE, NULL, 5376 "!i_mdi_pi_enable_disable:" 5377 " failed. Could not find corresponding pip\n")); 5378 return (MDI_FAILURE); 5379 } 5380 /* 5381 * Do a callback into the mdi consumer to let it 5382 * know that path is about to get enabled/disabled. 5383 */ 5384 if (f != NULL) { 5385 rv = (*f)(vh->vh_dip, pip, 0, 5386 MDI_PI_EXT_STATE(pip), 5387 MDI_EXT_STATE_CHANGE | sync_flag | 5388 op | MDI_BEFORE_STATE_CHANGE); 5389 if (rv != MDI_SUCCESS) { 5390 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5391 "!vo_pi_state_change: failed rv = %x", rv)); 5392 } 5393 } 5394 MDI_PI_LOCK(pip); 5395 switch (flags) { 5396 case USER_DISABLE: 5397 if (op == MDI_DISABLE_OP) 5398 MDI_PI_SET_USER_DISABLE(pip); 5399 else 5400 MDI_PI_SET_USER_ENABLE(pip); 5401 break; 5402 case DRIVER_DISABLE: 5403 if (op == MDI_DISABLE_OP) 5404 MDI_PI_SET_DRV_DISABLE(pip); 5405 else 5406 MDI_PI_SET_DRV_ENABLE(pip); 5407 break; 5408 case DRIVER_DISABLE_TRANSIENT: 5409 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5410 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5411 else 5412 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5413 break; 5414 } 5415 MDI_PI_UNLOCK(pip); 5416 /* 5417 * Do a callback into the mdi consumer to let it 5418 * know that path is now enabled/disabled. 5419 */ 5420 if (f != NULL) { 5421 rv = (*f)(vh->vh_dip, pip, 0, 5422 MDI_PI_EXT_STATE(pip), 5423 MDI_EXT_STATE_CHANGE | sync_flag | 5424 op | MDI_AFTER_STATE_CHANGE); 5425 if (rv != MDI_SUCCESS) { 5426 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5427 "!vo_pi_state_change: failed rv = %x", rv)); 5428 } 5429 } 5430 } 5431 5432 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5433 " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); 5434 return (MDI_SUCCESS); 5435 } 5436 5437 /*ARGSUSED3*/ 5438 int 5439 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5440 int flags, clock_t timeout) 5441 { 5442 mdi_pathinfo_t *pip; 5443 dev_info_t *dip; 5444 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5445 char *paddr; 5446 5447 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5448 5449 if (!MDI_PHCI(pdip)) 5450 return (MDI_FAILURE); 5451 5452 paddr = strchr(devnm, '@'); 5453 if (paddr == NULL) 5454 return (MDI_FAILURE); 5455 5456 paddr++; /* skip '@' */ 5457 pip = mdi_pi_find(pdip, NULL, paddr); 5458 while (pip == NULL && timeout > 0) { 5459 if (interval > timeout) 5460 interval = timeout; 5461 if (flags & NDI_DEVI_DEBUG) { 5462 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5463 ddi_driver_name(pdip), ddi_get_instance(pdip), 5464 paddr, interval, timeout); 5465 } 5466 delay(interval); 5467 timeout -= interval; 5468 interval += interval; 5469 pip = mdi_pi_find(pdip, NULL, paddr); 5470 } 5471 5472 if (pip == NULL) 5473 return (MDI_FAILURE); 5474 dip = mdi_pi_get_client(pip); 5475 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5476 return (MDI_FAILURE); 5477 *cdipp = dip; 5478 5479 /* TODO: holding should happen inside search functions */ 5480 ndi_hold_devi(dip); 5481 return (MDI_SUCCESS); 5482 } 5483 5484 /* 5485 * Ensure phci powered up 5486 */ 5487 static void 5488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5489 { 5490 dev_info_t *ph_dip; 5491 5492 ASSERT(pip != NULL); 5493 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5494 5495 if (MDI_PI(pip)->pi_pm_held) { 5496 return; 5497 } 5498 5499 ph_dip = mdi_pi_get_phci(pip); 5500 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5501 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5502 if (ph_dip == NULL) { 5503 return; 5504 } 5505 5506 MDI_PI_UNLOCK(pip); 5507 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5508 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5509 pm_hold_power(ph_dip); 5510 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5511 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5512 MDI_PI_LOCK(pip); 5513 5514 MDI_PI(pip)->pi_pm_held = 1; 5515 } 5516 5517 /* 5518 * Allow phci powered down 5519 */ 5520 static void 5521 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5522 { 5523 dev_info_t *ph_dip = NULL; 5524 5525 ASSERT(pip != NULL); 5526 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5527 5528 if (MDI_PI(pip)->pi_pm_held == 0) { 5529 return; 5530 } 5531 5532 ph_dip = mdi_pi_get_phci(pip); 5533 ASSERT(ph_dip != NULL); 5534 5535 MDI_PI_UNLOCK(pip); 5536 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5537 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5538 5539 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5540 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5541 pm_rele_power(ph_dip); 5542 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5543 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5544 5545 MDI_PI_LOCK(pip); 5546 MDI_PI(pip)->pi_pm_held = 0; 5547 } 5548 5549 static void 5550 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5551 { 5552 ASSERT(ct); 5553 5554 ct->ct_power_cnt += incr; 5555 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5556 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5557 ASSERT(ct->ct_power_cnt >= 0); 5558 } 5559 5560 static void 5561 i_mdi_rele_all_phci(mdi_client_t *ct) 5562 { 5563 mdi_pathinfo_t *pip; 5564 5565 ASSERT(mutex_owned(&ct->ct_mutex)); 5566 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5567 while (pip != NULL) { 5568 mdi_hold_path(pip); 5569 MDI_PI_LOCK(pip); 5570 i_mdi_pm_rele_pip(pip); 5571 MDI_PI_UNLOCK(pip); 5572 mdi_rele_path(pip); 5573 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5574 } 5575 } 5576 5577 static void 5578 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5579 { 5580 ASSERT(ct); 5581 5582 if (i_ddi_node_state(ct->ct_dip) >= DS_READY) { 5583 ct->ct_power_cnt -= decr; 5584 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5585 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5586 } 5587 5588 ASSERT(ct->ct_power_cnt >= 0); 5589 if (ct->ct_power_cnt == 0) { 5590 i_mdi_rele_all_phci(ct); 5591 return; 5592 } 5593 } 5594 5595 static void 5596 i_mdi_pm_reset_client(mdi_client_t *ct) 5597 { 5598 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5599 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5600 ct->ct_power_cnt = 0; 5601 i_mdi_rele_all_phci(ct); 5602 ct->ct_powercnt_reset = 1; 5603 ct->ct_powercnt_held = 0; 5604 } 5605 5606 static void 5607 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5608 { 5609 mdi_pathinfo_t *pip; 5610 ASSERT(mutex_owned(&ct->ct_mutex)); 5611 5612 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5613 while (pip != NULL) { 5614 mdi_hold_path(pip); 5615 MDI_PI_LOCK(pip); 5616 i_mdi_pm_hold_pip(pip); 5617 MDI_PI_UNLOCK(pip); 5618 mdi_rele_path(pip); 5619 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5620 } 5621 } 5622 5623 static int 5624 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5625 { 5626 int ret; 5627 dev_info_t *ph_dip; 5628 5629 MDI_PI_LOCK(pip); 5630 i_mdi_pm_hold_pip(pip); 5631 5632 ph_dip = mdi_pi_get_phci(pip); 5633 MDI_PI_UNLOCK(pip); 5634 5635 /* bring all components of phci to full power */ 5636 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5637 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5638 ddi_get_instance(ph_dip))); 5639 5640 ret = pm_powerup(ph_dip); 5641 5642 if (ret == DDI_FAILURE) { 5643 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5644 "pm_powerup FAILED for %s%d\n", 5645 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5646 5647 MDI_PI_LOCK(pip); 5648 i_mdi_pm_rele_pip(pip); 5649 MDI_PI_UNLOCK(pip); 5650 return (MDI_FAILURE); 5651 } 5652 5653 return (MDI_SUCCESS); 5654 } 5655 5656 static int 5657 i_mdi_power_all_phci(mdi_client_t *ct) 5658 { 5659 mdi_pathinfo_t *pip; 5660 int succeeded = 0; 5661 5662 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5663 while (pip != NULL) { 5664 mdi_hold_path(pip); 5665 MDI_CLIENT_UNLOCK(ct); 5666 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5667 succeeded = 1; 5668 5669 ASSERT(ct == MDI_PI(pip)->pi_client); 5670 MDI_CLIENT_LOCK(ct); 5671 mdi_rele_path(pip); 5672 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5673 } 5674 5675 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5676 } 5677 5678 /* 5679 * mdi_bus_power(): 5680 * 1. Place the phci(s) into powered up state so that 5681 * client can do power management 5682 * 2. Ensure phci powered up as client power managing 5683 * Return Values: 5684 * MDI_SUCCESS 5685 * MDI_FAILURE 5686 */ 5687 int 5688 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5689 void *arg, void *result) 5690 { 5691 int ret = MDI_SUCCESS; 5692 pm_bp_child_pwrchg_t *bpc; 5693 mdi_client_t *ct; 5694 dev_info_t *cdip; 5695 pm_bp_has_changed_t *bphc; 5696 5697 /* 5698 * BUS_POWER_NOINVOL not supported 5699 */ 5700 if (op == BUS_POWER_NOINVOL) 5701 return (MDI_FAILURE); 5702 5703 /* 5704 * ignore other OPs. 5705 * return quickly to save cou cycles on the ct processing 5706 */ 5707 switch (op) { 5708 case BUS_POWER_PRE_NOTIFICATION: 5709 case BUS_POWER_POST_NOTIFICATION: 5710 bpc = (pm_bp_child_pwrchg_t *)arg; 5711 cdip = bpc->bpc_dip; 5712 break; 5713 case BUS_POWER_HAS_CHANGED: 5714 bphc = (pm_bp_has_changed_t *)arg; 5715 cdip = bphc->bphc_dip; 5716 break; 5717 default: 5718 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5719 } 5720 5721 ASSERT(MDI_CLIENT(cdip)); 5722 5723 ct = i_devi_get_client(cdip); 5724 if (ct == NULL) 5725 return (MDI_FAILURE); 5726 5727 /* 5728 * wait till the mdi_pathinfo node state change are processed 5729 */ 5730 MDI_CLIENT_LOCK(ct); 5731 switch (op) { 5732 case BUS_POWER_PRE_NOTIFICATION: 5733 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5734 "BUS_POWER_PRE_NOTIFICATION:" 5735 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5736 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5737 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5738 5739 /* serialize power level change per client */ 5740 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5741 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5742 5743 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5744 5745 if (ct->ct_power_cnt == 0) { 5746 ret = i_mdi_power_all_phci(ct); 5747 } 5748 5749 /* 5750 * if new_level > 0: 5751 * - hold phci(s) 5752 * - power up phci(s) if not already 5753 * ignore power down 5754 */ 5755 if (bpc->bpc_nlevel > 0) { 5756 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5757 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5758 "mdi_bus_power i_mdi_pm_hold_client\n")); 5759 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5760 } 5761 } 5762 break; 5763 case BUS_POWER_POST_NOTIFICATION: 5764 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5765 "BUS_POWER_POST_NOTIFICATION:" 5766 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5767 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5768 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5769 *(int *)result)); 5770 5771 if (*(int *)result == DDI_SUCCESS) { 5772 if (bpc->bpc_nlevel > 0) { 5773 MDI_CLIENT_SET_POWER_UP(ct); 5774 } else { 5775 MDI_CLIENT_SET_POWER_DOWN(ct); 5776 } 5777 } 5778 5779 /* release the hold we did in pre-notification */ 5780 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5781 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5782 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5783 "mdi_bus_power i_mdi_pm_rele_client\n")); 5784 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5785 } 5786 5787 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5788 /* another thread might started attaching */ 5789 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5790 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5791 "mdi_bus_power i_mdi_pm_rele_client\n")); 5792 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5793 /* detaching has been taken care in pm_post_unconfig */ 5794 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5795 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5796 "mdi_bus_power i_mdi_pm_reset_client\n")); 5797 i_mdi_pm_reset_client(ct); 5798 } 5799 } 5800 5801 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5802 cv_broadcast(&ct->ct_powerchange_cv); 5803 5804 break; 5805 5806 /* need to do more */ 5807 case BUS_POWER_HAS_CHANGED: 5808 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5809 "BUS_POWER_HAS_CHANGED:" 5810 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5811 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5812 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5813 5814 if (bphc->bphc_nlevel > 0 && 5815 bphc->bphc_nlevel > bphc->bphc_olevel) { 5816 if (ct->ct_power_cnt == 0) { 5817 ret = i_mdi_power_all_phci(ct); 5818 } 5819 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5820 "mdi_bus_power i_mdi_pm_hold_client\n")); 5821 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5822 } 5823 5824 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5825 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5826 "mdi_bus_power i_mdi_pm_rele_client\n")); 5827 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5828 } 5829 break; 5830 } 5831 5832 MDI_CLIENT_UNLOCK(ct); 5833 return (ret); 5834 } 5835 5836 static int 5837 i_mdi_pm_pre_config_one(dev_info_t *child) 5838 { 5839 int ret = MDI_SUCCESS; 5840 mdi_client_t *ct; 5841 5842 ct = i_devi_get_client(child); 5843 if (ct == NULL) 5844 return (MDI_FAILURE); 5845 5846 MDI_CLIENT_LOCK(ct); 5847 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5848 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5849 5850 if (!MDI_CLIENT_IS_FAILED(ct)) { 5851 MDI_CLIENT_UNLOCK(ct); 5852 MDI_DEBUG(4, (CE_NOTE, child, 5853 "i_mdi_pm_pre_config_one already configured\n")); 5854 return (MDI_SUCCESS); 5855 } 5856 5857 if (ct->ct_powercnt_held) { 5858 MDI_CLIENT_UNLOCK(ct); 5859 MDI_DEBUG(4, (CE_NOTE, child, 5860 "i_mdi_pm_pre_config_one ALREADY held\n")); 5861 return (MDI_SUCCESS); 5862 } 5863 5864 if (ct->ct_power_cnt == 0) { 5865 ret = i_mdi_power_all_phci(ct); 5866 } 5867 MDI_DEBUG(4, (CE_NOTE, child, 5868 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5869 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5870 ct->ct_powercnt_held = 1; 5871 ct->ct_powercnt_reset = 0; 5872 MDI_CLIENT_UNLOCK(ct); 5873 return (ret); 5874 } 5875 5876 static int 5877 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5878 { 5879 int ret = MDI_SUCCESS; 5880 dev_info_t *cdip; 5881 int circ; 5882 5883 ASSERT(MDI_VHCI(parent)); 5884 5885 /* ndi_devi_config_one */ 5886 if (child) { 5887 return (i_mdi_pm_pre_config_one(child)); 5888 } 5889 5890 /* devi_config_common */ 5891 ndi_devi_enter(parent, &circ); 5892 cdip = ddi_get_child(parent); 5893 while (cdip) { 5894 dev_info_t *next = ddi_get_next_sibling(cdip); 5895 5896 ret = i_mdi_pm_pre_config_one(cdip); 5897 if (ret != MDI_SUCCESS) 5898 break; 5899 cdip = next; 5900 } 5901 ndi_devi_exit(parent, circ); 5902 return (ret); 5903 } 5904 5905 static int 5906 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5907 { 5908 int ret = MDI_SUCCESS; 5909 mdi_client_t *ct; 5910 5911 ct = i_devi_get_client(child); 5912 if (ct == NULL) 5913 return (MDI_FAILURE); 5914 5915 MDI_CLIENT_LOCK(ct); 5916 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5917 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5918 5919 if (i_ddi_node_state(ct->ct_dip) < DS_READY) { 5920 MDI_DEBUG(4, (CE_NOTE, child, 5921 "i_mdi_pm_pre_unconfig node detached already\n")); 5922 MDI_CLIENT_UNLOCK(ct); 5923 return (MDI_SUCCESS); 5924 } 5925 5926 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 5927 (flags & NDI_AUTODETACH)) { 5928 MDI_DEBUG(4, (CE_NOTE, child, 5929 "i_mdi_pm_pre_unconfig auto-modunload\n")); 5930 MDI_CLIENT_UNLOCK(ct); 5931 return (MDI_FAILURE); 5932 } 5933 5934 if (ct->ct_powercnt_held) { 5935 MDI_DEBUG(4, (CE_NOTE, child, 5936 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 5937 MDI_CLIENT_UNLOCK(ct); 5938 *held = 1; 5939 return (MDI_SUCCESS); 5940 } 5941 5942 if (ct->ct_power_cnt == 0) { 5943 ret = i_mdi_power_all_phci(ct); 5944 } 5945 MDI_DEBUG(4, (CE_NOTE, child, 5946 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 5947 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5948 ct->ct_powercnt_held = 1; 5949 ct->ct_powercnt_reset = 0; 5950 MDI_CLIENT_UNLOCK(ct); 5951 if (ret == MDI_SUCCESS) 5952 *held = 1; 5953 return (ret); 5954 } 5955 5956 static int 5957 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 5958 int flags) 5959 { 5960 int ret = MDI_SUCCESS; 5961 dev_info_t *cdip; 5962 int circ; 5963 5964 ASSERT(MDI_VHCI(parent)); 5965 *held = 0; 5966 5967 /* ndi_devi_unconfig_one */ 5968 if (child) { 5969 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 5970 } 5971 5972 /* devi_unconfig_common */ 5973 ndi_devi_enter(parent, &circ); 5974 cdip = ddi_get_child(parent); 5975 while (cdip) { 5976 dev_info_t *next = ddi_get_next_sibling(cdip); 5977 5978 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 5979 cdip = next; 5980 } 5981 ndi_devi_exit(parent, circ); 5982 5983 if (*held) 5984 ret = MDI_SUCCESS; 5985 5986 return (ret); 5987 } 5988 5989 static void 5990 i_mdi_pm_post_config_one(dev_info_t *child) 5991 { 5992 mdi_client_t *ct; 5993 5994 ct = i_devi_get_client(child); 5995 if (ct == NULL) 5996 return; 5997 5998 MDI_CLIENT_LOCK(ct); 5999 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6000 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6001 6002 if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) { 6003 MDI_DEBUG(4, (CE_NOTE, child, 6004 "i_mdi_pm_post_config_one NOT held\n")); 6005 MDI_CLIENT_UNLOCK(ct); 6006 return; 6007 } 6008 6009 /* client has not been updated */ 6010 if (MDI_CLIENT_IS_FAILED(ct)) { 6011 MDI_DEBUG(4, (CE_NOTE, child, 6012 "i_mdi_pm_post_config_one NOT configured\n")); 6013 MDI_CLIENT_UNLOCK(ct); 6014 return; 6015 } 6016 6017 /* another thread might have powered it down or detached it */ 6018 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6019 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6020 (i_ddi_node_state(ct->ct_dip) < DS_READY && 6021 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6022 MDI_DEBUG(4, (CE_NOTE, child, 6023 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6024 i_mdi_pm_reset_client(ct); 6025 } else { 6026 mdi_pathinfo_t *pip, *next; 6027 int valid_path_count = 0; 6028 6029 MDI_DEBUG(4, (CE_NOTE, child, 6030 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6031 pip = ct->ct_path_head; 6032 while (pip != NULL) { 6033 MDI_PI_LOCK(pip); 6034 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6035 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6036 == MDI_PATHINFO_STATE_ONLINE || 6037 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 6038 == MDI_PATHINFO_STATE_STANDBY) 6039 valid_path_count ++; 6040 MDI_PI_UNLOCK(pip); 6041 pip = next; 6042 } 6043 i_mdi_pm_rele_client(ct, valid_path_count); 6044 } 6045 ct->ct_powercnt_held = 0; 6046 MDI_CLIENT_UNLOCK(ct); 6047 } 6048 6049 static void 6050 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6051 { 6052 int circ; 6053 dev_info_t *cdip; 6054 ASSERT(MDI_VHCI(parent)); 6055 6056 /* ndi_devi_config_one */ 6057 if (child) { 6058 i_mdi_pm_post_config_one(child); 6059 return; 6060 } 6061 6062 /* devi_config_common */ 6063 ndi_devi_enter(parent, &circ); 6064 cdip = ddi_get_child(parent); 6065 while (cdip) { 6066 dev_info_t *next = ddi_get_next_sibling(cdip); 6067 6068 i_mdi_pm_post_config_one(cdip); 6069 cdip = next; 6070 } 6071 ndi_devi_exit(parent, circ); 6072 } 6073 6074 static void 6075 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6076 { 6077 mdi_client_t *ct; 6078 6079 ct = i_devi_get_client(child); 6080 if (ct == NULL) 6081 return; 6082 6083 MDI_CLIENT_LOCK(ct); 6084 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6085 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6086 6087 if (!ct->ct_powercnt_held) { 6088 MDI_DEBUG(4, (CE_NOTE, child, 6089 "i_mdi_pm_post_unconfig NOT held\n")); 6090 MDI_CLIENT_UNLOCK(ct); 6091 return; 6092 } 6093 6094 /* failure detaching or another thread just attached it */ 6095 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6096 i_ddi_node_state(ct->ct_dip) == DS_READY) || 6097 (i_ddi_node_state(ct->ct_dip) != DS_READY && 6098 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6099 MDI_DEBUG(4, (CE_NOTE, child, 6100 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6101 i_mdi_pm_reset_client(ct); 6102 } 6103 6104 MDI_DEBUG(4, (CE_NOTE, child, 6105 "i_mdi_pm_post_unconfig not changed\n")); 6106 MDI_CLIENT_UNLOCK(ct); 6107 } 6108 6109 static void 6110 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6111 { 6112 int circ; 6113 dev_info_t *cdip; 6114 6115 ASSERT(MDI_VHCI(parent)); 6116 6117 if (!held) { 6118 MDI_DEBUG(4, (CE_NOTE, parent, 6119 "i_mdi_pm_post_unconfig held = %d\n", held)); 6120 return; 6121 } 6122 6123 if (child) { 6124 i_mdi_pm_post_unconfig_one(child); 6125 return; 6126 } 6127 6128 ndi_devi_enter(parent, &circ); 6129 cdip = ddi_get_child(parent); 6130 while (cdip) { 6131 dev_info_t *next = ddi_get_next_sibling(cdip); 6132 6133 i_mdi_pm_post_unconfig_one(cdip); 6134 cdip = next; 6135 } 6136 ndi_devi_exit(parent, circ); 6137 } 6138 6139 int 6140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6141 { 6142 int circ, ret = MDI_SUCCESS; 6143 dev_info_t *client_dip = NULL; 6144 mdi_client_t *ct; 6145 6146 /* 6147 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6148 * Power up pHCI for the named client device. 6149 * Note: Before the client is enumerated under vhci by phci, 6150 * client_dip can be NULL. Then proceed to power up all the 6151 * pHCIs. 6152 */ 6153 if (devnm != NULL) { 6154 ndi_devi_enter(vdip, &circ); 6155 client_dip = ndi_devi_findchild(vdip, devnm); 6156 ndi_devi_exit(vdip, circ); 6157 } 6158 6159 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6160 6161 switch (op) { 6162 case MDI_PM_PRE_CONFIG: 6163 ret = i_mdi_pm_pre_config(vdip, client_dip); 6164 6165 break; 6166 case MDI_PM_PRE_UNCONFIG: 6167 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6168 flags); 6169 6170 break; 6171 case MDI_PM_POST_CONFIG: 6172 i_mdi_pm_post_config(vdip, client_dip); 6173 6174 break; 6175 case MDI_PM_POST_UNCONFIG: 6176 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6177 6178 break; 6179 case MDI_PM_HOLD_POWER: 6180 case MDI_PM_RELE_POWER: 6181 ASSERT(args); 6182 6183 client_dip = (dev_info_t *)args; 6184 ASSERT(MDI_CLIENT(client_dip)); 6185 6186 ct = i_devi_get_client(client_dip); 6187 MDI_CLIENT_LOCK(ct); 6188 6189 if (op == MDI_PM_HOLD_POWER) { 6190 if (ct->ct_power_cnt == 0) { 6191 (void) i_mdi_power_all_phci(ct); 6192 MDI_DEBUG(4, (CE_NOTE, client_dip, 6193 "mdi_power i_mdi_pm_hold_client\n")); 6194 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6195 } 6196 } else { 6197 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6198 MDI_DEBUG(4, (CE_NOTE, client_dip, 6199 "mdi_power i_mdi_pm_rele_client\n")); 6200 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6201 } else { 6202 MDI_DEBUG(4, (CE_NOTE, client_dip, 6203 "mdi_power i_mdi_pm_reset_client\n")); 6204 i_mdi_pm_reset_client(ct); 6205 } 6206 } 6207 6208 MDI_CLIENT_UNLOCK(ct); 6209 break; 6210 default: 6211 break; 6212 } 6213 6214 return (ret); 6215 } 6216 6217 int 6218 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6219 { 6220 mdi_vhci_t *vhci; 6221 6222 if (!MDI_VHCI(dip)) 6223 return (MDI_FAILURE); 6224 6225 if (mdi_class) { 6226 vhci = DEVI(dip)->devi_mdi_xhci; 6227 ASSERT(vhci); 6228 *mdi_class = vhci->vh_class; 6229 } 6230 6231 return (MDI_SUCCESS); 6232 } 6233 6234 int 6235 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6236 { 6237 mdi_phci_t *phci; 6238 6239 if (!MDI_PHCI(dip)) 6240 return (MDI_FAILURE); 6241 6242 if (mdi_class) { 6243 phci = DEVI(dip)->devi_mdi_xhci; 6244 ASSERT(phci); 6245 *mdi_class = phci->ph_vhci->vh_class; 6246 } 6247 6248 return (MDI_SUCCESS); 6249 } 6250 6251 int 6252 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6253 { 6254 mdi_client_t *client; 6255 6256 if (!MDI_CLIENT(dip)) 6257 return (MDI_FAILURE); 6258 6259 if (mdi_class) { 6260 client = DEVI(dip)->devi_mdi_client; 6261 ASSERT(client); 6262 *mdi_class = client->ct_vhci->vh_class; 6263 } 6264 6265 return (MDI_SUCCESS); 6266 } 6267 6268 void * 6269 mdi_client_get_vhci_private(dev_info_t *dip) 6270 { 6271 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6272 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6273 mdi_client_t *ct; 6274 ct = i_devi_get_client(dip); 6275 return (ct->ct_vprivate); 6276 } 6277 return (NULL); 6278 } 6279 6280 void 6281 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6282 { 6283 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6284 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6285 mdi_client_t *ct; 6286 ct = i_devi_get_client(dip); 6287 ct->ct_vprivate = data; 6288 } 6289 } 6290 /* 6291 * mdi_pi_get_vhci_private(): 6292 * Get the vhci private information associated with the 6293 * mdi_pathinfo node 6294 */ 6295 void * 6296 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6297 { 6298 caddr_t vprivate = NULL; 6299 if (pip) { 6300 vprivate = MDI_PI(pip)->pi_vprivate; 6301 } 6302 return (vprivate); 6303 } 6304 6305 /* 6306 * mdi_pi_set_vhci_private(): 6307 * Set the vhci private information in the mdi_pathinfo node 6308 */ 6309 void 6310 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6311 { 6312 if (pip) { 6313 MDI_PI(pip)->pi_vprivate = priv; 6314 } 6315 } 6316 6317 /* 6318 * mdi_phci_get_vhci_private(): 6319 * Get the vhci private information associated with the 6320 * mdi_phci node 6321 */ 6322 void * 6323 mdi_phci_get_vhci_private(dev_info_t *dip) 6324 { 6325 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6326 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6327 mdi_phci_t *ph; 6328 ph = i_devi_get_phci(dip); 6329 return (ph->ph_vprivate); 6330 } 6331 return (NULL); 6332 } 6333 6334 /* 6335 * mdi_phci_set_vhci_private(): 6336 * Set the vhci private information in the mdi_phci node 6337 */ 6338 void 6339 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6340 { 6341 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6342 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6343 mdi_phci_t *ph; 6344 ph = i_devi_get_phci(dip); 6345 ph->ph_vprivate = priv; 6346 } 6347 } 6348 6349 /* 6350 * List of vhci class names: 6351 * A vhci class name must be in this list only if the corresponding vhci 6352 * driver intends to use the mdi provided bus config implementation 6353 * (i.e., mdi_vhci_bus_config()). 6354 */ 6355 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6356 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6357 6358 /* 6359 * Built-in list of phci drivers for every vhci class. 6360 * All phci drivers expect iscsi have root device support. 6361 */ 6362 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6363 { "fp", 1 }, 6364 { "iscsi", 0 }, 6365 { "ibsrp", 1 } 6366 }; 6367 6368 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6369 6370 /* 6371 * During boot time, the on-disk vhci cache for every vhci class is read 6372 * in the form of an nvlist and stored here. 6373 */ 6374 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6375 6376 /* nvpair names in vhci cache nvlist */ 6377 #define MDI_VHCI_CACHE_VERSION 1 6378 #define MDI_NVPNAME_VERSION "version" 6379 #define MDI_NVPNAME_PHCIS "phcis" 6380 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6381 6382 typedef enum { 6383 VHCACHE_NOT_REBUILT, 6384 VHCACHE_PARTIALLY_BUILT, 6385 VHCACHE_FULLY_BUILT 6386 } vhcache_build_status_t; 6387 6388 /* 6389 * Given vhci class name, return its on-disk vhci cache filename. 6390 * Memory for the returned filename which includes the full path is allocated 6391 * by this function. 6392 */ 6393 static char * 6394 vhclass2vhcache_filename(char *vhclass) 6395 { 6396 char *filename; 6397 int len; 6398 static char *fmt = "/etc/devices/mdi_%s_cache"; 6399 6400 /* 6401 * fmt contains the on-disk vhci cache file name format; 6402 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6403 */ 6404 6405 /* the -1 below is to account for "%s" in the format string */ 6406 len = strlen(fmt) + strlen(vhclass) - 1; 6407 filename = kmem_alloc(len, KM_SLEEP); 6408 (void) snprintf(filename, len, fmt, vhclass); 6409 ASSERT(len == (strlen(filename) + 1)); 6410 return (filename); 6411 } 6412 6413 /* 6414 * initialize the vhci cache related data structures and read the on-disk 6415 * vhci cached data into memory. 6416 */ 6417 static void 6418 setup_vhci_cache(mdi_vhci_t *vh) 6419 { 6420 mdi_vhci_config_t *vhc; 6421 mdi_vhci_cache_t *vhcache; 6422 int i; 6423 nvlist_t *nvl = NULL; 6424 6425 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6426 vh->vh_config = vhc; 6427 vhcache = &vhc->vhc_vhcache; 6428 6429 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6430 6431 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6432 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6433 6434 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6435 6436 /* 6437 * Create string hash; same as mod_hash_create_strhash() except that 6438 * we use NULL key destructor. 6439 */ 6440 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6441 mdi_bus_config_cache_hash_size, 6442 mod_hash_null_keydtor, mod_hash_null_valdtor, 6443 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6444 6445 setup_phci_driver_list(vh); 6446 6447 /* 6448 * The on-disk vhci cache is read during booting prior to the 6449 * lights-out period by mdi_read_devices_files(). 6450 */ 6451 for (i = 0; i < N_VHCI_CLASSES; i++) { 6452 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6453 nvl = vhcache_nvl[i]; 6454 vhcache_nvl[i] = NULL; 6455 break; 6456 } 6457 } 6458 6459 /* 6460 * this is to cover the case of some one manually causing unloading 6461 * (or detaching) and reloading (or attaching) of a vhci driver. 6462 */ 6463 if (nvl == NULL && modrootloaded) 6464 nvl = read_on_disk_vhci_cache(vh->vh_class); 6465 6466 if (nvl != NULL) { 6467 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6468 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6469 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6470 else { 6471 cmn_err(CE_WARN, 6472 "%s: data file corrupted, will recreate\n", 6473 vhc->vhc_vhcache_filename); 6474 } 6475 rw_exit(&vhcache->vhcache_lock); 6476 nvlist_free(nvl); 6477 } 6478 6479 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6480 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6481 } 6482 6483 /* 6484 * free all vhci cache related resources 6485 */ 6486 static int 6487 destroy_vhci_cache(mdi_vhci_t *vh) 6488 { 6489 mdi_vhci_config_t *vhc = vh->vh_config; 6490 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6491 mdi_vhcache_phci_t *cphci, *cphci_next; 6492 mdi_vhcache_client_t *cct, *cct_next; 6493 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6494 6495 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6496 return (MDI_FAILURE); 6497 6498 kmem_free(vhc->vhc_vhcache_filename, 6499 strlen(vhc->vhc_vhcache_filename) + 1); 6500 6501 if (vhc->vhc_phci_driver_list) 6502 free_phci_driver_list(vhc); 6503 6504 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6505 6506 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6507 cphci = cphci_next) { 6508 cphci_next = cphci->cphci_next; 6509 free_vhcache_phci(cphci); 6510 } 6511 6512 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6513 cct_next = cct->cct_next; 6514 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6515 cpi_next = cpi->cpi_next; 6516 free_vhcache_pathinfo(cpi); 6517 } 6518 free_vhcache_client(cct); 6519 } 6520 6521 rw_destroy(&vhcache->vhcache_lock); 6522 6523 mutex_destroy(&vhc->vhc_lock); 6524 cv_destroy(&vhc->vhc_cv); 6525 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6526 return (MDI_SUCCESS); 6527 } 6528 6529 /* 6530 * Setup the list of phci drivers associated with the specified vhci class. 6531 * MDI uses this information to rebuild bus config cache if in case the 6532 * cache is not available or corrupted. 6533 */ 6534 static void 6535 setup_phci_driver_list(mdi_vhci_t *vh) 6536 { 6537 mdi_vhci_config_t *vhc = vh->vh_config; 6538 mdi_phci_driver_info_t *driver_list; 6539 char **driver_list1; 6540 uint_t ndrivers, ndrivers1; 6541 int i, j; 6542 6543 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6544 driver_list = scsi_phci_driver_list; 6545 ndrivers = sizeof (scsi_phci_driver_list) / 6546 sizeof (mdi_phci_driver_info_t); 6547 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6548 driver_list = ib_phci_driver_list; 6549 ndrivers = sizeof (ib_phci_driver_list) / 6550 sizeof (mdi_phci_driver_info_t); 6551 } else { 6552 driver_list = NULL; 6553 ndrivers = 0; 6554 } 6555 6556 /* 6557 * The driver.conf file of a vhci driver can specify additional 6558 * phci drivers using a project private "phci-drivers" property. 6559 */ 6560 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6561 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6562 &ndrivers1) != DDI_PROP_SUCCESS) 6563 ndrivers1 = 0; 6564 6565 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6566 if (vhc->vhc_nphci_drivers == 0) 6567 return; 6568 6569 vhc->vhc_phci_driver_list = kmem_alloc( 6570 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6571 6572 for (i = 0; i < ndrivers; i++) { 6573 vhc->vhc_phci_driver_list[i].phdriver_name = 6574 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6575 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6576 driver_list[i].phdriver_root_support; 6577 } 6578 6579 for (j = 0; j < ndrivers1; j++, i++) { 6580 vhc->vhc_phci_driver_list[i].phdriver_name = 6581 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6582 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6583 } 6584 6585 if (ndrivers1) 6586 ddi_prop_free(driver_list1); 6587 } 6588 6589 /* 6590 * Free the memory allocated for the phci driver list 6591 */ 6592 static void 6593 free_phci_driver_list(mdi_vhci_config_t *vhc) 6594 { 6595 int i; 6596 6597 if (vhc->vhc_phci_driver_list == NULL) 6598 return; 6599 6600 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6601 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6602 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6603 } 6604 6605 kmem_free(vhc->vhc_phci_driver_list, 6606 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6607 } 6608 6609 /* 6610 * Stop all vhci cache related async threads and free their resources. 6611 */ 6612 static int 6613 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6614 { 6615 mdi_async_client_config_t *acc, *acc_next; 6616 6617 mutex_enter(&vhc->vhc_lock); 6618 vhc->vhc_flags |= MDI_VHC_EXIT; 6619 ASSERT(vhc->vhc_acc_thrcount >= 0); 6620 cv_broadcast(&vhc->vhc_cv); 6621 6622 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6623 (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) || 6624 vhc->vhc_acc_thrcount != 0) { 6625 mutex_exit(&vhc->vhc_lock); 6626 delay(1); 6627 mutex_enter(&vhc->vhc_lock); 6628 } 6629 6630 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6631 6632 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6633 acc_next = acc->acc_next; 6634 free_async_client_config(acc); 6635 } 6636 vhc->vhc_acc_list_head = NULL; 6637 vhc->vhc_acc_list_tail = NULL; 6638 vhc->vhc_acc_count = 0; 6639 6640 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6641 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6642 mutex_exit(&vhc->vhc_lock); 6643 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6644 vhcache_dirty(vhc); 6645 return (MDI_FAILURE); 6646 } 6647 } else 6648 mutex_exit(&vhc->vhc_lock); 6649 6650 if (callb_delete(vhc->vhc_cbid) != 0) 6651 return (MDI_FAILURE); 6652 6653 return (MDI_SUCCESS); 6654 } 6655 6656 /* 6657 * Stop vhci cache flush thread 6658 */ 6659 /* ARGSUSED */ 6660 static boolean_t 6661 stop_vhcache_flush_thread(void *arg, int code) 6662 { 6663 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6664 6665 mutex_enter(&vhc->vhc_lock); 6666 vhc->vhc_flags |= MDI_VHC_EXIT; 6667 cv_broadcast(&vhc->vhc_cv); 6668 6669 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6670 mutex_exit(&vhc->vhc_lock); 6671 delay(1); 6672 mutex_enter(&vhc->vhc_lock); 6673 } 6674 6675 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6676 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6677 mutex_exit(&vhc->vhc_lock); 6678 (void) flush_vhcache(vhc, 1); 6679 } else 6680 mutex_exit(&vhc->vhc_lock); 6681 6682 return (B_TRUE); 6683 } 6684 6685 /* 6686 * Enqueue the vhcache phci (cphci) at the tail of the list 6687 */ 6688 static void 6689 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6690 { 6691 cphci->cphci_next = NULL; 6692 if (vhcache->vhcache_phci_head == NULL) 6693 vhcache->vhcache_phci_head = cphci; 6694 else 6695 vhcache->vhcache_phci_tail->cphci_next = cphci; 6696 vhcache->vhcache_phci_tail = cphci; 6697 } 6698 6699 /* 6700 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6701 */ 6702 static void 6703 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6704 mdi_vhcache_pathinfo_t *cpi) 6705 { 6706 cpi->cpi_next = NULL; 6707 if (cct->cct_cpi_head == NULL) 6708 cct->cct_cpi_head = cpi; 6709 else 6710 cct->cct_cpi_tail->cpi_next = cpi; 6711 cct->cct_cpi_tail = cpi; 6712 } 6713 6714 /* 6715 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6716 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6717 * flag set come at the beginning of the list. All cpis which have this 6718 * flag set come at the end of the list. 6719 */ 6720 static void 6721 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6722 mdi_vhcache_pathinfo_t *newcpi) 6723 { 6724 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6725 6726 if (cct->cct_cpi_head == NULL || 6727 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6728 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6729 else { 6730 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6731 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6732 prev_cpi = cpi, cpi = cpi->cpi_next) 6733 ; 6734 6735 if (prev_cpi == NULL) 6736 cct->cct_cpi_head = newcpi; 6737 else 6738 prev_cpi->cpi_next = newcpi; 6739 6740 newcpi->cpi_next = cpi; 6741 6742 if (cpi == NULL) 6743 cct->cct_cpi_tail = newcpi; 6744 } 6745 } 6746 6747 /* 6748 * Enqueue the vhcache client (cct) at the tail of the list 6749 */ 6750 static void 6751 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6752 mdi_vhcache_client_t *cct) 6753 { 6754 cct->cct_next = NULL; 6755 if (vhcache->vhcache_client_head == NULL) 6756 vhcache->vhcache_client_head = cct; 6757 else 6758 vhcache->vhcache_client_tail->cct_next = cct; 6759 vhcache->vhcache_client_tail = cct; 6760 } 6761 6762 static void 6763 free_string_array(char **str, int nelem) 6764 { 6765 int i; 6766 6767 if (str) { 6768 for (i = 0; i < nelem; i++) { 6769 if (str[i]) 6770 kmem_free(str[i], strlen(str[i]) + 1); 6771 } 6772 kmem_free(str, sizeof (char *) * nelem); 6773 } 6774 } 6775 6776 static void 6777 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6778 { 6779 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6780 kmem_free(cphci, sizeof (*cphci)); 6781 } 6782 6783 static void 6784 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6785 { 6786 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6787 kmem_free(cpi, sizeof (*cpi)); 6788 } 6789 6790 static void 6791 free_vhcache_client(mdi_vhcache_client_t *cct) 6792 { 6793 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6794 kmem_free(cct, sizeof (*cct)); 6795 } 6796 6797 static char * 6798 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6799 { 6800 char *name_addr; 6801 int len; 6802 6803 len = strlen(ct_name) + strlen(ct_addr) + 2; 6804 name_addr = kmem_alloc(len, KM_SLEEP); 6805 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6806 6807 if (ret_len) 6808 *ret_len = len; 6809 return (name_addr); 6810 } 6811 6812 /* 6813 * Copy the contents of paddrnvl to vhci cache. 6814 * paddrnvl nvlist contains path information for a vhci client. 6815 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6816 */ 6817 static void 6818 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6819 mdi_vhcache_client_t *cct) 6820 { 6821 nvpair_t *nvp = NULL; 6822 mdi_vhcache_pathinfo_t *cpi; 6823 uint_t nelem; 6824 uint32_t *val; 6825 6826 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6827 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6828 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6829 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6830 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6831 ASSERT(nelem == 2); 6832 cpi->cpi_cphci = cphci_list[val[0]]; 6833 cpi->cpi_flags = val[1]; 6834 enqueue_tail_vhcache_pathinfo(cct, cpi); 6835 } 6836 } 6837 6838 /* 6839 * Copy the contents of caddrmapnvl to vhci cache. 6840 * caddrmapnvl nvlist contains vhci client address to phci client address 6841 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6842 * this nvlist. 6843 */ 6844 static void 6845 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6846 mdi_vhcache_phci_t *cphci_list[]) 6847 { 6848 nvpair_t *nvp = NULL; 6849 nvlist_t *paddrnvl; 6850 mdi_vhcache_client_t *cct; 6851 6852 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6853 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6854 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6855 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6856 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6857 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6858 /* the client must contain at least one path */ 6859 ASSERT(cct->cct_cpi_head != NULL); 6860 6861 enqueue_vhcache_client(vhcache, cct); 6862 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6863 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6864 } 6865 } 6866 6867 /* 6868 * Copy the contents of the main nvlist to vhci cache. 6869 * 6870 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6871 * The nvlist contains the mappings between the vhci client addresses and 6872 * their corresponding phci client addresses. 6873 * 6874 * The structure of the nvlist is as follows: 6875 * 6876 * Main nvlist: 6877 * NAME TYPE DATA 6878 * version int32 version number 6879 * phcis string array array of phci paths 6880 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6881 * 6882 * structure of c2paddrs_nvl: 6883 * NAME TYPE DATA 6884 * caddr1 nvlist_t paddrs_nvl1 6885 * caddr2 nvlist_t paddrs_nvl2 6886 * ... 6887 * where caddr1, caddr2, ... are vhci client name and addresses in the 6888 * form of "<clientname>@<clientaddress>". 6889 * (for example: "ssd@2000002037cd9f72"); 6890 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6891 * 6892 * structure of paddrs_nvl: 6893 * NAME TYPE DATA 6894 * pi_addr1 uint32_array (phci-id, cpi_flags) 6895 * pi_addr2 uint32_array (phci-id, cpi_flags) 6896 * ... 6897 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6898 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6899 * phci-ids are integers that identify PHCIs to which the 6900 * the bus specific address belongs to. These integers are used as an index 6901 * into to the phcis string array in the main nvlist to get the PHCI path. 6902 */ 6903 static int 6904 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6905 { 6906 char **phcis, **phci_namep; 6907 uint_t nphcis; 6908 mdi_vhcache_phci_t *cphci, **cphci_list; 6909 nvlist_t *caddrmapnvl; 6910 int32_t ver; 6911 int i; 6912 size_t cphci_list_size; 6913 6914 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6915 6916 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6917 ver != MDI_VHCI_CACHE_VERSION) 6918 return (MDI_FAILURE); 6919 6920 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6921 &nphcis) != 0) 6922 return (MDI_SUCCESS); 6923 6924 ASSERT(nphcis > 0); 6925 6926 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6927 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6928 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6929 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6930 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6931 enqueue_vhcache_phci(vhcache, cphci); 6932 cphci_list[i] = cphci; 6933 } 6934 6935 ASSERT(vhcache->vhcache_phci_head != NULL); 6936 6937 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6938 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6939 6940 kmem_free(cphci_list, cphci_list_size); 6941 return (MDI_SUCCESS); 6942 } 6943 6944 /* 6945 * Build paddrnvl for the specified client using the information in the 6946 * vhci cache and add it to the caddrmapnnvl. 6947 * Returns 0 on success, errno on failure. 6948 */ 6949 static int 6950 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 6951 nvlist_t *caddrmapnvl) 6952 { 6953 mdi_vhcache_pathinfo_t *cpi; 6954 nvlist_t *nvl; 6955 int err; 6956 uint32_t val[2]; 6957 6958 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 6959 6960 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 6961 return (err); 6962 6963 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 6964 val[0] = cpi->cpi_cphci->cphci_id; 6965 val[1] = cpi->cpi_flags; 6966 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 6967 != 0) 6968 goto out; 6969 } 6970 6971 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 6972 out: 6973 nvlist_free(nvl); 6974 return (err); 6975 } 6976 6977 /* 6978 * Build caddrmapnvl using the information in the vhci cache 6979 * and add it to the mainnvl. 6980 * Returns 0 on success, errno on failure. 6981 */ 6982 static int 6983 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 6984 { 6985 mdi_vhcache_client_t *cct; 6986 nvlist_t *nvl; 6987 int err; 6988 6989 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 6990 6991 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 6992 return (err); 6993 6994 for (cct = vhcache->vhcache_client_head; cct != NULL; 6995 cct = cct->cct_next) { 6996 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 6997 goto out; 6998 } 6999 7000 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7001 out: 7002 nvlist_free(nvl); 7003 return (err); 7004 } 7005 7006 /* 7007 * Build nvlist using the information in the vhci cache. 7008 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7009 * Returns nvl on success, NULL on failure. 7010 */ 7011 static nvlist_t * 7012 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7013 { 7014 mdi_vhcache_phci_t *cphci; 7015 uint_t phci_count; 7016 char **phcis; 7017 nvlist_t *nvl; 7018 int err, i; 7019 7020 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7021 nvl = NULL; 7022 goto out; 7023 } 7024 7025 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7026 MDI_VHCI_CACHE_VERSION)) != 0) 7027 goto out; 7028 7029 rw_enter(&vhcache->vhcache_lock, RW_READER); 7030 if (vhcache->vhcache_phci_head == NULL) { 7031 rw_exit(&vhcache->vhcache_lock); 7032 return (nvl); 7033 } 7034 7035 phci_count = 0; 7036 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7037 cphci = cphci->cphci_next) 7038 cphci->cphci_id = phci_count++; 7039 7040 /* build phci pathname list */ 7041 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7042 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7043 cphci = cphci->cphci_next, i++) 7044 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7045 7046 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7047 phci_count); 7048 free_string_array(phcis, phci_count); 7049 7050 if (err == 0 && 7051 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7052 rw_exit(&vhcache->vhcache_lock); 7053 return (nvl); 7054 } 7055 7056 rw_exit(&vhcache->vhcache_lock); 7057 out: 7058 if (nvl) 7059 nvlist_free(nvl); 7060 return (NULL); 7061 } 7062 7063 /* 7064 * Lookup vhcache phci structure for the specified phci path. 7065 */ 7066 static mdi_vhcache_phci_t * 7067 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7068 { 7069 mdi_vhcache_phci_t *cphci; 7070 7071 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7072 7073 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7074 cphci = cphci->cphci_next) { 7075 if (strcmp(cphci->cphci_path, phci_path) == 0) 7076 return (cphci); 7077 } 7078 7079 return (NULL); 7080 } 7081 7082 /* 7083 * Lookup vhcache phci structure for the specified phci. 7084 */ 7085 static mdi_vhcache_phci_t * 7086 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7087 { 7088 mdi_vhcache_phci_t *cphci; 7089 7090 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7091 7092 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7093 cphci = cphci->cphci_next) { 7094 if (cphci->cphci_phci == ph) 7095 return (cphci); 7096 } 7097 7098 return (NULL); 7099 } 7100 7101 /* 7102 * Add the specified phci to the vhci cache if not already present. 7103 */ 7104 static void 7105 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7106 { 7107 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7108 mdi_vhcache_phci_t *cphci; 7109 char *pathname; 7110 int cache_updated; 7111 7112 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7113 7114 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7115 (void) ddi_pathname(ph->ph_dip, pathname); 7116 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7117 != NULL) { 7118 cphci->cphci_phci = ph; 7119 cache_updated = 0; 7120 } else { 7121 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7122 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7123 cphci->cphci_phci = ph; 7124 enqueue_vhcache_phci(vhcache, cphci); 7125 cache_updated = 1; 7126 } 7127 rw_exit(&vhcache->vhcache_lock); 7128 7129 kmem_free(pathname, MAXPATHLEN); 7130 if (cache_updated) 7131 vhcache_dirty(vhc); 7132 } 7133 7134 /* 7135 * Remove the reference to the specified phci from the vhci cache. 7136 */ 7137 static void 7138 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7139 { 7140 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7141 mdi_vhcache_phci_t *cphci; 7142 7143 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7144 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7145 /* do not remove the actual mdi_vhcache_phci structure */ 7146 cphci->cphci_phci = NULL; 7147 } 7148 rw_exit(&vhcache->vhcache_lock); 7149 } 7150 7151 static void 7152 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7153 mdi_vhcache_lookup_token_t *src) 7154 { 7155 if (src == NULL) { 7156 dst->lt_cct = NULL; 7157 dst->lt_cct_lookup_time = 0; 7158 } else { 7159 dst->lt_cct = src->lt_cct; 7160 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7161 } 7162 } 7163 7164 /* 7165 * Look up vhcache client for the specified client. 7166 */ 7167 static mdi_vhcache_client_t * 7168 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7169 mdi_vhcache_lookup_token_t *token) 7170 { 7171 mod_hash_val_t hv; 7172 char *name_addr; 7173 int len; 7174 7175 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7176 7177 /* 7178 * If no vhcache clean occurred since the last lookup, we can 7179 * simply return the cct from the last lookup operation. 7180 * It works because ccts are never freed except during the vhcache 7181 * cleanup operation. 7182 */ 7183 if (token != NULL && 7184 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7185 return (token->lt_cct); 7186 7187 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7188 if (mod_hash_find(vhcache->vhcache_client_hash, 7189 (mod_hash_key_t)name_addr, &hv) == 0) { 7190 if (token) { 7191 token->lt_cct = (mdi_vhcache_client_t *)hv; 7192 token->lt_cct_lookup_time = lbolt64; 7193 } 7194 } else { 7195 if (token) { 7196 token->lt_cct = NULL; 7197 token->lt_cct_lookup_time = 0; 7198 } 7199 hv = NULL; 7200 } 7201 kmem_free(name_addr, len); 7202 return ((mdi_vhcache_client_t *)hv); 7203 } 7204 7205 /* 7206 * Add the specified path to the vhci cache if not already present. 7207 * Also add the vhcache client for the client corresponding to this path 7208 * if it doesn't already exist. 7209 */ 7210 static void 7211 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7212 { 7213 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7214 mdi_vhcache_client_t *cct; 7215 mdi_vhcache_pathinfo_t *cpi; 7216 mdi_phci_t *ph = pip->pi_phci; 7217 mdi_client_t *ct = pip->pi_client; 7218 int cache_updated = 0; 7219 7220 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7221 7222 /* if vhcache client for this pip doesn't already exist, add it */ 7223 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7224 NULL)) == NULL) { 7225 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7226 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7227 ct->ct_guid, NULL); 7228 enqueue_vhcache_client(vhcache, cct); 7229 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7230 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7231 cache_updated = 1; 7232 } 7233 7234 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7235 if (cpi->cpi_cphci->cphci_phci == ph && 7236 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7237 cpi->cpi_pip = pip; 7238 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7239 cpi->cpi_flags &= 7240 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7241 sort_vhcache_paths(cct); 7242 cache_updated = 1; 7243 } 7244 break; 7245 } 7246 } 7247 7248 if (cpi == NULL) { 7249 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7250 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7251 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7252 ASSERT(cpi->cpi_cphci != NULL); 7253 cpi->cpi_pip = pip; 7254 enqueue_vhcache_pathinfo(cct, cpi); 7255 cache_updated = 1; 7256 } 7257 7258 rw_exit(&vhcache->vhcache_lock); 7259 7260 if (cache_updated) 7261 vhcache_dirty(vhc); 7262 } 7263 7264 /* 7265 * Remove the reference to the specified path from the vhci cache. 7266 */ 7267 static void 7268 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7269 { 7270 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7271 mdi_client_t *ct = pip->pi_client; 7272 mdi_vhcache_client_t *cct; 7273 mdi_vhcache_pathinfo_t *cpi; 7274 7275 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7276 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7277 NULL)) != NULL) { 7278 for (cpi = cct->cct_cpi_head; cpi != NULL; 7279 cpi = cpi->cpi_next) { 7280 if (cpi->cpi_pip == pip) { 7281 cpi->cpi_pip = NULL; 7282 break; 7283 } 7284 } 7285 } 7286 rw_exit(&vhcache->vhcache_lock); 7287 } 7288 7289 /* 7290 * Flush the vhci cache to disk. 7291 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7292 */ 7293 static int 7294 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7295 { 7296 nvlist_t *nvl; 7297 int err; 7298 int rv; 7299 7300 /* 7301 * It is possible that the system may shutdown before 7302 * i_ddi_io_initialized (during stmsboot for example). To allow for 7303 * flushing the cache in this case do not check for 7304 * i_ddi_io_initialized when force flag is set. 7305 */ 7306 if (force_flag == 0 && !i_ddi_io_initialized()) 7307 return (MDI_FAILURE); 7308 7309 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7310 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7311 nvlist_free(nvl); 7312 } else 7313 err = EFAULT; 7314 7315 rv = MDI_SUCCESS; 7316 mutex_enter(&vhc->vhc_lock); 7317 if (err != 0) { 7318 if (err == EROFS) { 7319 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7320 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7321 MDI_VHC_VHCACHE_DIRTY); 7322 } else { 7323 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7324 cmn_err(CE_CONT, "%s: update failed\n", 7325 vhc->vhc_vhcache_filename); 7326 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7327 } 7328 rv = MDI_FAILURE; 7329 } 7330 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7331 cmn_err(CE_CONT, 7332 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7333 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7334 } 7335 mutex_exit(&vhc->vhc_lock); 7336 7337 return (rv); 7338 } 7339 7340 /* 7341 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7342 * Exits itself if left idle for the idle timeout period. 7343 */ 7344 static void 7345 vhcache_flush_thread(void *arg) 7346 { 7347 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7348 clock_t idle_time, quit_at_ticks; 7349 callb_cpr_t cprinfo; 7350 7351 /* number of seconds to sleep idle before exiting */ 7352 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7353 7354 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7355 "mdi_vhcache_flush"); 7356 mutex_enter(&vhc->vhc_lock); 7357 for (; ; ) { 7358 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7359 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7360 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7361 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7362 (void) cv_timedwait(&vhc->vhc_cv, 7363 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7364 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7365 } else { 7366 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7367 mutex_exit(&vhc->vhc_lock); 7368 7369 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7370 vhcache_dirty(vhc); 7371 7372 mutex_enter(&vhc->vhc_lock); 7373 } 7374 } 7375 7376 quit_at_ticks = ddi_get_lbolt() + idle_time; 7377 7378 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7379 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7380 ddi_get_lbolt() < quit_at_ticks) { 7381 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7382 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7383 quit_at_ticks); 7384 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7385 } 7386 7387 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7388 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7389 goto out; 7390 } 7391 7392 out: 7393 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7394 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7395 CALLB_CPR_EXIT(&cprinfo); 7396 } 7397 7398 /* 7399 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7400 */ 7401 static void 7402 vhcache_dirty(mdi_vhci_config_t *vhc) 7403 { 7404 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7405 int create_thread; 7406 7407 rw_enter(&vhcache->vhcache_lock, RW_READER); 7408 /* do not flush cache until the cache is fully built */ 7409 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7410 rw_exit(&vhcache->vhcache_lock); 7411 return; 7412 } 7413 rw_exit(&vhcache->vhcache_lock); 7414 7415 mutex_enter(&vhc->vhc_lock); 7416 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7417 mutex_exit(&vhc->vhc_lock); 7418 return; 7419 } 7420 7421 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7422 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7423 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7424 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7425 cv_broadcast(&vhc->vhc_cv); 7426 create_thread = 0; 7427 } else { 7428 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7429 create_thread = 1; 7430 } 7431 mutex_exit(&vhc->vhc_lock); 7432 7433 if (create_thread) 7434 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7435 0, &p0, TS_RUN, minclsyspri); 7436 } 7437 7438 /* 7439 * phci bus config structure - one for for each phci bus config operation that 7440 * we initiate on behalf of a vhci. 7441 */ 7442 typedef struct mdi_phci_bus_config_s { 7443 char *phbc_phci_path; 7444 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7445 struct mdi_phci_bus_config_s *phbc_next; 7446 } mdi_phci_bus_config_t; 7447 7448 /* vhci bus config structure - one for each vhci bus config operation */ 7449 typedef struct mdi_vhci_bus_config_s { 7450 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7451 major_t vhbc_op_major; /* bus config op major */ 7452 uint_t vhbc_op_flags; /* bus config op flags */ 7453 kmutex_t vhbc_lock; 7454 kcondvar_t vhbc_cv; 7455 int vhbc_thr_count; 7456 } mdi_vhci_bus_config_t; 7457 7458 /* 7459 * bus config the specified phci 7460 */ 7461 static void 7462 bus_config_phci(void *arg) 7463 { 7464 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7465 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7466 dev_info_t *ph_dip; 7467 7468 /* 7469 * first configure all path components upto phci and then configure 7470 * the phci children. 7471 */ 7472 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7473 != NULL) { 7474 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7475 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7476 (void) ndi_devi_config_driver(ph_dip, 7477 vhbc->vhbc_op_flags, 7478 vhbc->vhbc_op_major); 7479 } else 7480 (void) ndi_devi_config(ph_dip, 7481 vhbc->vhbc_op_flags); 7482 7483 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7484 ndi_rele_devi(ph_dip); 7485 } 7486 7487 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7488 kmem_free(phbc, sizeof (*phbc)); 7489 7490 mutex_enter(&vhbc->vhbc_lock); 7491 vhbc->vhbc_thr_count--; 7492 if (vhbc->vhbc_thr_count == 0) 7493 cv_broadcast(&vhbc->vhbc_cv); 7494 mutex_exit(&vhbc->vhbc_lock); 7495 } 7496 7497 /* 7498 * Bus config all phcis associated with the vhci in parallel. 7499 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7500 */ 7501 static void 7502 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7503 ddi_bus_config_op_t op, major_t maj) 7504 { 7505 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7506 mdi_vhci_bus_config_t *vhbc; 7507 mdi_vhcache_phci_t *cphci; 7508 7509 rw_enter(&vhcache->vhcache_lock, RW_READER); 7510 if (vhcache->vhcache_phci_head == NULL) { 7511 rw_exit(&vhcache->vhcache_lock); 7512 return; 7513 } 7514 7515 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7516 7517 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7518 cphci = cphci->cphci_next) { 7519 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7520 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7521 KM_SLEEP); 7522 phbc->phbc_vhbusconfig = vhbc; 7523 phbc->phbc_next = phbc_head; 7524 phbc_head = phbc; 7525 vhbc->vhbc_thr_count++; 7526 } 7527 rw_exit(&vhcache->vhcache_lock); 7528 7529 vhbc->vhbc_op = op; 7530 vhbc->vhbc_op_major = maj; 7531 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7532 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7533 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7534 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7535 7536 /* now create threads to initiate bus config on all phcis in parallel */ 7537 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7538 phbc_next = phbc->phbc_next; 7539 if (mdi_mtc_off) 7540 bus_config_phci((void *)phbc); 7541 else 7542 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7543 0, &p0, TS_RUN, minclsyspri); 7544 } 7545 7546 mutex_enter(&vhbc->vhbc_lock); 7547 /* wait until all threads exit */ 7548 while (vhbc->vhbc_thr_count > 0) 7549 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7550 mutex_exit(&vhbc->vhbc_lock); 7551 7552 mutex_destroy(&vhbc->vhbc_lock); 7553 cv_destroy(&vhbc->vhbc_cv); 7554 kmem_free(vhbc, sizeof (*vhbc)); 7555 } 7556 7557 /* 7558 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7559 * The path includes the child component in addition to the phci path. 7560 */ 7561 static int 7562 bus_config_one_phci_child(char *path) 7563 { 7564 dev_info_t *ph_dip, *child; 7565 char *devnm; 7566 int rv = MDI_FAILURE; 7567 7568 /* extract the child component of the phci */ 7569 devnm = strrchr(path, '/'); 7570 *devnm++ = '\0'; 7571 7572 /* 7573 * first configure all path components upto phci and then 7574 * configure the phci child. 7575 */ 7576 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7577 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7578 NDI_SUCCESS) { 7579 /* 7580 * release the hold that ndi_devi_config_one() placed 7581 */ 7582 ndi_rele_devi(child); 7583 rv = MDI_SUCCESS; 7584 } 7585 7586 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7587 ndi_rele_devi(ph_dip); 7588 } 7589 7590 devnm--; 7591 *devnm = '/'; 7592 return (rv); 7593 } 7594 7595 /* 7596 * Build a list of phci client paths for the specified vhci client. 7597 * The list includes only those phci client paths which aren't configured yet. 7598 */ 7599 static mdi_phys_path_t * 7600 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7601 { 7602 mdi_vhcache_pathinfo_t *cpi; 7603 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7604 int config_path, len; 7605 7606 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7607 /* 7608 * include only those paths that aren't configured. 7609 */ 7610 config_path = 0; 7611 if (cpi->cpi_pip == NULL) 7612 config_path = 1; 7613 else { 7614 MDI_PI_LOCK(cpi->cpi_pip); 7615 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7616 config_path = 1; 7617 MDI_PI_UNLOCK(cpi->cpi_pip); 7618 } 7619 7620 if (config_path) { 7621 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7622 len = strlen(cpi->cpi_cphci->cphci_path) + 7623 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7624 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7625 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7626 cpi->cpi_cphci->cphci_path, ct_name, 7627 cpi->cpi_addr); 7628 pp->phys_path_next = NULL; 7629 7630 if (pp_head == NULL) 7631 pp_head = pp; 7632 else 7633 pp_tail->phys_path_next = pp; 7634 pp_tail = pp; 7635 } 7636 } 7637 7638 return (pp_head); 7639 } 7640 7641 /* 7642 * Free the memory allocated for phci client path list. 7643 */ 7644 static void 7645 free_phclient_path_list(mdi_phys_path_t *pp_head) 7646 { 7647 mdi_phys_path_t *pp, *pp_next; 7648 7649 for (pp = pp_head; pp != NULL; pp = pp_next) { 7650 pp_next = pp->phys_path_next; 7651 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7652 kmem_free(pp, sizeof (*pp)); 7653 } 7654 } 7655 7656 /* 7657 * Allocated async client structure and initialize with the specified values. 7658 */ 7659 static mdi_async_client_config_t * 7660 alloc_async_client_config(char *ct_name, char *ct_addr, 7661 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7662 { 7663 mdi_async_client_config_t *acc; 7664 7665 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7666 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7667 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7668 acc->acc_phclient_path_list_head = pp_head; 7669 init_vhcache_lookup_token(&acc->acc_token, tok); 7670 acc->acc_next = NULL; 7671 return (acc); 7672 } 7673 7674 /* 7675 * Free the memory allocated for the async client structure and their members. 7676 */ 7677 static void 7678 free_async_client_config(mdi_async_client_config_t *acc) 7679 { 7680 if (acc->acc_phclient_path_list_head) 7681 free_phclient_path_list(acc->acc_phclient_path_list_head); 7682 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7683 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7684 kmem_free(acc, sizeof (*acc)); 7685 } 7686 7687 /* 7688 * Sort vhcache pathinfos (cpis) of the specified client. 7689 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7690 * flag set come at the beginning of the list. All cpis which have this 7691 * flag set come at the end of the list. 7692 */ 7693 static void 7694 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7695 { 7696 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7697 7698 cpi_head = cct->cct_cpi_head; 7699 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7700 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7701 cpi_next = cpi->cpi_next; 7702 enqueue_vhcache_pathinfo(cct, cpi); 7703 } 7704 } 7705 7706 /* 7707 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7708 * every vhcache pathinfo of the specified client. If not adjust the flag 7709 * setting appropriately. 7710 * 7711 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7712 * on-disk vhci cache. So every time this flag is updated the cache must be 7713 * flushed. 7714 */ 7715 static void 7716 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7717 mdi_vhcache_lookup_token_t *tok) 7718 { 7719 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7720 mdi_vhcache_client_t *cct; 7721 mdi_vhcache_pathinfo_t *cpi; 7722 7723 rw_enter(&vhcache->vhcache_lock, RW_READER); 7724 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7725 == NULL) { 7726 rw_exit(&vhcache->vhcache_lock); 7727 return; 7728 } 7729 7730 /* 7731 * to avoid unnecessary on-disk cache updates, first check if an 7732 * update is really needed. If no update is needed simply return. 7733 */ 7734 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7735 if ((cpi->cpi_pip != NULL && 7736 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7737 (cpi->cpi_pip == NULL && 7738 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7739 break; 7740 } 7741 } 7742 if (cpi == NULL) { 7743 rw_exit(&vhcache->vhcache_lock); 7744 return; 7745 } 7746 7747 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7748 rw_exit(&vhcache->vhcache_lock); 7749 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7750 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7751 tok)) == NULL) { 7752 rw_exit(&vhcache->vhcache_lock); 7753 return; 7754 } 7755 } 7756 7757 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7758 if (cpi->cpi_pip != NULL) 7759 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7760 else 7761 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7762 } 7763 sort_vhcache_paths(cct); 7764 7765 rw_exit(&vhcache->vhcache_lock); 7766 vhcache_dirty(vhc); 7767 } 7768 7769 /* 7770 * Configure all specified paths of the client. 7771 */ 7772 static void 7773 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7774 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7775 { 7776 mdi_phys_path_t *pp; 7777 7778 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7779 (void) bus_config_one_phci_child(pp->phys_path); 7780 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7781 } 7782 7783 /* 7784 * Dequeue elements from vhci async client config list and bus configure 7785 * their corresponding phci clients. 7786 */ 7787 static void 7788 config_client_paths_thread(void *arg) 7789 { 7790 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7791 mdi_async_client_config_t *acc; 7792 clock_t quit_at_ticks; 7793 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7794 callb_cpr_t cprinfo; 7795 7796 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7797 "mdi_config_client_paths"); 7798 7799 for (; ; ) { 7800 quit_at_ticks = ddi_get_lbolt() + idle_time; 7801 7802 mutex_enter(&vhc->vhc_lock); 7803 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7804 vhc->vhc_acc_list_head == NULL && 7805 ddi_get_lbolt() < quit_at_ticks) { 7806 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7807 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7808 quit_at_ticks); 7809 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7810 } 7811 7812 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7813 vhc->vhc_acc_list_head == NULL) 7814 goto out; 7815 7816 acc = vhc->vhc_acc_list_head; 7817 vhc->vhc_acc_list_head = acc->acc_next; 7818 if (vhc->vhc_acc_list_head == NULL) 7819 vhc->vhc_acc_list_tail = NULL; 7820 vhc->vhc_acc_count--; 7821 mutex_exit(&vhc->vhc_lock); 7822 7823 config_client_paths_sync(vhc, acc->acc_ct_name, 7824 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7825 &acc->acc_token); 7826 7827 free_async_client_config(acc); 7828 } 7829 7830 out: 7831 vhc->vhc_acc_thrcount--; 7832 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7833 CALLB_CPR_EXIT(&cprinfo); 7834 } 7835 7836 /* 7837 * Arrange for all the phci client paths (pp_head) for the specified client 7838 * to be bus configured asynchronously by a thread. 7839 */ 7840 static void 7841 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7842 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7843 { 7844 mdi_async_client_config_t *acc, *newacc; 7845 int create_thread; 7846 7847 if (pp_head == NULL) 7848 return; 7849 7850 if (mdi_mtc_off) { 7851 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7852 free_phclient_path_list(pp_head); 7853 return; 7854 } 7855 7856 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7857 ASSERT(newacc); 7858 7859 mutex_enter(&vhc->vhc_lock); 7860 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7861 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7862 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7863 free_async_client_config(newacc); 7864 mutex_exit(&vhc->vhc_lock); 7865 return; 7866 } 7867 } 7868 7869 if (vhc->vhc_acc_list_head == NULL) 7870 vhc->vhc_acc_list_head = newacc; 7871 else 7872 vhc->vhc_acc_list_tail->acc_next = newacc; 7873 vhc->vhc_acc_list_tail = newacc; 7874 vhc->vhc_acc_count++; 7875 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7876 cv_broadcast(&vhc->vhc_cv); 7877 create_thread = 0; 7878 } else { 7879 vhc->vhc_acc_thrcount++; 7880 create_thread = 1; 7881 } 7882 mutex_exit(&vhc->vhc_lock); 7883 7884 if (create_thread) 7885 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7886 0, &p0, TS_RUN, minclsyspri); 7887 } 7888 7889 /* 7890 * Return number of online paths for the specified client. 7891 */ 7892 static int 7893 nonline_paths(mdi_vhcache_client_t *cct) 7894 { 7895 mdi_vhcache_pathinfo_t *cpi; 7896 int online_count = 0; 7897 7898 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7899 if (cpi->cpi_pip != NULL) { 7900 MDI_PI_LOCK(cpi->cpi_pip); 7901 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7902 online_count++; 7903 MDI_PI_UNLOCK(cpi->cpi_pip); 7904 } 7905 } 7906 7907 return (online_count); 7908 } 7909 7910 /* 7911 * Bus configure all paths for the specified vhci client. 7912 * If at least one path for the client is already online, the remaining paths 7913 * will be configured asynchronously. Otherwise, it synchronously configures 7914 * the paths until at least one path is online and then rest of the paths 7915 * will be configured asynchronously. 7916 */ 7917 static void 7918 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7919 { 7920 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7921 mdi_phys_path_t *pp_head, *pp; 7922 mdi_vhcache_client_t *cct; 7923 mdi_vhcache_lookup_token_t tok; 7924 7925 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7926 7927 init_vhcache_lookup_token(&tok, NULL); 7928 7929 if (ct_name == NULL || ct_addr == NULL || 7930 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 7931 == NULL || 7932 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 7933 rw_exit(&vhcache->vhcache_lock); 7934 return; 7935 } 7936 7937 /* if at least one path is online, configure the rest asynchronously */ 7938 if (nonline_paths(cct) > 0) { 7939 rw_exit(&vhcache->vhcache_lock); 7940 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 7941 return; 7942 } 7943 7944 rw_exit(&vhcache->vhcache_lock); 7945 7946 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 7947 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 7948 rw_enter(&vhcache->vhcache_lock, RW_READER); 7949 7950 if ((cct = lookup_vhcache_client(vhcache, ct_name, 7951 ct_addr, &tok)) == NULL) { 7952 rw_exit(&vhcache->vhcache_lock); 7953 goto out; 7954 } 7955 7956 if (nonline_paths(cct) > 0 && 7957 pp->phys_path_next != NULL) { 7958 rw_exit(&vhcache->vhcache_lock); 7959 config_client_paths_async(vhc, ct_name, ct_addr, 7960 pp->phys_path_next, &tok); 7961 pp->phys_path_next = NULL; 7962 goto out; 7963 } 7964 7965 rw_exit(&vhcache->vhcache_lock); 7966 } 7967 } 7968 7969 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 7970 out: 7971 free_phclient_path_list(pp_head); 7972 } 7973 7974 static void 7975 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 7976 { 7977 mutex_enter(&vhc->vhc_lock); 7978 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 7979 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 7980 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 7981 mutex_exit(&vhc->vhc_lock); 7982 } 7983 7984 static void 7985 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 7986 { 7987 mutex_enter(&vhc->vhc_lock); 7988 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 7989 cv_broadcast(&vhc->vhc_cv); 7990 mutex_exit(&vhc->vhc_lock); 7991 } 7992 7993 /* 7994 * Attach the phci driver instances associated with the vhci: 7995 * If root is mounted attach all phci driver instances. 7996 * If root is not mounted, attach the instances of only those phci 7997 * drivers that have the root support. 7998 */ 7999 static void 8000 attach_phci_drivers(mdi_vhci_config_t *vhc, int root_mounted) 8001 { 8002 int i; 8003 major_t m; 8004 8005 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8006 if (root_mounted == 0 && 8007 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8008 continue; 8009 8010 m = ddi_name_to_major( 8011 vhc->vhc_phci_driver_list[i].phdriver_name); 8012 if (m != (major_t)-1) { 8013 if (ddi_hold_installed_driver(m) != NULL) 8014 ddi_rele_driver(m); 8015 } 8016 } 8017 } 8018 8019 /* 8020 * Build vhci cache: 8021 * 8022 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8023 * the phci driver instances. During this process the cache gets built. 8024 * 8025 * Cache is built fully if the root is mounted (i.e., root_mounted is nonzero). 8026 * 8027 * If the root is not mounted, phci drivers that do not have root support 8028 * are not attached. As a result the cache is built partially. The entries 8029 * in the cache reflect only those phci drivers that have root support. 8030 */ 8031 static vhcache_build_status_t 8032 build_vhci_cache(mdi_vhci_config_t *vhc, int root_mounted) 8033 { 8034 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8035 8036 rw_enter(&vhcache->vhcache_lock, RW_READER); 8037 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8038 rw_exit(&vhcache->vhcache_lock); 8039 return (VHCACHE_NOT_REBUILT); 8040 } 8041 rw_exit(&vhcache->vhcache_lock); 8042 8043 attach_phci_drivers(vhc, root_mounted); 8044 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8045 BUS_CONFIG_ALL, (major_t)-1); 8046 8047 if (root_mounted) { 8048 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8049 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8050 rw_exit(&vhcache->vhcache_lock); 8051 vhcache_dirty(vhc); 8052 return (VHCACHE_FULLY_BUILT); 8053 } else 8054 return (VHCACHE_PARTIALLY_BUILT); 8055 } 8056 8057 /* 8058 * Wait until the root is mounted and then build the vhci cache. 8059 */ 8060 static void 8061 build_vhci_cache_thread(void *arg) 8062 { 8063 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8064 8065 mutex_enter(&vhc->vhc_lock); 8066 while (!modrootloaded && !(vhc->vhc_flags & MDI_VHC_EXIT)) { 8067 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8068 ddi_get_lbolt() + 10 * TICKS_PER_SECOND); 8069 } 8070 if (vhc->vhc_flags & MDI_VHC_EXIT) 8071 goto out; 8072 8073 mutex_exit(&vhc->vhc_lock); 8074 8075 /* 8076 * Now that the root is mounted. So build_vhci_cache() will build 8077 * the full cache. 8078 */ 8079 (void) build_vhci_cache(vhc, 1); 8080 8081 mutex_enter(&vhc->vhc_lock); 8082 out: 8083 vhc->vhc_flags &= ~MDI_VHC_BUILD_VHCI_CACHE_THREAD; 8084 mutex_exit(&vhc->vhc_lock); 8085 } 8086 8087 /* 8088 * Build vhci cache - a wrapper for build_vhci_cache(). 8089 * 8090 * In a normal case on-disk vhci cache is read and setup during booting. 8091 * But if the on-disk vhci cache is not there or deleted or corrupted then 8092 * this function sets up the vhci cache. 8093 * 8094 * The cache is built fully if the root is mounted. 8095 * 8096 * If the root is not mounted, initially the cache is built reflecting only 8097 * those driver entries that have the root support. A separate thread is 8098 * created to handle the creation of full cache. This thread will wait 8099 * until the root is mounted and then rebuilds the cache. 8100 */ 8101 static int 8102 e_build_vhci_cache(mdi_vhci_config_t *vhc) 8103 { 8104 vhcache_build_status_t rv; 8105 8106 single_threaded_vhconfig_enter(vhc); 8107 8108 mutex_enter(&vhc->vhc_lock); 8109 if (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) { 8110 if (modrootloaded) { 8111 cv_broadcast(&vhc->vhc_cv); 8112 /* wait until build vhci cache thread exits */ 8113 while (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) 8114 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8115 rv = VHCACHE_FULLY_BUILT; 8116 } else { 8117 /* 8118 * The presense of MDI_VHC_BUILD_VHCI_CACHE_THREAD 8119 * flag indicates that the cache has already been 8120 * partially built. 8121 */ 8122 rv = VHCACHE_PARTIALLY_BUILT; 8123 } 8124 8125 mutex_exit(&vhc->vhc_lock); 8126 single_threaded_vhconfig_exit(vhc); 8127 return (rv); 8128 } 8129 mutex_exit(&vhc->vhc_lock); 8130 8131 rv = build_vhci_cache(vhc, modrootloaded); 8132 8133 if (rv == VHCACHE_PARTIALLY_BUILT) { 8134 /* 8135 * create a thread; this thread will wait until the root is 8136 * mounted and then fully rebuilds the cache. 8137 */ 8138 mutex_enter(&vhc->vhc_lock); 8139 vhc->vhc_flags |= MDI_VHC_BUILD_VHCI_CACHE_THREAD; 8140 mutex_exit(&vhc->vhc_lock); 8141 (void) thread_create(NULL, 0, build_vhci_cache_thread, 8142 vhc, 0, &p0, TS_RUN, minclsyspri); 8143 } 8144 8145 single_threaded_vhconfig_exit(vhc); 8146 return (rv); 8147 } 8148 8149 /* 8150 * Generic vhci bus config implementation: 8151 * 8152 * Parameters 8153 * vdip vhci dip 8154 * flags bus config flags 8155 * op bus config operation 8156 * The remaining parameters are bus config operation specific 8157 * 8158 * for BUS_CONFIG_ONE 8159 * arg pointer to name@addr 8160 * child upon successful return from this function, *child will be 8161 * set to the configured and held devinfo child node of vdip. 8162 * ct_addr pointer to client address (i.e. GUID) 8163 * 8164 * for BUS_CONFIG_DRIVER 8165 * arg major number of the driver 8166 * child and ct_addr parameters are ignored 8167 * 8168 * for BUS_CONFIG_ALL 8169 * arg, child, and ct_addr parameters are ignored 8170 * 8171 * Note that for the rest of the bus config operations, this function simply 8172 * calls the framework provided default bus config routine. 8173 */ 8174 int 8175 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8176 void *arg, dev_info_t **child, char *ct_addr) 8177 { 8178 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8179 mdi_vhci_config_t *vhc = vh->vh_config; 8180 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8181 vhcache_build_status_t rv = VHCACHE_NOT_REBUILT; 8182 char *cp; 8183 8184 /* 8185 * While bus configuring phcis, the phci driver interactions with MDI 8186 * cause child nodes to be enumerated under the vhci node for which 8187 * they need to ndi_devi_enter the vhci node. 8188 * 8189 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8190 * for the bus config operations on phcis to finish while holding the 8191 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8192 * phcis and call the default framework provided bus config function 8193 * if we are called with ndi_devi_enter lock held. 8194 */ 8195 if (DEVI_BUSY_OWNED(vdip)) { 8196 MDI_DEBUG(2, (CE_NOTE, vdip, 8197 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8198 goto default_bus_config; 8199 } 8200 8201 rw_enter(&vhcache->vhcache_lock, RW_READER); 8202 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8203 rw_exit(&vhcache->vhcache_lock); 8204 rv = e_build_vhci_cache(vhc); 8205 rw_enter(&vhcache->vhcache_lock, RW_READER); 8206 } 8207 8208 switch (op) { 8209 case BUS_CONFIG_ONE: 8210 /* extract node name */ 8211 cp = (char *)arg; 8212 while (*cp != '\0' && *cp != '@') 8213 cp++; 8214 if (*cp == '@') { 8215 *cp = '\0'; 8216 config_client_paths(vhc, (char *)arg, ct_addr); 8217 /* config_client_paths() releases the cache_lock */ 8218 *cp = '@'; 8219 } else 8220 rw_exit(&vhcache->vhcache_lock); 8221 break; 8222 8223 case BUS_CONFIG_DRIVER: 8224 rw_exit(&vhcache->vhcache_lock); 8225 if (rv == VHCACHE_NOT_REBUILT) 8226 bus_config_all_phcis(vhcache, flags, op, 8227 (major_t)(uintptr_t)arg); 8228 break; 8229 8230 case BUS_CONFIG_ALL: 8231 rw_exit(&vhcache->vhcache_lock); 8232 if (rv == VHCACHE_NOT_REBUILT) 8233 bus_config_all_phcis(vhcache, flags, op, -1); 8234 break; 8235 8236 default: 8237 rw_exit(&vhcache->vhcache_lock); 8238 break; 8239 } 8240 8241 8242 default_bus_config: 8243 /* 8244 * All requested child nodes are enumerated under the vhci. 8245 * Now configure them. 8246 */ 8247 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8248 NDI_SUCCESS) { 8249 return (MDI_SUCCESS); 8250 } 8251 8252 return (MDI_FAILURE); 8253 } 8254 8255 /* 8256 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8257 */ 8258 static nvlist_t * 8259 read_on_disk_vhci_cache(char *vhci_class) 8260 { 8261 nvlist_t *nvl; 8262 int err; 8263 char *filename; 8264 8265 filename = vhclass2vhcache_filename(vhci_class); 8266 8267 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8268 kmem_free(filename, strlen(filename) + 1); 8269 return (nvl); 8270 } else if (err == EIO) 8271 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8272 else if (err == EINVAL) 8273 cmn_err(CE_WARN, 8274 "%s: data file corrupted, will recreate\n", filename); 8275 8276 kmem_free(filename, strlen(filename) + 1); 8277 return (NULL); 8278 } 8279 8280 /* 8281 * Read on-disk vhci cache into nvlists for all vhci classes. 8282 * Called during booting by i_ddi_read_devices_files(). 8283 */ 8284 void 8285 mdi_read_devices_files(void) 8286 { 8287 int i; 8288 8289 for (i = 0; i < N_VHCI_CLASSES; i++) 8290 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8291 } 8292 8293 /* 8294 * Remove all stale entries from vhci cache. 8295 */ 8296 static void 8297 clean_vhcache(mdi_vhci_config_t *vhc) 8298 { 8299 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8300 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8301 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8302 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8303 8304 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8305 8306 cct_head = vhcache->vhcache_client_head; 8307 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8308 for (cct = cct_head; cct != NULL; cct = cct_next) { 8309 cct_next = cct->cct_next; 8310 8311 cpi_head = cct->cct_cpi_head; 8312 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8313 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8314 cpi_next = cpi->cpi_next; 8315 if (cpi->cpi_pip != NULL) { 8316 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8317 enqueue_tail_vhcache_pathinfo(cct, cpi); 8318 } else 8319 free_vhcache_pathinfo(cpi); 8320 } 8321 8322 if (cct->cct_cpi_head != NULL) 8323 enqueue_vhcache_client(vhcache, cct); 8324 else { 8325 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8326 (mod_hash_key_t)cct->cct_name_addr); 8327 free_vhcache_client(cct); 8328 } 8329 } 8330 8331 cphci_head = vhcache->vhcache_phci_head; 8332 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8333 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8334 cphci_next = cphci->cphci_next; 8335 if (cphci->cphci_phci != NULL) 8336 enqueue_vhcache_phci(vhcache, cphci); 8337 else 8338 free_vhcache_phci(cphci); 8339 } 8340 8341 vhcache->vhcache_clean_time = lbolt64; 8342 rw_exit(&vhcache->vhcache_lock); 8343 vhcache_dirty(vhc); 8344 } 8345 8346 /* 8347 * Remove all stale entries from vhci cache. 8348 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8349 */ 8350 void 8351 mdi_clean_vhcache(void) 8352 { 8353 mdi_vhci_t *vh; 8354 8355 mutex_enter(&mdi_mutex); 8356 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8357 vh->vh_refcnt++; 8358 mutex_exit(&mdi_mutex); 8359 clean_vhcache(vh->vh_config); 8360 mutex_enter(&mdi_mutex); 8361 vh->vh_refcnt--; 8362 } 8363 mutex_exit(&mdi_mutex); 8364 } 8365