1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 38 */ 39 40 #include <sys/note.h> 41 #include <sys/types.h> 42 #include <sys/varargs.h> 43 #include <sys/param.h> 44 #include <sys/errno.h> 45 #include <sys/uio.h> 46 #include <sys/buf.h> 47 #include <sys/modctl.h> 48 #include <sys/open.h> 49 #include <sys/kmem.h> 50 #include <sys/poll.h> 51 #include <sys/conf.h> 52 #include <sys/bootconf.h> 53 #include <sys/cmn_err.h> 54 #include <sys/stat.h> 55 #include <sys/ddi.h> 56 #include <sys/sunddi.h> 57 #include <sys/ddipropdefs.h> 58 #include <sys/sunndi.h> 59 #include <sys/ndi_impldefs.h> 60 #include <sys/promif.h> 61 #include <sys/sunmdi.h> 62 #include <sys/mdi_impldefs.h> 63 #include <sys/taskq.h> 64 #include <sys/epm.h> 65 #include <sys/sunpm.h> 66 #include <sys/modhash.h> 67 #include <sys/disp.h> 68 #include <sys/autoconf.h> 69 70 #ifdef DEBUG 71 #include <sys/debug.h> 72 int mdi_debug = 1; 73 #define MDI_DEBUG(level, stmnt) \ 74 if (mdi_debug >= (level)) i_mdi_log stmnt 75 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 76 #else /* !DEBUG */ 77 #define MDI_DEBUG(level, stmnt) 78 #endif /* DEBUG */ 79 80 extern pri_t minclsyspri; 81 extern int modrootloaded; 82 83 /* 84 * Global mutex: 85 * Protects vHCI list and structure members, pHCI and Client lists. 86 */ 87 kmutex_t mdi_mutex; 88 89 /* 90 * Registered vHCI class driver lists 91 */ 92 int mdi_vhci_count; 93 mdi_vhci_t *mdi_vhci_head; 94 mdi_vhci_t *mdi_vhci_tail; 95 96 /* 97 * Client Hash Table size 98 */ 99 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 100 101 /* 102 * taskq interface definitions 103 */ 104 #define MDI_TASKQ_N_THREADS 8 105 #define MDI_TASKQ_PRI minclsyspri 106 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 107 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 108 109 taskq_t *mdi_taskq; 110 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 111 112 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 113 114 /* 115 * The data should be "quiet" for this interval (in seconds) before the 116 * vhci cached data is flushed to the disk. 117 */ 118 static int mdi_vhcache_flush_delay = 10; 119 120 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 121 static int mdi_vhcache_flush_daemon_idle_time = 60; 122 123 /* 124 * MDI falls back to discovery of all paths when a bus_config_one fails. 125 * The following parameters can be used to tune this operation. 126 * 127 * mdi_path_discovery_boot 128 * Number of times path discovery will be attempted during early boot. 129 * Probably there is no reason to ever set this value to greater than one. 130 * 131 * mdi_path_discovery_postboot 132 * Number of times path discovery will be attempted after early boot. 133 * Set it to a minimum of two to allow for discovery of iscsi paths which 134 * may happen very late during booting. 135 * 136 * mdi_path_discovery_interval 137 * Minimum number of seconds MDI will wait between successive discovery 138 * of all paths. Set it to -1 to disable discovery of all paths. 139 */ 140 static int mdi_path_discovery_boot = 1; 141 static int mdi_path_discovery_postboot = 2; 142 static int mdi_path_discovery_interval = 10; 143 144 /* 145 * number of seconds the asynchronous configuration thread will sleep idle 146 * before exiting. 147 */ 148 static int mdi_async_config_idle_time = 600; 149 150 static int mdi_bus_config_cache_hash_size = 256; 151 152 /* turns off multithreaded configuration for certain operations */ 153 static int mdi_mtc_off = 0; 154 155 /* 156 * MDI component property name/value string definitions 157 */ 158 const char *mdi_component_prop = "mpxio-component"; 159 const char *mdi_component_prop_vhci = "vhci"; 160 const char *mdi_component_prop_phci = "phci"; 161 const char *mdi_component_prop_client = "client"; 162 163 /* 164 * MDI client global unique identifier property name 165 */ 166 const char *mdi_client_guid_prop = "client-guid"; 167 168 /* 169 * MDI client load balancing property name/value string definitions 170 */ 171 const char *mdi_load_balance = "load-balance"; 172 const char *mdi_load_balance_none = "none"; 173 const char *mdi_load_balance_rr = "round-robin"; 174 const char *mdi_load_balance_lba = "logical-block"; 175 176 /* 177 * Obsolete vHCI class definition; to be removed after Leadville update 178 */ 179 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 180 181 static char vhci_greeting[] = 182 "\tThere already exists one vHCI driver for class %s\n" 183 "\tOnly one vHCI driver for each class is allowed\n"; 184 185 /* 186 * Static function prototypes 187 */ 188 static int i_mdi_phci_offline(dev_info_t *, uint_t); 189 static int i_mdi_client_offline(dev_info_t *, uint_t); 190 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 191 static void i_mdi_phci_post_detach(dev_info_t *, 192 ddi_detach_cmd_t, int); 193 static int i_mdi_client_pre_detach(dev_info_t *, 194 ddi_detach_cmd_t); 195 static void i_mdi_client_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 198 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 199 static int i_mdi_lba_lb(mdi_client_t *ct, 200 mdi_pathinfo_t **ret_pip, struct buf *buf); 201 static void i_mdi_pm_hold_client(mdi_client_t *, int); 202 static void i_mdi_pm_rele_client(mdi_client_t *, int); 203 static void i_mdi_pm_reset_client(mdi_client_t *); 204 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 205 static int i_mdi_power_all_phci(mdi_client_t *); 206 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 207 208 209 /* 210 * Internal mdi_pathinfo node functions 211 */ 212 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 213 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 214 215 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 216 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 217 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 218 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 219 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 220 mdi_client_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static void setup_phci_driver_list(mdi_vhci_t *); 274 static void free_phci_driver_list(mdi_vhci_config_t *); 275 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 276 static boolean_t stop_vhcache_flush_thread(void *, int); 277 static void free_string_array(char **, int); 278 static void free_vhcache_phci(mdi_vhcache_phci_t *); 279 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 280 static void free_vhcache_client(mdi_vhcache_client_t *); 281 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 282 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 283 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 284 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 285 static void vhcache_pi_add(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void vhcache_pi_remove(mdi_vhci_config_t *, 288 struct mdi_pathinfo *); 289 static void free_phclient_path_list(mdi_phys_path_t *); 290 static void sort_vhcache_paths(mdi_vhcache_client_t *); 291 static int flush_vhcache(mdi_vhci_config_t *, int); 292 static void vhcache_dirty(mdi_vhci_config_t *); 293 static void free_async_client_config(mdi_async_client_config_t *); 294 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 295 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 296 static nvlist_t *read_on_disk_vhci_cache(char *); 297 extern int fread_nvlist(char *, nvlist_t **); 298 extern int fwrite_nvlist(char *, nvlist_t *); 299 300 /* called once when first vhci registers with mdi */ 301 static void 302 i_mdi_init() 303 { 304 static int initialized = 0; 305 306 if (initialized) 307 return; 308 initialized = 1; 309 310 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 311 /* 312 * Create our taskq resources 313 */ 314 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 315 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 316 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 317 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 318 } 319 320 /* 321 * mdi_get_component_type(): 322 * Return mpxio component type 323 * Return Values: 324 * MDI_COMPONENT_NONE 325 * MDI_COMPONENT_VHCI 326 * MDI_COMPONENT_PHCI 327 * MDI_COMPONENT_CLIENT 328 * XXX This doesn't work under multi-level MPxIO and should be 329 * removed when clients migrate mdi_is_*() interfaces. 330 */ 331 int 332 mdi_get_component_type(dev_info_t *dip) 333 { 334 return (DEVI(dip)->devi_mdi_component); 335 } 336 337 /* 338 * mdi_vhci_register(): 339 * Register a vHCI module with the mpxio framework 340 * mdi_vhci_register() is called by vHCI drivers to register the 341 * 'class_driver' vHCI driver and its MDI entrypoints with the 342 * mpxio framework. The vHCI driver must call this interface as 343 * part of its attach(9e) handler. 344 * Competing threads may try to attach mdi_vhci_register() as 345 * the vHCI drivers are loaded and attached as a result of pHCI 346 * driver instance registration (mdi_phci_register()) with the 347 * framework. 348 * Return Values: 349 * MDI_SUCCESS 350 * MDI_FAILURE 351 */ 352 353 /*ARGSUSED*/ 354 int 355 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 356 int flags) 357 { 358 mdi_vhci_t *vh = NULL; 359 360 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 361 362 i_mdi_init(); 363 364 mutex_enter(&mdi_mutex); 365 /* 366 * Scan for already registered vhci 367 */ 368 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 369 if (strcmp(vh->vh_class, class) == 0) { 370 /* 371 * vHCI has already been created. Check for valid 372 * vHCI ops registration. We only support one vHCI 373 * module per class 374 */ 375 if (vh->vh_ops != NULL) { 376 mutex_exit(&mdi_mutex); 377 cmn_err(CE_NOTE, vhci_greeting, class); 378 return (MDI_FAILURE); 379 } 380 break; 381 } 382 } 383 384 /* 385 * if not yet created, create the vHCI component 386 */ 387 if (vh == NULL) { 388 struct client_hash *hash = NULL; 389 char *load_balance; 390 391 /* 392 * Allocate and initialize the mdi extensions 393 */ 394 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 395 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 396 KM_SLEEP); 397 vh->vh_client_table = hash; 398 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 399 (void) strcpy(vh->vh_class, class); 400 vh->vh_lb = LOAD_BALANCE_RR; 401 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 402 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 403 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 404 vh->vh_lb = LOAD_BALANCE_NONE; 405 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 406 == 0) { 407 vh->vh_lb = LOAD_BALANCE_LBA; 408 } 409 ddi_prop_free(load_balance); 410 } 411 412 /* 413 * Store the vHCI ops vectors 414 */ 415 vh->vh_dip = vdip; 416 vh->vh_ops = vops; 417 418 setup_vhci_cache(vh); 419 420 if (mdi_vhci_head == NULL) { 421 mdi_vhci_head = vh; 422 } 423 if (mdi_vhci_tail) { 424 mdi_vhci_tail->vh_next = vh; 425 } 426 mdi_vhci_tail = vh; 427 mdi_vhci_count++; 428 } 429 430 /* 431 * Claim the devfs node as a vhci component 432 */ 433 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 434 435 /* 436 * Initialize our back reference from dev_info node 437 */ 438 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 439 mutex_exit(&mdi_mutex); 440 return (MDI_SUCCESS); 441 } 442 443 /* 444 * mdi_vhci_unregister(): 445 * Unregister a vHCI module from mpxio framework 446 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 447 * of a vhci to unregister it from the framework. 448 * Return Values: 449 * MDI_SUCCESS 450 * MDI_FAILURE 451 */ 452 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 /* 460 * Check for invalid VHCI 461 */ 462 if ((vh = i_devi_get_vhci(vdip)) == NULL) 463 return (MDI_FAILURE); 464 465 mutex_enter(&mdi_mutex); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 471 if (found == vh) 472 break; 473 prev = found; 474 } 475 476 if (found == NULL) { 477 mutex_exit(&mdi_mutex); 478 return (MDI_FAILURE); 479 } 480 481 /* 482 * Check the vHCI, pHCI and client count. All the pHCIs and clients 483 * should have been unregistered, before a vHCI can be 484 * unregistered. 485 */ 486 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 487 mutex_exit(&mdi_mutex); 488 return (MDI_FAILURE); 489 } 490 491 /* 492 * Remove the vHCI from the global list 493 */ 494 if (vh == mdi_vhci_head) { 495 mdi_vhci_head = vh->vh_next; 496 } else { 497 prev->vh_next = vh->vh_next; 498 } 499 if (vh == mdi_vhci_tail) { 500 mdi_vhci_tail = prev; 501 } 502 503 mdi_vhci_count--; 504 mutex_exit(&mdi_mutex); 505 506 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 507 /* add vhci to the global list */ 508 mutex_enter(&mdi_mutex); 509 if (mdi_vhci_head == NULL) 510 mdi_vhci_head = vh; 511 else 512 mdi_vhci_tail->vh_next = vh; 513 mdi_vhci_tail = vh; 514 mdi_vhci_count++; 515 mutex_exit(&mdi_mutex); 516 return (MDI_FAILURE); 517 } 518 519 vh->vh_ops = NULL; 520 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 521 DEVI(vdip)->devi_mdi_xhci = NULL; 522 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 523 kmem_free(vh->vh_client_table, 524 mdi_client_table_size * sizeof (struct client_hash)); 525 526 kmem_free(vh, sizeof (mdi_vhci_t)); 527 return (MDI_SUCCESS); 528 } 529 530 /* 531 * i_mdi_vhci_class2vhci(): 532 * Look for a matching vHCI module given a vHCI class name 533 * Return Values: 534 * Handle to a vHCI component 535 * NULL 536 */ 537 static mdi_vhci_t * 538 i_mdi_vhci_class2vhci(char *class) 539 { 540 mdi_vhci_t *vh = NULL; 541 542 ASSERT(!MUTEX_HELD(&mdi_mutex)); 543 544 mutex_enter(&mdi_mutex); 545 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 546 if (strcmp(vh->vh_class, class) == 0) { 547 break; 548 } 549 } 550 mutex_exit(&mdi_mutex); 551 return (vh); 552 } 553 554 /* 555 * i_devi_get_vhci(): 556 * Utility function to get the handle to a vHCI component 557 * Return Values: 558 * Handle to a vHCI component 559 * NULL 560 */ 561 mdi_vhci_t * 562 i_devi_get_vhci(dev_info_t *vdip) 563 { 564 mdi_vhci_t *vh = NULL; 565 if (MDI_VHCI(vdip)) { 566 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 567 } 568 return (vh); 569 } 570 571 /* 572 * mdi_phci_register(): 573 * Register a pHCI module with mpxio framework 574 * mdi_phci_register() is called by pHCI drivers to register with 575 * the mpxio framework and a specific 'class_driver' vHCI. The 576 * pHCI driver must call this interface as part of its attach(9e) 577 * handler. 578 * Return Values: 579 * MDI_SUCCESS 580 * MDI_FAILURE 581 */ 582 583 /*ARGSUSED*/ 584 int 585 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 586 { 587 mdi_phci_t *ph; 588 mdi_vhci_t *vh; 589 char *data; 590 char *pathname; 591 592 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 593 (void) ddi_pathname(pdip, pathname); 594 595 /* 596 * Check for mpxio-disable property. Enable mpxio if the property is 597 * missing or not set to "yes". 598 * If the property is set to "yes" then emit a brief message. 599 */ 600 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 601 &data) == DDI_SUCCESS)) { 602 if (strcmp(data, "yes") == 0) { 603 MDI_DEBUG(1, (CE_CONT, pdip, 604 "?%s (%s%d) multipath capabilities " 605 "disabled via %s.conf.\n", pathname, 606 ddi_driver_name(pdip), ddi_get_instance(pdip), 607 ddi_driver_name(pdip))); 608 ddi_prop_free(data); 609 kmem_free(pathname, MAXPATHLEN); 610 return (MDI_FAILURE); 611 } 612 ddi_prop_free(data); 613 } 614 615 kmem_free(pathname, MAXPATHLEN); 616 617 /* 618 * Search for a matching vHCI 619 */ 620 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 621 if (vh == NULL) { 622 return (MDI_FAILURE); 623 } 624 625 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 626 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 627 ph->ph_dip = pdip; 628 ph->ph_vhci = vh; 629 ph->ph_next = NULL; 630 ph->ph_unstable = 0; 631 ph->ph_vprivate = 0; 632 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 633 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 634 635 MDI_PHCI_SET_POWER_UP(ph); 636 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 637 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 638 639 vhcache_phci_add(vh->vh_config, ph); 640 641 mutex_enter(&mdi_mutex); 642 if (vh->vh_phci_head == NULL) { 643 vh->vh_phci_head = ph; 644 } 645 if (vh->vh_phci_tail) { 646 vh->vh_phci_tail->ph_next = ph; 647 } 648 vh->vh_phci_tail = ph; 649 vh->vh_phci_count++; 650 mutex_exit(&mdi_mutex); 651 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 652 return (MDI_SUCCESS); 653 } 654 655 /* 656 * mdi_phci_unregister(): 657 * Unregister a pHCI module from mpxio framework 658 * mdi_phci_unregister() is called by the pHCI drivers from their 659 * detach(9E) handler to unregister their instances from the 660 * framework. 661 * Return Values: 662 * MDI_SUCCESS 663 * MDI_FAILURE 664 */ 665 666 /*ARGSUSED*/ 667 int 668 mdi_phci_unregister(dev_info_t *pdip, int flags) 669 { 670 mdi_vhci_t *vh; 671 mdi_phci_t *ph; 672 mdi_phci_t *tmp; 673 mdi_phci_t *prev = NULL; 674 675 ph = i_devi_get_phci(pdip); 676 if (ph == NULL) { 677 MDI_DEBUG(1, (CE_WARN, pdip, 678 "!pHCI unregister: Not a valid pHCI")); 679 return (MDI_FAILURE); 680 } 681 682 vh = ph->ph_vhci; 683 ASSERT(vh != NULL); 684 if (vh == NULL) { 685 MDI_DEBUG(1, (CE_WARN, pdip, 686 "!pHCI unregister: Not a valid vHCI")); 687 return (MDI_FAILURE); 688 } 689 690 mutex_enter(&mdi_mutex); 691 tmp = vh->vh_phci_head; 692 while (tmp) { 693 if (tmp == ph) { 694 break; 695 } 696 prev = tmp; 697 tmp = tmp->ph_next; 698 } 699 700 if (ph == vh->vh_phci_head) { 701 vh->vh_phci_head = ph->ph_next; 702 } else { 703 prev->ph_next = ph->ph_next; 704 } 705 706 if (ph == vh->vh_phci_tail) { 707 vh->vh_phci_tail = prev; 708 } 709 710 vh->vh_phci_count--; 711 712 mutex_exit(&mdi_mutex); 713 714 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 715 ESC_DDI_INITIATOR_UNREGISTER); 716 vhcache_phci_remove(vh->vh_config, ph); 717 cv_destroy(&ph->ph_unstable_cv); 718 cv_destroy(&ph->ph_powerchange_cv); 719 mutex_destroy(&ph->ph_mutex); 720 kmem_free(ph, sizeof (mdi_phci_t)); 721 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 722 DEVI(pdip)->devi_mdi_xhci = NULL; 723 return (MDI_SUCCESS); 724 } 725 726 /* 727 * i_devi_get_phci(): 728 * Utility function to return the phci extensions. 729 */ 730 static mdi_phci_t * 731 i_devi_get_phci(dev_info_t *pdip) 732 { 733 mdi_phci_t *ph = NULL; 734 if (MDI_PHCI(pdip)) { 735 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 736 } 737 return (ph); 738 } 739 740 /* 741 * mdi_phci_path2devinfo(): 742 * Utility function to search for a valid phci device given 743 * the devfs pathname. 744 */ 745 746 dev_info_t * 747 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 748 { 749 char *temp_pathname; 750 mdi_vhci_t *vh; 751 mdi_phci_t *ph; 752 dev_info_t *pdip = NULL; 753 754 vh = i_devi_get_vhci(vdip); 755 ASSERT(vh != NULL); 756 757 if (vh == NULL) { 758 /* 759 * Invalid vHCI component, return failure 760 */ 761 return (NULL); 762 } 763 764 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 765 mutex_enter(&mdi_mutex); 766 ph = vh->vh_phci_head; 767 while (ph != NULL) { 768 pdip = ph->ph_dip; 769 ASSERT(pdip != NULL); 770 *temp_pathname = '\0'; 771 (void) ddi_pathname(pdip, temp_pathname); 772 if (strcmp(temp_pathname, pathname) == 0) { 773 break; 774 } 775 ph = ph->ph_next; 776 } 777 if (ph == NULL) { 778 pdip = NULL; 779 } 780 mutex_exit(&mdi_mutex); 781 kmem_free(temp_pathname, MAXPATHLEN); 782 return (pdip); 783 } 784 785 /* 786 * mdi_phci_get_path_count(): 787 * get number of path information nodes associated with a given 788 * pHCI device. 789 */ 790 int 791 mdi_phci_get_path_count(dev_info_t *pdip) 792 { 793 mdi_phci_t *ph; 794 int count = 0; 795 796 ph = i_devi_get_phci(pdip); 797 if (ph != NULL) { 798 count = ph->ph_path_count; 799 } 800 return (count); 801 } 802 803 /* 804 * i_mdi_phci_lock(): 805 * Lock a pHCI device 806 * Return Values: 807 * None 808 * Note: 809 * The default locking order is: 810 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 811 * But there are number of situations where locks need to be 812 * grabbed in reverse order. This routine implements try and lock 813 * mechanism depending on the requested parameter option. 814 */ 815 static void 816 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 817 { 818 if (pip) { 819 /* Reverse locking is requested. */ 820 while (MDI_PHCI_TRYLOCK(ph) == 0) { 821 /* 822 * tryenter failed. Try to grab again 823 * after a small delay 824 */ 825 MDI_PI_HOLD(pip); 826 MDI_PI_UNLOCK(pip); 827 delay(1); 828 MDI_PI_LOCK(pip); 829 MDI_PI_RELE(pip); 830 } 831 } else { 832 MDI_PHCI_LOCK(ph); 833 } 834 } 835 836 /* 837 * i_mdi_phci_get_client_lock(): 838 * Lock a pHCI device 839 * Return Values: 840 * None 841 * Note: 842 * The default locking order is: 843 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 844 * But there are number of situations where locks need to be 845 * grabbed in reverse order. This routine implements try and lock 846 * mechanism depending on the requested parameter option. 847 */ 848 static void 849 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 850 { 851 if (ct) { 852 /* Reverse locking is requested. */ 853 while (MDI_PHCI_TRYLOCK(ph) == 0) { 854 /* 855 * tryenter failed. Try to grab again 856 * after a small delay 857 */ 858 MDI_CLIENT_UNLOCK(ct); 859 delay(1); 860 MDI_CLIENT_LOCK(ct); 861 } 862 } else { 863 MDI_PHCI_LOCK(ph); 864 } 865 } 866 867 /* 868 * i_mdi_phci_unlock(): 869 * Unlock the pHCI component 870 */ 871 static void 872 i_mdi_phci_unlock(mdi_phci_t *ph) 873 { 874 MDI_PHCI_UNLOCK(ph); 875 } 876 877 /* 878 * i_mdi_devinfo_create(): 879 * create client device's devinfo node 880 * Return Values: 881 * dev_info 882 * NULL 883 * Notes: 884 */ 885 static dev_info_t * 886 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 887 char **compatible, int ncompatible) 888 { 889 dev_info_t *cdip = NULL; 890 891 ASSERT(MUTEX_HELD(&mdi_mutex)); 892 893 /* Verify for duplicate entry */ 894 cdip = i_mdi_devinfo_find(vh, name, guid); 895 ASSERT(cdip == NULL); 896 if (cdip) { 897 cmn_err(CE_WARN, 898 "i_mdi_devinfo_create: client dip %p already exists", 899 (void *)cdip); 900 } 901 902 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 903 if (cdip == NULL) 904 goto fail; 905 906 /* 907 * Create component type and Global unique identifier 908 * properties 909 */ 910 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 911 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 912 goto fail; 913 } 914 915 /* Decorate the node with compatible property */ 916 if (compatible && 917 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 918 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 919 goto fail; 920 } 921 922 return (cdip); 923 924 fail: 925 if (cdip) { 926 (void) ndi_prop_remove_all(cdip); 927 (void) ndi_devi_free(cdip); 928 } 929 return (NULL); 930 } 931 932 /* 933 * i_mdi_devinfo_find(): 934 * Find a matching devinfo node for given client node name 935 * and its guid. 936 * Return Values: 937 * Handle to a dev_info node or NULL 938 */ 939 940 static dev_info_t * 941 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 942 { 943 char *data; 944 dev_info_t *cdip = NULL; 945 dev_info_t *ndip = NULL; 946 int circular; 947 948 ndi_devi_enter(vh->vh_dip, &circular); 949 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 950 while ((cdip = ndip) != NULL) { 951 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 952 953 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 954 continue; 955 } 956 957 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 958 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 959 &data) != DDI_PROP_SUCCESS) { 960 continue; 961 } 962 963 if (strcmp(data, guid) != 0) { 964 ddi_prop_free(data); 965 continue; 966 } 967 ddi_prop_free(data); 968 break; 969 } 970 ndi_devi_exit(vh->vh_dip, circular); 971 return (cdip); 972 } 973 974 /* 975 * i_mdi_devinfo_remove(): 976 * Remove a client device node 977 */ 978 static int 979 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 980 { 981 int rv = MDI_SUCCESS; 982 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 983 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 984 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 985 if (rv != NDI_SUCCESS) { 986 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 987 " failed. cdip = %p\n", cdip)); 988 } 989 /* 990 * Convert to MDI error code 991 */ 992 switch (rv) { 993 case NDI_SUCCESS: 994 rv = MDI_SUCCESS; 995 break; 996 case NDI_BUSY: 997 rv = MDI_BUSY; 998 break; 999 default: 1000 rv = MDI_FAILURE; 1001 break; 1002 } 1003 } 1004 return (rv); 1005 } 1006 1007 /* 1008 * i_devi_get_client() 1009 * Utility function to get mpxio component extensions 1010 */ 1011 static mdi_client_t * 1012 i_devi_get_client(dev_info_t *cdip) 1013 { 1014 mdi_client_t *ct = NULL; 1015 if (MDI_CLIENT(cdip)) { 1016 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1017 } 1018 return (ct); 1019 } 1020 1021 /* 1022 * i_mdi_is_child_present(): 1023 * Search for the presence of client device dev_info node 1024 */ 1025 1026 static int 1027 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1028 { 1029 int rv = MDI_FAILURE; 1030 struct dev_info *dip; 1031 int circular; 1032 1033 ndi_devi_enter(vdip, &circular); 1034 dip = DEVI(vdip)->devi_child; 1035 while (dip) { 1036 if (dip == DEVI(cdip)) { 1037 rv = MDI_SUCCESS; 1038 break; 1039 } 1040 dip = dip->devi_sibling; 1041 } 1042 ndi_devi_exit(vdip, circular); 1043 return (rv); 1044 } 1045 1046 1047 /* 1048 * i_mdi_client_lock(): 1049 * Grab client component lock 1050 * Return Values: 1051 * None 1052 * Note: 1053 * The default locking order is: 1054 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1055 * But there are number of situations where locks need to be 1056 * grabbed in reverse order. This routine implements try and lock 1057 * mechanism depending on the requested parameter option. 1058 */ 1059 1060 static void 1061 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1062 { 1063 if (pip) { 1064 /* 1065 * Reverse locking is requested. 1066 */ 1067 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1068 /* 1069 * tryenter failed. Try to grab again 1070 * after a small delay 1071 */ 1072 MDI_PI_HOLD(pip); 1073 MDI_PI_UNLOCK(pip); 1074 delay(1); 1075 MDI_PI_LOCK(pip); 1076 MDI_PI_RELE(pip); 1077 } 1078 } else { 1079 MDI_CLIENT_LOCK(ct); 1080 } 1081 } 1082 1083 /* 1084 * i_mdi_client_unlock(): 1085 * Unlock a client component 1086 */ 1087 1088 static void 1089 i_mdi_client_unlock(mdi_client_t *ct) 1090 { 1091 MDI_CLIENT_UNLOCK(ct); 1092 } 1093 1094 /* 1095 * i_mdi_client_alloc(): 1096 * Allocate and initialize a client structure. Caller should 1097 * hold the global mdi_mutex. 1098 * Return Values: 1099 * Handle to a client component 1100 */ 1101 /*ARGSUSED*/ 1102 static mdi_client_t * 1103 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1104 { 1105 mdi_client_t *ct; 1106 1107 ASSERT(MUTEX_HELD(&mdi_mutex)); 1108 1109 /* 1110 * Allocate and initialize a component structure. 1111 */ 1112 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1113 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1114 ct->ct_hnext = NULL; 1115 ct->ct_hprev = NULL; 1116 ct->ct_dip = NULL; 1117 ct->ct_vhci = vh; 1118 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1119 (void) strcpy(ct->ct_drvname, name); 1120 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1121 (void) strcpy(ct->ct_guid, lguid); 1122 ct->ct_cprivate = NULL; 1123 ct->ct_vprivate = NULL; 1124 ct->ct_flags = 0; 1125 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1126 MDI_CLIENT_SET_OFFLINE(ct); 1127 MDI_CLIENT_SET_DETACH(ct); 1128 MDI_CLIENT_SET_POWER_UP(ct); 1129 ct->ct_failover_flags = 0; 1130 ct->ct_failover_status = 0; 1131 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1132 ct->ct_unstable = 0; 1133 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1134 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1135 ct->ct_lb = vh->vh_lb; 1136 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1137 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1138 ct->ct_path_count = 0; 1139 ct->ct_path_head = NULL; 1140 ct->ct_path_tail = NULL; 1141 ct->ct_path_last = NULL; 1142 1143 /* 1144 * Add this client component to our client hash queue 1145 */ 1146 i_mdi_client_enlist_table(vh, ct); 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_client_enlist_table(): 1152 * Attach the client device to the client hash table. Caller 1153 * should hold the mdi_mutex 1154 */ 1155 1156 static void 1157 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1158 { 1159 int index; 1160 struct client_hash *head; 1161 1162 ASSERT(MUTEX_HELD(&mdi_mutex)); 1163 index = i_mdi_get_hash_key(ct->ct_guid); 1164 head = &vh->vh_client_table[index]; 1165 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1166 head->ct_hash_head = ct; 1167 head->ct_hash_count++; 1168 vh->vh_client_count++; 1169 } 1170 1171 /* 1172 * i_mdi_client_delist_table(): 1173 * Attach the client device to the client hash table. 1174 * Caller should hold the mdi_mutex 1175 */ 1176 1177 static void 1178 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1179 { 1180 int index; 1181 char *guid; 1182 struct client_hash *head; 1183 mdi_client_t *next; 1184 mdi_client_t *last; 1185 1186 ASSERT(MUTEX_HELD(&mdi_mutex)); 1187 guid = ct->ct_guid; 1188 index = i_mdi_get_hash_key(guid); 1189 head = &vh->vh_client_table[index]; 1190 1191 last = NULL; 1192 next = (mdi_client_t *)head->ct_hash_head; 1193 while (next != NULL) { 1194 if (next == ct) { 1195 break; 1196 } 1197 last = next; 1198 next = next->ct_hnext; 1199 } 1200 1201 if (next) { 1202 head->ct_hash_count--; 1203 if (last == NULL) { 1204 head->ct_hash_head = ct->ct_hnext; 1205 } else { 1206 last->ct_hnext = ct->ct_hnext; 1207 } 1208 ct->ct_hnext = NULL; 1209 vh->vh_client_count--; 1210 } 1211 } 1212 1213 1214 /* 1215 * i_mdi_client_free(): 1216 * Free a client component 1217 */ 1218 static int 1219 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1220 { 1221 int rv = MDI_SUCCESS; 1222 int flags = ct->ct_flags; 1223 dev_info_t *cdip; 1224 dev_info_t *vdip; 1225 1226 ASSERT(MUTEX_HELD(&mdi_mutex)); 1227 vdip = vh->vh_dip; 1228 cdip = ct->ct_dip; 1229 1230 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1231 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1232 DEVI(cdip)->devi_mdi_client = NULL; 1233 1234 /* 1235 * Clear out back ref. to dev_info_t node 1236 */ 1237 ct->ct_dip = NULL; 1238 1239 /* 1240 * Remove this client from our hash queue 1241 */ 1242 i_mdi_client_delist_table(vh, ct); 1243 1244 /* 1245 * Uninitialize and free the component 1246 */ 1247 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1248 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1249 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1250 cv_destroy(&ct->ct_failover_cv); 1251 cv_destroy(&ct->ct_unstable_cv); 1252 cv_destroy(&ct->ct_powerchange_cv); 1253 mutex_destroy(&ct->ct_mutex); 1254 kmem_free(ct, sizeof (*ct)); 1255 1256 if (cdip != NULL) { 1257 mutex_exit(&mdi_mutex); 1258 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1259 mutex_enter(&mdi_mutex); 1260 } 1261 return (rv); 1262 } 1263 1264 /* 1265 * i_mdi_client_find(): 1266 * Find the client structure corresponding to a given guid 1267 * Caller should hold the mdi_mutex 1268 */ 1269 static mdi_client_t * 1270 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1271 { 1272 int index; 1273 struct client_hash *head; 1274 mdi_client_t *ct; 1275 1276 ASSERT(MUTEX_HELD(&mdi_mutex)); 1277 index = i_mdi_get_hash_key(guid); 1278 head = &vh->vh_client_table[index]; 1279 1280 ct = head->ct_hash_head; 1281 while (ct != NULL) { 1282 if (strcmp(ct->ct_guid, guid) == 0 && 1283 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1284 break; 1285 } 1286 ct = ct->ct_hnext; 1287 } 1288 return (ct); 1289 } 1290 1291 1292 1293 /* 1294 * i_mdi_client_update_state(): 1295 * Compute and update client device state 1296 * Notes: 1297 * A client device can be in any of three possible states: 1298 * 1299 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1300 * one online/standby paths. Can tolerate failures. 1301 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1302 * no alternate paths available as standby. A failure on the online 1303 * would result in loss of access to device data. 1304 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1305 * no paths available to access the device. 1306 */ 1307 static void 1308 i_mdi_client_update_state(mdi_client_t *ct) 1309 { 1310 int state; 1311 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1312 state = i_mdi_client_compute_state(ct, NULL); 1313 MDI_CLIENT_SET_STATE(ct, state); 1314 } 1315 1316 /* 1317 * i_mdi_client_compute_state(): 1318 * Compute client device state 1319 * 1320 * mdi_phci_t * Pointer to pHCI structure which should 1321 * while computing the new value. Used by 1322 * i_mdi_phci_offline() to find the new 1323 * client state after DR of a pHCI. 1324 */ 1325 static int 1326 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1327 { 1328 int state; 1329 int online_count = 0; 1330 int standby_count = 0; 1331 mdi_pathinfo_t *pip, *next; 1332 1333 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1334 pip = ct->ct_path_head; 1335 while (pip != NULL) { 1336 MDI_PI_LOCK(pip); 1337 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1338 if (MDI_PI(pip)->pi_phci == ph) { 1339 MDI_PI_UNLOCK(pip); 1340 pip = next; 1341 continue; 1342 } 1343 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1344 == MDI_PATHINFO_STATE_ONLINE) 1345 online_count++; 1346 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1347 == MDI_PATHINFO_STATE_STANDBY) 1348 standby_count++; 1349 MDI_PI_UNLOCK(pip); 1350 pip = next; 1351 } 1352 1353 if (online_count == 0) { 1354 if (standby_count == 0) { 1355 state = MDI_CLIENT_STATE_FAILED; 1356 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1357 " ct = %p\n", ct)); 1358 } else if (standby_count == 1) { 1359 state = MDI_CLIENT_STATE_DEGRADED; 1360 } else { 1361 state = MDI_CLIENT_STATE_OPTIMAL; 1362 } 1363 } else if (online_count == 1) { 1364 if (standby_count == 0) { 1365 state = MDI_CLIENT_STATE_DEGRADED; 1366 } else { 1367 state = MDI_CLIENT_STATE_OPTIMAL; 1368 } 1369 } else { 1370 state = MDI_CLIENT_STATE_OPTIMAL; 1371 } 1372 return (state); 1373 } 1374 1375 /* 1376 * i_mdi_client2devinfo(): 1377 * Utility function 1378 */ 1379 dev_info_t * 1380 i_mdi_client2devinfo(mdi_client_t *ct) 1381 { 1382 return (ct->ct_dip); 1383 } 1384 1385 /* 1386 * mdi_client_path2_devinfo(): 1387 * Given the parent devinfo and child devfs pathname, search for 1388 * a valid devfs node handle. 1389 */ 1390 dev_info_t * 1391 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1392 { 1393 dev_info_t *cdip = NULL; 1394 dev_info_t *ndip = NULL; 1395 char *temp_pathname; 1396 int circular; 1397 1398 /* 1399 * Allocate temp buffer 1400 */ 1401 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1402 1403 /* 1404 * Lock parent against changes 1405 */ 1406 ndi_devi_enter(vdip, &circular); 1407 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1408 while ((cdip = ndip) != NULL) { 1409 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1410 1411 *temp_pathname = '\0'; 1412 (void) ddi_pathname(cdip, temp_pathname); 1413 if (strcmp(temp_pathname, pathname) == 0) { 1414 break; 1415 } 1416 } 1417 /* 1418 * Release devinfo lock 1419 */ 1420 ndi_devi_exit(vdip, circular); 1421 1422 /* 1423 * Free the temp buffer 1424 */ 1425 kmem_free(temp_pathname, MAXPATHLEN); 1426 return (cdip); 1427 } 1428 1429 1430 /* 1431 * mdi_client_get_path_count(): 1432 * Utility function to get number of path information nodes 1433 * associated with a given client device. 1434 */ 1435 int 1436 mdi_client_get_path_count(dev_info_t *cdip) 1437 { 1438 mdi_client_t *ct; 1439 int count = 0; 1440 1441 ct = i_devi_get_client(cdip); 1442 if (ct != NULL) { 1443 count = ct->ct_path_count; 1444 } 1445 return (count); 1446 } 1447 1448 1449 /* 1450 * i_mdi_get_hash_key(): 1451 * Create a hash using strings as keys 1452 * 1453 */ 1454 static int 1455 i_mdi_get_hash_key(char *str) 1456 { 1457 uint32_t g, hash = 0; 1458 char *p; 1459 1460 for (p = str; *p != '\0'; p++) { 1461 g = *p; 1462 hash += g; 1463 } 1464 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1465 } 1466 1467 /* 1468 * mdi_get_lb_policy(): 1469 * Get current load balancing policy for a given client device 1470 */ 1471 client_lb_t 1472 mdi_get_lb_policy(dev_info_t *cdip) 1473 { 1474 client_lb_t lb = LOAD_BALANCE_NONE; 1475 mdi_client_t *ct; 1476 1477 ct = i_devi_get_client(cdip); 1478 if (ct != NULL) { 1479 lb = ct->ct_lb; 1480 } 1481 return (lb); 1482 } 1483 1484 /* 1485 * mdi_set_lb_region_size(): 1486 * Set current region size for the load-balance 1487 */ 1488 int 1489 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1490 { 1491 mdi_client_t *ct; 1492 int rv = MDI_FAILURE; 1493 1494 ct = i_devi_get_client(cdip); 1495 if (ct != NULL && ct->ct_lb_args != NULL) { 1496 ct->ct_lb_args->region_size = region_size; 1497 rv = MDI_SUCCESS; 1498 } 1499 return (rv); 1500 } 1501 1502 /* 1503 * mdi_Set_lb_policy(): 1504 * Set current load balancing policy for a given client device 1505 */ 1506 int 1507 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1508 { 1509 mdi_client_t *ct; 1510 int rv = MDI_FAILURE; 1511 1512 ct = i_devi_get_client(cdip); 1513 if (ct != NULL) { 1514 ct->ct_lb = lb; 1515 rv = MDI_SUCCESS; 1516 } 1517 return (rv); 1518 } 1519 1520 /* 1521 * mdi_failover(): 1522 * failover function called by the vHCI drivers to initiate 1523 * a failover operation. This is typically due to non-availability 1524 * of online paths to route I/O requests. Failover can be 1525 * triggered through user application also. 1526 * 1527 * The vHCI driver calls mdi_failover() to initiate a failover 1528 * operation. mdi_failover() calls back into the vHCI driver's 1529 * vo_failover() entry point to perform the actual failover 1530 * operation. The reason for requiring the vHCI driver to 1531 * initiate failover by calling mdi_failover(), instead of directly 1532 * executing vo_failover() itself, is to ensure that the mdi 1533 * framework can keep track of the client state properly. 1534 * Additionally, mdi_failover() provides as a convenience the 1535 * option of performing the failover operation synchronously or 1536 * asynchronously 1537 * 1538 * Upon successful completion of the failover operation, the 1539 * paths that were previously ONLINE will be in the STANDBY state, 1540 * and the newly activated paths will be in the ONLINE state. 1541 * 1542 * The flags modifier determines whether the activation is done 1543 * synchronously: MDI_FAILOVER_SYNC 1544 * Return Values: 1545 * MDI_SUCCESS 1546 * MDI_FAILURE 1547 * MDI_BUSY 1548 */ 1549 /*ARGSUSED*/ 1550 int 1551 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1552 { 1553 int rv; 1554 mdi_client_t *ct; 1555 1556 ct = i_devi_get_client(cdip); 1557 ASSERT(ct != NULL); 1558 if (ct == NULL) { 1559 /* cdip is not a valid client device. Nothing more to do. */ 1560 return (MDI_FAILURE); 1561 } 1562 1563 MDI_CLIENT_LOCK(ct); 1564 1565 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1566 /* A path to the client is being freed */ 1567 MDI_CLIENT_UNLOCK(ct); 1568 return (MDI_BUSY); 1569 } 1570 1571 1572 if (MDI_CLIENT_IS_FAILED(ct)) { 1573 /* 1574 * Client is in failed state. Nothing more to do. 1575 */ 1576 MDI_CLIENT_UNLOCK(ct); 1577 return (MDI_FAILURE); 1578 } 1579 1580 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1581 /* 1582 * Failover is already in progress; return BUSY 1583 */ 1584 MDI_CLIENT_UNLOCK(ct); 1585 return (MDI_BUSY); 1586 } 1587 /* 1588 * Make sure that mdi_pathinfo node state changes are processed. 1589 * We do not allow failovers to progress while client path state 1590 * changes are in progress 1591 */ 1592 if (ct->ct_unstable) { 1593 if (flags == MDI_FAILOVER_ASYNC) { 1594 MDI_CLIENT_UNLOCK(ct); 1595 return (MDI_BUSY); 1596 } else { 1597 while (ct->ct_unstable) 1598 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1599 } 1600 } 1601 1602 /* 1603 * Client device is in stable state. Before proceeding, perform sanity 1604 * checks again. 1605 */ 1606 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1607 (!i_ddi_devi_attached(ct->ct_dip))) { 1608 /* 1609 * Client is in failed state. Nothing more to do. 1610 */ 1611 MDI_CLIENT_UNLOCK(ct); 1612 return (MDI_FAILURE); 1613 } 1614 1615 /* 1616 * Set the client state as failover in progress. 1617 */ 1618 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1619 ct->ct_failover_flags = flags; 1620 MDI_CLIENT_UNLOCK(ct); 1621 1622 if (flags == MDI_FAILOVER_ASYNC) { 1623 /* 1624 * Submit the initiate failover request via CPR safe 1625 * taskq threads. 1626 */ 1627 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1628 ct, KM_SLEEP); 1629 return (MDI_ACCEPT); 1630 } else { 1631 /* 1632 * Synchronous failover mode. Typically invoked from the user 1633 * land. 1634 */ 1635 rv = i_mdi_failover(ct); 1636 } 1637 return (rv); 1638 } 1639 1640 /* 1641 * i_mdi_failover(): 1642 * internal failover function. Invokes vHCI drivers failover 1643 * callback function and process the failover status 1644 * Return Values: 1645 * None 1646 * 1647 * Note: A client device in failover state can not be detached or freed. 1648 */ 1649 static int 1650 i_mdi_failover(void *arg) 1651 { 1652 int rv = MDI_SUCCESS; 1653 mdi_client_t *ct = (mdi_client_t *)arg; 1654 mdi_vhci_t *vh = ct->ct_vhci; 1655 1656 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1657 1658 if (vh->vh_ops->vo_failover != NULL) { 1659 /* 1660 * Call vHCI drivers callback routine 1661 */ 1662 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1663 ct->ct_failover_flags); 1664 } 1665 1666 MDI_CLIENT_LOCK(ct); 1667 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1668 1669 /* 1670 * Save the failover return status 1671 */ 1672 ct->ct_failover_status = rv; 1673 1674 /* 1675 * As a result of failover, client status would have been changed. 1676 * Update the client state and wake up anyone waiting on this client 1677 * device. 1678 */ 1679 i_mdi_client_update_state(ct); 1680 1681 cv_broadcast(&ct->ct_failover_cv); 1682 MDI_CLIENT_UNLOCK(ct); 1683 return (rv); 1684 } 1685 1686 /* 1687 * Load balancing is logical block. 1688 * IOs within the range described by region_size 1689 * would go on the same path. This would improve the 1690 * performance by cache-hit on some of the RAID devices. 1691 * Search only for online paths(At some point we 1692 * may want to balance across target ports). 1693 * If no paths are found then default to round-robin. 1694 */ 1695 static int 1696 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1697 { 1698 int path_index = -1; 1699 int online_path_count = 0; 1700 int online_nonpref_path_count = 0; 1701 int region_size = ct->ct_lb_args->region_size; 1702 mdi_pathinfo_t *pip; 1703 mdi_pathinfo_t *next; 1704 int preferred, path_cnt; 1705 1706 pip = ct->ct_path_head; 1707 while (pip) { 1708 MDI_PI_LOCK(pip); 1709 if (MDI_PI(pip)->pi_state == 1710 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1711 online_path_count++; 1712 } else if (MDI_PI(pip)->pi_state == 1713 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1714 online_nonpref_path_count++; 1715 } 1716 next = (mdi_pathinfo_t *) 1717 MDI_PI(pip)->pi_client_link; 1718 MDI_PI_UNLOCK(pip); 1719 pip = next; 1720 } 1721 /* if found any online/preferred then use this type */ 1722 if (online_path_count > 0) { 1723 path_cnt = online_path_count; 1724 preferred = 1; 1725 } else if (online_nonpref_path_count > 0) { 1726 path_cnt = online_nonpref_path_count; 1727 preferred = 0; 1728 } else { 1729 path_cnt = 0; 1730 } 1731 if (path_cnt) { 1732 path_index = (bp->b_blkno >> region_size) % path_cnt; 1733 pip = ct->ct_path_head; 1734 while (pip && path_index != -1) { 1735 MDI_PI_LOCK(pip); 1736 if (path_index == 0 && 1737 (MDI_PI(pip)->pi_state == 1738 MDI_PATHINFO_STATE_ONLINE) && 1739 MDI_PI(pip)->pi_preferred == preferred) { 1740 MDI_PI_HOLD(pip); 1741 MDI_PI_UNLOCK(pip); 1742 *ret_pip = pip; 1743 return (MDI_SUCCESS); 1744 } 1745 path_index --; 1746 next = (mdi_pathinfo_t *) 1747 MDI_PI(pip)->pi_client_link; 1748 MDI_PI_UNLOCK(pip); 1749 pip = next; 1750 } 1751 if (pip == NULL) { 1752 MDI_DEBUG(4, (CE_NOTE, NULL, 1753 "!lba %p, no pip !!\n", 1754 bp->b_blkno)); 1755 } else { 1756 MDI_DEBUG(4, (CE_NOTE, NULL, 1757 "!lba %p, no pip for path_index, " 1758 "pip %p\n", pip)); 1759 } 1760 } 1761 return (MDI_FAILURE); 1762 } 1763 1764 /* 1765 * mdi_select_path(): 1766 * select a path to access a client device. 1767 * 1768 * mdi_select_path() function is called by the vHCI drivers to 1769 * select a path to route the I/O request to. The caller passes 1770 * the block I/O data transfer structure ("buf") as one of the 1771 * parameters. The mpxio framework uses the buf structure 1772 * contents to maintain per path statistics (total I/O size / 1773 * count pending). If more than one online paths are available to 1774 * select, the framework automatically selects a suitable path 1775 * for routing I/O request. If a failover operation is active for 1776 * this client device the call shall be failed with MDI_BUSY error 1777 * code. 1778 * 1779 * By default this function returns a suitable path in online 1780 * state based on the current load balancing policy. Currently 1781 * we support LOAD_BALANCE_NONE (Previously selected online path 1782 * will continue to be used till the path is usable) and 1783 * LOAD_BALANCE_RR (Online paths will be selected in a round 1784 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1785 * based on the logical block). The load balancing 1786 * through vHCI drivers configuration file (driver.conf). 1787 * 1788 * vHCI drivers may override this default behavior by specifying 1789 * appropriate flags. If start_pip is specified (non NULL) is 1790 * used as start point to walk and find the next appropriate path. 1791 * The following values are currently defined: 1792 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1793 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1794 * 1795 * The non-standard behavior is used by the scsi_vhci driver, 1796 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1797 * attach of client devices (to avoid an unnecessary failover 1798 * when the STANDBY path comes up first), during failover 1799 * (to activate a STANDBY path as ONLINE). 1800 * 1801 * The selected path in returned in a held state (ref_cnt). 1802 * Caller should release the hold by calling mdi_rele_path(). 1803 * 1804 * Return Values: 1805 * MDI_SUCCESS - Completed successfully 1806 * MDI_BUSY - Client device is busy failing over 1807 * MDI_NOPATH - Client device is online, but no valid path are 1808 * available to access this client device 1809 * MDI_FAILURE - Invalid client device or state 1810 * MDI_DEVI_ONLINING 1811 * - Client device (struct dev_info state) is in 1812 * onlining state. 1813 */ 1814 1815 /*ARGSUSED*/ 1816 int 1817 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1818 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1819 { 1820 mdi_client_t *ct; 1821 mdi_pathinfo_t *pip; 1822 mdi_pathinfo_t *next; 1823 mdi_pathinfo_t *head; 1824 mdi_pathinfo_t *start; 1825 client_lb_t lbp; /* load balancing policy */ 1826 int sb = 1; /* standard behavior */ 1827 int preferred = 1; /* preferred path */ 1828 int cond, cont = 1; 1829 int retry = 0; 1830 1831 if (flags != 0) { 1832 /* 1833 * disable default behavior 1834 */ 1835 sb = 0; 1836 } 1837 1838 *ret_pip = NULL; 1839 ct = i_devi_get_client(cdip); 1840 if (ct == NULL) { 1841 /* mdi extensions are NULL, Nothing more to do */ 1842 return (MDI_FAILURE); 1843 } 1844 1845 MDI_CLIENT_LOCK(ct); 1846 1847 if (sb) { 1848 if (MDI_CLIENT_IS_FAILED(ct)) { 1849 /* 1850 * Client is not ready to accept any I/O requests. 1851 * Fail this request. 1852 */ 1853 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1854 "client state offline ct = %p\n", ct)); 1855 MDI_CLIENT_UNLOCK(ct); 1856 return (MDI_FAILURE); 1857 } 1858 1859 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1860 /* 1861 * Check for Failover is in progress. If so tell the 1862 * caller that this device is busy. 1863 */ 1864 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1865 "client failover in progress ct = %p\n", ct)); 1866 MDI_CLIENT_UNLOCK(ct); 1867 return (MDI_BUSY); 1868 } 1869 1870 /* 1871 * Check to see whether the client device is attached. 1872 * If not so, let the vHCI driver manually select a path 1873 * (standby) and let the probe/attach process to continue. 1874 */ 1875 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 1876 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1877 MDI_CLIENT_UNLOCK(ct); 1878 return (MDI_DEVI_ONLINING); 1879 } 1880 } 1881 1882 /* 1883 * Cache in the client list head. If head of the list is NULL 1884 * return MDI_NOPATH 1885 */ 1886 head = ct->ct_path_head; 1887 if (head == NULL) { 1888 MDI_CLIENT_UNLOCK(ct); 1889 return (MDI_NOPATH); 1890 } 1891 1892 /* 1893 * for non default behavior, bypass current 1894 * load balancing policy and always use LOAD_BALANCE_RR 1895 * except that the start point will be adjusted based 1896 * on the provided start_pip 1897 */ 1898 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1899 1900 switch (lbp) { 1901 case LOAD_BALANCE_NONE: 1902 /* 1903 * Load balancing is None or Alternate path mode 1904 * Start looking for a online mdi_pathinfo node starting from 1905 * last known selected path 1906 */ 1907 preferred = 1; 1908 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1909 if (pip == NULL) { 1910 pip = head; 1911 } 1912 start = pip; 1913 do { 1914 MDI_PI_LOCK(pip); 1915 /* 1916 * No need to explicitly check if the path is disabled. 1917 * Since we are checking for state == ONLINE and the 1918 * same veriable is used for DISABLE/ENABLE information. 1919 */ 1920 if ((MDI_PI(pip)->pi_state == 1921 MDI_PATHINFO_STATE_ONLINE) && 1922 preferred == MDI_PI(pip)->pi_preferred) { 1923 /* 1924 * Return the path in hold state. Caller should 1925 * release the lock by calling mdi_rele_path() 1926 */ 1927 MDI_PI_HOLD(pip); 1928 MDI_PI_UNLOCK(pip); 1929 ct->ct_path_last = pip; 1930 *ret_pip = pip; 1931 MDI_CLIENT_UNLOCK(ct); 1932 return (MDI_SUCCESS); 1933 } 1934 1935 /* 1936 * Path is busy. 1937 */ 1938 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1939 MDI_PI_IS_TRANSIENT(pip)) 1940 retry = 1; 1941 /* 1942 * Keep looking for a next available online path 1943 */ 1944 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1945 if (next == NULL) { 1946 next = head; 1947 } 1948 MDI_PI_UNLOCK(pip); 1949 pip = next; 1950 if (start == pip && preferred) { 1951 preferred = 0; 1952 } else if (start == pip && !preferred) { 1953 cont = 0; 1954 } 1955 } while (cont); 1956 break; 1957 1958 case LOAD_BALANCE_LBA: 1959 /* 1960 * Make sure we are looking 1961 * for an online path. Otherwise, if it is for a STANDBY 1962 * path request, it will go through and fetch an ONLINE 1963 * path which is not desirable. 1964 */ 1965 if ((ct->ct_lb_args != NULL) && 1966 (ct->ct_lb_args->region_size) && bp && 1967 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1968 if (i_mdi_lba_lb(ct, ret_pip, bp) 1969 == MDI_SUCCESS) { 1970 MDI_CLIENT_UNLOCK(ct); 1971 return (MDI_SUCCESS); 1972 } 1973 } 1974 /* FALLTHROUGH */ 1975 case LOAD_BALANCE_RR: 1976 /* 1977 * Load balancing is Round Robin. Start looking for a online 1978 * mdi_pathinfo node starting from last known selected path 1979 * as the start point. If override flags are specified, 1980 * process accordingly. 1981 * If the search is already in effect(start_pip not null), 1982 * then lets just use the same path preference to continue the 1983 * traversal. 1984 */ 1985 1986 if (start_pip != NULL) { 1987 preferred = MDI_PI(start_pip)->pi_preferred; 1988 } else { 1989 preferred = 1; 1990 } 1991 1992 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1993 if (start == NULL) { 1994 pip = head; 1995 } else { 1996 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1997 if (pip == NULL) { 1998 if (!sb) { 1999 if (preferred == 0) { 2000 /* 2001 * Looks like we have completed 2002 * the traversal as preferred 2003 * value is 0. Time to bail out. 2004 */ 2005 *ret_pip = NULL; 2006 MDI_CLIENT_UNLOCK(ct); 2007 return (MDI_NOPATH); 2008 } else { 2009 /* 2010 * Looks like we reached the 2011 * end of the list. Lets enable 2012 * traversal of non preferred 2013 * paths. 2014 */ 2015 preferred = 0; 2016 } 2017 } 2018 pip = head; 2019 } 2020 } 2021 start = pip; 2022 do { 2023 MDI_PI_LOCK(pip); 2024 if (sb) { 2025 cond = ((MDI_PI(pip)->pi_state == 2026 MDI_PATHINFO_STATE_ONLINE && 2027 MDI_PI(pip)->pi_preferred == 2028 preferred) ? 1 : 0); 2029 } else { 2030 if (flags == MDI_SELECT_ONLINE_PATH) { 2031 cond = ((MDI_PI(pip)->pi_state == 2032 MDI_PATHINFO_STATE_ONLINE && 2033 MDI_PI(pip)->pi_preferred == 2034 preferred) ? 1 : 0); 2035 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2036 cond = ((MDI_PI(pip)->pi_state == 2037 MDI_PATHINFO_STATE_STANDBY && 2038 MDI_PI(pip)->pi_preferred == 2039 preferred) ? 1 : 0); 2040 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2041 MDI_SELECT_STANDBY_PATH)) { 2042 cond = (((MDI_PI(pip)->pi_state == 2043 MDI_PATHINFO_STATE_ONLINE || 2044 (MDI_PI(pip)->pi_state == 2045 MDI_PATHINFO_STATE_STANDBY)) && 2046 MDI_PI(pip)->pi_preferred == 2047 preferred) ? 1 : 0); 2048 } else if (flags == 2049 (MDI_SELECT_STANDBY_PATH | 2050 MDI_SELECT_ONLINE_PATH | 2051 MDI_SELECT_USER_DISABLE_PATH)) { 2052 cond = (((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE || 2054 (MDI_PI(pip)->pi_state == 2055 MDI_PATHINFO_STATE_STANDBY) || 2056 (MDI_PI(pip)->pi_state == 2057 (MDI_PATHINFO_STATE_ONLINE| 2058 MDI_PATHINFO_STATE_USER_DISABLE)) || 2059 (MDI_PI(pip)->pi_state == 2060 (MDI_PATHINFO_STATE_STANDBY | 2061 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2062 MDI_PI(pip)->pi_preferred == 2063 preferred) ? 1 : 0); 2064 } else { 2065 cond = 0; 2066 } 2067 } 2068 /* 2069 * No need to explicitly check if the path is disabled. 2070 * Since we are checking for state == ONLINE and the 2071 * same veriable is used for DISABLE/ENABLE information. 2072 */ 2073 if (cond) { 2074 /* 2075 * Return the path in hold state. Caller should 2076 * release the lock by calling mdi_rele_path() 2077 */ 2078 MDI_PI_HOLD(pip); 2079 MDI_PI_UNLOCK(pip); 2080 if (sb) 2081 ct->ct_path_last = pip; 2082 *ret_pip = pip; 2083 MDI_CLIENT_UNLOCK(ct); 2084 return (MDI_SUCCESS); 2085 } 2086 /* 2087 * Path is busy. 2088 */ 2089 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2090 MDI_PI_IS_TRANSIENT(pip)) 2091 retry = 1; 2092 2093 /* 2094 * Keep looking for a next available online path 2095 */ 2096 do_again: 2097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2098 if (next == NULL) { 2099 if (!sb) { 2100 if (preferred == 1) { 2101 /* 2102 * Looks like we reached the 2103 * end of the list. Lets enable 2104 * traversal of non preferred 2105 * paths. 2106 */ 2107 preferred = 0; 2108 next = head; 2109 } else { 2110 /* 2111 * We have done both the passes 2112 * Preferred as well as for 2113 * Non-preferred. Bail out now. 2114 */ 2115 cont = 0; 2116 } 2117 } else { 2118 /* 2119 * Standard behavior case. 2120 */ 2121 next = head; 2122 } 2123 } 2124 MDI_PI_UNLOCK(pip); 2125 if (cont == 0) { 2126 break; 2127 } 2128 pip = next; 2129 2130 if (!sb) { 2131 /* 2132 * We need to handle the selection of 2133 * non-preferred path in the following 2134 * case: 2135 * 2136 * +------+ +------+ +------+ +-----+ 2137 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2138 * +------+ +------+ +------+ +-----+ 2139 * 2140 * If we start the search with B, we need to 2141 * skip beyond B to pick C which is non - 2142 * preferred in the second pass. The following 2143 * test, if true, will allow us to skip over 2144 * the 'start'(B in the example) to select 2145 * other non preferred elements. 2146 */ 2147 if ((start_pip != NULL) && (start_pip == pip) && 2148 (MDI_PI(start_pip)->pi_preferred 2149 != preferred)) { 2150 /* 2151 * try again after going past the start 2152 * pip 2153 */ 2154 MDI_PI_LOCK(pip); 2155 goto do_again; 2156 } 2157 } else { 2158 /* 2159 * Standard behavior case 2160 */ 2161 if (start == pip && preferred) { 2162 /* look for nonpreferred paths */ 2163 preferred = 0; 2164 } else if (start == pip && !preferred) { 2165 /* 2166 * Exit condition 2167 */ 2168 cont = 0; 2169 } 2170 } 2171 } while (cont); 2172 break; 2173 } 2174 2175 MDI_CLIENT_UNLOCK(ct); 2176 if (retry == 1) { 2177 return (MDI_BUSY); 2178 } else { 2179 return (MDI_NOPATH); 2180 } 2181 } 2182 2183 /* 2184 * For a client, return the next available path to any phci 2185 * 2186 * Note: 2187 * Caller should hold the branch's devinfo node to get a consistent 2188 * snap shot of the mdi_pathinfo nodes. 2189 * 2190 * Please note that even the list is stable the mdi_pathinfo 2191 * node state and properties are volatile. The caller should lock 2192 * and unlock the nodes by calling mdi_pi_lock() and 2193 * mdi_pi_unlock() functions to get a stable properties. 2194 * 2195 * If there is a need to use the nodes beyond the hold of the 2196 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2197 * need to be held against unexpected removal by calling 2198 * mdi_hold_path() and should be released by calling 2199 * mdi_rele_path() on completion. 2200 */ 2201 mdi_pathinfo_t * 2202 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2203 { 2204 mdi_client_t *ct; 2205 2206 if (!MDI_CLIENT(ct_dip)) 2207 return (NULL); 2208 2209 /* 2210 * Walk through client link 2211 */ 2212 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2213 ASSERT(ct != NULL); 2214 2215 if (pip == NULL) 2216 return ((mdi_pathinfo_t *)ct->ct_path_head); 2217 2218 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2219 } 2220 2221 /* 2222 * For a phci, return the next available path to any client 2223 * Note: ditto mdi_get_next_phci_path() 2224 */ 2225 mdi_pathinfo_t * 2226 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2227 { 2228 mdi_phci_t *ph; 2229 2230 if (!MDI_PHCI(ph_dip)) 2231 return (NULL); 2232 2233 /* 2234 * Walk through pHCI link 2235 */ 2236 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2237 ASSERT(ph != NULL); 2238 2239 if (pip == NULL) 2240 return ((mdi_pathinfo_t *)ph->ph_path_head); 2241 2242 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2243 } 2244 2245 /* 2246 * mdi_get_nextpath(): 2247 * mdi_pathinfo node walker function. Get the next node from the 2248 * client or pHCI device list. 2249 * 2250 * XXX This is wrapper function for compatibility purposes only. 2251 * 2252 * It doesn't work under Multi-level MPxIO, where a dip 2253 * is both client and phci (which link should next_path follow?). 2254 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2255 * this interface should be removed. 2256 */ 2257 void 2258 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2259 mdi_pathinfo_t **ret_pip) 2260 { 2261 if (MDI_CLIENT(dip)) { 2262 *ret_pip = mdi_get_next_phci_path(dip, pip); 2263 } else if (MDI_PHCI(dip)) { 2264 *ret_pip = mdi_get_next_client_path(dip, pip); 2265 } else { 2266 *ret_pip = NULL; 2267 } 2268 } 2269 2270 /* 2271 * mdi_hold_path(): 2272 * Hold the mdi_pathinfo node against unwanted unexpected free. 2273 * Return Values: 2274 * None 2275 */ 2276 void 2277 mdi_hold_path(mdi_pathinfo_t *pip) 2278 { 2279 if (pip) { 2280 MDI_PI_LOCK(pip); 2281 MDI_PI_HOLD(pip); 2282 MDI_PI_UNLOCK(pip); 2283 } 2284 } 2285 2286 2287 /* 2288 * mdi_rele_path(): 2289 * Release the mdi_pathinfo node which was selected 2290 * through mdi_select_path() mechanism or manually held by 2291 * calling mdi_hold_path(). 2292 * Return Values: 2293 * None 2294 */ 2295 void 2296 mdi_rele_path(mdi_pathinfo_t *pip) 2297 { 2298 if (pip) { 2299 MDI_PI_LOCK(pip); 2300 MDI_PI_RELE(pip); 2301 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2302 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2303 } 2304 MDI_PI_UNLOCK(pip); 2305 } 2306 } 2307 2308 2309 /* 2310 * mdi_pi_lock(): 2311 * Lock the mdi_pathinfo node. 2312 * Note: 2313 * The caller should release the lock by calling mdi_pi_unlock() 2314 */ 2315 void 2316 mdi_pi_lock(mdi_pathinfo_t *pip) 2317 { 2318 ASSERT(pip != NULL); 2319 if (pip) { 2320 MDI_PI_LOCK(pip); 2321 } 2322 } 2323 2324 2325 /* 2326 * mdi_pi_unlock(): 2327 * Unlock the mdi_pathinfo node. 2328 * Note: 2329 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2330 */ 2331 void 2332 mdi_pi_unlock(mdi_pathinfo_t *pip) 2333 { 2334 ASSERT(pip != NULL); 2335 if (pip) { 2336 MDI_PI_UNLOCK(pip); 2337 } 2338 } 2339 2340 /* 2341 * mdi_pi_find(): 2342 * Search the list of mdi_pathinfo nodes attached to the 2343 * pHCI/Client device node whose path address matches "paddr". 2344 * Returns a pointer to the mdi_pathinfo node if a matching node is 2345 * found. 2346 * Return Values: 2347 * mdi_pathinfo node handle 2348 * NULL 2349 * Notes: 2350 * Caller need not hold any locks to call this function. 2351 */ 2352 mdi_pathinfo_t * 2353 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2354 { 2355 mdi_phci_t *ph; 2356 mdi_vhci_t *vh; 2357 mdi_client_t *ct; 2358 mdi_pathinfo_t *pip = NULL; 2359 2360 if ((pdip == NULL) || (paddr == NULL)) { 2361 return (NULL); 2362 } 2363 ph = i_devi_get_phci(pdip); 2364 if (ph == NULL) { 2365 /* 2366 * Invalid pHCI device, Nothing more to do. 2367 */ 2368 MDI_DEBUG(2, (CE_WARN, NULL, 2369 "!mdi_pi_find: invalid phci")); 2370 return (NULL); 2371 } 2372 2373 vh = ph->ph_vhci; 2374 if (vh == NULL) { 2375 /* 2376 * Invalid vHCI device, Nothing more to do. 2377 */ 2378 MDI_DEBUG(2, (CE_WARN, NULL, 2379 "!mdi_pi_find: invalid phci")); 2380 return (NULL); 2381 } 2382 2383 /* 2384 * Look for client device identified by caddr (guid) 2385 */ 2386 if (caddr == NULL) { 2387 /* 2388 * Find a mdi_pathinfo node under pHCI list for a matching 2389 * unit address. 2390 */ 2391 mutex_enter(&ph->ph_mutex); 2392 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2393 2394 while (pip != NULL) { 2395 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2396 break; 2397 } 2398 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2399 } 2400 mutex_exit(&ph->ph_mutex); 2401 return (pip); 2402 } 2403 2404 /* 2405 * XXX - Is the rest of the code in this function really necessary? 2406 * The consumers of mdi_pi_find() can search for the desired pathinfo 2407 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2408 * whether the search is based on the pathinfo nodes attached to 2409 * the pHCI or the client node, the result will be the same. 2410 */ 2411 2412 /* 2413 * Find the client device corresponding to 'caddr' 2414 */ 2415 mutex_enter(&mdi_mutex); 2416 2417 /* 2418 * XXX - Passing NULL to the following function works as long as the 2419 * the client addresses (caddr) are unique per vhci basis. 2420 */ 2421 ct = i_mdi_client_find(vh, NULL, caddr); 2422 if (ct == NULL) { 2423 /* 2424 * Client not found, Obviously mdi_pathinfo node has not been 2425 * created yet. 2426 */ 2427 mutex_exit(&mdi_mutex); 2428 return (pip); 2429 } 2430 2431 /* 2432 * Hold the client lock and look for a mdi_pathinfo node with matching 2433 * pHCI and paddr 2434 */ 2435 MDI_CLIENT_LOCK(ct); 2436 2437 /* 2438 * Release the global mutex as it is no more needed. Note: We always 2439 * respect the locking order while acquiring. 2440 */ 2441 mutex_exit(&mdi_mutex); 2442 2443 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2444 while (pip != NULL) { 2445 /* 2446 * Compare the unit address 2447 */ 2448 if ((MDI_PI(pip)->pi_phci == ph) && 2449 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2450 break; 2451 } 2452 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2453 } 2454 MDI_CLIENT_UNLOCK(ct); 2455 return (pip); 2456 } 2457 2458 /* 2459 * mdi_pi_alloc(): 2460 * Allocate and initialize a new instance of a mdi_pathinfo node. 2461 * The mdi_pathinfo node returned by this function identifies a 2462 * unique device path is capable of having properties attached 2463 * and passed to mdi_pi_online() to fully attach and online the 2464 * path and client device node. 2465 * The mdi_pathinfo node returned by this function must be 2466 * destroyed using mdi_pi_free() if the path is no longer 2467 * operational or if the caller fails to attach a client device 2468 * node when calling mdi_pi_online(). The framework will not free 2469 * the resources allocated. 2470 * This function can be called from both interrupt and kernel 2471 * contexts. DDI_NOSLEEP flag should be used while calling 2472 * from interrupt contexts. 2473 * Return Values: 2474 * MDI_SUCCESS 2475 * MDI_FAILURE 2476 * MDI_NOMEM 2477 */ 2478 /*ARGSUSED*/ 2479 int 2480 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2481 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2482 { 2483 mdi_vhci_t *vh; 2484 mdi_phci_t *ph; 2485 mdi_client_t *ct; 2486 mdi_pathinfo_t *pip = NULL; 2487 dev_info_t *cdip; 2488 int rv = MDI_NOMEM; 2489 int path_allocated = 0; 2490 2491 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2492 ret_pip == NULL) { 2493 /* Nothing more to do */ 2494 return (MDI_FAILURE); 2495 } 2496 2497 *ret_pip = NULL; 2498 ph = i_devi_get_phci(pdip); 2499 ASSERT(ph != NULL); 2500 if (ph == NULL) { 2501 /* Invalid pHCI device, return failure */ 2502 MDI_DEBUG(1, (CE_WARN, NULL, 2503 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2504 return (MDI_FAILURE); 2505 } 2506 2507 MDI_PHCI_LOCK(ph); 2508 vh = ph->ph_vhci; 2509 if (vh == NULL) { 2510 /* Invalid vHCI device, return failure */ 2511 MDI_DEBUG(1, (CE_WARN, NULL, 2512 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2513 MDI_PHCI_UNLOCK(ph); 2514 return (MDI_FAILURE); 2515 } 2516 2517 if (MDI_PHCI_IS_READY(ph) == 0) { 2518 /* 2519 * Do not allow new node creation when pHCI is in 2520 * offline/suspended states 2521 */ 2522 MDI_DEBUG(1, (CE_WARN, NULL, 2523 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2524 MDI_PHCI_UNLOCK(ph); 2525 return (MDI_BUSY); 2526 } 2527 MDI_PHCI_UNSTABLE(ph); 2528 MDI_PHCI_UNLOCK(ph); 2529 2530 /* look for a matching client, create one if not found */ 2531 mutex_enter(&mdi_mutex); 2532 ct = i_mdi_client_find(vh, cname, caddr); 2533 if (ct == NULL) { 2534 ct = i_mdi_client_alloc(vh, cname, caddr); 2535 ASSERT(ct != NULL); 2536 } 2537 2538 if (ct->ct_dip == NULL) { 2539 /* 2540 * Allocate a devinfo node 2541 */ 2542 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2543 compatible, ncompatible); 2544 if (ct->ct_dip == NULL) { 2545 (void) i_mdi_client_free(vh, ct); 2546 goto fail; 2547 } 2548 } 2549 cdip = ct->ct_dip; 2550 2551 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2552 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2553 2554 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2555 while (pip != NULL) { 2556 /* 2557 * Compare the unit address 2558 */ 2559 if ((MDI_PI(pip)->pi_phci == ph) && 2560 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2561 break; 2562 } 2563 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2564 } 2565 2566 if (pip == NULL) { 2567 /* 2568 * This is a new path for this client device. Allocate and 2569 * initialize a new pathinfo node 2570 */ 2571 pip = i_mdi_pi_alloc(ph, paddr, ct); 2572 ASSERT(pip != NULL); 2573 path_allocated = 1; 2574 } 2575 rv = MDI_SUCCESS; 2576 2577 fail: 2578 /* 2579 * Release the global mutex. 2580 */ 2581 mutex_exit(&mdi_mutex); 2582 2583 /* 2584 * Mark the pHCI as stable 2585 */ 2586 MDI_PHCI_LOCK(ph); 2587 MDI_PHCI_STABLE(ph); 2588 MDI_PHCI_UNLOCK(ph); 2589 *ret_pip = pip; 2590 2591 if (path_allocated) 2592 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2593 2594 return (rv); 2595 } 2596 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2603 flags, ret_pip)); 2604 } 2605 2606 /* 2607 * i_mdi_pi_alloc(): 2608 * Allocate a mdi_pathinfo node and add to the pHCI path list 2609 * Return Values: 2610 * mdi_pathinfo 2611 */ 2612 2613 /*ARGSUSED*/ 2614 static mdi_pathinfo_t * 2615 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2616 { 2617 mdi_pathinfo_t *pip; 2618 int ct_circular; 2619 int ph_circular; 2620 int se_flag; 2621 int kmem_flag; 2622 2623 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2624 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2625 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2626 MDI_PATHINFO_STATE_TRANSIENT; 2627 2628 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2629 MDI_PI_SET_USER_DISABLE(pip); 2630 2631 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2632 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2633 2634 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2635 MDI_PI_SET_DRV_DISABLE(pip); 2636 2637 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2638 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2639 MDI_PI(pip)->pi_client = ct; 2640 MDI_PI(pip)->pi_phci = ph; 2641 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2642 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2643 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2644 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2645 MDI_PI(pip)->pi_pprivate = NULL; 2646 MDI_PI(pip)->pi_cprivate = NULL; 2647 MDI_PI(pip)->pi_vprivate = NULL; 2648 MDI_PI(pip)->pi_client_link = NULL; 2649 MDI_PI(pip)->pi_phci_link = NULL; 2650 MDI_PI(pip)->pi_ref_cnt = 0; 2651 MDI_PI(pip)->pi_kstats = NULL; 2652 MDI_PI(pip)->pi_preferred = 1; 2653 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2654 2655 /* 2656 * Lock both dev_info nodes against changes in parallel. 2657 */ 2658 ndi_devi_enter(ct->ct_dip, &ct_circular); 2659 ndi_devi_enter(ph->ph_dip, &ph_circular); 2660 2661 i_mdi_phci_add_path(ph, pip); 2662 i_mdi_client_add_path(ct, pip); 2663 2664 ndi_devi_exit(ph->ph_dip, ph_circular); 2665 ndi_devi_exit(ct->ct_dip, ct_circular); 2666 2667 /* determine interrupt context */ 2668 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2669 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2670 2671 i_ddi_di_cache_invalidate(kmem_flag); 2672 2673 return (pip); 2674 } 2675 2676 /* 2677 * i_mdi_phci_add_path(): 2678 * Add a mdi_pathinfo node to pHCI list. 2679 * Notes: 2680 * Caller should per-pHCI mutex 2681 */ 2682 2683 static void 2684 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2685 { 2686 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2687 2688 if (ph->ph_path_head == NULL) { 2689 ph->ph_path_head = pip; 2690 } else { 2691 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2692 } 2693 ph->ph_path_tail = pip; 2694 ph->ph_path_count++; 2695 } 2696 2697 /* 2698 * i_mdi_client_add_path(): 2699 * Add mdi_pathinfo node to client list 2700 */ 2701 2702 static void 2703 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2704 { 2705 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2706 2707 if (ct->ct_path_head == NULL) { 2708 ct->ct_path_head = pip; 2709 } else { 2710 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2711 } 2712 ct->ct_path_tail = pip; 2713 ct->ct_path_count++; 2714 } 2715 2716 /* 2717 * mdi_pi_free(): 2718 * Free the mdi_pathinfo node and also client device node if this 2719 * is the last path to the device 2720 * Return Values: 2721 * MDI_SUCCESS 2722 * MDI_FAILURE 2723 * MDI_BUSY 2724 */ 2725 2726 /*ARGSUSED*/ 2727 int 2728 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2729 { 2730 int rv = MDI_SUCCESS; 2731 mdi_vhci_t *vh; 2732 mdi_phci_t *ph; 2733 mdi_client_t *ct; 2734 int (*f)(); 2735 int client_held = 0; 2736 2737 MDI_PI_LOCK(pip); 2738 ph = MDI_PI(pip)->pi_phci; 2739 ASSERT(ph != NULL); 2740 if (ph == NULL) { 2741 /* 2742 * Invalid pHCI device, return failure 2743 */ 2744 MDI_DEBUG(1, (CE_WARN, NULL, 2745 "!mdi_pi_free: invalid pHCI")); 2746 MDI_PI_UNLOCK(pip); 2747 return (MDI_FAILURE); 2748 } 2749 2750 vh = ph->ph_vhci; 2751 ASSERT(vh != NULL); 2752 if (vh == NULL) { 2753 /* Invalid pHCI device, return failure */ 2754 MDI_DEBUG(1, (CE_WARN, NULL, 2755 "!mdi_pi_free: invalid vHCI")); 2756 MDI_PI_UNLOCK(pip); 2757 return (MDI_FAILURE); 2758 } 2759 2760 ct = MDI_PI(pip)->pi_client; 2761 ASSERT(ct != NULL); 2762 if (ct == NULL) { 2763 /* 2764 * Invalid Client device, return failure 2765 */ 2766 MDI_DEBUG(1, (CE_WARN, NULL, 2767 "!mdi_pi_free: invalid client")); 2768 MDI_PI_UNLOCK(pip); 2769 return (MDI_FAILURE); 2770 } 2771 2772 /* 2773 * Check to see for busy condition. A mdi_pathinfo can only be freed 2774 * if the node state is either offline or init and the reference count 2775 * is zero. 2776 */ 2777 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2778 MDI_PI_IS_INITING(pip))) { 2779 /* 2780 * Node is busy 2781 */ 2782 MDI_DEBUG(1, (CE_WARN, NULL, 2783 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2784 MDI_PI_UNLOCK(pip); 2785 return (MDI_BUSY); 2786 } 2787 2788 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2789 /* 2790 * Give a chance for pending I/Os to complete. 2791 */ 2792 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2793 "%d cmds still pending on path: %p\n", 2794 MDI_PI(pip)->pi_ref_cnt, pip)); 2795 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2796 &MDI_PI(pip)->pi_mutex, 2797 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2798 /* 2799 * The timeout time reached without ref_cnt being zero 2800 * being signaled. 2801 */ 2802 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2803 "!mdi_pi_free: " 2804 "Timeout reached on path %p without the cond\n", 2805 pip)); 2806 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2807 "!mdi_pi_free: " 2808 "%d cmds still pending on path: %p\n", 2809 MDI_PI(pip)->pi_ref_cnt, pip)); 2810 MDI_PI_UNLOCK(pip); 2811 return (MDI_BUSY); 2812 } 2813 } 2814 if (MDI_PI(pip)->pi_pm_held) { 2815 client_held = 1; 2816 } 2817 MDI_PI_UNLOCK(pip); 2818 2819 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2820 2821 MDI_CLIENT_LOCK(ct); 2822 2823 /* Prevent further failovers till mdi_mutex is held */ 2824 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2825 2826 /* 2827 * Wait till failover is complete before removing this node. 2828 */ 2829 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2830 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2831 2832 MDI_CLIENT_UNLOCK(ct); 2833 mutex_enter(&mdi_mutex); 2834 MDI_CLIENT_LOCK(ct); 2835 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2836 2837 if (!MDI_PI_IS_INITING(pip)) { 2838 f = vh->vh_ops->vo_pi_uninit; 2839 if (f != NULL) { 2840 rv = (*f)(vh->vh_dip, pip, 0); 2841 } 2842 } 2843 /* 2844 * If vo_pi_uninit() completed successfully. 2845 */ 2846 if (rv == MDI_SUCCESS) { 2847 if (client_held) { 2848 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2849 "i_mdi_pm_rele_client\n")); 2850 i_mdi_pm_rele_client(ct, 1); 2851 } 2852 i_mdi_pi_free(ph, pip, ct); 2853 if (ct->ct_path_count == 0) { 2854 /* 2855 * Client lost its last path. 2856 * Clean up the client device 2857 */ 2858 MDI_CLIENT_UNLOCK(ct); 2859 (void) i_mdi_client_free(ct->ct_vhci, ct); 2860 mutex_exit(&mdi_mutex); 2861 return (rv); 2862 } 2863 } 2864 MDI_CLIENT_UNLOCK(ct); 2865 mutex_exit(&mdi_mutex); 2866 2867 if (rv == MDI_FAILURE) 2868 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2869 2870 return (rv); 2871 } 2872 2873 /* 2874 * i_mdi_pi_free(): 2875 * Free the mdi_pathinfo node 2876 */ 2877 static void 2878 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2879 { 2880 int ct_circular; 2881 int ph_circular; 2882 int se_flag; 2883 int kmem_flag; 2884 2885 /* 2886 * remove any per-path kstats 2887 */ 2888 i_mdi_pi_kstat_destroy(pip); 2889 2890 ndi_devi_enter(ct->ct_dip, &ct_circular); 2891 ndi_devi_enter(ph->ph_dip, &ph_circular); 2892 2893 i_mdi_client_remove_path(ct, pip); 2894 i_mdi_phci_remove_path(ph, pip); 2895 2896 ndi_devi_exit(ph->ph_dip, ph_circular); 2897 ndi_devi_exit(ct->ct_dip, ct_circular); 2898 2899 /* determine interrupt context */ 2900 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2901 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2902 2903 i_ddi_di_cache_invalidate(kmem_flag); 2904 2905 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2906 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2907 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2908 if (MDI_PI(pip)->pi_addr) { 2909 kmem_free(MDI_PI(pip)->pi_addr, 2910 strlen(MDI_PI(pip)->pi_addr) + 1); 2911 MDI_PI(pip)->pi_addr = NULL; 2912 } 2913 2914 if (MDI_PI(pip)->pi_prop) { 2915 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2916 MDI_PI(pip)->pi_prop = NULL; 2917 } 2918 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2919 } 2920 2921 2922 /* 2923 * i_mdi_phci_remove_path(): 2924 * Remove a mdi_pathinfo node from pHCI list. 2925 * Notes: 2926 * Caller should hold per-pHCI mutex 2927 */ 2928 2929 static void 2930 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2931 { 2932 mdi_pathinfo_t *prev = NULL; 2933 mdi_pathinfo_t *path = NULL; 2934 2935 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2936 2937 path = ph->ph_path_head; 2938 while (path != NULL) { 2939 if (path == pip) { 2940 break; 2941 } 2942 prev = path; 2943 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2944 } 2945 2946 if (path) { 2947 ph->ph_path_count--; 2948 if (prev) { 2949 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2950 } else { 2951 ph->ph_path_head = 2952 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2953 } 2954 if (ph->ph_path_tail == path) { 2955 ph->ph_path_tail = prev; 2956 } 2957 } 2958 2959 /* 2960 * Clear the pHCI link 2961 */ 2962 MDI_PI(pip)->pi_phci_link = NULL; 2963 MDI_PI(pip)->pi_phci = NULL; 2964 } 2965 2966 /* 2967 * i_mdi_client_remove_path(): 2968 * Remove a mdi_pathinfo node from client path list. 2969 */ 2970 2971 static void 2972 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2973 { 2974 mdi_pathinfo_t *prev = NULL; 2975 mdi_pathinfo_t *path; 2976 2977 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2978 2979 path = ct->ct_path_head; 2980 while (path != NULL) { 2981 if (path == pip) { 2982 break; 2983 } 2984 prev = path; 2985 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2986 } 2987 2988 if (path) { 2989 ct->ct_path_count--; 2990 if (prev) { 2991 MDI_PI(prev)->pi_client_link = 2992 MDI_PI(path)->pi_client_link; 2993 } else { 2994 ct->ct_path_head = 2995 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2996 } 2997 if (ct->ct_path_tail == path) { 2998 ct->ct_path_tail = prev; 2999 } 3000 if (ct->ct_path_last == path) { 3001 ct->ct_path_last = ct->ct_path_head; 3002 } 3003 } 3004 MDI_PI(pip)->pi_client_link = NULL; 3005 MDI_PI(pip)->pi_client = NULL; 3006 } 3007 3008 /* 3009 * i_mdi_pi_state_change(): 3010 * online a mdi_pathinfo node 3011 * 3012 * Return Values: 3013 * MDI_SUCCESS 3014 * MDI_FAILURE 3015 */ 3016 /*ARGSUSED*/ 3017 static int 3018 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3019 { 3020 int rv = MDI_SUCCESS; 3021 mdi_vhci_t *vh; 3022 mdi_phci_t *ph; 3023 mdi_client_t *ct; 3024 int (*f)(); 3025 dev_info_t *cdip; 3026 3027 MDI_PI_LOCK(pip); 3028 3029 ph = MDI_PI(pip)->pi_phci; 3030 ASSERT(ph); 3031 if (ph == NULL) { 3032 /* 3033 * Invalid pHCI device, fail the request 3034 */ 3035 MDI_PI_UNLOCK(pip); 3036 MDI_DEBUG(1, (CE_WARN, NULL, 3037 "!mdi_pi_state_change: invalid phci")); 3038 return (MDI_FAILURE); 3039 } 3040 3041 vh = ph->ph_vhci; 3042 ASSERT(vh); 3043 if (vh == NULL) { 3044 /* 3045 * Invalid vHCI device, fail the request 3046 */ 3047 MDI_PI_UNLOCK(pip); 3048 MDI_DEBUG(1, (CE_WARN, NULL, 3049 "!mdi_pi_state_change: invalid vhci")); 3050 return (MDI_FAILURE); 3051 } 3052 3053 ct = MDI_PI(pip)->pi_client; 3054 ASSERT(ct != NULL); 3055 if (ct == NULL) { 3056 /* 3057 * Invalid client device, fail the request 3058 */ 3059 MDI_PI_UNLOCK(pip); 3060 MDI_DEBUG(1, (CE_WARN, NULL, 3061 "!mdi_pi_state_change: invalid client")); 3062 return (MDI_FAILURE); 3063 } 3064 3065 /* 3066 * If this path has not been initialized yet, Callback vHCI driver's 3067 * pathinfo node initialize entry point 3068 */ 3069 3070 if (MDI_PI_IS_INITING(pip)) { 3071 MDI_PI_UNLOCK(pip); 3072 f = vh->vh_ops->vo_pi_init; 3073 if (f != NULL) { 3074 rv = (*f)(vh->vh_dip, pip, 0); 3075 if (rv != MDI_SUCCESS) { 3076 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3077 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3078 vh, pip)); 3079 return (MDI_FAILURE); 3080 } 3081 } 3082 MDI_PI_LOCK(pip); 3083 MDI_PI_CLEAR_TRANSIENT(pip); 3084 } 3085 3086 /* 3087 * Do not allow state transition when pHCI is in offline/suspended 3088 * states 3089 */ 3090 i_mdi_phci_lock(ph, pip); 3091 if (MDI_PHCI_IS_READY(ph) == 0) { 3092 MDI_DEBUG(1, (CE_WARN, NULL, 3093 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3094 MDI_PI_UNLOCK(pip); 3095 i_mdi_phci_unlock(ph); 3096 return (MDI_BUSY); 3097 } 3098 MDI_PHCI_UNSTABLE(ph); 3099 i_mdi_phci_unlock(ph); 3100 3101 /* 3102 * Check if mdi_pathinfo state is in transient state. 3103 * If yes, offlining is in progress and wait till transient state is 3104 * cleared. 3105 */ 3106 if (MDI_PI_IS_TRANSIENT(pip)) { 3107 while (MDI_PI_IS_TRANSIENT(pip)) { 3108 cv_wait(&MDI_PI(pip)->pi_state_cv, 3109 &MDI_PI(pip)->pi_mutex); 3110 } 3111 } 3112 3113 /* 3114 * Grab the client lock in reverse order sequence and release the 3115 * mdi_pathinfo mutex. 3116 */ 3117 i_mdi_client_lock(ct, pip); 3118 MDI_PI_UNLOCK(pip); 3119 3120 /* 3121 * Wait till failover state is cleared 3122 */ 3123 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3124 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3125 3126 /* 3127 * Mark the mdi_pathinfo node state as transient 3128 */ 3129 MDI_PI_LOCK(pip); 3130 switch (state) { 3131 case MDI_PATHINFO_STATE_ONLINE: 3132 MDI_PI_SET_ONLINING(pip); 3133 break; 3134 3135 case MDI_PATHINFO_STATE_STANDBY: 3136 MDI_PI_SET_STANDBYING(pip); 3137 break; 3138 3139 case MDI_PATHINFO_STATE_FAULT: 3140 /* 3141 * Mark the pathinfo state as FAULTED 3142 */ 3143 MDI_PI_SET_FAULTING(pip); 3144 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3145 break; 3146 3147 case MDI_PATHINFO_STATE_OFFLINE: 3148 /* 3149 * ndi_devi_offline() cannot hold pip or ct locks. 3150 */ 3151 MDI_PI_UNLOCK(pip); 3152 /* 3153 * Do not offline if path will become last path and path 3154 * is busy for user initiated events. 3155 */ 3156 cdip = ct->ct_dip; 3157 if ((flag & NDI_DEVI_REMOVE) && 3158 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3159 i_mdi_client_unlock(ct); 3160 rv = ndi_devi_offline(cdip, 0); 3161 if (rv != NDI_SUCCESS) { 3162 /* 3163 * Convert to MDI error code 3164 */ 3165 switch (rv) { 3166 case NDI_BUSY: 3167 rv = MDI_BUSY; 3168 break; 3169 default: 3170 rv = MDI_FAILURE; 3171 break; 3172 } 3173 goto state_change_exit; 3174 } else { 3175 i_mdi_client_lock(ct, NULL); 3176 } 3177 } 3178 /* 3179 * Mark the mdi_pathinfo node state as transient 3180 */ 3181 MDI_PI_LOCK(pip); 3182 MDI_PI_SET_OFFLINING(pip); 3183 break; 3184 } 3185 MDI_PI_UNLOCK(pip); 3186 MDI_CLIENT_UNSTABLE(ct); 3187 i_mdi_client_unlock(ct); 3188 3189 f = vh->vh_ops->vo_pi_state_change; 3190 if (f != NULL) { 3191 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3192 if (rv == MDI_NOT_SUPPORTED) { 3193 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3194 } 3195 if (rv != MDI_SUCCESS) { 3196 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3197 "!vo_pi_state_change: failed rv = %x", rv)); 3198 } 3199 } 3200 MDI_CLIENT_LOCK(ct); 3201 MDI_PI_LOCK(pip); 3202 if (MDI_PI_IS_TRANSIENT(pip)) { 3203 if (rv == MDI_SUCCESS) { 3204 MDI_PI_CLEAR_TRANSIENT(pip); 3205 } else { 3206 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3207 } 3208 } 3209 3210 /* 3211 * Wake anyone waiting for this mdi_pathinfo node 3212 */ 3213 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3214 MDI_PI_UNLOCK(pip); 3215 3216 /* 3217 * Mark the client device as stable 3218 */ 3219 MDI_CLIENT_STABLE(ct); 3220 if (rv == MDI_SUCCESS) { 3221 if (ct->ct_unstable == 0) { 3222 cdip = ct->ct_dip; 3223 3224 /* 3225 * Onlining the mdi_pathinfo node will impact the 3226 * client state Update the client and dev_info node 3227 * state accordingly 3228 */ 3229 rv = NDI_SUCCESS; 3230 i_mdi_client_update_state(ct); 3231 switch (MDI_CLIENT_STATE(ct)) { 3232 case MDI_CLIENT_STATE_OPTIMAL: 3233 case MDI_CLIENT_STATE_DEGRADED: 3234 if (cdip && !i_ddi_devi_attached(cdip) && 3235 ((state == MDI_PATHINFO_STATE_ONLINE) || 3236 (state == MDI_PATHINFO_STATE_STANDBY))) { 3237 3238 i_mdi_client_unlock(ct); 3239 /* 3240 * Must do ndi_devi_online() through 3241 * hotplug thread for deferred 3242 * attach mechanism to work 3243 */ 3244 rv = ndi_devi_online(cdip, 0); 3245 i_mdi_client_lock(ct, NULL); 3246 if ((rv != NDI_SUCCESS) && 3247 (MDI_CLIENT_STATE(ct) == 3248 MDI_CLIENT_STATE_DEGRADED)) { 3249 /* 3250 * ndi_devi_online failed. 3251 * Reset client flags to 3252 * offline. 3253 */ 3254 MDI_DEBUG(1, (CE_WARN, cdip, 3255 "!ndi_devi_online: failed " 3256 " Error: %x", rv)); 3257 MDI_CLIENT_SET_OFFLINE(ct); 3258 } 3259 if (rv != NDI_SUCCESS) { 3260 /* Reset the path state */ 3261 MDI_PI_LOCK(pip); 3262 MDI_PI(pip)->pi_state = 3263 MDI_PI_OLD_STATE(pip); 3264 MDI_PI_UNLOCK(pip); 3265 } 3266 } 3267 break; 3268 3269 case MDI_CLIENT_STATE_FAILED: 3270 /* 3271 * This is the last path case for 3272 * non-user initiated events. 3273 */ 3274 if (((flag & NDI_DEVI_REMOVE) == 0) && 3275 cdip && (i_ddi_node_state(cdip) >= 3276 DS_INITIALIZED)) { 3277 i_mdi_client_unlock(ct); 3278 rv = ndi_devi_offline(cdip, 0); 3279 i_mdi_client_lock(ct, NULL); 3280 3281 if (rv != NDI_SUCCESS) { 3282 /* 3283 * ndi_devi_offline failed. 3284 * Reset client flags to 3285 * online as the path could not 3286 * be offlined. 3287 */ 3288 MDI_DEBUG(1, (CE_WARN, cdip, 3289 "!ndi_devi_offline: failed " 3290 " Error: %x", rv)); 3291 MDI_CLIENT_SET_ONLINE(ct); 3292 } 3293 } 3294 break; 3295 } 3296 /* 3297 * Convert to MDI error code 3298 */ 3299 switch (rv) { 3300 case NDI_SUCCESS: 3301 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3302 i_mdi_report_path_state(ct, pip); 3303 rv = MDI_SUCCESS; 3304 break; 3305 case NDI_BUSY: 3306 rv = MDI_BUSY; 3307 break; 3308 default: 3309 rv = MDI_FAILURE; 3310 break; 3311 } 3312 } 3313 } 3314 MDI_CLIENT_UNLOCK(ct); 3315 3316 state_change_exit: 3317 /* 3318 * Mark the pHCI as stable again. 3319 */ 3320 MDI_PHCI_LOCK(ph); 3321 MDI_PHCI_STABLE(ph); 3322 MDI_PHCI_UNLOCK(ph); 3323 return (rv); 3324 } 3325 3326 /* 3327 * mdi_pi_online(): 3328 * Place the path_info node in the online state. The path is 3329 * now available to be selected by mdi_select_path() for 3330 * transporting I/O requests to client devices. 3331 * Return Values: 3332 * MDI_SUCCESS 3333 * MDI_FAILURE 3334 */ 3335 int 3336 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3337 { 3338 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3339 dev_info_t *cdip; 3340 int client_held = 0; 3341 int rv; 3342 3343 ASSERT(ct != NULL); 3344 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3345 if (rv != MDI_SUCCESS) 3346 return (rv); 3347 3348 MDI_PI_LOCK(pip); 3349 if (MDI_PI(pip)->pi_pm_held == 0) { 3350 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3351 "i_mdi_pm_hold_pip\n")); 3352 i_mdi_pm_hold_pip(pip); 3353 client_held = 1; 3354 } 3355 MDI_PI_UNLOCK(pip); 3356 3357 if (client_held) { 3358 MDI_CLIENT_LOCK(ct); 3359 if (ct->ct_power_cnt == 0) { 3360 rv = i_mdi_power_all_phci(ct); 3361 } 3362 3363 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3364 "i_mdi_pm_hold_client\n")); 3365 i_mdi_pm_hold_client(ct, 1); 3366 MDI_CLIENT_UNLOCK(ct); 3367 } 3368 3369 /* 3370 * Create the per-path (pathinfo) IO and error kstats which 3371 * are reported via iostat(1m). 3372 * 3373 * Defer creating the per-path kstats if device is not yet 3374 * attached; the names of the kstats are constructed in part 3375 * using the devices instance number which is assigned during 3376 * process of attaching the client device. 3377 * 3378 * The framework post_attach handler, mdi_post_attach(), is 3379 * is responsible for initializing the client's pathinfo list 3380 * once successfully attached. 3381 */ 3382 cdip = ct->ct_dip; 3383 ASSERT(cdip); 3384 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3385 return (rv); 3386 3387 MDI_CLIENT_LOCK(ct); 3388 rv = i_mdi_pi_kstat_create(pip); 3389 MDI_CLIENT_UNLOCK(ct); 3390 return (rv); 3391 } 3392 3393 /* 3394 * mdi_pi_standby(): 3395 * Place the mdi_pathinfo node in standby state 3396 * 3397 * Return Values: 3398 * MDI_SUCCESS 3399 * MDI_FAILURE 3400 */ 3401 int 3402 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3403 { 3404 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3405 } 3406 3407 /* 3408 * mdi_pi_fault(): 3409 * Place the mdi_pathinfo node in fault'ed state 3410 * Return Values: 3411 * MDI_SUCCESS 3412 * MDI_FAILURE 3413 */ 3414 int 3415 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3416 { 3417 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3418 } 3419 3420 /* 3421 * mdi_pi_offline(): 3422 * Offline a mdi_pathinfo node. 3423 * Return Values: 3424 * MDI_SUCCESS 3425 * MDI_FAILURE 3426 */ 3427 int 3428 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3429 { 3430 int ret, client_held = 0; 3431 mdi_client_t *ct; 3432 3433 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3434 3435 if (ret == MDI_SUCCESS) { 3436 MDI_PI_LOCK(pip); 3437 if (MDI_PI(pip)->pi_pm_held) { 3438 client_held = 1; 3439 } 3440 MDI_PI_UNLOCK(pip); 3441 3442 if (client_held) { 3443 ct = MDI_PI(pip)->pi_client; 3444 MDI_CLIENT_LOCK(ct); 3445 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3446 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3447 i_mdi_pm_rele_client(ct, 1); 3448 MDI_CLIENT_UNLOCK(ct); 3449 } 3450 } 3451 3452 return (ret); 3453 } 3454 3455 /* 3456 * i_mdi_pi_offline(): 3457 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3458 */ 3459 static int 3460 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3461 { 3462 dev_info_t *vdip = NULL; 3463 mdi_vhci_t *vh = NULL; 3464 mdi_client_t *ct = NULL; 3465 int (*f)(); 3466 int rv; 3467 3468 MDI_PI_LOCK(pip); 3469 ct = MDI_PI(pip)->pi_client; 3470 ASSERT(ct != NULL); 3471 3472 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3473 /* 3474 * Give a chance for pending I/Os to complete. 3475 */ 3476 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3477 "%d cmds still pending on path: %p\n", 3478 MDI_PI(pip)->pi_ref_cnt, pip)); 3479 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3480 &MDI_PI(pip)->pi_mutex, 3481 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3482 /* 3483 * The timeout time reached without ref_cnt being zero 3484 * being signaled. 3485 */ 3486 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3487 "Timeout reached on path %p without the cond\n", 3488 pip)); 3489 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3490 "%d cmds still pending on path: %p\n", 3491 MDI_PI(pip)->pi_ref_cnt, pip)); 3492 } 3493 } 3494 vh = ct->ct_vhci; 3495 vdip = vh->vh_dip; 3496 3497 /* 3498 * Notify vHCI that has registered this event 3499 */ 3500 ASSERT(vh->vh_ops); 3501 f = vh->vh_ops->vo_pi_state_change; 3502 3503 if (f != NULL) { 3504 MDI_PI_UNLOCK(pip); 3505 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3506 flags)) != MDI_SUCCESS) { 3507 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3508 "vdip 0x%x, pip 0x%x", vdip, pip)); 3509 } 3510 MDI_PI_LOCK(pip); 3511 } 3512 3513 /* 3514 * Set the mdi_pathinfo node state and clear the transient condition 3515 */ 3516 MDI_PI_SET_OFFLINE(pip); 3517 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3518 MDI_PI_UNLOCK(pip); 3519 3520 MDI_CLIENT_LOCK(ct); 3521 if (rv == MDI_SUCCESS) { 3522 if (ct->ct_unstable == 0) { 3523 dev_info_t *cdip = ct->ct_dip; 3524 3525 /* 3526 * Onlining the mdi_pathinfo node will impact the 3527 * client state Update the client and dev_info node 3528 * state accordingly 3529 */ 3530 i_mdi_client_update_state(ct); 3531 rv = NDI_SUCCESS; 3532 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3533 if (cdip && 3534 (i_ddi_node_state(cdip) >= 3535 DS_INITIALIZED)) { 3536 MDI_CLIENT_UNLOCK(ct); 3537 rv = ndi_devi_offline(cdip, 0); 3538 MDI_CLIENT_LOCK(ct); 3539 if (rv != NDI_SUCCESS) { 3540 /* 3541 * ndi_devi_offline failed. 3542 * Reset client flags to 3543 * online. 3544 */ 3545 MDI_DEBUG(4, (CE_WARN, cdip, 3546 "!ndi_devi_offline: failed " 3547 " Error: %x", rv)); 3548 MDI_CLIENT_SET_ONLINE(ct); 3549 } 3550 } 3551 } 3552 /* 3553 * Convert to MDI error code 3554 */ 3555 switch (rv) { 3556 case NDI_SUCCESS: 3557 rv = MDI_SUCCESS; 3558 break; 3559 case NDI_BUSY: 3560 rv = MDI_BUSY; 3561 break; 3562 default: 3563 rv = MDI_FAILURE; 3564 break; 3565 } 3566 } 3567 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3568 i_mdi_report_path_state(ct, pip); 3569 } 3570 3571 MDI_CLIENT_UNLOCK(ct); 3572 3573 /* 3574 * Change in the mdi_pathinfo node state will impact the client state 3575 */ 3576 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3577 ct, pip)); 3578 return (rv); 3579 } 3580 3581 3582 /* 3583 * mdi_pi_get_addr(): 3584 * Get the unit address associated with a mdi_pathinfo node 3585 * 3586 * Return Values: 3587 * char * 3588 */ 3589 char * 3590 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3591 { 3592 if (pip == NULL) 3593 return (NULL); 3594 3595 return (MDI_PI(pip)->pi_addr); 3596 } 3597 3598 /* 3599 * mdi_pi_get_client(): 3600 * Get the client devinfo associated with a mdi_pathinfo node 3601 * 3602 * Return Values: 3603 * Handle to client device dev_info node 3604 */ 3605 dev_info_t * 3606 mdi_pi_get_client(mdi_pathinfo_t *pip) 3607 { 3608 dev_info_t *dip = NULL; 3609 if (pip) { 3610 dip = MDI_PI(pip)->pi_client->ct_dip; 3611 } 3612 return (dip); 3613 } 3614 3615 /* 3616 * mdi_pi_get_phci(): 3617 * Get the pHCI devinfo associated with the mdi_pathinfo node 3618 * Return Values: 3619 * Handle to dev_info node 3620 */ 3621 dev_info_t * 3622 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3623 { 3624 dev_info_t *dip = NULL; 3625 if (pip) { 3626 dip = MDI_PI(pip)->pi_phci->ph_dip; 3627 } 3628 return (dip); 3629 } 3630 3631 /* 3632 * mdi_pi_get_client_private(): 3633 * Get the client private information associated with the 3634 * mdi_pathinfo node 3635 */ 3636 void * 3637 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3638 { 3639 void *cprivate = NULL; 3640 if (pip) { 3641 cprivate = MDI_PI(pip)->pi_cprivate; 3642 } 3643 return (cprivate); 3644 } 3645 3646 /* 3647 * mdi_pi_set_client_private(): 3648 * Set the client private information in the mdi_pathinfo node 3649 */ 3650 void 3651 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3652 { 3653 if (pip) { 3654 MDI_PI(pip)->pi_cprivate = priv; 3655 } 3656 } 3657 3658 /* 3659 * mdi_pi_get_phci_private(): 3660 * Get the pHCI private information associated with the 3661 * mdi_pathinfo node 3662 */ 3663 caddr_t 3664 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3665 { 3666 caddr_t pprivate = NULL; 3667 if (pip) { 3668 pprivate = MDI_PI(pip)->pi_pprivate; 3669 } 3670 return (pprivate); 3671 } 3672 3673 /* 3674 * mdi_pi_set_phci_private(): 3675 * Set the pHCI private information in the mdi_pathinfo node 3676 */ 3677 void 3678 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3679 { 3680 if (pip) { 3681 MDI_PI(pip)->pi_pprivate = priv; 3682 } 3683 } 3684 3685 /* 3686 * mdi_pi_get_state(): 3687 * Get the mdi_pathinfo node state. Transient states are internal 3688 * and not provided to the users 3689 */ 3690 mdi_pathinfo_state_t 3691 mdi_pi_get_state(mdi_pathinfo_t *pip) 3692 { 3693 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3694 3695 if (pip) { 3696 if (MDI_PI_IS_TRANSIENT(pip)) { 3697 /* 3698 * mdi_pathinfo is in state transition. Return the 3699 * last good state. 3700 */ 3701 state = MDI_PI_OLD_STATE(pip); 3702 } else { 3703 state = MDI_PI_STATE(pip); 3704 } 3705 } 3706 return (state); 3707 } 3708 3709 /* 3710 * Note that the following function needs to be the new interface for 3711 * mdi_pi_get_state when mpxio gets integrated to ON. 3712 */ 3713 int 3714 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3715 uint32_t *ext_state) 3716 { 3717 *state = MDI_PATHINFO_STATE_INIT; 3718 3719 if (pip) { 3720 if (MDI_PI_IS_TRANSIENT(pip)) { 3721 /* 3722 * mdi_pathinfo is in state transition. Return the 3723 * last good state. 3724 */ 3725 *state = MDI_PI_OLD_STATE(pip); 3726 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3727 } else { 3728 *state = MDI_PI_STATE(pip); 3729 *ext_state = MDI_PI_EXT_STATE(pip); 3730 } 3731 } 3732 return (MDI_SUCCESS); 3733 } 3734 3735 /* 3736 * mdi_pi_get_preferred: 3737 * Get the preferred path flag 3738 */ 3739 int 3740 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3741 { 3742 if (pip) { 3743 return (MDI_PI(pip)->pi_preferred); 3744 } 3745 return (0); 3746 } 3747 3748 /* 3749 * mdi_pi_set_preferred: 3750 * Set the preferred path flag 3751 */ 3752 void 3753 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3754 { 3755 if (pip) { 3756 MDI_PI(pip)->pi_preferred = preferred; 3757 } 3758 } 3759 3760 3761 /* 3762 * mdi_pi_set_state(): 3763 * Set the mdi_pathinfo node state 3764 */ 3765 void 3766 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3767 { 3768 uint32_t ext_state; 3769 3770 if (pip) { 3771 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3772 MDI_PI(pip)->pi_state = state; 3773 MDI_PI(pip)->pi_state |= ext_state; 3774 } 3775 } 3776 3777 /* 3778 * Property functions: 3779 */ 3780 3781 int 3782 i_map_nvlist_error_to_mdi(int val) 3783 { 3784 int rv; 3785 3786 switch (val) { 3787 case 0: 3788 rv = DDI_PROP_SUCCESS; 3789 break; 3790 case EINVAL: 3791 case ENOTSUP: 3792 rv = DDI_PROP_INVAL_ARG; 3793 break; 3794 case ENOMEM: 3795 rv = DDI_PROP_NO_MEMORY; 3796 break; 3797 default: 3798 rv = DDI_PROP_NOT_FOUND; 3799 break; 3800 } 3801 return (rv); 3802 } 3803 3804 /* 3805 * mdi_pi_get_next_prop(): 3806 * Property walk function. The caller should hold mdi_pi_lock() 3807 * and release by calling mdi_pi_unlock() at the end of walk to 3808 * get a consistent value. 3809 */ 3810 3811 nvpair_t * 3812 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3813 { 3814 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3815 return (NULL); 3816 } 3817 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3818 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3819 } 3820 3821 /* 3822 * mdi_prop_remove(): 3823 * Remove the named property from the named list. 3824 */ 3825 3826 int 3827 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3828 { 3829 if (pip == NULL) { 3830 return (DDI_PROP_NOT_FOUND); 3831 } 3832 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3833 MDI_PI_LOCK(pip); 3834 if (MDI_PI(pip)->pi_prop == NULL) { 3835 MDI_PI_UNLOCK(pip); 3836 return (DDI_PROP_NOT_FOUND); 3837 } 3838 if (name) { 3839 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3840 } else { 3841 char nvp_name[MAXNAMELEN]; 3842 nvpair_t *nvp; 3843 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3844 while (nvp) { 3845 nvpair_t *next; 3846 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3847 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3848 nvpair_name(nvp)); 3849 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3850 nvp_name); 3851 nvp = next; 3852 } 3853 } 3854 MDI_PI_UNLOCK(pip); 3855 return (DDI_PROP_SUCCESS); 3856 } 3857 3858 /* 3859 * mdi_prop_size(): 3860 * Get buffer size needed to pack the property data. 3861 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3862 * buffer size. 3863 */ 3864 3865 int 3866 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3867 { 3868 int rv; 3869 size_t bufsize; 3870 3871 *buflenp = 0; 3872 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3873 return (DDI_PROP_NOT_FOUND); 3874 } 3875 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3876 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3877 &bufsize, NV_ENCODE_NATIVE); 3878 *buflenp = bufsize; 3879 return (i_map_nvlist_error_to_mdi(rv)); 3880 } 3881 3882 /* 3883 * mdi_prop_pack(): 3884 * pack the property list. The caller should hold the 3885 * mdi_pathinfo_t node to get a consistent data 3886 */ 3887 3888 int 3889 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3890 { 3891 int rv; 3892 size_t bufsize; 3893 3894 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3895 return (DDI_PROP_NOT_FOUND); 3896 } 3897 3898 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3899 3900 bufsize = buflen; 3901 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3902 NV_ENCODE_NATIVE, KM_SLEEP); 3903 3904 return (i_map_nvlist_error_to_mdi(rv)); 3905 } 3906 3907 /* 3908 * mdi_prop_update_byte(): 3909 * Create/Update a byte property 3910 */ 3911 int 3912 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3913 { 3914 int rv; 3915 3916 if (pip == NULL) { 3917 return (DDI_PROP_INVAL_ARG); 3918 } 3919 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3920 MDI_PI_LOCK(pip); 3921 if (MDI_PI(pip)->pi_prop == NULL) { 3922 MDI_PI_UNLOCK(pip); 3923 return (DDI_PROP_NOT_FOUND); 3924 } 3925 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3926 MDI_PI_UNLOCK(pip); 3927 return (i_map_nvlist_error_to_mdi(rv)); 3928 } 3929 3930 /* 3931 * mdi_prop_update_byte_array(): 3932 * Create/Update a byte array property 3933 */ 3934 int 3935 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3936 uint_t nelements) 3937 { 3938 int rv; 3939 3940 if (pip == NULL) { 3941 return (DDI_PROP_INVAL_ARG); 3942 } 3943 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3944 MDI_PI_LOCK(pip); 3945 if (MDI_PI(pip)->pi_prop == NULL) { 3946 MDI_PI_UNLOCK(pip); 3947 return (DDI_PROP_NOT_FOUND); 3948 } 3949 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3950 MDI_PI_UNLOCK(pip); 3951 return (i_map_nvlist_error_to_mdi(rv)); 3952 } 3953 3954 /* 3955 * mdi_prop_update_int(): 3956 * Create/Update a 32 bit integer property 3957 */ 3958 int 3959 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3960 { 3961 int rv; 3962 3963 if (pip == NULL) { 3964 return (DDI_PROP_INVAL_ARG); 3965 } 3966 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3967 MDI_PI_LOCK(pip); 3968 if (MDI_PI(pip)->pi_prop == NULL) { 3969 MDI_PI_UNLOCK(pip); 3970 return (DDI_PROP_NOT_FOUND); 3971 } 3972 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3973 MDI_PI_UNLOCK(pip); 3974 return (i_map_nvlist_error_to_mdi(rv)); 3975 } 3976 3977 /* 3978 * mdi_prop_update_int64(): 3979 * Create/Update a 64 bit integer property 3980 */ 3981 int 3982 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3983 { 3984 int rv; 3985 3986 if (pip == NULL) { 3987 return (DDI_PROP_INVAL_ARG); 3988 } 3989 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3990 MDI_PI_LOCK(pip); 3991 if (MDI_PI(pip)->pi_prop == NULL) { 3992 MDI_PI_UNLOCK(pip); 3993 return (DDI_PROP_NOT_FOUND); 3994 } 3995 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3996 MDI_PI_UNLOCK(pip); 3997 return (i_map_nvlist_error_to_mdi(rv)); 3998 } 3999 4000 /* 4001 * mdi_prop_update_int_array(): 4002 * Create/Update a int array property 4003 */ 4004 int 4005 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4006 uint_t nelements) 4007 { 4008 int rv; 4009 4010 if (pip == NULL) { 4011 return (DDI_PROP_INVAL_ARG); 4012 } 4013 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4014 MDI_PI_LOCK(pip); 4015 if (MDI_PI(pip)->pi_prop == NULL) { 4016 MDI_PI_UNLOCK(pip); 4017 return (DDI_PROP_NOT_FOUND); 4018 } 4019 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4020 nelements); 4021 MDI_PI_UNLOCK(pip); 4022 return (i_map_nvlist_error_to_mdi(rv)); 4023 } 4024 4025 /* 4026 * mdi_prop_update_string(): 4027 * Create/Update a string property 4028 */ 4029 int 4030 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4031 { 4032 int rv; 4033 4034 if (pip == NULL) { 4035 return (DDI_PROP_INVAL_ARG); 4036 } 4037 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4038 MDI_PI_LOCK(pip); 4039 if (MDI_PI(pip)->pi_prop == NULL) { 4040 MDI_PI_UNLOCK(pip); 4041 return (DDI_PROP_NOT_FOUND); 4042 } 4043 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4044 MDI_PI_UNLOCK(pip); 4045 return (i_map_nvlist_error_to_mdi(rv)); 4046 } 4047 4048 /* 4049 * mdi_prop_update_string_array(): 4050 * Create/Update a string array property 4051 */ 4052 int 4053 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4054 uint_t nelements) 4055 { 4056 int rv; 4057 4058 if (pip == NULL) { 4059 return (DDI_PROP_INVAL_ARG); 4060 } 4061 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4062 MDI_PI_LOCK(pip); 4063 if (MDI_PI(pip)->pi_prop == NULL) { 4064 MDI_PI_UNLOCK(pip); 4065 return (DDI_PROP_NOT_FOUND); 4066 } 4067 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4068 nelements); 4069 MDI_PI_UNLOCK(pip); 4070 return (i_map_nvlist_error_to_mdi(rv)); 4071 } 4072 4073 /* 4074 * mdi_prop_lookup_byte(): 4075 * Look for byte property identified by name. The data returned 4076 * is the actual property and valid as long as mdi_pathinfo_t node 4077 * is alive. 4078 */ 4079 int 4080 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4081 { 4082 int rv; 4083 4084 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4085 return (DDI_PROP_NOT_FOUND); 4086 } 4087 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4088 return (i_map_nvlist_error_to_mdi(rv)); 4089 } 4090 4091 4092 /* 4093 * mdi_prop_lookup_byte_array(): 4094 * Look for byte array property identified by name. The data 4095 * returned is the actual property and valid as long as 4096 * mdi_pathinfo_t node is alive. 4097 */ 4098 int 4099 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4100 uint_t *nelements) 4101 { 4102 int rv; 4103 4104 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4105 return (DDI_PROP_NOT_FOUND); 4106 } 4107 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4108 nelements); 4109 return (i_map_nvlist_error_to_mdi(rv)); 4110 } 4111 4112 /* 4113 * mdi_prop_lookup_int(): 4114 * Look for int property identified by name. The data returned 4115 * is the actual property and valid as long as mdi_pathinfo_t 4116 * node is alive. 4117 */ 4118 int 4119 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4120 { 4121 int rv; 4122 4123 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4124 return (DDI_PROP_NOT_FOUND); 4125 } 4126 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4127 return (i_map_nvlist_error_to_mdi(rv)); 4128 } 4129 4130 /* 4131 * mdi_prop_lookup_int64(): 4132 * Look for int64 property identified by name. The data returned 4133 * is the actual property and valid as long as mdi_pathinfo_t node 4134 * is alive. 4135 */ 4136 int 4137 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4138 { 4139 int rv; 4140 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4141 return (DDI_PROP_NOT_FOUND); 4142 } 4143 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4144 return (i_map_nvlist_error_to_mdi(rv)); 4145 } 4146 4147 /* 4148 * mdi_prop_lookup_int_array(): 4149 * Look for int array property identified by name. The data 4150 * returned is the actual property and valid as long as 4151 * mdi_pathinfo_t node is alive. 4152 */ 4153 int 4154 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4155 uint_t *nelements) 4156 { 4157 int rv; 4158 4159 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4160 return (DDI_PROP_NOT_FOUND); 4161 } 4162 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4163 (int32_t **)data, nelements); 4164 return (i_map_nvlist_error_to_mdi(rv)); 4165 } 4166 4167 /* 4168 * mdi_prop_lookup_string(): 4169 * Look for string property identified by name. The data 4170 * returned is the actual property and valid as long as 4171 * mdi_pathinfo_t node is alive. 4172 */ 4173 int 4174 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4175 { 4176 int rv; 4177 4178 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4179 return (DDI_PROP_NOT_FOUND); 4180 } 4181 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4182 return (i_map_nvlist_error_to_mdi(rv)); 4183 } 4184 4185 /* 4186 * mdi_prop_lookup_string_array(): 4187 * Look for string array property identified by name. The data 4188 * returned is the actual property and valid as long as 4189 * mdi_pathinfo_t node is alive. 4190 */ 4191 4192 int 4193 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4194 uint_t *nelements) 4195 { 4196 int rv; 4197 4198 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4199 return (DDI_PROP_NOT_FOUND); 4200 } 4201 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4202 nelements); 4203 return (i_map_nvlist_error_to_mdi(rv)); 4204 } 4205 4206 /* 4207 * mdi_prop_free(): 4208 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4209 * functions return the pointer to actual property data and not a 4210 * copy of it. So the data returned is valid as long as 4211 * mdi_pathinfo_t node is valid. 4212 */ 4213 4214 /*ARGSUSED*/ 4215 int 4216 mdi_prop_free(void *data) 4217 { 4218 return (DDI_PROP_SUCCESS); 4219 } 4220 4221 /*ARGSUSED*/ 4222 static void 4223 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4224 { 4225 char *phci_path, *ct_path; 4226 char *ct_status; 4227 char *status; 4228 dev_info_t *dip = ct->ct_dip; 4229 char lb_buf[64]; 4230 4231 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4232 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4233 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4234 return; 4235 } 4236 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4237 ct_status = "optimal"; 4238 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4239 ct_status = "degraded"; 4240 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4241 ct_status = "failed"; 4242 } else { 4243 ct_status = "unknown"; 4244 } 4245 4246 if (MDI_PI_IS_OFFLINE(pip)) { 4247 status = "offline"; 4248 } else if (MDI_PI_IS_ONLINE(pip)) { 4249 status = "online"; 4250 } else if (MDI_PI_IS_STANDBY(pip)) { 4251 status = "standby"; 4252 } else if (MDI_PI_IS_FAULT(pip)) { 4253 status = "faulted"; 4254 } else { 4255 status = "unknown"; 4256 } 4257 4258 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4259 (void) snprintf(lb_buf, sizeof (lb_buf), 4260 "%s, region-size: %d", mdi_load_balance_lba, 4261 ct->ct_lb_args->region_size); 4262 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4263 (void) snprintf(lb_buf, sizeof (lb_buf), 4264 "%s", mdi_load_balance_none); 4265 } else { 4266 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4267 mdi_load_balance_rr); 4268 } 4269 4270 if (dip) { 4271 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4272 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4273 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4274 "path %s (%s%d) to target address: %s is %s" 4275 " Load balancing: %s\n", 4276 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4277 ddi_get_instance(dip), ct_status, 4278 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4279 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4280 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4281 MDI_PI(pip)->pi_addr, status, lb_buf); 4282 kmem_free(phci_path, MAXPATHLEN); 4283 kmem_free(ct_path, MAXPATHLEN); 4284 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4285 } 4286 } 4287 4288 #ifdef DEBUG 4289 /* 4290 * i_mdi_log(): 4291 * Utility function for error message management 4292 * 4293 */ 4294 4295 /*VARARGS3*/ 4296 static void 4297 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4298 { 4299 char buf[MAXNAMELEN]; 4300 char name[MAXNAMELEN]; 4301 va_list ap; 4302 int log_only = 0; 4303 int boot_only = 0; 4304 int console_only = 0; 4305 4306 if (dip) { 4307 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4308 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4309 ddi_node_name(dip), ddi_get_instance(dip)); 4310 } else { 4311 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4312 ddi_node_name(dip), ddi_get_instance(dip)); 4313 } 4314 } else { 4315 name[0] = '\0'; 4316 } 4317 4318 va_start(ap, fmt); 4319 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4320 va_end(ap); 4321 4322 switch (buf[0]) { 4323 case '!': 4324 log_only = 1; 4325 break; 4326 case '?': 4327 boot_only = 1; 4328 break; 4329 case '^': 4330 console_only = 1; 4331 break; 4332 } 4333 4334 switch (level) { 4335 case CE_NOTE: 4336 level = CE_CONT; 4337 /* FALLTHROUGH */ 4338 case CE_CONT: 4339 case CE_WARN: 4340 case CE_PANIC: 4341 if (boot_only) { 4342 cmn_err(level, "?%s\t%s", name, &buf[1]); 4343 } else if (console_only) { 4344 cmn_err(level, "^%s\t%s", name, &buf[1]); 4345 } else if (log_only) { 4346 cmn_err(level, "!%s\t%s", name, &buf[1]); 4347 } else { 4348 cmn_err(level, "%s\t%s", name, buf); 4349 } 4350 break; 4351 default: 4352 cmn_err(level, "%s\t%s", name, buf); 4353 break; 4354 } 4355 } 4356 #endif /* DEBUG */ 4357 4358 void 4359 i_mdi_client_online(dev_info_t *ct_dip) 4360 { 4361 mdi_client_t *ct; 4362 4363 /* 4364 * Client online notification. Mark client state as online 4365 * restore our binding with dev_info node 4366 */ 4367 ct = i_devi_get_client(ct_dip); 4368 ASSERT(ct != NULL); 4369 MDI_CLIENT_LOCK(ct); 4370 MDI_CLIENT_SET_ONLINE(ct); 4371 /* catch for any memory leaks */ 4372 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4373 ct->ct_dip = ct_dip; 4374 4375 if (ct->ct_power_cnt == 0) 4376 (void) i_mdi_power_all_phci(ct); 4377 4378 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4379 "i_mdi_pm_hold_client\n")); 4380 i_mdi_pm_hold_client(ct, 1); 4381 4382 MDI_CLIENT_UNLOCK(ct); 4383 } 4384 4385 void 4386 i_mdi_phci_online(dev_info_t *ph_dip) 4387 { 4388 mdi_phci_t *ph; 4389 4390 /* pHCI online notification. Mark state accordingly */ 4391 ph = i_devi_get_phci(ph_dip); 4392 ASSERT(ph != NULL); 4393 MDI_PHCI_LOCK(ph); 4394 MDI_PHCI_SET_ONLINE(ph); 4395 MDI_PHCI_UNLOCK(ph); 4396 } 4397 4398 /* 4399 * mdi_devi_online(): 4400 * Online notification from NDI framework on pHCI/client 4401 * device online. 4402 * Return Values: 4403 * NDI_SUCCESS 4404 * MDI_FAILURE 4405 */ 4406 4407 /*ARGSUSED*/ 4408 int 4409 mdi_devi_online(dev_info_t *dip, uint_t flags) 4410 { 4411 if (MDI_PHCI(dip)) { 4412 i_mdi_phci_online(dip); 4413 } 4414 4415 if (MDI_CLIENT(dip)) { 4416 i_mdi_client_online(dip); 4417 } 4418 return (NDI_SUCCESS); 4419 } 4420 4421 /* 4422 * mdi_devi_offline(): 4423 * Offline notification from NDI framework on pHCI/Client device 4424 * offline. 4425 * 4426 * Return Values: 4427 * NDI_SUCCESS 4428 * NDI_FAILURE 4429 */ 4430 4431 /*ARGSUSED*/ 4432 int 4433 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4434 { 4435 int rv = NDI_SUCCESS; 4436 4437 if (MDI_CLIENT(dip)) { 4438 rv = i_mdi_client_offline(dip, flags); 4439 if (rv != NDI_SUCCESS) 4440 return (rv); 4441 } 4442 4443 if (MDI_PHCI(dip)) { 4444 rv = i_mdi_phci_offline(dip, flags); 4445 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4446 /* set client back online */ 4447 i_mdi_client_online(dip); 4448 } 4449 } 4450 4451 return (rv); 4452 } 4453 4454 /*ARGSUSED*/ 4455 static int 4456 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4457 { 4458 int rv = NDI_SUCCESS; 4459 mdi_phci_t *ph; 4460 mdi_client_t *ct; 4461 mdi_pathinfo_t *pip; 4462 mdi_pathinfo_t *next; 4463 mdi_pathinfo_t *failed_pip = NULL; 4464 dev_info_t *cdip; 4465 4466 /* 4467 * pHCI component offline notification 4468 * Make sure that this pHCI instance is free to be offlined. 4469 * If it is OK to proceed, Offline and remove all the child 4470 * mdi_pathinfo nodes. This process automatically offlines 4471 * corresponding client devices, for which this pHCI provides 4472 * critical services. 4473 */ 4474 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4475 dip)); 4476 4477 ph = i_devi_get_phci(dip); 4478 if (ph == NULL) { 4479 return (rv); 4480 } 4481 4482 MDI_PHCI_LOCK(ph); 4483 4484 if (MDI_PHCI_IS_OFFLINE(ph)) { 4485 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4486 MDI_PHCI_UNLOCK(ph); 4487 return (NDI_SUCCESS); 4488 } 4489 4490 /* 4491 * Check to see if the pHCI can be offlined 4492 */ 4493 if (ph->ph_unstable) { 4494 MDI_DEBUG(1, (CE_WARN, dip, 4495 "!One or more target devices are in transient " 4496 "state. This device can not be removed at " 4497 "this moment. Please try again later.")); 4498 MDI_PHCI_UNLOCK(ph); 4499 return (NDI_BUSY); 4500 } 4501 4502 pip = ph->ph_path_head; 4503 while (pip != NULL) { 4504 MDI_PI_LOCK(pip); 4505 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4506 /* 4507 * The mdi_pathinfo state is OK. Check the client state. 4508 * If failover in progress fail the pHCI from offlining 4509 */ 4510 ct = MDI_PI(pip)->pi_client; 4511 i_mdi_client_lock(ct, pip); 4512 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4513 (ct->ct_unstable)) { 4514 /* 4515 * Failover is in progress, Fail the DR 4516 */ 4517 MDI_DEBUG(1, (CE_WARN, dip, 4518 "!pHCI device (%s%d) is Busy. %s", 4519 ddi_driver_name(dip), ddi_get_instance(dip), 4520 "This device can not be removed at " 4521 "this moment. Please try again later.")); 4522 MDI_PI_UNLOCK(pip); 4523 MDI_CLIENT_UNLOCK(ct); 4524 MDI_PHCI_UNLOCK(ph); 4525 return (NDI_BUSY); 4526 } 4527 MDI_PI_UNLOCK(pip); 4528 4529 /* 4530 * Check to see of we are removing the last path of this 4531 * client device... 4532 */ 4533 cdip = ct->ct_dip; 4534 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4535 (i_mdi_client_compute_state(ct, ph) == 4536 MDI_CLIENT_STATE_FAILED)) { 4537 i_mdi_client_unlock(ct); 4538 MDI_PHCI_UNLOCK(ph); 4539 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4540 /* 4541 * ndi_devi_offline() failed. 4542 * This pHCI provides the critical path 4543 * to one or more client devices. 4544 * Return busy. 4545 */ 4546 MDI_PHCI_LOCK(ph); 4547 MDI_DEBUG(1, (CE_WARN, dip, 4548 "!pHCI device (%s%d) is Busy. %s", 4549 ddi_driver_name(dip), ddi_get_instance(dip), 4550 "This device can not be removed at " 4551 "this moment. Please try again later.")); 4552 failed_pip = pip; 4553 break; 4554 } else { 4555 MDI_PHCI_LOCK(ph); 4556 pip = next; 4557 } 4558 } else { 4559 i_mdi_client_unlock(ct); 4560 pip = next; 4561 } 4562 } 4563 4564 if (failed_pip) { 4565 pip = ph->ph_path_head; 4566 while (pip != failed_pip) { 4567 MDI_PI_LOCK(pip); 4568 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4569 ct = MDI_PI(pip)->pi_client; 4570 i_mdi_client_lock(ct, pip); 4571 cdip = ct->ct_dip; 4572 switch (MDI_CLIENT_STATE(ct)) { 4573 case MDI_CLIENT_STATE_OPTIMAL: 4574 case MDI_CLIENT_STATE_DEGRADED: 4575 if (cdip) { 4576 MDI_PI_UNLOCK(pip); 4577 i_mdi_client_unlock(ct); 4578 MDI_PHCI_UNLOCK(ph); 4579 (void) ndi_devi_online(cdip, 0); 4580 MDI_PHCI_LOCK(ph); 4581 pip = next; 4582 continue; 4583 } 4584 break; 4585 4586 case MDI_CLIENT_STATE_FAILED: 4587 if (cdip) { 4588 MDI_PI_UNLOCK(pip); 4589 i_mdi_client_unlock(ct); 4590 MDI_PHCI_UNLOCK(ph); 4591 (void) ndi_devi_offline(cdip, 0); 4592 MDI_PHCI_LOCK(ph); 4593 pip = next; 4594 continue; 4595 } 4596 break; 4597 } 4598 MDI_PI_UNLOCK(pip); 4599 i_mdi_client_unlock(ct); 4600 pip = next; 4601 } 4602 MDI_PHCI_UNLOCK(ph); 4603 return (NDI_BUSY); 4604 } 4605 4606 /* 4607 * Mark the pHCI as offline 4608 */ 4609 MDI_PHCI_SET_OFFLINE(ph); 4610 4611 /* 4612 * Mark the child mdi_pathinfo nodes as transient 4613 */ 4614 pip = ph->ph_path_head; 4615 while (pip != NULL) { 4616 MDI_PI_LOCK(pip); 4617 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4618 MDI_PI_SET_OFFLINING(pip); 4619 MDI_PI_UNLOCK(pip); 4620 pip = next; 4621 } 4622 MDI_PHCI_UNLOCK(ph); 4623 /* 4624 * Give a chance for any pending commands to execute 4625 */ 4626 delay(1); 4627 MDI_PHCI_LOCK(ph); 4628 pip = ph->ph_path_head; 4629 while (pip != NULL) { 4630 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4631 (void) i_mdi_pi_offline(pip, flags); 4632 MDI_PI_LOCK(pip); 4633 ct = MDI_PI(pip)->pi_client; 4634 if (!MDI_PI_IS_OFFLINE(pip)) { 4635 MDI_DEBUG(1, (CE_WARN, dip, 4636 "!pHCI device (%s%d) is Busy. %s", 4637 ddi_driver_name(dip), ddi_get_instance(dip), 4638 "This device can not be removed at " 4639 "this moment. Please try again later.")); 4640 MDI_PI_UNLOCK(pip); 4641 MDI_PHCI_SET_ONLINE(ph); 4642 MDI_PHCI_UNLOCK(ph); 4643 return (NDI_BUSY); 4644 } 4645 MDI_PI_UNLOCK(pip); 4646 pip = next; 4647 } 4648 MDI_PHCI_UNLOCK(ph); 4649 4650 return (rv); 4651 } 4652 4653 /*ARGSUSED*/ 4654 static int 4655 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4656 { 4657 int rv = NDI_SUCCESS; 4658 mdi_client_t *ct; 4659 4660 /* 4661 * Client component to go offline. Make sure that we are 4662 * not in failing over state and update client state 4663 * accordingly 4664 */ 4665 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4666 dip)); 4667 ct = i_devi_get_client(dip); 4668 if (ct != NULL) { 4669 MDI_CLIENT_LOCK(ct); 4670 if (ct->ct_unstable) { 4671 /* 4672 * One or more paths are in transient state, 4673 * Dont allow offline of a client device 4674 */ 4675 MDI_DEBUG(1, (CE_WARN, dip, 4676 "!One or more paths to this device is " 4677 "in transient state. This device can not " 4678 "be removed at this moment. " 4679 "Please try again later.")); 4680 MDI_CLIENT_UNLOCK(ct); 4681 return (NDI_BUSY); 4682 } 4683 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4684 /* 4685 * Failover is in progress, Dont allow DR of 4686 * a client device 4687 */ 4688 MDI_DEBUG(1, (CE_WARN, dip, 4689 "!Client device (%s%d) is Busy. %s", 4690 ddi_driver_name(dip), ddi_get_instance(dip), 4691 "This device can not be removed at " 4692 "this moment. Please try again later.")); 4693 MDI_CLIENT_UNLOCK(ct); 4694 return (NDI_BUSY); 4695 } 4696 MDI_CLIENT_SET_OFFLINE(ct); 4697 4698 /* 4699 * Unbind our relationship with the dev_info node 4700 */ 4701 if (flags & NDI_DEVI_REMOVE) { 4702 ct->ct_dip = NULL; 4703 } 4704 MDI_CLIENT_UNLOCK(ct); 4705 } 4706 return (rv); 4707 } 4708 4709 /* 4710 * mdi_pre_attach(): 4711 * Pre attach() notification handler 4712 */ 4713 4714 /*ARGSUSED*/ 4715 int 4716 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4717 { 4718 /* don't support old DDI_PM_RESUME */ 4719 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4720 (cmd == DDI_PM_RESUME)) 4721 return (DDI_FAILURE); 4722 4723 return (DDI_SUCCESS); 4724 } 4725 4726 /* 4727 * mdi_post_attach(): 4728 * Post attach() notification handler 4729 */ 4730 4731 /*ARGSUSED*/ 4732 void 4733 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4734 { 4735 mdi_phci_t *ph; 4736 mdi_client_t *ct; 4737 mdi_pathinfo_t *pip; 4738 4739 if (MDI_PHCI(dip)) { 4740 ph = i_devi_get_phci(dip); 4741 ASSERT(ph != NULL); 4742 4743 MDI_PHCI_LOCK(ph); 4744 switch (cmd) { 4745 case DDI_ATTACH: 4746 MDI_DEBUG(2, (CE_NOTE, dip, 4747 "!pHCI post_attach: called %p\n", ph)); 4748 if (error == DDI_SUCCESS) { 4749 MDI_PHCI_SET_ATTACH(ph); 4750 } else { 4751 MDI_DEBUG(1, (CE_NOTE, dip, 4752 "!pHCI post_attach: failed error=%d\n", 4753 error)); 4754 MDI_PHCI_SET_DETACH(ph); 4755 } 4756 break; 4757 4758 case DDI_RESUME: 4759 MDI_DEBUG(2, (CE_NOTE, dip, 4760 "!pHCI post_resume: called %p\n", ph)); 4761 if (error == DDI_SUCCESS) { 4762 MDI_PHCI_SET_RESUME(ph); 4763 } else { 4764 MDI_DEBUG(1, (CE_NOTE, dip, 4765 "!pHCI post_resume: failed error=%d\n", 4766 error)); 4767 MDI_PHCI_SET_SUSPEND(ph); 4768 } 4769 break; 4770 } 4771 MDI_PHCI_UNLOCK(ph); 4772 } 4773 4774 if (MDI_CLIENT(dip)) { 4775 ct = i_devi_get_client(dip); 4776 ASSERT(ct != NULL); 4777 4778 MDI_CLIENT_LOCK(ct); 4779 switch (cmd) { 4780 case DDI_ATTACH: 4781 MDI_DEBUG(2, (CE_NOTE, dip, 4782 "!Client post_attach: called %p\n", ct)); 4783 if (error != DDI_SUCCESS) { 4784 MDI_DEBUG(1, (CE_NOTE, dip, 4785 "!Client post_attach: failed error=%d\n", 4786 error)); 4787 MDI_CLIENT_SET_DETACH(ct); 4788 MDI_DEBUG(4, (CE_WARN, dip, 4789 "mdi_post_attach i_mdi_pm_reset_client\n")); 4790 i_mdi_pm_reset_client(ct); 4791 break; 4792 } 4793 4794 /* 4795 * Client device has successfully attached. 4796 * Create kstats for any pathinfo structures 4797 * initially associated with this client. 4798 */ 4799 for (pip = ct->ct_path_head; pip != NULL; 4800 pip = (mdi_pathinfo_t *) 4801 MDI_PI(pip)->pi_client_link) { 4802 (void) i_mdi_pi_kstat_create(pip); 4803 i_mdi_report_path_state(ct, pip); 4804 } 4805 MDI_CLIENT_SET_ATTACH(ct); 4806 break; 4807 4808 case DDI_RESUME: 4809 MDI_DEBUG(2, (CE_NOTE, dip, 4810 "!Client post_attach: called %p\n", ct)); 4811 if (error == DDI_SUCCESS) { 4812 MDI_CLIENT_SET_RESUME(ct); 4813 } else { 4814 MDI_DEBUG(1, (CE_NOTE, dip, 4815 "!Client post_resume: failed error=%d\n", 4816 error)); 4817 MDI_CLIENT_SET_SUSPEND(ct); 4818 } 4819 break; 4820 } 4821 MDI_CLIENT_UNLOCK(ct); 4822 } 4823 } 4824 4825 /* 4826 * mdi_pre_detach(): 4827 * Pre detach notification handler 4828 */ 4829 4830 /*ARGSUSED*/ 4831 int 4832 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4833 { 4834 int rv = DDI_SUCCESS; 4835 4836 if (MDI_CLIENT(dip)) { 4837 (void) i_mdi_client_pre_detach(dip, cmd); 4838 } 4839 4840 if (MDI_PHCI(dip)) { 4841 rv = i_mdi_phci_pre_detach(dip, cmd); 4842 } 4843 4844 return (rv); 4845 } 4846 4847 /*ARGSUSED*/ 4848 static int 4849 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4850 { 4851 int rv = DDI_SUCCESS; 4852 mdi_phci_t *ph; 4853 mdi_client_t *ct; 4854 mdi_pathinfo_t *pip; 4855 mdi_pathinfo_t *failed_pip = NULL; 4856 mdi_pathinfo_t *next; 4857 4858 ph = i_devi_get_phci(dip); 4859 if (ph == NULL) { 4860 return (rv); 4861 } 4862 4863 MDI_PHCI_LOCK(ph); 4864 switch (cmd) { 4865 case DDI_DETACH: 4866 MDI_DEBUG(2, (CE_NOTE, dip, 4867 "!pHCI pre_detach: called %p\n", ph)); 4868 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4869 /* 4870 * mdi_pathinfo nodes are still attached to 4871 * this pHCI. Fail the detach for this pHCI. 4872 */ 4873 MDI_DEBUG(2, (CE_WARN, dip, 4874 "!pHCI pre_detach: " 4875 "mdi_pathinfo nodes are still attached " 4876 "%p\n", ph)); 4877 rv = DDI_FAILURE; 4878 break; 4879 } 4880 MDI_PHCI_SET_DETACH(ph); 4881 break; 4882 4883 case DDI_SUSPEND: 4884 /* 4885 * pHCI is getting suspended. Since mpxio client 4886 * devices may not be suspended at this point, to avoid 4887 * a potential stack overflow, it is important to suspend 4888 * client devices before pHCI can be suspended. 4889 */ 4890 4891 MDI_DEBUG(2, (CE_NOTE, dip, 4892 "!pHCI pre_suspend: called %p\n", ph)); 4893 /* 4894 * Suspend all the client devices accessible through this pHCI 4895 */ 4896 pip = ph->ph_path_head; 4897 while (pip != NULL && rv == DDI_SUCCESS) { 4898 dev_info_t *cdip; 4899 MDI_PI_LOCK(pip); 4900 next = 4901 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4902 ct = MDI_PI(pip)->pi_client; 4903 i_mdi_client_lock(ct, pip); 4904 cdip = ct->ct_dip; 4905 MDI_PI_UNLOCK(pip); 4906 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4907 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4908 i_mdi_client_unlock(ct); 4909 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4910 DDI_SUCCESS) { 4911 /* 4912 * Suspend of one of the client 4913 * device has failed. 4914 */ 4915 MDI_DEBUG(1, (CE_WARN, dip, 4916 "!Suspend of device (%s%d) failed.", 4917 ddi_driver_name(cdip), 4918 ddi_get_instance(cdip))); 4919 failed_pip = pip; 4920 break; 4921 } 4922 } else { 4923 i_mdi_client_unlock(ct); 4924 } 4925 pip = next; 4926 } 4927 4928 if (rv == DDI_SUCCESS) { 4929 /* 4930 * Suspend of client devices is complete. Proceed 4931 * with pHCI suspend. 4932 */ 4933 MDI_PHCI_SET_SUSPEND(ph); 4934 } else { 4935 /* 4936 * Revert back all the suspended client device states 4937 * to converse. 4938 */ 4939 pip = ph->ph_path_head; 4940 while (pip != failed_pip) { 4941 dev_info_t *cdip; 4942 MDI_PI_LOCK(pip); 4943 next = 4944 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4945 ct = MDI_PI(pip)->pi_client; 4946 i_mdi_client_lock(ct, pip); 4947 cdip = ct->ct_dip; 4948 MDI_PI_UNLOCK(pip); 4949 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4950 i_mdi_client_unlock(ct); 4951 (void) devi_attach(cdip, DDI_RESUME); 4952 } else { 4953 i_mdi_client_unlock(ct); 4954 } 4955 pip = next; 4956 } 4957 } 4958 break; 4959 4960 default: 4961 rv = DDI_FAILURE; 4962 break; 4963 } 4964 MDI_PHCI_UNLOCK(ph); 4965 return (rv); 4966 } 4967 4968 /*ARGSUSED*/ 4969 static int 4970 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4971 { 4972 int rv = DDI_SUCCESS; 4973 mdi_client_t *ct; 4974 4975 ct = i_devi_get_client(dip); 4976 if (ct == NULL) { 4977 return (rv); 4978 } 4979 4980 MDI_CLIENT_LOCK(ct); 4981 switch (cmd) { 4982 case DDI_DETACH: 4983 MDI_DEBUG(2, (CE_NOTE, dip, 4984 "!Client pre_detach: called %p\n", ct)); 4985 MDI_CLIENT_SET_DETACH(ct); 4986 break; 4987 4988 case DDI_SUSPEND: 4989 MDI_DEBUG(2, (CE_NOTE, dip, 4990 "!Client pre_suspend: called %p\n", ct)); 4991 MDI_CLIENT_SET_SUSPEND(ct); 4992 break; 4993 4994 default: 4995 rv = DDI_FAILURE; 4996 break; 4997 } 4998 MDI_CLIENT_UNLOCK(ct); 4999 return (rv); 5000 } 5001 5002 /* 5003 * mdi_post_detach(): 5004 * Post detach notification handler 5005 */ 5006 5007 /*ARGSUSED*/ 5008 void 5009 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5010 { 5011 /* 5012 * Detach/Suspend of mpxio component failed. Update our state 5013 * too 5014 */ 5015 if (MDI_PHCI(dip)) 5016 i_mdi_phci_post_detach(dip, cmd, error); 5017 5018 if (MDI_CLIENT(dip)) 5019 i_mdi_client_post_detach(dip, cmd, error); 5020 } 5021 5022 /*ARGSUSED*/ 5023 static void 5024 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5025 { 5026 mdi_phci_t *ph; 5027 5028 /* 5029 * Detach/Suspend of phci component failed. Update our state 5030 * too 5031 */ 5032 ph = i_devi_get_phci(dip); 5033 if (ph == NULL) { 5034 return; 5035 } 5036 5037 MDI_PHCI_LOCK(ph); 5038 /* 5039 * Detach of pHCI failed. Restore back converse 5040 * state 5041 */ 5042 switch (cmd) { 5043 case DDI_DETACH: 5044 MDI_DEBUG(2, (CE_NOTE, dip, 5045 "!pHCI post_detach: called %p\n", ph)); 5046 if (error != DDI_SUCCESS) 5047 MDI_PHCI_SET_ATTACH(ph); 5048 break; 5049 5050 case DDI_SUSPEND: 5051 MDI_DEBUG(2, (CE_NOTE, dip, 5052 "!pHCI post_suspend: called %p\n", ph)); 5053 if (error != DDI_SUCCESS) 5054 MDI_PHCI_SET_RESUME(ph); 5055 break; 5056 } 5057 MDI_PHCI_UNLOCK(ph); 5058 } 5059 5060 /*ARGSUSED*/ 5061 static void 5062 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5063 { 5064 mdi_client_t *ct; 5065 5066 ct = i_devi_get_client(dip); 5067 if (ct == NULL) { 5068 return; 5069 } 5070 MDI_CLIENT_LOCK(ct); 5071 /* 5072 * Detach of Client failed. Restore back converse 5073 * state 5074 */ 5075 switch (cmd) { 5076 case DDI_DETACH: 5077 MDI_DEBUG(2, (CE_NOTE, dip, 5078 "!Client post_detach: called %p\n", ct)); 5079 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5080 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5081 "i_mdi_pm_rele_client\n")); 5082 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5083 } else { 5084 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5085 "i_mdi_pm_reset_client\n")); 5086 i_mdi_pm_reset_client(ct); 5087 } 5088 if (error != DDI_SUCCESS) 5089 MDI_CLIENT_SET_ATTACH(ct); 5090 break; 5091 5092 case DDI_SUSPEND: 5093 MDI_DEBUG(2, (CE_NOTE, dip, 5094 "!Client post_suspend: called %p\n", ct)); 5095 if (error != DDI_SUCCESS) 5096 MDI_CLIENT_SET_RESUME(ct); 5097 break; 5098 } 5099 MDI_CLIENT_UNLOCK(ct); 5100 } 5101 5102 /* 5103 * create and install per-path (client - pHCI) statistics 5104 * I/O stats supported: nread, nwritten, reads, and writes 5105 * Error stats - hard errors, soft errors, & transport errors 5106 */ 5107 static int 5108 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5109 { 5110 5111 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5112 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5113 char ksname[KSTAT_STRLEN]; 5114 mdi_pathinfo_t *cpip; 5115 const char *err_postfix = ",err"; 5116 kstat_t *kiosp, *kerrsp; 5117 struct pi_errs *nsp; 5118 struct mdi_pi_kstats *mdi_statp; 5119 5120 ASSERT(client != NULL && ppath != NULL); 5121 5122 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5123 5124 if (MDI_PI(pip)->pi_kstats != NULL) 5125 return (MDI_SUCCESS); 5126 5127 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5128 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5129 if (cpip == pip) 5130 continue; 5131 /* 5132 * We have found a different path with same parent 5133 * kstats for a given client-pHCI are common 5134 */ 5135 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5136 (MDI_PI(cpip)->pi_kstats != NULL)) { 5137 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5138 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5139 return (MDI_SUCCESS); 5140 } 5141 } 5142 5143 /* 5144 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5145 * clamp length of name against max length of error kstat name 5146 */ 5147 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5148 ddi_driver_name(client), ddi_get_instance(client), 5149 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5150 (KSTAT_STRLEN - strlen(err_postfix))) { 5151 return (MDI_FAILURE); 5152 } 5153 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5154 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5155 return (MDI_FAILURE); 5156 } 5157 5158 (void) strcat(ksname, err_postfix); 5159 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5160 KSTAT_TYPE_NAMED, 5161 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5162 5163 if (kerrsp == NULL) { 5164 kstat_delete(kiosp); 5165 return (MDI_FAILURE); 5166 } 5167 5168 nsp = (struct pi_errs *)kerrsp->ks_data; 5169 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5170 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5171 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5172 KSTAT_DATA_UINT32); 5173 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5174 KSTAT_DATA_UINT32); 5175 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5176 KSTAT_DATA_UINT32); 5177 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5178 KSTAT_DATA_UINT32); 5179 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5180 KSTAT_DATA_UINT32); 5181 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5182 KSTAT_DATA_UINT32); 5183 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5184 KSTAT_DATA_UINT32); 5185 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5186 5187 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5188 mdi_statp->pi_kstat_ref = 1; 5189 mdi_statp->pi_kstat_iostats = kiosp; 5190 mdi_statp->pi_kstat_errstats = kerrsp; 5191 kstat_install(kiosp); 5192 kstat_install(kerrsp); 5193 MDI_PI(pip)->pi_kstats = mdi_statp; 5194 return (MDI_SUCCESS); 5195 } 5196 5197 /* 5198 * destroy per-path properties 5199 */ 5200 static void 5201 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5202 { 5203 5204 struct mdi_pi_kstats *mdi_statp; 5205 5206 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5207 return; 5208 5209 MDI_PI(pip)->pi_kstats = NULL; 5210 5211 /* 5212 * the kstat may be shared between multiple pathinfo nodes 5213 * decrement this pathinfo's usage, removing the kstats 5214 * themselves when the last pathinfo reference is removed. 5215 */ 5216 ASSERT(mdi_statp->pi_kstat_ref > 0); 5217 if (--mdi_statp->pi_kstat_ref != 0) 5218 return; 5219 5220 kstat_delete(mdi_statp->pi_kstat_iostats); 5221 kstat_delete(mdi_statp->pi_kstat_errstats); 5222 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5223 } 5224 5225 /* 5226 * update I/O paths KSTATS 5227 */ 5228 void 5229 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5230 { 5231 kstat_t *iostatp; 5232 size_t xfer_cnt; 5233 5234 ASSERT(pip != NULL); 5235 5236 /* 5237 * I/O can be driven across a path prior to having path 5238 * statistics available, i.e. probe(9e). 5239 */ 5240 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5241 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5242 xfer_cnt = bp->b_bcount - bp->b_resid; 5243 if (bp->b_flags & B_READ) { 5244 KSTAT_IO_PTR(iostatp)->reads++; 5245 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5246 } else { 5247 KSTAT_IO_PTR(iostatp)->writes++; 5248 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5249 } 5250 } 5251 } 5252 5253 /* 5254 * Enable the path(specific client/target/initiator) 5255 * Enabling a path means that MPxIO may select the enabled path for routing 5256 * future I/O requests, subject to other path state constraints. 5257 */ 5258 int 5259 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5260 { 5261 mdi_phci_t *ph; 5262 5263 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5264 if (ph == NULL) { 5265 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5266 " failed. pip: %p ph = NULL\n", pip)); 5267 return (MDI_FAILURE); 5268 } 5269 5270 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5271 MDI_ENABLE_OP); 5272 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5273 " Returning success pip = %p. ph = %p\n", pip, ph)); 5274 return (MDI_SUCCESS); 5275 5276 } 5277 5278 /* 5279 * Disable the path (specific client/target/initiator) 5280 * Disabling a path means that MPxIO will not select the disabled path for 5281 * routing any new I/O requests. 5282 */ 5283 int 5284 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5285 { 5286 mdi_phci_t *ph; 5287 5288 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5289 if (ph == NULL) { 5290 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5291 " failed. pip: %p ph = NULL\n", pip)); 5292 return (MDI_FAILURE); 5293 } 5294 5295 (void) i_mdi_enable_disable_path(pip, 5296 ph->ph_vhci, flags, MDI_DISABLE_OP); 5297 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5298 "Returning success pip = %p. ph = %p", pip, ph)); 5299 return (MDI_SUCCESS); 5300 } 5301 5302 /* 5303 * disable the path to a particular pHCI (pHCI specified in the phci_path 5304 * argument) for a particular client (specified in the client_path argument). 5305 * Disabling a path means that MPxIO will not select the disabled path for 5306 * routing any new I/O requests. 5307 * NOTE: this will be removed once the NWS files are changed to use the new 5308 * mdi_{enable,disable}_path interfaces 5309 */ 5310 int 5311 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5312 { 5313 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5314 } 5315 5316 /* 5317 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5318 * argument) for a particular client (specified in the client_path argument). 5319 * Enabling a path means that MPxIO may select the enabled path for routing 5320 * future I/O requests, subject to other path state constraints. 5321 * NOTE: this will be removed once the NWS files are changed to use the new 5322 * mdi_{enable,disable}_path interfaces 5323 */ 5324 5325 int 5326 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5327 { 5328 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5329 } 5330 5331 /* 5332 * Common routine for doing enable/disable. 5333 */ 5334 static mdi_pathinfo_t * 5335 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5336 int op) 5337 { 5338 int sync_flag = 0; 5339 int rv; 5340 mdi_pathinfo_t *next; 5341 int (*f)() = NULL; 5342 5343 f = vh->vh_ops->vo_pi_state_change; 5344 5345 sync_flag = (flags << 8) & 0xf00; 5346 5347 /* 5348 * Do a callback into the mdi consumer to let it 5349 * know that path is about to get enabled/disabled. 5350 */ 5351 if (f != NULL) { 5352 rv = (*f)(vh->vh_dip, pip, 0, 5353 MDI_PI_EXT_STATE(pip), 5354 MDI_EXT_STATE_CHANGE | sync_flag | 5355 op | MDI_BEFORE_STATE_CHANGE); 5356 if (rv != MDI_SUCCESS) { 5357 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5358 "!vo_pi_state_change: failed rv = %x", rv)); 5359 } 5360 } 5361 MDI_PI_LOCK(pip); 5362 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5363 5364 switch (flags) { 5365 case USER_DISABLE: 5366 if (op == MDI_DISABLE_OP) 5367 MDI_PI_SET_USER_DISABLE(pip); 5368 else 5369 MDI_PI_SET_USER_ENABLE(pip); 5370 break; 5371 case DRIVER_DISABLE: 5372 if (op == MDI_DISABLE_OP) 5373 MDI_PI_SET_DRV_DISABLE(pip); 5374 else 5375 MDI_PI_SET_DRV_ENABLE(pip); 5376 break; 5377 case DRIVER_DISABLE_TRANSIENT: 5378 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5379 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5380 else 5381 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5382 break; 5383 } 5384 MDI_PI_UNLOCK(pip); 5385 /* 5386 * Do a callback into the mdi consumer to let it 5387 * know that path is now enabled/disabled. 5388 */ 5389 if (f != NULL) { 5390 rv = (*f)(vh->vh_dip, pip, 0, 5391 MDI_PI_EXT_STATE(pip), 5392 MDI_EXT_STATE_CHANGE | sync_flag | 5393 op | MDI_AFTER_STATE_CHANGE); 5394 if (rv != MDI_SUCCESS) { 5395 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5396 "!vo_pi_state_change: failed rv = %x", rv)); 5397 } 5398 } 5399 return (next); 5400 } 5401 5402 /* 5403 * Common routine for doing enable/disable. 5404 * NOTE: this will be removed once the NWS files are changed to use the new 5405 * mdi_{enable,disable}_path has been putback 5406 */ 5407 int 5408 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5409 { 5410 5411 mdi_phci_t *ph; 5412 mdi_vhci_t *vh = NULL; 5413 mdi_client_t *ct; 5414 mdi_pathinfo_t *next, *pip; 5415 int found_it; 5416 5417 ph = i_devi_get_phci(pdip); 5418 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5419 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5420 if (ph == NULL) { 5421 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5422 " failed. ph = NULL operation = %d\n", op)); 5423 return (MDI_FAILURE); 5424 } 5425 5426 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5427 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5428 " Invalid operation = %d\n", op)); 5429 return (MDI_FAILURE); 5430 } 5431 5432 vh = ph->ph_vhci; 5433 5434 if (cdip == NULL) { 5435 /* 5436 * Need to mark the Phci as enabled/disabled. 5437 */ 5438 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5439 "Operation %d for the phci\n", op)); 5440 MDI_PHCI_LOCK(ph); 5441 switch (flags) { 5442 case USER_DISABLE: 5443 if (op == MDI_DISABLE_OP) 5444 MDI_PHCI_SET_USER_DISABLE(ph); 5445 else 5446 MDI_PHCI_SET_USER_ENABLE(ph); 5447 break; 5448 case DRIVER_DISABLE: 5449 if (op == MDI_DISABLE_OP) 5450 MDI_PHCI_SET_DRV_DISABLE(ph); 5451 else 5452 MDI_PHCI_SET_DRV_ENABLE(ph); 5453 break; 5454 case DRIVER_DISABLE_TRANSIENT: 5455 if (op == MDI_DISABLE_OP) 5456 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5457 else 5458 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5459 break; 5460 default: 5461 MDI_PHCI_UNLOCK(ph); 5462 MDI_DEBUG(1, (CE_NOTE, NULL, 5463 "!i_mdi_pi_enable_disable:" 5464 " Invalid flag argument= %d\n", flags)); 5465 } 5466 5467 /* 5468 * Phci has been disabled. Now try to enable/disable 5469 * path info's to each client. 5470 */ 5471 pip = ph->ph_path_head; 5472 while (pip != NULL) { 5473 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5474 } 5475 MDI_PHCI_UNLOCK(ph); 5476 } else { 5477 5478 /* 5479 * Disable a specific client. 5480 */ 5481 ct = i_devi_get_client(cdip); 5482 if (ct == NULL) { 5483 MDI_DEBUG(1, (CE_NOTE, NULL, 5484 "!i_mdi_pi_enable_disable:" 5485 " failed. ct = NULL operation = %d\n", op)); 5486 return (MDI_FAILURE); 5487 } 5488 5489 MDI_CLIENT_LOCK(ct); 5490 pip = ct->ct_path_head; 5491 found_it = 0; 5492 while (pip != NULL) { 5493 MDI_PI_LOCK(pip); 5494 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5495 if (MDI_PI(pip)->pi_phci == ph) { 5496 MDI_PI_UNLOCK(pip); 5497 found_it = 1; 5498 break; 5499 } 5500 MDI_PI_UNLOCK(pip); 5501 pip = next; 5502 } 5503 5504 5505 MDI_CLIENT_UNLOCK(ct); 5506 if (found_it == 0) { 5507 MDI_DEBUG(1, (CE_NOTE, NULL, 5508 "!i_mdi_pi_enable_disable:" 5509 " failed. Could not find corresponding pip\n")); 5510 return (MDI_FAILURE); 5511 } 5512 5513 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5514 } 5515 5516 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5517 " Returning success op: %x pdip = %p cdip = %p\n", op, 5518 pdip, cdip)); 5519 return (MDI_SUCCESS); 5520 } 5521 5522 /*ARGSUSED3*/ 5523 int 5524 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5525 int flags, clock_t timeout) 5526 { 5527 mdi_pathinfo_t *pip; 5528 dev_info_t *dip; 5529 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5530 char *paddr; 5531 5532 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5533 5534 if (!MDI_PHCI(pdip)) 5535 return (MDI_FAILURE); 5536 5537 paddr = strchr(devnm, '@'); 5538 if (paddr == NULL) 5539 return (MDI_FAILURE); 5540 5541 paddr++; /* skip '@' */ 5542 pip = mdi_pi_find(pdip, NULL, paddr); 5543 while (pip == NULL && timeout > 0) { 5544 if (interval > timeout) 5545 interval = timeout; 5546 if (flags & NDI_DEVI_DEBUG) { 5547 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5548 ddi_driver_name(pdip), ddi_get_instance(pdip), 5549 paddr, interval, timeout); 5550 } 5551 delay(interval); 5552 timeout -= interval; 5553 interval += interval; 5554 pip = mdi_pi_find(pdip, NULL, paddr); 5555 } 5556 5557 if (pip == NULL) 5558 return (MDI_FAILURE); 5559 dip = mdi_pi_get_client(pip); 5560 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5561 return (MDI_FAILURE); 5562 *cdipp = dip; 5563 5564 /* TODO: holding should happen inside search functions */ 5565 ndi_hold_devi(dip); 5566 return (MDI_SUCCESS); 5567 } 5568 5569 /* 5570 * Ensure phci powered up 5571 */ 5572 static void 5573 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5574 { 5575 dev_info_t *ph_dip; 5576 5577 ASSERT(pip != NULL); 5578 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5579 5580 if (MDI_PI(pip)->pi_pm_held) { 5581 return; 5582 } 5583 5584 ph_dip = mdi_pi_get_phci(pip); 5585 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5586 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5587 if (ph_dip == NULL) { 5588 return; 5589 } 5590 5591 MDI_PI_UNLOCK(pip); 5592 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5593 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5594 pm_hold_power(ph_dip); 5595 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5596 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5597 MDI_PI_LOCK(pip); 5598 5599 MDI_PI(pip)->pi_pm_held = 1; 5600 } 5601 5602 /* 5603 * Allow phci powered down 5604 */ 5605 static void 5606 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5607 { 5608 dev_info_t *ph_dip = NULL; 5609 5610 ASSERT(pip != NULL); 5611 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5612 5613 if (MDI_PI(pip)->pi_pm_held == 0) { 5614 return; 5615 } 5616 5617 ph_dip = mdi_pi_get_phci(pip); 5618 ASSERT(ph_dip != NULL); 5619 5620 MDI_PI_UNLOCK(pip); 5621 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5622 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5623 5624 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5625 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5626 pm_rele_power(ph_dip); 5627 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5628 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5629 5630 MDI_PI_LOCK(pip); 5631 MDI_PI(pip)->pi_pm_held = 0; 5632 } 5633 5634 static void 5635 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5636 { 5637 ASSERT(ct); 5638 5639 ct->ct_power_cnt += incr; 5640 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5641 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5642 ASSERT(ct->ct_power_cnt >= 0); 5643 } 5644 5645 static void 5646 i_mdi_rele_all_phci(mdi_client_t *ct) 5647 { 5648 mdi_pathinfo_t *pip; 5649 5650 ASSERT(mutex_owned(&ct->ct_mutex)); 5651 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5652 while (pip != NULL) { 5653 mdi_hold_path(pip); 5654 MDI_PI_LOCK(pip); 5655 i_mdi_pm_rele_pip(pip); 5656 MDI_PI_UNLOCK(pip); 5657 mdi_rele_path(pip); 5658 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5659 } 5660 } 5661 5662 static void 5663 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5664 { 5665 ASSERT(ct); 5666 5667 if (i_ddi_devi_attached(ct->ct_dip)) { 5668 ct->ct_power_cnt -= decr; 5669 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5670 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5671 } 5672 5673 ASSERT(ct->ct_power_cnt >= 0); 5674 if (ct->ct_power_cnt == 0) { 5675 i_mdi_rele_all_phci(ct); 5676 return; 5677 } 5678 } 5679 5680 static void 5681 i_mdi_pm_reset_client(mdi_client_t *ct) 5682 { 5683 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5684 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5685 ct->ct_power_cnt = 0; 5686 i_mdi_rele_all_phci(ct); 5687 ct->ct_powercnt_config = 0; 5688 ct->ct_powercnt_unconfig = 0; 5689 ct->ct_powercnt_reset = 1; 5690 } 5691 5692 static void 5693 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5694 { 5695 mdi_pathinfo_t *pip; 5696 ASSERT(mutex_owned(&ct->ct_mutex)); 5697 5698 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5699 while (pip != NULL) { 5700 mdi_hold_path(pip); 5701 MDI_PI_LOCK(pip); 5702 i_mdi_pm_hold_pip(pip); 5703 MDI_PI_UNLOCK(pip); 5704 mdi_rele_path(pip); 5705 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5706 } 5707 } 5708 5709 static int 5710 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5711 { 5712 int ret; 5713 dev_info_t *ph_dip; 5714 5715 MDI_PI_LOCK(pip); 5716 i_mdi_pm_hold_pip(pip); 5717 5718 ph_dip = mdi_pi_get_phci(pip); 5719 MDI_PI_UNLOCK(pip); 5720 5721 /* bring all components of phci to full power */ 5722 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5723 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5724 ddi_get_instance(ph_dip))); 5725 5726 ret = pm_powerup(ph_dip); 5727 5728 if (ret == DDI_FAILURE) { 5729 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5730 "pm_powerup FAILED for %s%d\n", 5731 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5732 5733 MDI_PI_LOCK(pip); 5734 i_mdi_pm_rele_pip(pip); 5735 MDI_PI_UNLOCK(pip); 5736 return (MDI_FAILURE); 5737 } 5738 5739 return (MDI_SUCCESS); 5740 } 5741 5742 static int 5743 i_mdi_power_all_phci(mdi_client_t *ct) 5744 { 5745 mdi_pathinfo_t *pip; 5746 int succeeded = 0; 5747 5748 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5749 while (pip != NULL) { 5750 mdi_hold_path(pip); 5751 MDI_CLIENT_UNLOCK(ct); 5752 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5753 succeeded = 1; 5754 5755 ASSERT(ct == MDI_PI(pip)->pi_client); 5756 MDI_CLIENT_LOCK(ct); 5757 mdi_rele_path(pip); 5758 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5759 } 5760 5761 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5762 } 5763 5764 /* 5765 * mdi_bus_power(): 5766 * 1. Place the phci(s) into powered up state so that 5767 * client can do power management 5768 * 2. Ensure phci powered up as client power managing 5769 * Return Values: 5770 * MDI_SUCCESS 5771 * MDI_FAILURE 5772 */ 5773 int 5774 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5775 void *arg, void *result) 5776 { 5777 int ret = MDI_SUCCESS; 5778 pm_bp_child_pwrchg_t *bpc; 5779 mdi_client_t *ct; 5780 dev_info_t *cdip; 5781 pm_bp_has_changed_t *bphc; 5782 5783 /* 5784 * BUS_POWER_NOINVOL not supported 5785 */ 5786 if (op == BUS_POWER_NOINVOL) 5787 return (MDI_FAILURE); 5788 5789 /* 5790 * ignore other OPs. 5791 * return quickly to save cou cycles on the ct processing 5792 */ 5793 switch (op) { 5794 case BUS_POWER_PRE_NOTIFICATION: 5795 case BUS_POWER_POST_NOTIFICATION: 5796 bpc = (pm_bp_child_pwrchg_t *)arg; 5797 cdip = bpc->bpc_dip; 5798 break; 5799 case BUS_POWER_HAS_CHANGED: 5800 bphc = (pm_bp_has_changed_t *)arg; 5801 cdip = bphc->bphc_dip; 5802 break; 5803 default: 5804 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5805 } 5806 5807 ASSERT(MDI_CLIENT(cdip)); 5808 5809 ct = i_devi_get_client(cdip); 5810 if (ct == NULL) 5811 return (MDI_FAILURE); 5812 5813 /* 5814 * wait till the mdi_pathinfo node state change are processed 5815 */ 5816 MDI_CLIENT_LOCK(ct); 5817 switch (op) { 5818 case BUS_POWER_PRE_NOTIFICATION: 5819 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5820 "BUS_POWER_PRE_NOTIFICATION:" 5821 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5822 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5823 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5824 5825 /* serialize power level change per client */ 5826 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5827 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5828 5829 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5830 5831 if (ct->ct_power_cnt == 0) { 5832 ret = i_mdi_power_all_phci(ct); 5833 } 5834 5835 /* 5836 * if new_level > 0: 5837 * - hold phci(s) 5838 * - power up phci(s) if not already 5839 * ignore power down 5840 */ 5841 if (bpc->bpc_nlevel > 0) { 5842 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5843 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5844 "mdi_bus_power i_mdi_pm_hold_client\n")); 5845 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5846 } 5847 } 5848 break; 5849 case BUS_POWER_POST_NOTIFICATION: 5850 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5851 "BUS_POWER_POST_NOTIFICATION:" 5852 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5853 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5854 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5855 *(int *)result)); 5856 5857 if (*(int *)result == DDI_SUCCESS) { 5858 if (bpc->bpc_nlevel > 0) { 5859 MDI_CLIENT_SET_POWER_UP(ct); 5860 } else { 5861 MDI_CLIENT_SET_POWER_DOWN(ct); 5862 } 5863 } 5864 5865 /* release the hold we did in pre-notification */ 5866 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5867 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5868 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5869 "mdi_bus_power i_mdi_pm_rele_client\n")); 5870 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5871 } 5872 5873 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5874 /* another thread might started attaching */ 5875 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5876 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5877 "mdi_bus_power i_mdi_pm_rele_client\n")); 5878 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5879 /* detaching has been taken care in pm_post_unconfig */ 5880 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5881 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5882 "mdi_bus_power i_mdi_pm_reset_client\n")); 5883 i_mdi_pm_reset_client(ct); 5884 } 5885 } 5886 5887 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5888 cv_broadcast(&ct->ct_powerchange_cv); 5889 5890 break; 5891 5892 /* need to do more */ 5893 case BUS_POWER_HAS_CHANGED: 5894 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5895 "BUS_POWER_HAS_CHANGED:" 5896 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5897 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5898 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5899 5900 if (bphc->bphc_nlevel > 0 && 5901 bphc->bphc_nlevel > bphc->bphc_olevel) { 5902 if (ct->ct_power_cnt == 0) { 5903 ret = i_mdi_power_all_phci(ct); 5904 } 5905 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5906 "mdi_bus_power i_mdi_pm_hold_client\n")); 5907 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5908 } 5909 5910 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5911 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5912 "mdi_bus_power i_mdi_pm_rele_client\n")); 5913 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5914 } 5915 break; 5916 } 5917 5918 MDI_CLIENT_UNLOCK(ct); 5919 return (ret); 5920 } 5921 5922 static int 5923 i_mdi_pm_pre_config_one(dev_info_t *child) 5924 { 5925 int ret = MDI_SUCCESS; 5926 mdi_client_t *ct; 5927 5928 ct = i_devi_get_client(child); 5929 if (ct == NULL) 5930 return (MDI_FAILURE); 5931 5932 MDI_CLIENT_LOCK(ct); 5933 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5934 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5935 5936 if (!MDI_CLIENT_IS_FAILED(ct)) { 5937 MDI_CLIENT_UNLOCK(ct); 5938 MDI_DEBUG(4, (CE_NOTE, child, 5939 "i_mdi_pm_pre_config_one already configured\n")); 5940 return (MDI_SUCCESS); 5941 } 5942 5943 if (ct->ct_powercnt_config) { 5944 MDI_CLIENT_UNLOCK(ct); 5945 MDI_DEBUG(4, (CE_NOTE, child, 5946 "i_mdi_pm_pre_config_one ALREADY held\n")); 5947 return (MDI_SUCCESS); 5948 } 5949 5950 if (ct->ct_power_cnt == 0) { 5951 ret = i_mdi_power_all_phci(ct); 5952 } 5953 MDI_DEBUG(4, (CE_NOTE, child, 5954 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5955 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5956 ct->ct_powercnt_config = 1; 5957 ct->ct_powercnt_reset = 0; 5958 MDI_CLIENT_UNLOCK(ct); 5959 return (ret); 5960 } 5961 5962 static int 5963 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5964 { 5965 int ret = MDI_SUCCESS; 5966 dev_info_t *cdip; 5967 int circ; 5968 5969 ASSERT(MDI_VHCI(parent)); 5970 5971 /* ndi_devi_config_one */ 5972 if (child) { 5973 return (i_mdi_pm_pre_config_one(child)); 5974 } 5975 5976 /* devi_config_common */ 5977 ndi_devi_enter(parent, &circ); 5978 cdip = ddi_get_child(parent); 5979 while (cdip) { 5980 dev_info_t *next = ddi_get_next_sibling(cdip); 5981 5982 ret = i_mdi_pm_pre_config_one(cdip); 5983 if (ret != MDI_SUCCESS) 5984 break; 5985 cdip = next; 5986 } 5987 ndi_devi_exit(parent, circ); 5988 return (ret); 5989 } 5990 5991 static int 5992 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5993 { 5994 int ret = MDI_SUCCESS; 5995 mdi_client_t *ct; 5996 5997 ct = i_devi_get_client(child); 5998 if (ct == NULL) 5999 return (MDI_FAILURE); 6000 6001 MDI_CLIENT_LOCK(ct); 6002 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6003 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6004 6005 if (!i_ddi_devi_attached(ct->ct_dip)) { 6006 MDI_DEBUG(4, (CE_NOTE, child, 6007 "i_mdi_pm_pre_unconfig node detached already\n")); 6008 MDI_CLIENT_UNLOCK(ct); 6009 return (MDI_SUCCESS); 6010 } 6011 6012 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6013 (flags & NDI_AUTODETACH)) { 6014 MDI_DEBUG(4, (CE_NOTE, child, 6015 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6016 MDI_CLIENT_UNLOCK(ct); 6017 return (MDI_FAILURE); 6018 } 6019 6020 if (ct->ct_powercnt_unconfig) { 6021 MDI_DEBUG(4, (CE_NOTE, child, 6022 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6023 MDI_CLIENT_UNLOCK(ct); 6024 *held = 1; 6025 return (MDI_SUCCESS); 6026 } 6027 6028 if (ct->ct_power_cnt == 0) { 6029 ret = i_mdi_power_all_phci(ct); 6030 } 6031 MDI_DEBUG(4, (CE_NOTE, child, 6032 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6033 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6034 ct->ct_powercnt_unconfig = 1; 6035 ct->ct_powercnt_reset = 0; 6036 MDI_CLIENT_UNLOCK(ct); 6037 if (ret == MDI_SUCCESS) 6038 *held = 1; 6039 return (ret); 6040 } 6041 6042 static int 6043 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 6044 int flags) 6045 { 6046 int ret = MDI_SUCCESS; 6047 dev_info_t *cdip; 6048 int circ; 6049 6050 ASSERT(MDI_VHCI(parent)); 6051 *held = 0; 6052 6053 /* ndi_devi_unconfig_one */ 6054 if (child) { 6055 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6056 } 6057 6058 /* devi_unconfig_common */ 6059 ndi_devi_enter(parent, &circ); 6060 cdip = ddi_get_child(parent); 6061 while (cdip) { 6062 dev_info_t *next = ddi_get_next_sibling(cdip); 6063 6064 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6065 cdip = next; 6066 } 6067 ndi_devi_exit(parent, circ); 6068 6069 if (*held) 6070 ret = MDI_SUCCESS; 6071 6072 return (ret); 6073 } 6074 6075 static void 6076 i_mdi_pm_post_config_one(dev_info_t *child) 6077 { 6078 mdi_client_t *ct; 6079 6080 ct = i_devi_get_client(child); 6081 if (ct == NULL) 6082 return; 6083 6084 MDI_CLIENT_LOCK(ct); 6085 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6086 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6087 6088 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6089 MDI_DEBUG(4, (CE_NOTE, child, 6090 "i_mdi_pm_post_config_one NOT configured\n")); 6091 MDI_CLIENT_UNLOCK(ct); 6092 return; 6093 } 6094 6095 /* client has not been updated */ 6096 if (MDI_CLIENT_IS_FAILED(ct)) { 6097 MDI_DEBUG(4, (CE_NOTE, child, 6098 "i_mdi_pm_post_config_one NOT configured\n")); 6099 MDI_CLIENT_UNLOCK(ct); 6100 return; 6101 } 6102 6103 /* another thread might have powered it down or detached it */ 6104 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6105 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6106 (!i_ddi_devi_attached(ct->ct_dip) && 6107 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6108 MDI_DEBUG(4, (CE_NOTE, child, 6109 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6110 i_mdi_pm_reset_client(ct); 6111 } else { 6112 mdi_pathinfo_t *pip, *next; 6113 int valid_path_count = 0; 6114 6115 MDI_DEBUG(4, (CE_NOTE, child, 6116 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6117 pip = ct->ct_path_head; 6118 while (pip != NULL) { 6119 MDI_PI_LOCK(pip); 6120 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6121 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6122 valid_path_count ++; 6123 MDI_PI_UNLOCK(pip); 6124 pip = next; 6125 } 6126 i_mdi_pm_rele_client(ct, valid_path_count); 6127 } 6128 ct->ct_powercnt_config = 0; 6129 MDI_CLIENT_UNLOCK(ct); 6130 } 6131 6132 static void 6133 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6134 { 6135 int circ; 6136 dev_info_t *cdip; 6137 ASSERT(MDI_VHCI(parent)); 6138 6139 /* ndi_devi_config_one */ 6140 if (child) { 6141 i_mdi_pm_post_config_one(child); 6142 return; 6143 } 6144 6145 /* devi_config_common */ 6146 ndi_devi_enter(parent, &circ); 6147 cdip = ddi_get_child(parent); 6148 while (cdip) { 6149 dev_info_t *next = ddi_get_next_sibling(cdip); 6150 6151 i_mdi_pm_post_config_one(cdip); 6152 cdip = next; 6153 } 6154 ndi_devi_exit(parent, circ); 6155 } 6156 6157 static void 6158 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6159 { 6160 mdi_client_t *ct; 6161 6162 ct = i_devi_get_client(child); 6163 if (ct == NULL) 6164 return; 6165 6166 MDI_CLIENT_LOCK(ct); 6167 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6168 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6169 6170 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6171 MDI_DEBUG(4, (CE_NOTE, child, 6172 "i_mdi_pm_post_unconfig NOT held\n")); 6173 MDI_CLIENT_UNLOCK(ct); 6174 return; 6175 } 6176 6177 /* failure detaching or another thread just attached it */ 6178 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6179 i_ddi_devi_attached(ct->ct_dip)) || 6180 (!i_ddi_devi_attached(ct->ct_dip) && 6181 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6182 MDI_DEBUG(4, (CE_NOTE, child, 6183 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6184 i_mdi_pm_reset_client(ct); 6185 } else { 6186 mdi_pathinfo_t *pip, *next; 6187 int valid_path_count = 0; 6188 6189 MDI_DEBUG(4, (CE_NOTE, child, 6190 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6191 pip = ct->ct_path_head; 6192 while (pip != NULL) { 6193 MDI_PI_LOCK(pip); 6194 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6195 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6196 valid_path_count ++; 6197 MDI_PI_UNLOCK(pip); 6198 pip = next; 6199 } 6200 i_mdi_pm_rele_client(ct, valid_path_count); 6201 ct->ct_powercnt_unconfig = 0; 6202 } 6203 6204 MDI_CLIENT_UNLOCK(ct); 6205 } 6206 6207 static void 6208 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6209 { 6210 int circ; 6211 dev_info_t *cdip; 6212 6213 ASSERT(MDI_VHCI(parent)); 6214 6215 if (!held) { 6216 MDI_DEBUG(4, (CE_NOTE, parent, 6217 "i_mdi_pm_post_unconfig held = %d\n", held)); 6218 return; 6219 } 6220 6221 if (child) { 6222 i_mdi_pm_post_unconfig_one(child); 6223 return; 6224 } 6225 6226 ndi_devi_enter(parent, &circ); 6227 cdip = ddi_get_child(parent); 6228 while (cdip) { 6229 dev_info_t *next = ddi_get_next_sibling(cdip); 6230 6231 i_mdi_pm_post_unconfig_one(cdip); 6232 cdip = next; 6233 } 6234 ndi_devi_exit(parent, circ); 6235 } 6236 6237 int 6238 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6239 { 6240 int circ, ret = MDI_SUCCESS; 6241 dev_info_t *client_dip = NULL; 6242 mdi_client_t *ct; 6243 6244 /* 6245 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6246 * Power up pHCI for the named client device. 6247 * Note: Before the client is enumerated under vhci by phci, 6248 * client_dip can be NULL. Then proceed to power up all the 6249 * pHCIs. 6250 */ 6251 if (devnm != NULL) { 6252 ndi_devi_enter(vdip, &circ); 6253 client_dip = ndi_devi_findchild(vdip, devnm); 6254 ndi_devi_exit(vdip, circ); 6255 } 6256 6257 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6258 6259 switch (op) { 6260 case MDI_PM_PRE_CONFIG: 6261 ret = i_mdi_pm_pre_config(vdip, client_dip); 6262 6263 break; 6264 case MDI_PM_PRE_UNCONFIG: 6265 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6266 flags); 6267 6268 break; 6269 case MDI_PM_POST_CONFIG: 6270 i_mdi_pm_post_config(vdip, client_dip); 6271 6272 break; 6273 case MDI_PM_POST_UNCONFIG: 6274 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6275 6276 break; 6277 case MDI_PM_HOLD_POWER: 6278 case MDI_PM_RELE_POWER: 6279 ASSERT(args); 6280 6281 client_dip = (dev_info_t *)args; 6282 ASSERT(MDI_CLIENT(client_dip)); 6283 6284 ct = i_devi_get_client(client_dip); 6285 MDI_CLIENT_LOCK(ct); 6286 6287 if (op == MDI_PM_HOLD_POWER) { 6288 if (ct->ct_power_cnt == 0) { 6289 (void) i_mdi_power_all_phci(ct); 6290 MDI_DEBUG(4, (CE_NOTE, client_dip, 6291 "mdi_power i_mdi_pm_hold_client\n")); 6292 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6293 } 6294 } else { 6295 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6296 MDI_DEBUG(4, (CE_NOTE, client_dip, 6297 "mdi_power i_mdi_pm_rele_client\n")); 6298 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6299 } else { 6300 MDI_DEBUG(4, (CE_NOTE, client_dip, 6301 "mdi_power i_mdi_pm_reset_client\n")); 6302 i_mdi_pm_reset_client(ct); 6303 } 6304 } 6305 6306 MDI_CLIENT_UNLOCK(ct); 6307 break; 6308 default: 6309 break; 6310 } 6311 6312 return (ret); 6313 } 6314 6315 int 6316 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6317 { 6318 mdi_vhci_t *vhci; 6319 6320 if (!MDI_VHCI(dip)) 6321 return (MDI_FAILURE); 6322 6323 if (mdi_class) { 6324 vhci = DEVI(dip)->devi_mdi_xhci; 6325 ASSERT(vhci); 6326 *mdi_class = vhci->vh_class; 6327 } 6328 6329 return (MDI_SUCCESS); 6330 } 6331 6332 int 6333 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6334 { 6335 mdi_phci_t *phci; 6336 6337 if (!MDI_PHCI(dip)) 6338 return (MDI_FAILURE); 6339 6340 if (mdi_class) { 6341 phci = DEVI(dip)->devi_mdi_xhci; 6342 ASSERT(phci); 6343 *mdi_class = phci->ph_vhci->vh_class; 6344 } 6345 6346 return (MDI_SUCCESS); 6347 } 6348 6349 int 6350 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6351 { 6352 mdi_client_t *client; 6353 6354 if (!MDI_CLIENT(dip)) 6355 return (MDI_FAILURE); 6356 6357 if (mdi_class) { 6358 client = DEVI(dip)->devi_mdi_client; 6359 ASSERT(client); 6360 *mdi_class = client->ct_vhci->vh_class; 6361 } 6362 6363 return (MDI_SUCCESS); 6364 } 6365 6366 void * 6367 mdi_client_get_vhci_private(dev_info_t *dip) 6368 { 6369 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6370 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6371 mdi_client_t *ct; 6372 ct = i_devi_get_client(dip); 6373 return (ct->ct_vprivate); 6374 } 6375 return (NULL); 6376 } 6377 6378 void 6379 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6380 { 6381 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6382 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6383 mdi_client_t *ct; 6384 ct = i_devi_get_client(dip); 6385 ct->ct_vprivate = data; 6386 } 6387 } 6388 /* 6389 * mdi_pi_get_vhci_private(): 6390 * Get the vhci private information associated with the 6391 * mdi_pathinfo node 6392 */ 6393 void * 6394 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6395 { 6396 caddr_t vprivate = NULL; 6397 if (pip) { 6398 vprivate = MDI_PI(pip)->pi_vprivate; 6399 } 6400 return (vprivate); 6401 } 6402 6403 /* 6404 * mdi_pi_set_vhci_private(): 6405 * Set the vhci private information in the mdi_pathinfo node 6406 */ 6407 void 6408 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6409 { 6410 if (pip) { 6411 MDI_PI(pip)->pi_vprivate = priv; 6412 } 6413 } 6414 6415 /* 6416 * mdi_phci_get_vhci_private(): 6417 * Get the vhci private information associated with the 6418 * mdi_phci node 6419 */ 6420 void * 6421 mdi_phci_get_vhci_private(dev_info_t *dip) 6422 { 6423 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6424 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6425 mdi_phci_t *ph; 6426 ph = i_devi_get_phci(dip); 6427 return (ph->ph_vprivate); 6428 } 6429 return (NULL); 6430 } 6431 6432 /* 6433 * mdi_phci_set_vhci_private(): 6434 * Set the vhci private information in the mdi_phci node 6435 */ 6436 void 6437 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6438 { 6439 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6440 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6441 mdi_phci_t *ph; 6442 ph = i_devi_get_phci(dip); 6443 ph->ph_vprivate = priv; 6444 } 6445 } 6446 6447 /* 6448 * List of vhci class names: 6449 * A vhci class name must be in this list only if the corresponding vhci 6450 * driver intends to use the mdi provided bus config implementation 6451 * (i.e., mdi_vhci_bus_config()). 6452 */ 6453 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6454 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6455 6456 /* 6457 * Built-in list of phci drivers for every vhci class. 6458 * All phci drivers expect iscsi have root device support. 6459 */ 6460 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6461 { "fp", 1 }, 6462 { "iscsi", 0 }, 6463 { "ibsrp", 1 } 6464 }; 6465 6466 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6467 6468 /* 6469 * During boot time, the on-disk vhci cache for every vhci class is read 6470 * in the form of an nvlist and stored here. 6471 */ 6472 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6473 6474 /* nvpair names in vhci cache nvlist */ 6475 #define MDI_VHCI_CACHE_VERSION 1 6476 #define MDI_NVPNAME_VERSION "version" 6477 #define MDI_NVPNAME_PHCIS "phcis" 6478 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6479 6480 /* 6481 * Given vhci class name, return its on-disk vhci cache filename. 6482 * Memory for the returned filename which includes the full path is allocated 6483 * by this function. 6484 */ 6485 static char * 6486 vhclass2vhcache_filename(char *vhclass) 6487 { 6488 char *filename; 6489 int len; 6490 static char *fmt = "/etc/devices/mdi_%s_cache"; 6491 6492 /* 6493 * fmt contains the on-disk vhci cache file name format; 6494 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6495 */ 6496 6497 /* the -1 below is to account for "%s" in the format string */ 6498 len = strlen(fmt) + strlen(vhclass) - 1; 6499 filename = kmem_alloc(len, KM_SLEEP); 6500 (void) snprintf(filename, len, fmt, vhclass); 6501 ASSERT(len == (strlen(filename) + 1)); 6502 return (filename); 6503 } 6504 6505 /* 6506 * initialize the vhci cache related data structures and read the on-disk 6507 * vhci cached data into memory. 6508 */ 6509 static void 6510 setup_vhci_cache(mdi_vhci_t *vh) 6511 { 6512 mdi_vhci_config_t *vhc; 6513 mdi_vhci_cache_t *vhcache; 6514 int i; 6515 nvlist_t *nvl = NULL; 6516 6517 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6518 vh->vh_config = vhc; 6519 vhcache = &vhc->vhc_vhcache; 6520 6521 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6522 6523 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6524 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6525 6526 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6527 6528 /* 6529 * Create string hash; same as mod_hash_create_strhash() except that 6530 * we use NULL key destructor. 6531 */ 6532 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6533 mdi_bus_config_cache_hash_size, 6534 mod_hash_null_keydtor, mod_hash_null_valdtor, 6535 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6536 6537 setup_phci_driver_list(vh); 6538 6539 /* 6540 * The on-disk vhci cache is read during booting prior to the 6541 * lights-out period by mdi_read_devices_files(). 6542 */ 6543 for (i = 0; i < N_VHCI_CLASSES; i++) { 6544 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6545 nvl = vhcache_nvl[i]; 6546 vhcache_nvl[i] = NULL; 6547 break; 6548 } 6549 } 6550 6551 /* 6552 * this is to cover the case of some one manually causing unloading 6553 * (or detaching) and reloading (or attaching) of a vhci driver. 6554 */ 6555 if (nvl == NULL && modrootloaded) 6556 nvl = read_on_disk_vhci_cache(vh->vh_class); 6557 6558 if (nvl != NULL) { 6559 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6560 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6561 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6562 else { 6563 cmn_err(CE_WARN, 6564 "%s: data file corrupted, will recreate\n", 6565 vhc->vhc_vhcache_filename); 6566 } 6567 rw_exit(&vhcache->vhcache_lock); 6568 nvlist_free(nvl); 6569 } 6570 6571 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6572 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6573 6574 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6575 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6576 } 6577 6578 /* 6579 * free all vhci cache related resources 6580 */ 6581 static int 6582 destroy_vhci_cache(mdi_vhci_t *vh) 6583 { 6584 mdi_vhci_config_t *vhc = vh->vh_config; 6585 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6586 mdi_vhcache_phci_t *cphci, *cphci_next; 6587 mdi_vhcache_client_t *cct, *cct_next; 6588 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6589 6590 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6591 return (MDI_FAILURE); 6592 6593 kmem_free(vhc->vhc_vhcache_filename, 6594 strlen(vhc->vhc_vhcache_filename) + 1); 6595 6596 if (vhc->vhc_phci_driver_list) 6597 free_phci_driver_list(vhc); 6598 6599 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6600 6601 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6602 cphci = cphci_next) { 6603 cphci_next = cphci->cphci_next; 6604 free_vhcache_phci(cphci); 6605 } 6606 6607 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6608 cct_next = cct->cct_next; 6609 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6610 cpi_next = cpi->cpi_next; 6611 free_vhcache_pathinfo(cpi); 6612 } 6613 free_vhcache_client(cct); 6614 } 6615 6616 rw_destroy(&vhcache->vhcache_lock); 6617 6618 mutex_destroy(&vhc->vhc_lock); 6619 cv_destroy(&vhc->vhc_cv); 6620 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6621 return (MDI_SUCCESS); 6622 } 6623 6624 /* 6625 * Setup the list of phci drivers associated with the specified vhci class. 6626 * MDI uses this information to rebuild bus config cache if in case the 6627 * cache is not available or corrupted. 6628 */ 6629 static void 6630 setup_phci_driver_list(mdi_vhci_t *vh) 6631 { 6632 mdi_vhci_config_t *vhc = vh->vh_config; 6633 mdi_phci_driver_info_t *driver_list; 6634 char **driver_list1; 6635 uint_t ndrivers, ndrivers1; 6636 int i, j; 6637 6638 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6639 driver_list = scsi_phci_driver_list; 6640 ndrivers = sizeof (scsi_phci_driver_list) / 6641 sizeof (mdi_phci_driver_info_t); 6642 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6643 driver_list = ib_phci_driver_list; 6644 ndrivers = sizeof (ib_phci_driver_list) / 6645 sizeof (mdi_phci_driver_info_t); 6646 } else { 6647 driver_list = NULL; 6648 ndrivers = 0; 6649 } 6650 6651 /* 6652 * The driver.conf file of a vhci driver can specify additional 6653 * phci drivers using a project private "phci-drivers" property. 6654 */ 6655 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6656 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6657 &ndrivers1) != DDI_PROP_SUCCESS) 6658 ndrivers1 = 0; 6659 6660 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6661 if (vhc->vhc_nphci_drivers == 0) 6662 return; 6663 6664 vhc->vhc_phci_driver_list = kmem_alloc( 6665 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6666 6667 for (i = 0; i < ndrivers; i++) { 6668 vhc->vhc_phci_driver_list[i].phdriver_name = 6669 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6670 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6671 driver_list[i].phdriver_root_support; 6672 } 6673 6674 for (j = 0; j < ndrivers1; j++, i++) { 6675 vhc->vhc_phci_driver_list[i].phdriver_name = 6676 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6677 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6678 } 6679 6680 if (ndrivers1) 6681 ddi_prop_free(driver_list1); 6682 } 6683 6684 /* 6685 * Free the memory allocated for the phci driver list 6686 */ 6687 static void 6688 free_phci_driver_list(mdi_vhci_config_t *vhc) 6689 { 6690 int i; 6691 6692 if (vhc->vhc_phci_driver_list == NULL) 6693 return; 6694 6695 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6696 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6697 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6698 } 6699 6700 kmem_free(vhc->vhc_phci_driver_list, 6701 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6702 } 6703 6704 /* 6705 * Stop all vhci cache related async threads and free their resources. 6706 */ 6707 static int 6708 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6709 { 6710 mdi_async_client_config_t *acc, *acc_next; 6711 6712 mutex_enter(&vhc->vhc_lock); 6713 vhc->vhc_flags |= MDI_VHC_EXIT; 6714 ASSERT(vhc->vhc_acc_thrcount >= 0); 6715 cv_broadcast(&vhc->vhc_cv); 6716 6717 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6718 vhc->vhc_acc_thrcount != 0) { 6719 mutex_exit(&vhc->vhc_lock); 6720 delay(1); 6721 mutex_enter(&vhc->vhc_lock); 6722 } 6723 6724 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6725 6726 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6727 acc_next = acc->acc_next; 6728 free_async_client_config(acc); 6729 } 6730 vhc->vhc_acc_list_head = NULL; 6731 vhc->vhc_acc_list_tail = NULL; 6732 vhc->vhc_acc_count = 0; 6733 6734 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6735 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6736 mutex_exit(&vhc->vhc_lock); 6737 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6738 vhcache_dirty(vhc); 6739 return (MDI_FAILURE); 6740 } 6741 } else 6742 mutex_exit(&vhc->vhc_lock); 6743 6744 if (callb_delete(vhc->vhc_cbid) != 0) 6745 return (MDI_FAILURE); 6746 6747 return (MDI_SUCCESS); 6748 } 6749 6750 /* 6751 * Stop vhci cache flush thread 6752 */ 6753 /* ARGSUSED */ 6754 static boolean_t 6755 stop_vhcache_flush_thread(void *arg, int code) 6756 { 6757 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6758 6759 mutex_enter(&vhc->vhc_lock); 6760 vhc->vhc_flags |= MDI_VHC_EXIT; 6761 cv_broadcast(&vhc->vhc_cv); 6762 6763 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6764 mutex_exit(&vhc->vhc_lock); 6765 delay(1); 6766 mutex_enter(&vhc->vhc_lock); 6767 } 6768 6769 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6770 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6771 mutex_exit(&vhc->vhc_lock); 6772 (void) flush_vhcache(vhc, 1); 6773 } else 6774 mutex_exit(&vhc->vhc_lock); 6775 6776 return (B_TRUE); 6777 } 6778 6779 /* 6780 * Enqueue the vhcache phci (cphci) at the tail of the list 6781 */ 6782 static void 6783 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6784 { 6785 cphci->cphci_next = NULL; 6786 if (vhcache->vhcache_phci_head == NULL) 6787 vhcache->vhcache_phci_head = cphci; 6788 else 6789 vhcache->vhcache_phci_tail->cphci_next = cphci; 6790 vhcache->vhcache_phci_tail = cphci; 6791 } 6792 6793 /* 6794 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6795 */ 6796 static void 6797 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6798 mdi_vhcache_pathinfo_t *cpi) 6799 { 6800 cpi->cpi_next = NULL; 6801 if (cct->cct_cpi_head == NULL) 6802 cct->cct_cpi_head = cpi; 6803 else 6804 cct->cct_cpi_tail->cpi_next = cpi; 6805 cct->cct_cpi_tail = cpi; 6806 } 6807 6808 /* 6809 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6810 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6811 * flag set come at the beginning of the list. All cpis which have this 6812 * flag set come at the end of the list. 6813 */ 6814 static void 6815 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6816 mdi_vhcache_pathinfo_t *newcpi) 6817 { 6818 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6819 6820 if (cct->cct_cpi_head == NULL || 6821 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6822 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6823 else { 6824 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6825 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6826 prev_cpi = cpi, cpi = cpi->cpi_next) 6827 ; 6828 6829 if (prev_cpi == NULL) 6830 cct->cct_cpi_head = newcpi; 6831 else 6832 prev_cpi->cpi_next = newcpi; 6833 6834 newcpi->cpi_next = cpi; 6835 6836 if (cpi == NULL) 6837 cct->cct_cpi_tail = newcpi; 6838 } 6839 } 6840 6841 /* 6842 * Enqueue the vhcache client (cct) at the tail of the list 6843 */ 6844 static void 6845 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6846 mdi_vhcache_client_t *cct) 6847 { 6848 cct->cct_next = NULL; 6849 if (vhcache->vhcache_client_head == NULL) 6850 vhcache->vhcache_client_head = cct; 6851 else 6852 vhcache->vhcache_client_tail->cct_next = cct; 6853 vhcache->vhcache_client_tail = cct; 6854 } 6855 6856 static void 6857 free_string_array(char **str, int nelem) 6858 { 6859 int i; 6860 6861 if (str) { 6862 for (i = 0; i < nelem; i++) { 6863 if (str[i]) 6864 kmem_free(str[i], strlen(str[i]) + 1); 6865 } 6866 kmem_free(str, sizeof (char *) * nelem); 6867 } 6868 } 6869 6870 static void 6871 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6872 { 6873 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6874 kmem_free(cphci, sizeof (*cphci)); 6875 } 6876 6877 static void 6878 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6879 { 6880 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6881 kmem_free(cpi, sizeof (*cpi)); 6882 } 6883 6884 static void 6885 free_vhcache_client(mdi_vhcache_client_t *cct) 6886 { 6887 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6888 kmem_free(cct, sizeof (*cct)); 6889 } 6890 6891 static char * 6892 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6893 { 6894 char *name_addr; 6895 int len; 6896 6897 len = strlen(ct_name) + strlen(ct_addr) + 2; 6898 name_addr = kmem_alloc(len, KM_SLEEP); 6899 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6900 6901 if (ret_len) 6902 *ret_len = len; 6903 return (name_addr); 6904 } 6905 6906 /* 6907 * Copy the contents of paddrnvl to vhci cache. 6908 * paddrnvl nvlist contains path information for a vhci client. 6909 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6910 */ 6911 static void 6912 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6913 mdi_vhcache_client_t *cct) 6914 { 6915 nvpair_t *nvp = NULL; 6916 mdi_vhcache_pathinfo_t *cpi; 6917 uint_t nelem; 6918 uint32_t *val; 6919 6920 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6921 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6922 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6923 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6924 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6925 ASSERT(nelem == 2); 6926 cpi->cpi_cphci = cphci_list[val[0]]; 6927 cpi->cpi_flags = val[1]; 6928 enqueue_tail_vhcache_pathinfo(cct, cpi); 6929 } 6930 } 6931 6932 /* 6933 * Copy the contents of caddrmapnvl to vhci cache. 6934 * caddrmapnvl nvlist contains vhci client address to phci client address 6935 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6936 * this nvlist. 6937 */ 6938 static void 6939 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6940 mdi_vhcache_phci_t *cphci_list[]) 6941 { 6942 nvpair_t *nvp = NULL; 6943 nvlist_t *paddrnvl; 6944 mdi_vhcache_client_t *cct; 6945 6946 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6947 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6948 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6949 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6950 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6951 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6952 /* the client must contain at least one path */ 6953 ASSERT(cct->cct_cpi_head != NULL); 6954 6955 enqueue_vhcache_client(vhcache, cct); 6956 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6957 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6958 } 6959 } 6960 6961 /* 6962 * Copy the contents of the main nvlist to vhci cache. 6963 * 6964 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6965 * The nvlist contains the mappings between the vhci client addresses and 6966 * their corresponding phci client addresses. 6967 * 6968 * The structure of the nvlist is as follows: 6969 * 6970 * Main nvlist: 6971 * NAME TYPE DATA 6972 * version int32 version number 6973 * phcis string array array of phci paths 6974 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6975 * 6976 * structure of c2paddrs_nvl: 6977 * NAME TYPE DATA 6978 * caddr1 nvlist_t paddrs_nvl1 6979 * caddr2 nvlist_t paddrs_nvl2 6980 * ... 6981 * where caddr1, caddr2, ... are vhci client name and addresses in the 6982 * form of "<clientname>@<clientaddress>". 6983 * (for example: "ssd@2000002037cd9f72"); 6984 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6985 * 6986 * structure of paddrs_nvl: 6987 * NAME TYPE DATA 6988 * pi_addr1 uint32_array (phci-id, cpi_flags) 6989 * pi_addr2 uint32_array (phci-id, cpi_flags) 6990 * ... 6991 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6992 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6993 * phci-ids are integers that identify PHCIs to which the 6994 * the bus specific address belongs to. These integers are used as an index 6995 * into to the phcis string array in the main nvlist to get the PHCI path. 6996 */ 6997 static int 6998 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6999 { 7000 char **phcis, **phci_namep; 7001 uint_t nphcis; 7002 mdi_vhcache_phci_t *cphci, **cphci_list; 7003 nvlist_t *caddrmapnvl; 7004 int32_t ver; 7005 int i; 7006 size_t cphci_list_size; 7007 7008 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7009 7010 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7011 ver != MDI_VHCI_CACHE_VERSION) 7012 return (MDI_FAILURE); 7013 7014 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7015 &nphcis) != 0) 7016 return (MDI_SUCCESS); 7017 7018 ASSERT(nphcis > 0); 7019 7020 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7021 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7022 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7023 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7024 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7025 enqueue_vhcache_phci(vhcache, cphci); 7026 cphci_list[i] = cphci; 7027 } 7028 7029 ASSERT(vhcache->vhcache_phci_head != NULL); 7030 7031 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7032 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7033 7034 kmem_free(cphci_list, cphci_list_size); 7035 return (MDI_SUCCESS); 7036 } 7037 7038 /* 7039 * Build paddrnvl for the specified client using the information in the 7040 * vhci cache and add it to the caddrmapnnvl. 7041 * Returns 0 on success, errno on failure. 7042 */ 7043 static int 7044 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7045 nvlist_t *caddrmapnvl) 7046 { 7047 mdi_vhcache_pathinfo_t *cpi; 7048 nvlist_t *nvl; 7049 int err; 7050 uint32_t val[2]; 7051 7052 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7053 7054 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7055 return (err); 7056 7057 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7058 val[0] = cpi->cpi_cphci->cphci_id; 7059 val[1] = cpi->cpi_flags; 7060 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7061 != 0) 7062 goto out; 7063 } 7064 7065 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7066 out: 7067 nvlist_free(nvl); 7068 return (err); 7069 } 7070 7071 /* 7072 * Build caddrmapnvl using the information in the vhci cache 7073 * and add it to the mainnvl. 7074 * Returns 0 on success, errno on failure. 7075 */ 7076 static int 7077 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7078 { 7079 mdi_vhcache_client_t *cct; 7080 nvlist_t *nvl; 7081 int err; 7082 7083 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7084 7085 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7086 return (err); 7087 7088 for (cct = vhcache->vhcache_client_head; cct != NULL; 7089 cct = cct->cct_next) { 7090 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7091 goto out; 7092 } 7093 7094 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7095 out: 7096 nvlist_free(nvl); 7097 return (err); 7098 } 7099 7100 /* 7101 * Build nvlist using the information in the vhci cache. 7102 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7103 * Returns nvl on success, NULL on failure. 7104 */ 7105 static nvlist_t * 7106 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7107 { 7108 mdi_vhcache_phci_t *cphci; 7109 uint_t phci_count; 7110 char **phcis; 7111 nvlist_t *nvl; 7112 int err, i; 7113 7114 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7115 nvl = NULL; 7116 goto out; 7117 } 7118 7119 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7120 MDI_VHCI_CACHE_VERSION)) != 0) 7121 goto out; 7122 7123 rw_enter(&vhcache->vhcache_lock, RW_READER); 7124 if (vhcache->vhcache_phci_head == NULL) { 7125 rw_exit(&vhcache->vhcache_lock); 7126 return (nvl); 7127 } 7128 7129 phci_count = 0; 7130 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7131 cphci = cphci->cphci_next) 7132 cphci->cphci_id = phci_count++; 7133 7134 /* build phci pathname list */ 7135 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7136 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7137 cphci = cphci->cphci_next, i++) 7138 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7139 7140 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7141 phci_count); 7142 free_string_array(phcis, phci_count); 7143 7144 if (err == 0 && 7145 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7146 rw_exit(&vhcache->vhcache_lock); 7147 return (nvl); 7148 } 7149 7150 rw_exit(&vhcache->vhcache_lock); 7151 out: 7152 if (nvl) 7153 nvlist_free(nvl); 7154 return (NULL); 7155 } 7156 7157 /* 7158 * Lookup vhcache phci structure for the specified phci path. 7159 */ 7160 static mdi_vhcache_phci_t * 7161 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7162 { 7163 mdi_vhcache_phci_t *cphci; 7164 7165 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7166 7167 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7168 cphci = cphci->cphci_next) { 7169 if (strcmp(cphci->cphci_path, phci_path) == 0) 7170 return (cphci); 7171 } 7172 7173 return (NULL); 7174 } 7175 7176 /* 7177 * Lookup vhcache phci structure for the specified phci. 7178 */ 7179 static mdi_vhcache_phci_t * 7180 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7181 { 7182 mdi_vhcache_phci_t *cphci; 7183 7184 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7185 7186 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7187 cphci = cphci->cphci_next) { 7188 if (cphci->cphci_phci == ph) 7189 return (cphci); 7190 } 7191 7192 return (NULL); 7193 } 7194 7195 /* 7196 * Add the specified phci to the vhci cache if not already present. 7197 */ 7198 static void 7199 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7200 { 7201 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7202 mdi_vhcache_phci_t *cphci; 7203 char *pathname; 7204 int cache_updated; 7205 7206 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7207 7208 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7209 (void) ddi_pathname(ph->ph_dip, pathname); 7210 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7211 != NULL) { 7212 cphci->cphci_phci = ph; 7213 cache_updated = 0; 7214 } else { 7215 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7216 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7217 cphci->cphci_phci = ph; 7218 enqueue_vhcache_phci(vhcache, cphci); 7219 cache_updated = 1; 7220 } 7221 7222 rw_exit(&vhcache->vhcache_lock); 7223 7224 /* 7225 * Since a new phci has been added, reset 7226 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7227 * during next vhcache_discover_paths(). 7228 */ 7229 mutex_enter(&vhc->vhc_lock); 7230 vhc->vhc_path_discovery_cutoff_time = 0; 7231 mutex_exit(&vhc->vhc_lock); 7232 7233 kmem_free(pathname, MAXPATHLEN); 7234 if (cache_updated) 7235 vhcache_dirty(vhc); 7236 } 7237 7238 /* 7239 * Remove the reference to the specified phci from the vhci cache. 7240 */ 7241 static void 7242 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7243 { 7244 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7245 mdi_vhcache_phci_t *cphci; 7246 7247 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7248 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7249 /* do not remove the actual mdi_vhcache_phci structure */ 7250 cphci->cphci_phci = NULL; 7251 } 7252 rw_exit(&vhcache->vhcache_lock); 7253 } 7254 7255 static void 7256 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7257 mdi_vhcache_lookup_token_t *src) 7258 { 7259 if (src == NULL) { 7260 dst->lt_cct = NULL; 7261 dst->lt_cct_lookup_time = 0; 7262 } else { 7263 dst->lt_cct = src->lt_cct; 7264 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7265 } 7266 } 7267 7268 /* 7269 * Look up vhcache client for the specified client. 7270 */ 7271 static mdi_vhcache_client_t * 7272 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7273 mdi_vhcache_lookup_token_t *token) 7274 { 7275 mod_hash_val_t hv; 7276 char *name_addr; 7277 int len; 7278 7279 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7280 7281 /* 7282 * If no vhcache clean occurred since the last lookup, we can 7283 * simply return the cct from the last lookup operation. 7284 * It works because ccts are never freed except during the vhcache 7285 * cleanup operation. 7286 */ 7287 if (token != NULL && 7288 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7289 return (token->lt_cct); 7290 7291 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7292 if (mod_hash_find(vhcache->vhcache_client_hash, 7293 (mod_hash_key_t)name_addr, &hv) == 0) { 7294 if (token) { 7295 token->lt_cct = (mdi_vhcache_client_t *)hv; 7296 token->lt_cct_lookup_time = lbolt64; 7297 } 7298 } else { 7299 if (token) { 7300 token->lt_cct = NULL; 7301 token->lt_cct_lookup_time = 0; 7302 } 7303 hv = NULL; 7304 } 7305 kmem_free(name_addr, len); 7306 return ((mdi_vhcache_client_t *)hv); 7307 } 7308 7309 /* 7310 * Add the specified path to the vhci cache if not already present. 7311 * Also add the vhcache client for the client corresponding to this path 7312 * if it doesn't already exist. 7313 */ 7314 static void 7315 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7316 { 7317 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7318 mdi_vhcache_client_t *cct; 7319 mdi_vhcache_pathinfo_t *cpi; 7320 mdi_phci_t *ph = pip->pi_phci; 7321 mdi_client_t *ct = pip->pi_client; 7322 int cache_updated = 0; 7323 7324 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7325 7326 /* if vhcache client for this pip doesn't already exist, add it */ 7327 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7328 NULL)) == NULL) { 7329 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7330 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7331 ct->ct_guid, NULL); 7332 enqueue_vhcache_client(vhcache, cct); 7333 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7334 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7335 cache_updated = 1; 7336 } 7337 7338 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7339 if (cpi->cpi_cphci->cphci_phci == ph && 7340 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7341 cpi->cpi_pip = pip; 7342 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7343 cpi->cpi_flags &= 7344 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7345 sort_vhcache_paths(cct); 7346 cache_updated = 1; 7347 } 7348 break; 7349 } 7350 } 7351 7352 if (cpi == NULL) { 7353 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7354 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7355 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7356 ASSERT(cpi->cpi_cphci != NULL); 7357 cpi->cpi_pip = pip; 7358 enqueue_vhcache_pathinfo(cct, cpi); 7359 cache_updated = 1; 7360 } 7361 7362 rw_exit(&vhcache->vhcache_lock); 7363 7364 if (cache_updated) 7365 vhcache_dirty(vhc); 7366 } 7367 7368 /* 7369 * Remove the reference to the specified path from the vhci cache. 7370 */ 7371 static void 7372 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7373 { 7374 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7375 mdi_client_t *ct = pip->pi_client; 7376 mdi_vhcache_client_t *cct; 7377 mdi_vhcache_pathinfo_t *cpi; 7378 7379 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7380 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7381 NULL)) != NULL) { 7382 for (cpi = cct->cct_cpi_head; cpi != NULL; 7383 cpi = cpi->cpi_next) { 7384 if (cpi->cpi_pip == pip) { 7385 cpi->cpi_pip = NULL; 7386 break; 7387 } 7388 } 7389 } 7390 rw_exit(&vhcache->vhcache_lock); 7391 } 7392 7393 /* 7394 * Flush the vhci cache to disk. 7395 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7396 */ 7397 static int 7398 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7399 { 7400 nvlist_t *nvl; 7401 int err; 7402 int rv; 7403 7404 /* 7405 * It is possible that the system may shutdown before 7406 * i_ddi_io_initialized (during stmsboot for example). To allow for 7407 * flushing the cache in this case do not check for 7408 * i_ddi_io_initialized when force flag is set. 7409 */ 7410 if (force_flag == 0 && !i_ddi_io_initialized()) 7411 return (MDI_FAILURE); 7412 7413 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7414 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7415 nvlist_free(nvl); 7416 } else 7417 err = EFAULT; 7418 7419 rv = MDI_SUCCESS; 7420 mutex_enter(&vhc->vhc_lock); 7421 if (err != 0) { 7422 if (err == EROFS) { 7423 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7424 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7425 MDI_VHC_VHCACHE_DIRTY); 7426 } else { 7427 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7428 cmn_err(CE_CONT, "%s: update failed\n", 7429 vhc->vhc_vhcache_filename); 7430 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7431 } 7432 rv = MDI_FAILURE; 7433 } 7434 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7435 cmn_err(CE_CONT, 7436 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7437 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7438 } 7439 mutex_exit(&vhc->vhc_lock); 7440 7441 return (rv); 7442 } 7443 7444 /* 7445 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7446 * Exits itself if left idle for the idle timeout period. 7447 */ 7448 static void 7449 vhcache_flush_thread(void *arg) 7450 { 7451 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7452 clock_t idle_time, quit_at_ticks; 7453 callb_cpr_t cprinfo; 7454 7455 /* number of seconds to sleep idle before exiting */ 7456 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7457 7458 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7459 "mdi_vhcache_flush"); 7460 mutex_enter(&vhc->vhc_lock); 7461 for (; ; ) { 7462 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7463 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7464 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7465 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7466 (void) cv_timedwait(&vhc->vhc_cv, 7467 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7468 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7469 } else { 7470 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7471 mutex_exit(&vhc->vhc_lock); 7472 7473 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7474 vhcache_dirty(vhc); 7475 7476 mutex_enter(&vhc->vhc_lock); 7477 } 7478 } 7479 7480 quit_at_ticks = ddi_get_lbolt() + idle_time; 7481 7482 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7483 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7484 ddi_get_lbolt() < quit_at_ticks) { 7485 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7486 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7487 quit_at_ticks); 7488 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7489 } 7490 7491 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7492 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7493 goto out; 7494 } 7495 7496 out: 7497 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7498 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7499 CALLB_CPR_EXIT(&cprinfo); 7500 } 7501 7502 /* 7503 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7504 */ 7505 static void 7506 vhcache_dirty(mdi_vhci_config_t *vhc) 7507 { 7508 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7509 int create_thread; 7510 7511 rw_enter(&vhcache->vhcache_lock, RW_READER); 7512 /* do not flush cache until the cache is fully built */ 7513 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7514 rw_exit(&vhcache->vhcache_lock); 7515 return; 7516 } 7517 rw_exit(&vhcache->vhcache_lock); 7518 7519 mutex_enter(&vhc->vhc_lock); 7520 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7521 mutex_exit(&vhc->vhc_lock); 7522 return; 7523 } 7524 7525 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7526 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7527 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7528 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7529 cv_broadcast(&vhc->vhc_cv); 7530 create_thread = 0; 7531 } else { 7532 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7533 create_thread = 1; 7534 } 7535 mutex_exit(&vhc->vhc_lock); 7536 7537 if (create_thread) 7538 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7539 0, &p0, TS_RUN, minclsyspri); 7540 } 7541 7542 /* 7543 * phci bus config structure - one for for each phci bus config operation that 7544 * we initiate on behalf of a vhci. 7545 */ 7546 typedef struct mdi_phci_bus_config_s { 7547 char *phbc_phci_path; 7548 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7549 struct mdi_phci_bus_config_s *phbc_next; 7550 } mdi_phci_bus_config_t; 7551 7552 /* vhci bus config structure - one for each vhci bus config operation */ 7553 typedef struct mdi_vhci_bus_config_s { 7554 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7555 major_t vhbc_op_major; /* bus config op major */ 7556 uint_t vhbc_op_flags; /* bus config op flags */ 7557 kmutex_t vhbc_lock; 7558 kcondvar_t vhbc_cv; 7559 int vhbc_thr_count; 7560 } mdi_vhci_bus_config_t; 7561 7562 /* 7563 * bus config the specified phci 7564 */ 7565 static void 7566 bus_config_phci(void *arg) 7567 { 7568 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7569 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7570 dev_info_t *ph_dip; 7571 7572 /* 7573 * first configure all path components upto phci and then configure 7574 * the phci children. 7575 */ 7576 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7577 != NULL) { 7578 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7579 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7580 (void) ndi_devi_config_driver(ph_dip, 7581 vhbc->vhbc_op_flags, 7582 vhbc->vhbc_op_major); 7583 } else 7584 (void) ndi_devi_config(ph_dip, 7585 vhbc->vhbc_op_flags); 7586 7587 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7588 ndi_rele_devi(ph_dip); 7589 } 7590 7591 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7592 kmem_free(phbc, sizeof (*phbc)); 7593 7594 mutex_enter(&vhbc->vhbc_lock); 7595 vhbc->vhbc_thr_count--; 7596 if (vhbc->vhbc_thr_count == 0) 7597 cv_broadcast(&vhbc->vhbc_cv); 7598 mutex_exit(&vhbc->vhbc_lock); 7599 } 7600 7601 /* 7602 * Bus config all phcis associated with the vhci in parallel. 7603 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7604 */ 7605 static void 7606 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7607 ddi_bus_config_op_t op, major_t maj) 7608 { 7609 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7610 mdi_vhci_bus_config_t *vhbc; 7611 mdi_vhcache_phci_t *cphci; 7612 7613 rw_enter(&vhcache->vhcache_lock, RW_READER); 7614 if (vhcache->vhcache_phci_head == NULL) { 7615 rw_exit(&vhcache->vhcache_lock); 7616 return; 7617 } 7618 7619 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7620 7621 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7622 cphci = cphci->cphci_next) { 7623 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7624 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7625 KM_SLEEP); 7626 phbc->phbc_vhbusconfig = vhbc; 7627 phbc->phbc_next = phbc_head; 7628 phbc_head = phbc; 7629 vhbc->vhbc_thr_count++; 7630 } 7631 rw_exit(&vhcache->vhcache_lock); 7632 7633 vhbc->vhbc_op = op; 7634 vhbc->vhbc_op_major = maj; 7635 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7636 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7637 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7638 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7639 7640 /* now create threads to initiate bus config on all phcis in parallel */ 7641 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7642 phbc_next = phbc->phbc_next; 7643 if (mdi_mtc_off) 7644 bus_config_phci((void *)phbc); 7645 else 7646 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7647 0, &p0, TS_RUN, minclsyspri); 7648 } 7649 7650 mutex_enter(&vhbc->vhbc_lock); 7651 /* wait until all threads exit */ 7652 while (vhbc->vhbc_thr_count > 0) 7653 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7654 mutex_exit(&vhbc->vhbc_lock); 7655 7656 mutex_destroy(&vhbc->vhbc_lock); 7657 cv_destroy(&vhbc->vhbc_cv); 7658 kmem_free(vhbc, sizeof (*vhbc)); 7659 } 7660 7661 /* 7662 * Single threaded version of bus_config_all_phcis() 7663 */ 7664 static void 7665 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7666 ddi_bus_config_op_t op, major_t maj) 7667 { 7668 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7669 7670 single_threaded_vhconfig_enter(vhc); 7671 bus_config_all_phcis(vhcache, flags, op, maj); 7672 single_threaded_vhconfig_exit(vhc); 7673 } 7674 7675 /* 7676 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7677 * The path includes the child component in addition to the phci path. 7678 */ 7679 static int 7680 bus_config_one_phci_child(char *path) 7681 { 7682 dev_info_t *ph_dip, *child; 7683 char *devnm; 7684 int rv = MDI_FAILURE; 7685 7686 /* extract the child component of the phci */ 7687 devnm = strrchr(path, '/'); 7688 *devnm++ = '\0'; 7689 7690 /* 7691 * first configure all path components upto phci and then 7692 * configure the phci child. 7693 */ 7694 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7695 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7696 NDI_SUCCESS) { 7697 /* 7698 * release the hold that ndi_devi_config_one() placed 7699 */ 7700 ndi_rele_devi(child); 7701 rv = MDI_SUCCESS; 7702 } 7703 7704 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7705 ndi_rele_devi(ph_dip); 7706 } 7707 7708 devnm--; 7709 *devnm = '/'; 7710 return (rv); 7711 } 7712 7713 /* 7714 * Build a list of phci client paths for the specified vhci client. 7715 * The list includes only those phci client paths which aren't configured yet. 7716 */ 7717 static mdi_phys_path_t * 7718 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7719 { 7720 mdi_vhcache_pathinfo_t *cpi; 7721 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7722 int config_path, len; 7723 7724 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7725 /* 7726 * include only those paths that aren't configured. 7727 */ 7728 config_path = 0; 7729 if (cpi->cpi_pip == NULL) 7730 config_path = 1; 7731 else { 7732 MDI_PI_LOCK(cpi->cpi_pip); 7733 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7734 config_path = 1; 7735 MDI_PI_UNLOCK(cpi->cpi_pip); 7736 } 7737 7738 if (config_path) { 7739 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7740 len = strlen(cpi->cpi_cphci->cphci_path) + 7741 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7742 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7743 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7744 cpi->cpi_cphci->cphci_path, ct_name, 7745 cpi->cpi_addr); 7746 pp->phys_path_next = NULL; 7747 7748 if (pp_head == NULL) 7749 pp_head = pp; 7750 else 7751 pp_tail->phys_path_next = pp; 7752 pp_tail = pp; 7753 } 7754 } 7755 7756 return (pp_head); 7757 } 7758 7759 /* 7760 * Free the memory allocated for phci client path list. 7761 */ 7762 static void 7763 free_phclient_path_list(mdi_phys_path_t *pp_head) 7764 { 7765 mdi_phys_path_t *pp, *pp_next; 7766 7767 for (pp = pp_head; pp != NULL; pp = pp_next) { 7768 pp_next = pp->phys_path_next; 7769 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7770 kmem_free(pp, sizeof (*pp)); 7771 } 7772 } 7773 7774 /* 7775 * Allocated async client structure and initialize with the specified values. 7776 */ 7777 static mdi_async_client_config_t * 7778 alloc_async_client_config(char *ct_name, char *ct_addr, 7779 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7780 { 7781 mdi_async_client_config_t *acc; 7782 7783 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7784 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7785 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7786 acc->acc_phclient_path_list_head = pp_head; 7787 init_vhcache_lookup_token(&acc->acc_token, tok); 7788 acc->acc_next = NULL; 7789 return (acc); 7790 } 7791 7792 /* 7793 * Free the memory allocated for the async client structure and their members. 7794 */ 7795 static void 7796 free_async_client_config(mdi_async_client_config_t *acc) 7797 { 7798 if (acc->acc_phclient_path_list_head) 7799 free_phclient_path_list(acc->acc_phclient_path_list_head); 7800 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7801 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7802 kmem_free(acc, sizeof (*acc)); 7803 } 7804 7805 /* 7806 * Sort vhcache pathinfos (cpis) of the specified client. 7807 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7808 * flag set come at the beginning of the list. All cpis which have this 7809 * flag set come at the end of the list. 7810 */ 7811 static void 7812 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7813 { 7814 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7815 7816 cpi_head = cct->cct_cpi_head; 7817 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7818 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7819 cpi_next = cpi->cpi_next; 7820 enqueue_vhcache_pathinfo(cct, cpi); 7821 } 7822 } 7823 7824 /* 7825 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7826 * every vhcache pathinfo of the specified client. If not adjust the flag 7827 * setting appropriately. 7828 * 7829 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7830 * on-disk vhci cache. So every time this flag is updated the cache must be 7831 * flushed. 7832 */ 7833 static void 7834 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7835 mdi_vhcache_lookup_token_t *tok) 7836 { 7837 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7838 mdi_vhcache_client_t *cct; 7839 mdi_vhcache_pathinfo_t *cpi; 7840 7841 rw_enter(&vhcache->vhcache_lock, RW_READER); 7842 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7843 == NULL) { 7844 rw_exit(&vhcache->vhcache_lock); 7845 return; 7846 } 7847 7848 /* 7849 * to avoid unnecessary on-disk cache updates, first check if an 7850 * update is really needed. If no update is needed simply return. 7851 */ 7852 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7853 if ((cpi->cpi_pip != NULL && 7854 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7855 (cpi->cpi_pip == NULL && 7856 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7857 break; 7858 } 7859 } 7860 if (cpi == NULL) { 7861 rw_exit(&vhcache->vhcache_lock); 7862 return; 7863 } 7864 7865 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7866 rw_exit(&vhcache->vhcache_lock); 7867 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7868 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7869 tok)) == NULL) { 7870 rw_exit(&vhcache->vhcache_lock); 7871 return; 7872 } 7873 } 7874 7875 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7876 if (cpi->cpi_pip != NULL) 7877 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7878 else 7879 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7880 } 7881 sort_vhcache_paths(cct); 7882 7883 rw_exit(&vhcache->vhcache_lock); 7884 vhcache_dirty(vhc); 7885 } 7886 7887 /* 7888 * Configure all specified paths of the client. 7889 */ 7890 static void 7891 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7892 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7893 { 7894 mdi_phys_path_t *pp; 7895 7896 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7897 (void) bus_config_one_phci_child(pp->phys_path); 7898 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7899 } 7900 7901 /* 7902 * Dequeue elements from vhci async client config list and bus configure 7903 * their corresponding phci clients. 7904 */ 7905 static void 7906 config_client_paths_thread(void *arg) 7907 { 7908 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7909 mdi_async_client_config_t *acc; 7910 clock_t quit_at_ticks; 7911 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7912 callb_cpr_t cprinfo; 7913 7914 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7915 "mdi_config_client_paths"); 7916 7917 for (; ; ) { 7918 quit_at_ticks = ddi_get_lbolt() + idle_time; 7919 7920 mutex_enter(&vhc->vhc_lock); 7921 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7922 vhc->vhc_acc_list_head == NULL && 7923 ddi_get_lbolt() < quit_at_ticks) { 7924 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7925 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7926 quit_at_ticks); 7927 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7928 } 7929 7930 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7931 vhc->vhc_acc_list_head == NULL) 7932 goto out; 7933 7934 acc = vhc->vhc_acc_list_head; 7935 vhc->vhc_acc_list_head = acc->acc_next; 7936 if (vhc->vhc_acc_list_head == NULL) 7937 vhc->vhc_acc_list_tail = NULL; 7938 vhc->vhc_acc_count--; 7939 mutex_exit(&vhc->vhc_lock); 7940 7941 config_client_paths_sync(vhc, acc->acc_ct_name, 7942 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7943 &acc->acc_token); 7944 7945 free_async_client_config(acc); 7946 } 7947 7948 out: 7949 vhc->vhc_acc_thrcount--; 7950 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7951 CALLB_CPR_EXIT(&cprinfo); 7952 } 7953 7954 /* 7955 * Arrange for all the phci client paths (pp_head) for the specified client 7956 * to be bus configured asynchronously by a thread. 7957 */ 7958 static void 7959 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7960 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7961 { 7962 mdi_async_client_config_t *acc, *newacc; 7963 int create_thread; 7964 7965 if (pp_head == NULL) 7966 return; 7967 7968 if (mdi_mtc_off) { 7969 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7970 free_phclient_path_list(pp_head); 7971 return; 7972 } 7973 7974 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7975 ASSERT(newacc); 7976 7977 mutex_enter(&vhc->vhc_lock); 7978 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7979 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7980 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7981 free_async_client_config(newacc); 7982 mutex_exit(&vhc->vhc_lock); 7983 return; 7984 } 7985 } 7986 7987 if (vhc->vhc_acc_list_head == NULL) 7988 vhc->vhc_acc_list_head = newacc; 7989 else 7990 vhc->vhc_acc_list_tail->acc_next = newacc; 7991 vhc->vhc_acc_list_tail = newacc; 7992 vhc->vhc_acc_count++; 7993 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7994 cv_broadcast(&vhc->vhc_cv); 7995 create_thread = 0; 7996 } else { 7997 vhc->vhc_acc_thrcount++; 7998 create_thread = 1; 7999 } 8000 mutex_exit(&vhc->vhc_lock); 8001 8002 if (create_thread) 8003 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8004 0, &p0, TS_RUN, minclsyspri); 8005 } 8006 8007 /* 8008 * Return number of online paths for the specified client. 8009 */ 8010 static int 8011 nonline_paths(mdi_vhcache_client_t *cct) 8012 { 8013 mdi_vhcache_pathinfo_t *cpi; 8014 int online_count = 0; 8015 8016 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8017 if (cpi->cpi_pip != NULL) { 8018 MDI_PI_LOCK(cpi->cpi_pip); 8019 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8020 online_count++; 8021 MDI_PI_UNLOCK(cpi->cpi_pip); 8022 } 8023 } 8024 8025 return (online_count); 8026 } 8027 8028 /* 8029 * Bus configure all paths for the specified vhci client. 8030 * If at least one path for the client is already online, the remaining paths 8031 * will be configured asynchronously. Otherwise, it synchronously configures 8032 * the paths until at least one path is online and then rest of the paths 8033 * will be configured asynchronously. 8034 */ 8035 static void 8036 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8037 { 8038 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8039 mdi_phys_path_t *pp_head, *pp; 8040 mdi_vhcache_client_t *cct; 8041 mdi_vhcache_lookup_token_t tok; 8042 8043 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8044 8045 init_vhcache_lookup_token(&tok, NULL); 8046 8047 if (ct_name == NULL || ct_addr == NULL || 8048 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8049 == NULL || 8050 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8051 rw_exit(&vhcache->vhcache_lock); 8052 return; 8053 } 8054 8055 /* if at least one path is online, configure the rest asynchronously */ 8056 if (nonline_paths(cct) > 0) { 8057 rw_exit(&vhcache->vhcache_lock); 8058 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8059 return; 8060 } 8061 8062 rw_exit(&vhcache->vhcache_lock); 8063 8064 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8065 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8066 rw_enter(&vhcache->vhcache_lock, RW_READER); 8067 8068 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8069 ct_addr, &tok)) == NULL) { 8070 rw_exit(&vhcache->vhcache_lock); 8071 goto out; 8072 } 8073 8074 if (nonline_paths(cct) > 0 && 8075 pp->phys_path_next != NULL) { 8076 rw_exit(&vhcache->vhcache_lock); 8077 config_client_paths_async(vhc, ct_name, ct_addr, 8078 pp->phys_path_next, &tok); 8079 pp->phys_path_next = NULL; 8080 goto out; 8081 } 8082 8083 rw_exit(&vhcache->vhcache_lock); 8084 } 8085 } 8086 8087 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8088 out: 8089 free_phclient_path_list(pp_head); 8090 } 8091 8092 static void 8093 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8094 { 8095 mutex_enter(&vhc->vhc_lock); 8096 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8097 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8098 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8099 mutex_exit(&vhc->vhc_lock); 8100 } 8101 8102 static void 8103 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8104 { 8105 mutex_enter(&vhc->vhc_lock); 8106 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8107 cv_broadcast(&vhc->vhc_cv); 8108 mutex_exit(&vhc->vhc_lock); 8109 } 8110 8111 /* 8112 * Attach the phci driver instances associated with the vhci: 8113 * If root is mounted attach all phci driver instances. 8114 * If root is not mounted, attach the instances of only those phci 8115 * drivers that have the root support. 8116 */ 8117 static void 8118 attach_phci_drivers(mdi_vhci_config_t *vhc) 8119 { 8120 int i; 8121 major_t m; 8122 8123 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8124 if (modrootloaded == 0 && 8125 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8126 continue; 8127 8128 m = ddi_name_to_major( 8129 vhc->vhc_phci_driver_list[i].phdriver_name); 8130 if (m != (major_t)-1) { 8131 if (ddi_hold_installed_driver(m) != NULL) 8132 ddi_rele_driver(m); 8133 } 8134 } 8135 } 8136 8137 /* 8138 * Build vhci cache: 8139 * 8140 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8141 * the phci driver instances. During this process the cache gets built. 8142 * 8143 * Cache is built fully if the root is mounted. 8144 * If the root is not mounted, phci drivers that do not have root support 8145 * are not attached. As a result the cache is built partially. The entries 8146 * in the cache reflect only those phci drivers that have root support. 8147 */ 8148 static int 8149 build_vhci_cache(mdi_vhci_config_t *vhc) 8150 { 8151 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8152 8153 single_threaded_vhconfig_enter(vhc); 8154 8155 rw_enter(&vhcache->vhcache_lock, RW_READER); 8156 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8157 rw_exit(&vhcache->vhcache_lock); 8158 single_threaded_vhconfig_exit(vhc); 8159 return (0); 8160 } 8161 rw_exit(&vhcache->vhcache_lock); 8162 8163 attach_phci_drivers(vhc); 8164 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8165 BUS_CONFIG_ALL, (major_t)-1); 8166 8167 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8168 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8169 rw_exit(&vhcache->vhcache_lock); 8170 8171 single_threaded_vhconfig_exit(vhc); 8172 vhcache_dirty(vhc); 8173 return (1); 8174 } 8175 8176 /* 8177 * Determine if discovery of paths is needed. 8178 */ 8179 static int 8180 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8181 { 8182 int rv = 1; 8183 8184 mutex_enter(&vhc->vhc_lock); 8185 if (i_ddi_io_initialized() == 0) { 8186 if (vhc->vhc_path_discovery_boot > 0) { 8187 vhc->vhc_path_discovery_boot--; 8188 goto out; 8189 } 8190 } else { 8191 if (vhc->vhc_path_discovery_postboot > 0) { 8192 vhc->vhc_path_discovery_postboot--; 8193 goto out; 8194 } 8195 } 8196 8197 /* 8198 * Do full path discovery at most once per mdi_path_discovery_interval. 8199 * This is to avoid a series of full path discoveries when opening 8200 * stale /dev/[r]dsk links. 8201 */ 8202 if (mdi_path_discovery_interval != -1 && 8203 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8204 goto out; 8205 8206 rv = 0; 8207 out: 8208 mutex_exit(&vhc->vhc_lock); 8209 return (rv); 8210 } 8211 8212 /* 8213 * Discover all paths: 8214 * 8215 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8216 * driver instances. During this process all paths will be discovered. 8217 */ 8218 static int 8219 vhcache_discover_paths(mdi_vhci_config_t *vhc) 8220 { 8221 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8222 int rv = 0; 8223 8224 single_threaded_vhconfig_enter(vhc); 8225 8226 if (vhcache_do_discovery(vhc)) { 8227 attach_phci_drivers(vhc); 8228 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8229 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8230 8231 mutex_enter(&vhc->vhc_lock); 8232 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8233 mdi_path_discovery_interval * TICKS_PER_SECOND; 8234 mutex_exit(&vhc->vhc_lock); 8235 rv = 1; 8236 } 8237 8238 single_threaded_vhconfig_exit(vhc); 8239 return (rv); 8240 } 8241 8242 /* 8243 * Generic vhci bus config implementation: 8244 * 8245 * Parameters 8246 * vdip vhci dip 8247 * flags bus config flags 8248 * op bus config operation 8249 * The remaining parameters are bus config operation specific 8250 * 8251 * for BUS_CONFIG_ONE 8252 * arg pointer to name@addr 8253 * child upon successful return from this function, *child will be 8254 * set to the configured and held devinfo child node of vdip. 8255 * ct_addr pointer to client address (i.e. GUID) 8256 * 8257 * for BUS_CONFIG_DRIVER 8258 * arg major number of the driver 8259 * child and ct_addr parameters are ignored 8260 * 8261 * for BUS_CONFIG_ALL 8262 * arg, child, and ct_addr parameters are ignored 8263 * 8264 * Note that for the rest of the bus config operations, this function simply 8265 * calls the framework provided default bus config routine. 8266 */ 8267 int 8268 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8269 void *arg, dev_info_t **child, char *ct_addr) 8270 { 8271 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8272 mdi_vhci_config_t *vhc = vh->vh_config; 8273 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8274 int rv = 0; 8275 int params_valid = 0; 8276 char *cp; 8277 8278 /* 8279 * While bus configuring phcis, the phci driver interactions with MDI 8280 * cause child nodes to be enumerated under the vhci node for which 8281 * they need to ndi_devi_enter the vhci node. 8282 * 8283 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8284 * for the bus config operations on phcis to finish while holding the 8285 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8286 * phcis and call the default framework provided bus config function 8287 * if we are called with ndi_devi_enter lock held. 8288 */ 8289 if (DEVI_BUSY_OWNED(vdip)) { 8290 MDI_DEBUG(2, (CE_NOTE, vdip, 8291 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8292 goto default_bus_config; 8293 } 8294 8295 rw_enter(&vhcache->vhcache_lock, RW_READER); 8296 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8297 rw_exit(&vhcache->vhcache_lock); 8298 rv = build_vhci_cache(vhc); 8299 rw_enter(&vhcache->vhcache_lock, RW_READER); 8300 } 8301 8302 switch (op) { 8303 case BUS_CONFIG_ONE: 8304 if (arg != NULL && ct_addr != NULL) { 8305 /* extract node name */ 8306 cp = (char *)arg; 8307 while (*cp != '\0' && *cp != '@') 8308 cp++; 8309 if (*cp == '@') { 8310 params_valid = 1; 8311 *cp = '\0'; 8312 config_client_paths(vhc, (char *)arg, ct_addr); 8313 /* config_client_paths() releases cache_lock */ 8314 *cp = '@'; 8315 break; 8316 } 8317 } 8318 8319 rw_exit(&vhcache->vhcache_lock); 8320 break; 8321 8322 case BUS_CONFIG_DRIVER: 8323 rw_exit(&vhcache->vhcache_lock); 8324 if (rv == 0) 8325 st_bus_config_all_phcis(vhc, flags, op, 8326 (major_t)(uintptr_t)arg); 8327 break; 8328 8329 case BUS_CONFIG_ALL: 8330 rw_exit(&vhcache->vhcache_lock); 8331 if (rv == 0) 8332 st_bus_config_all_phcis(vhc, flags, op, -1); 8333 break; 8334 8335 default: 8336 rw_exit(&vhcache->vhcache_lock); 8337 break; 8338 } 8339 8340 8341 default_bus_config: 8342 /* 8343 * All requested child nodes are enumerated under the vhci. 8344 * Now configure them. 8345 */ 8346 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8347 NDI_SUCCESS) { 8348 return (MDI_SUCCESS); 8349 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8350 /* discover all paths and try configuring again */ 8351 if (vhcache_discover_paths(vhc) && 8352 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8353 NDI_SUCCESS) 8354 return (MDI_SUCCESS); 8355 } 8356 8357 return (MDI_FAILURE); 8358 } 8359 8360 /* 8361 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8362 */ 8363 static nvlist_t * 8364 read_on_disk_vhci_cache(char *vhci_class) 8365 { 8366 nvlist_t *nvl; 8367 int err; 8368 char *filename; 8369 8370 filename = vhclass2vhcache_filename(vhci_class); 8371 8372 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8373 kmem_free(filename, strlen(filename) + 1); 8374 return (nvl); 8375 } else if (err == EIO) 8376 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8377 else if (err == EINVAL) 8378 cmn_err(CE_WARN, 8379 "%s: data file corrupted, will recreate\n", filename); 8380 8381 kmem_free(filename, strlen(filename) + 1); 8382 return (NULL); 8383 } 8384 8385 /* 8386 * Read on-disk vhci cache into nvlists for all vhci classes. 8387 * Called during booting by i_ddi_read_devices_files(). 8388 */ 8389 void 8390 mdi_read_devices_files(void) 8391 { 8392 int i; 8393 8394 for (i = 0; i < N_VHCI_CLASSES; i++) 8395 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8396 } 8397 8398 /* 8399 * Remove all stale entries from vhci cache. 8400 */ 8401 static void 8402 clean_vhcache(mdi_vhci_config_t *vhc) 8403 { 8404 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8405 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8406 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8407 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8408 8409 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8410 8411 cct_head = vhcache->vhcache_client_head; 8412 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8413 for (cct = cct_head; cct != NULL; cct = cct_next) { 8414 cct_next = cct->cct_next; 8415 8416 cpi_head = cct->cct_cpi_head; 8417 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8418 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8419 cpi_next = cpi->cpi_next; 8420 if (cpi->cpi_pip != NULL) { 8421 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8422 enqueue_tail_vhcache_pathinfo(cct, cpi); 8423 } else 8424 free_vhcache_pathinfo(cpi); 8425 } 8426 8427 if (cct->cct_cpi_head != NULL) 8428 enqueue_vhcache_client(vhcache, cct); 8429 else { 8430 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8431 (mod_hash_key_t)cct->cct_name_addr); 8432 free_vhcache_client(cct); 8433 } 8434 } 8435 8436 cphci_head = vhcache->vhcache_phci_head; 8437 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8438 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8439 cphci_next = cphci->cphci_next; 8440 if (cphci->cphci_phci != NULL) 8441 enqueue_vhcache_phci(vhcache, cphci); 8442 else 8443 free_vhcache_phci(cphci); 8444 } 8445 8446 vhcache->vhcache_clean_time = lbolt64; 8447 rw_exit(&vhcache->vhcache_lock); 8448 vhcache_dirty(vhc); 8449 } 8450 8451 /* 8452 * Remove all stale entries from vhci cache. 8453 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8454 */ 8455 void 8456 mdi_clean_vhcache(void) 8457 { 8458 mdi_vhci_t *vh; 8459 8460 mutex_enter(&mdi_mutex); 8461 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8462 vh->vh_refcnt++; 8463 mutex_exit(&mdi_mutex); 8464 clean_vhcache(vh->vh_config); 8465 mutex_enter(&mdi_mutex); 8466 vh->vh_refcnt--; 8467 } 8468 mutex_exit(&mdi_mutex); 8469 } 8470 8471 /* 8472 * mdi_vhci_walk_clients(): 8473 * Walker routine to traverse client dev_info nodes 8474 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8475 * below the client, including nexus devices, which we dont want. 8476 * So we just traverse the immediate siblings, starting from 1st client. 8477 */ 8478 void 8479 mdi_vhci_walk_clients(dev_info_t *vdip, 8480 int (*f)(dev_info_t *, void *), void *arg) 8481 { 8482 dev_info_t *cdip; 8483 mdi_client_t *ct; 8484 8485 mutex_enter(&mdi_mutex); 8486 8487 cdip = ddi_get_child(vdip); 8488 8489 while (cdip) { 8490 ct = i_devi_get_client(cdip); 8491 MDI_CLIENT_LOCK(ct); 8492 8493 switch ((*f)(cdip, arg)) { 8494 case DDI_WALK_CONTINUE: 8495 cdip = ddi_get_next_sibling(cdip); 8496 MDI_CLIENT_UNLOCK(ct); 8497 break; 8498 8499 default: 8500 MDI_CLIENT_UNLOCK(ct); 8501 mutex_exit(&mdi_mutex); 8502 return; 8503 } 8504 } 8505 8506 mutex_exit(&mdi_mutex); 8507 } 8508 8509 /* 8510 * mdi_vhci_walk_phcis(): 8511 * Walker routine to traverse phci dev_info nodes 8512 */ 8513 void 8514 mdi_vhci_walk_phcis(dev_info_t *vdip, 8515 int (*f)(dev_info_t *, void *), void *arg) 8516 { 8517 mdi_vhci_t *vh = NULL; 8518 mdi_phci_t *ph = NULL; 8519 8520 mutex_enter(&mdi_mutex); 8521 8522 vh = i_devi_get_vhci(vdip); 8523 ph = vh->vh_phci_head; 8524 8525 while (ph) { 8526 MDI_PHCI_LOCK(ph); 8527 8528 switch ((*f)(ph->ph_dip, arg)) { 8529 case DDI_WALK_CONTINUE: 8530 MDI_PHCI_UNLOCK(ph); 8531 ph = ph->ph_next; 8532 break; 8533 8534 default: 8535 MDI_PHCI_UNLOCK(ph); 8536 mutex_exit(&mdi_mutex); 8537 return; 8538 } 8539 } 8540 8541 mutex_exit(&mdi_mutex); 8542 } 8543 8544 8545 /* 8546 * mdi_walk_vhcis(): 8547 * Walker routine to traverse vhci dev_info nodes 8548 */ 8549 void 8550 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8551 { 8552 mdi_vhci_t *vh = NULL; 8553 8554 mutex_enter(&mdi_mutex); 8555 /* 8556 * Scan for already registered vhci 8557 */ 8558 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8559 vh->vh_refcnt++; 8560 mutex_exit(&mdi_mutex); 8561 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8562 mutex_enter(&mdi_mutex); 8563 vh->vh_refcnt--; 8564 break; 8565 } else { 8566 mutex_enter(&mdi_mutex); 8567 vh->vh_refcnt--; 8568 } 8569 } 8570 8571 mutex_exit(&mdi_mutex); 8572 } 8573 8574 /* 8575 * i_mdi_log_sysevent(): 8576 * Logs events for pickup by syseventd 8577 */ 8578 static void 8579 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8580 { 8581 char *path_name; 8582 nvlist_t *attr_list; 8583 8584 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8585 KM_SLEEP) != DDI_SUCCESS) { 8586 goto alloc_failed; 8587 } 8588 8589 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8590 (void) ddi_pathname(dip, path_name); 8591 8592 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8593 ddi_driver_name(dip)) != DDI_SUCCESS) { 8594 goto error; 8595 } 8596 8597 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8598 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8599 goto error; 8600 } 8601 8602 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8603 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8604 goto error; 8605 } 8606 8607 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8608 path_name) != DDI_SUCCESS) { 8609 goto error; 8610 } 8611 8612 if (nvlist_add_string(attr_list, DDI_CLASS, 8613 ph_vh_class) != DDI_SUCCESS) { 8614 goto error; 8615 } 8616 8617 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8618 attr_list, NULL, DDI_SLEEP); 8619 8620 error: 8621 kmem_free(path_name, MAXPATHLEN); 8622 nvlist_free(attr_list); 8623 return; 8624 8625 alloc_failed: 8626 MDI_DEBUG(1, (CE_WARN, dip, 8627 "!i_mdi_log_sysevent: Unable to send sysevent")); 8628 } 8629