1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 30 * detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 71 #ifdef DEBUG 72 #include <sys/debug.h> 73 int mdi_debug = 1; 74 #define MDI_DEBUG(level, stmnt) \ 75 if (mdi_debug >= (level)) i_mdi_log stmnt 76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 77 #else /* !DEBUG */ 78 #define MDI_DEBUG(level, stmnt) 79 #endif /* DEBUG */ 80 81 extern pri_t minclsyspri; 82 extern int modrootloaded; 83 84 /* 85 * Global mutex: 86 * Protects vHCI list and structure members, pHCI and Client lists. 87 */ 88 kmutex_t mdi_mutex; 89 90 /* 91 * Registered vHCI class driver lists 92 */ 93 int mdi_vhci_count; 94 mdi_vhci_t *mdi_vhci_head; 95 mdi_vhci_t *mdi_vhci_tail; 96 97 /* 98 * Client Hash Table size 99 */ 100 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 101 102 /* 103 * taskq interface definitions 104 */ 105 #define MDI_TASKQ_N_THREADS 8 106 #define MDI_TASKQ_PRI minclsyspri 107 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 108 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 109 110 taskq_t *mdi_taskq; 111 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 112 113 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 114 115 /* 116 * The data should be "quiet" for this interval (in seconds) before the 117 * vhci cached data is flushed to the disk. 118 */ 119 static int mdi_vhcache_flush_delay = 10; 120 121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 122 static int mdi_vhcache_flush_daemon_idle_time = 60; 123 124 /* 125 * MDI falls back to discovery of all paths when a bus_config_one fails. 126 * The following parameters can be used to tune this operation. 127 * 128 * mdi_path_discovery_boot 129 * Number of times path discovery will be attempted during early boot. 130 * Probably there is no reason to ever set this value to greater than one. 131 * 132 * mdi_path_discovery_postboot 133 * Number of times path discovery will be attempted after early boot. 134 * Set it to a minimum of two to allow for discovery of iscsi paths which 135 * may happen very late during booting. 136 * 137 * mdi_path_discovery_interval 138 * Minimum number of seconds MDI will wait between successive discovery 139 * of all paths. Set it to -1 to disable discovery of all paths. 140 */ 141 static int mdi_path_discovery_boot = 1; 142 static int mdi_path_discovery_postboot = 2; 143 static int mdi_path_discovery_interval = 10; 144 145 /* 146 * number of seconds the asynchronous configuration thread will sleep idle 147 * before exiting. 148 */ 149 static int mdi_async_config_idle_time = 600; 150 151 static int mdi_bus_config_cache_hash_size = 256; 152 153 /* turns off multithreaded configuration for certain operations */ 154 static int mdi_mtc_off = 0; 155 156 /* 157 * MDI component property name/value string definitions 158 */ 159 const char *mdi_component_prop = "mpxio-component"; 160 const char *mdi_component_prop_vhci = "vhci"; 161 const char *mdi_component_prop_phci = "phci"; 162 const char *mdi_component_prop_client = "client"; 163 164 /* 165 * MDI client global unique identifier property name 166 */ 167 const char *mdi_client_guid_prop = "client-guid"; 168 169 /* 170 * MDI client load balancing property name/value string definitions 171 */ 172 const char *mdi_load_balance = "load-balance"; 173 const char *mdi_load_balance_none = "none"; 174 const char *mdi_load_balance_rr = "round-robin"; 175 const char *mdi_load_balance_lba = "logical-block"; 176 177 /* 178 * Obsolete vHCI class definition; to be removed after Leadville update 179 */ 180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 181 182 static char vhci_greeting[] = 183 "\tThere already exists one vHCI driver for class %s\n" 184 "\tOnly one vHCI driver for each class is allowed\n"; 185 186 /* 187 * Static function prototypes 188 */ 189 static int i_mdi_phci_offline(dev_info_t *, uint_t); 190 static int i_mdi_client_offline(dev_info_t *, uint_t); 191 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 192 static void i_mdi_phci_post_detach(dev_info_t *, 193 ddi_detach_cmd_t, int); 194 static int i_mdi_client_pre_detach(dev_info_t *, 195 ddi_detach_cmd_t); 196 static void i_mdi_client_post_detach(dev_info_t *, 197 ddi_detach_cmd_t, int); 198 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 199 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 200 static int i_mdi_lba_lb(mdi_client_t *ct, 201 mdi_pathinfo_t **ret_pip, struct buf *buf); 202 static void i_mdi_pm_hold_client(mdi_client_t *, int); 203 static void i_mdi_pm_rele_client(mdi_client_t *, int); 204 static void i_mdi_pm_reset_client(mdi_client_t *); 205 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 206 static int i_mdi_power_all_phci(mdi_client_t *); 207 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 208 209 210 /* 211 * Internal mdi_pathinfo node functions 212 */ 213 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 214 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 215 216 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 217 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 218 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 219 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 220 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 221 mdi_client_t *); 222 static void i_mdi_phci_unlock(mdi_phci_t *); 223 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 224 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 225 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 226 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 227 mdi_client_t *); 228 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 229 static void i_mdi_client_remove_path(mdi_client_t *, 230 mdi_pathinfo_t *); 231 232 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 233 mdi_pathinfo_state_t, int); 234 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 235 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 236 char **, int); 237 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 238 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 239 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 240 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 241 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 242 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 243 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 244 static void i_mdi_client_update_state(mdi_client_t *); 245 static int i_mdi_client_compute_state(mdi_client_t *, 246 mdi_phci_t *); 247 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 248 static void i_mdi_client_unlock(mdi_client_t *); 249 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 250 static mdi_client_t *i_devi_get_client(dev_info_t *); 251 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, 252 int); 253 /* 254 * Failover related function prototypes 255 */ 256 static int i_mdi_failover(void *); 257 258 /* 259 * misc internal functions 260 */ 261 static int i_mdi_get_hash_key(char *); 262 static int i_map_nvlist_error_to_mdi(int); 263 static void i_mdi_report_path_state(mdi_client_t *, 264 mdi_pathinfo_t *); 265 266 static void setup_vhci_cache(mdi_vhci_t *); 267 static int destroy_vhci_cache(mdi_vhci_t *); 268 static void setup_phci_driver_list(mdi_vhci_t *); 269 static void free_phci_driver_list(mdi_vhci_config_t *); 270 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 271 static boolean_t stop_vhcache_flush_thread(void *, int); 272 static void free_string_array(char **, int); 273 static void free_vhcache_phci(mdi_vhcache_phci_t *); 274 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 275 static void free_vhcache_client(mdi_vhcache_client_t *); 276 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 277 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 278 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 279 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 280 static void vhcache_pi_add(mdi_vhci_config_t *, 281 struct mdi_pathinfo *); 282 static void vhcache_pi_remove(mdi_vhci_config_t *, 283 struct mdi_pathinfo *); 284 static void free_phclient_path_list(mdi_phys_path_t *); 285 static void sort_vhcache_paths(mdi_vhcache_client_t *); 286 static int flush_vhcache(mdi_vhci_config_t *, int); 287 static void vhcache_dirty(mdi_vhci_config_t *); 288 static void free_async_client_config(mdi_async_client_config_t *); 289 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 290 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 291 static nvlist_t *read_on_disk_vhci_cache(char *); 292 extern int fread_nvlist(char *, nvlist_t **); 293 extern int fwrite_nvlist(char *, nvlist_t *); 294 295 /* called once when first vhci registers with mdi */ 296 static void 297 i_mdi_init() 298 { 299 static int initialized = 0; 300 301 if (initialized) 302 return; 303 initialized = 1; 304 305 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 306 /* 307 * Create our taskq resources 308 */ 309 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 310 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 311 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 312 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 313 } 314 315 /* 316 * mdi_get_component_type(): 317 * Return mpxio component type 318 * Return Values: 319 * MDI_COMPONENT_NONE 320 * MDI_COMPONENT_VHCI 321 * MDI_COMPONENT_PHCI 322 * MDI_COMPONENT_CLIENT 323 * XXX This doesn't work under multi-level MPxIO and should be 324 * removed when clients migrate mdi_is_*() interfaces. 325 */ 326 int 327 mdi_get_component_type(dev_info_t *dip) 328 { 329 return (DEVI(dip)->devi_mdi_component); 330 } 331 332 /* 333 * mdi_vhci_register(): 334 * Register a vHCI module with the mpxio framework 335 * mdi_vhci_register() is called by vHCI drivers to register the 336 * 'class_driver' vHCI driver and its MDI entrypoints with the 337 * mpxio framework. The vHCI driver must call this interface as 338 * part of its attach(9e) handler. 339 * Competing threads may try to attach mdi_vhci_register() as 340 * the vHCI drivers are loaded and attached as a result of pHCI 341 * driver instance registration (mdi_phci_register()) with the 342 * framework. 343 * Return Values: 344 * MDI_SUCCESS 345 * MDI_FAILURE 346 */ 347 348 /*ARGSUSED*/ 349 int 350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 351 int flags) 352 { 353 mdi_vhci_t *vh = NULL; 354 355 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 356 357 i_mdi_init(); 358 359 mutex_enter(&mdi_mutex); 360 /* 361 * Scan for already registered vhci 362 */ 363 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 364 if (strcmp(vh->vh_class, class) == 0) { 365 /* 366 * vHCI has already been created. Check for valid 367 * vHCI ops registration. We only support one vHCI 368 * module per class 369 */ 370 if (vh->vh_ops != NULL) { 371 mutex_exit(&mdi_mutex); 372 cmn_err(CE_NOTE, vhci_greeting, class); 373 return (MDI_FAILURE); 374 } 375 break; 376 } 377 } 378 379 /* 380 * if not yet created, create the vHCI component 381 */ 382 if (vh == NULL) { 383 struct client_hash *hash = NULL; 384 char *load_balance; 385 386 /* 387 * Allocate and initialize the mdi extensions 388 */ 389 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 390 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 391 KM_SLEEP); 392 vh->vh_client_table = hash; 393 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 394 (void) strcpy(vh->vh_class, class); 395 vh->vh_lb = LOAD_BALANCE_RR; 396 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 397 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 398 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 399 vh->vh_lb = LOAD_BALANCE_NONE; 400 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 401 == 0) { 402 vh->vh_lb = LOAD_BALANCE_LBA; 403 } 404 ddi_prop_free(load_balance); 405 } 406 407 /* 408 * Store the vHCI ops vectors 409 */ 410 vh->vh_dip = vdip; 411 vh->vh_ops = vops; 412 413 setup_vhci_cache(vh); 414 415 if (mdi_vhci_head == NULL) { 416 mdi_vhci_head = vh; 417 } 418 if (mdi_vhci_tail) { 419 mdi_vhci_tail->vh_next = vh; 420 } 421 mdi_vhci_tail = vh; 422 mdi_vhci_count++; 423 } 424 425 /* 426 * Claim the devfs node as a vhci component 427 */ 428 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 429 430 /* 431 * Initialize our back reference from dev_info node 432 */ 433 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 434 mutex_exit(&mdi_mutex); 435 return (MDI_SUCCESS); 436 } 437 438 /* 439 * mdi_vhci_unregister(): 440 * Unregister a vHCI module from mpxio framework 441 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 442 * of a vhci to unregister it from the framework. 443 * Return Values: 444 * MDI_SUCCESS 445 * MDI_FAILURE 446 */ 447 448 /*ARGSUSED*/ 449 int 450 mdi_vhci_unregister(dev_info_t *vdip, int flags) 451 { 452 mdi_vhci_t *found, *vh, *prev = NULL; 453 454 /* 455 * Check for invalid VHCI 456 */ 457 if ((vh = i_devi_get_vhci(vdip)) == NULL) 458 return (MDI_FAILURE); 459 460 mutex_enter(&mdi_mutex); 461 462 /* 463 * Scan the list of registered vHCIs for a match 464 */ 465 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 466 if (found == vh) 467 break; 468 prev = found; 469 } 470 471 if (found == NULL) { 472 mutex_exit(&mdi_mutex); 473 return (MDI_FAILURE); 474 } 475 476 /* 477 * Check the vHCI, pHCI and client count. All the pHCIs and clients 478 * should have been unregistered, before a vHCI can be 479 * unregistered. 480 */ 481 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 482 mutex_exit(&mdi_mutex); 483 return (MDI_FAILURE); 484 } 485 486 /* 487 * Remove the vHCI from the global list 488 */ 489 if (vh == mdi_vhci_head) { 490 mdi_vhci_head = vh->vh_next; 491 } else { 492 prev->vh_next = vh->vh_next; 493 } 494 if (vh == mdi_vhci_tail) { 495 mdi_vhci_tail = prev; 496 } 497 498 mdi_vhci_count--; 499 mutex_exit(&mdi_mutex); 500 501 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 502 /* add vhci to the global list */ 503 mutex_enter(&mdi_mutex); 504 if (mdi_vhci_head == NULL) 505 mdi_vhci_head = vh; 506 else 507 mdi_vhci_tail->vh_next = vh; 508 mdi_vhci_tail = vh; 509 mdi_vhci_count++; 510 mutex_exit(&mdi_mutex); 511 return (MDI_FAILURE); 512 } 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 521 kmem_free(vh, sizeof (mdi_vhci_t)); 522 return (MDI_SUCCESS); 523 } 524 525 /* 526 * i_mdi_vhci_class2vhci(): 527 * Look for a matching vHCI module given a vHCI class name 528 * Return Values: 529 * Handle to a vHCI component 530 * NULL 531 */ 532 static mdi_vhci_t * 533 i_mdi_vhci_class2vhci(char *class) 534 { 535 mdi_vhci_t *vh = NULL; 536 537 ASSERT(!MUTEX_HELD(&mdi_mutex)); 538 539 mutex_enter(&mdi_mutex); 540 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 541 if (strcmp(vh->vh_class, class) == 0) { 542 break; 543 } 544 } 545 mutex_exit(&mdi_mutex); 546 return (vh); 547 } 548 549 /* 550 * i_devi_get_vhci(): 551 * Utility function to get the handle to a vHCI component 552 * Return Values: 553 * Handle to a vHCI component 554 * NULL 555 */ 556 mdi_vhci_t * 557 i_devi_get_vhci(dev_info_t *vdip) 558 { 559 mdi_vhci_t *vh = NULL; 560 if (MDI_VHCI(vdip)) { 561 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 562 } 563 return (vh); 564 } 565 566 /* 567 * mdi_phci_register(): 568 * Register a pHCI module with mpxio framework 569 * mdi_phci_register() is called by pHCI drivers to register with 570 * the mpxio framework and a specific 'class_driver' vHCI. The 571 * pHCI driver must call this interface as part of its attach(9e) 572 * handler. 573 * Return Values: 574 * MDI_SUCCESS 575 * MDI_FAILURE 576 */ 577 578 /*ARGSUSED*/ 579 int 580 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 581 { 582 mdi_phci_t *ph; 583 mdi_vhci_t *vh; 584 char *data; 585 char *pathname; 586 587 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 588 (void) ddi_pathname(pdip, pathname); 589 590 /* 591 * Check for mpxio-disable property. Enable mpxio if the property is 592 * missing or not set to "yes". 593 * If the property is set to "yes" then emit a brief message. 594 */ 595 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 596 &data) == DDI_SUCCESS)) { 597 if (strcmp(data, "yes") == 0) { 598 MDI_DEBUG(1, (CE_CONT, pdip, 599 "?%s (%s%d) multipath capabilities " 600 "disabled via %s.conf.\n", pathname, 601 ddi_driver_name(pdip), ddi_get_instance(pdip), 602 ddi_driver_name(pdip))); 603 ddi_prop_free(data); 604 kmem_free(pathname, MAXPATHLEN); 605 return (MDI_FAILURE); 606 } 607 ddi_prop_free(data); 608 } 609 610 kmem_free(pathname, MAXPATHLEN); 611 612 /* 613 * Search for a matching vHCI 614 */ 615 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 616 if (vh == NULL) { 617 return (MDI_FAILURE); 618 } 619 620 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 621 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 622 ph->ph_dip = pdip; 623 ph->ph_vhci = vh; 624 ph->ph_next = NULL; 625 ph->ph_unstable = 0; 626 ph->ph_vprivate = 0; 627 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 628 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 629 630 MDI_PHCI_SET_POWER_UP(ph); 631 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 632 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 633 634 vhcache_phci_add(vh->vh_config, ph); 635 636 mutex_enter(&mdi_mutex); 637 if (vh->vh_phci_head == NULL) { 638 vh->vh_phci_head = ph; 639 } 640 if (vh->vh_phci_tail) { 641 vh->vh_phci_tail->ph_next = ph; 642 } 643 vh->vh_phci_tail = ph; 644 vh->vh_phci_count++; 645 mutex_exit(&mdi_mutex); 646 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 647 return (MDI_SUCCESS); 648 } 649 650 /* 651 * mdi_phci_unregister(): 652 * Unregister a pHCI module from mpxio framework 653 * mdi_phci_unregister() is called by the pHCI drivers from their 654 * detach(9E) handler to unregister their instances from the 655 * framework. 656 * Return Values: 657 * MDI_SUCCESS 658 * MDI_FAILURE 659 */ 660 661 /*ARGSUSED*/ 662 int 663 mdi_phci_unregister(dev_info_t *pdip, int flags) 664 { 665 mdi_vhci_t *vh; 666 mdi_phci_t *ph; 667 mdi_phci_t *tmp; 668 mdi_phci_t *prev = NULL; 669 670 ph = i_devi_get_phci(pdip); 671 if (ph == NULL) { 672 MDI_DEBUG(1, (CE_WARN, pdip, 673 "!pHCI unregister: Not a valid pHCI")); 674 return (MDI_FAILURE); 675 } 676 677 vh = ph->ph_vhci; 678 ASSERT(vh != NULL); 679 if (vh == NULL) { 680 MDI_DEBUG(1, (CE_WARN, pdip, 681 "!pHCI unregister: Not a valid vHCI")); 682 return (MDI_FAILURE); 683 } 684 685 mutex_enter(&mdi_mutex); 686 tmp = vh->vh_phci_head; 687 while (tmp) { 688 if (tmp == ph) { 689 break; 690 } 691 prev = tmp; 692 tmp = tmp->ph_next; 693 } 694 695 if (ph == vh->vh_phci_head) { 696 vh->vh_phci_head = ph->ph_next; 697 } else { 698 prev->ph_next = ph->ph_next; 699 } 700 701 if (ph == vh->vh_phci_tail) { 702 vh->vh_phci_tail = prev; 703 } 704 705 vh->vh_phci_count--; 706 707 mutex_exit(&mdi_mutex); 708 709 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 710 ESC_DDI_INITIATOR_UNREGISTER); 711 vhcache_phci_remove(vh->vh_config, ph); 712 cv_destroy(&ph->ph_unstable_cv); 713 cv_destroy(&ph->ph_powerchange_cv); 714 mutex_destroy(&ph->ph_mutex); 715 kmem_free(ph, sizeof (mdi_phci_t)); 716 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 717 DEVI(pdip)->devi_mdi_xhci = NULL; 718 return (MDI_SUCCESS); 719 } 720 721 /* 722 * i_devi_get_phci(): 723 * Utility function to return the phci extensions. 724 */ 725 static mdi_phci_t * 726 i_devi_get_phci(dev_info_t *pdip) 727 { 728 mdi_phci_t *ph = NULL; 729 if (MDI_PHCI(pdip)) { 730 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 731 } 732 return (ph); 733 } 734 735 /* 736 * mdi_phci_path2devinfo(): 737 * Utility function to search for a valid phci device given 738 * the devfs pathname. 739 */ 740 741 dev_info_t * 742 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 743 { 744 char *temp_pathname; 745 mdi_vhci_t *vh; 746 mdi_phci_t *ph; 747 dev_info_t *pdip = NULL; 748 749 vh = i_devi_get_vhci(vdip); 750 ASSERT(vh != NULL); 751 752 if (vh == NULL) { 753 /* 754 * Invalid vHCI component, return failure 755 */ 756 return (NULL); 757 } 758 759 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 760 mutex_enter(&mdi_mutex); 761 ph = vh->vh_phci_head; 762 while (ph != NULL) { 763 pdip = ph->ph_dip; 764 ASSERT(pdip != NULL); 765 *temp_pathname = '\0'; 766 (void) ddi_pathname(pdip, temp_pathname); 767 if (strcmp(temp_pathname, pathname) == 0) { 768 break; 769 } 770 ph = ph->ph_next; 771 } 772 if (ph == NULL) { 773 pdip = NULL; 774 } 775 mutex_exit(&mdi_mutex); 776 kmem_free(temp_pathname, MAXPATHLEN); 777 return (pdip); 778 } 779 780 /* 781 * mdi_phci_get_path_count(): 782 * get number of path information nodes associated with a given 783 * pHCI device. 784 */ 785 int 786 mdi_phci_get_path_count(dev_info_t *pdip) 787 { 788 mdi_phci_t *ph; 789 int count = 0; 790 791 ph = i_devi_get_phci(pdip); 792 if (ph != NULL) { 793 count = ph->ph_path_count; 794 } 795 return (count); 796 } 797 798 /* 799 * i_mdi_phci_lock(): 800 * Lock a pHCI device 801 * Return Values: 802 * None 803 * Note: 804 * The default locking order is: 805 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 806 * But there are number of situations where locks need to be 807 * grabbed in reverse order. This routine implements try and lock 808 * mechanism depending on the requested parameter option. 809 */ 810 static void 811 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 812 { 813 if (pip) { 814 /* Reverse locking is requested. */ 815 while (MDI_PHCI_TRYLOCK(ph) == 0) { 816 /* 817 * tryenter failed. Try to grab again 818 * after a small delay 819 */ 820 MDI_PI_HOLD(pip); 821 MDI_PI_UNLOCK(pip); 822 delay(1); 823 MDI_PI_LOCK(pip); 824 MDI_PI_RELE(pip); 825 } 826 } else { 827 MDI_PHCI_LOCK(ph); 828 } 829 } 830 831 /* 832 * i_mdi_phci_get_client_lock(): 833 * Lock a pHCI device 834 * Return Values: 835 * None 836 * Note: 837 * The default locking order is: 838 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 839 * But there are number of situations where locks need to be 840 * grabbed in reverse order. This routine implements try and lock 841 * mechanism depending on the requested parameter option. 842 */ 843 static void 844 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 845 { 846 if (ct) { 847 /* Reverse locking is requested. */ 848 while (MDI_PHCI_TRYLOCK(ph) == 0) { 849 /* 850 * tryenter failed. Try to grab again 851 * after a small delay 852 */ 853 MDI_CLIENT_UNLOCK(ct); 854 delay(1); 855 MDI_CLIENT_LOCK(ct); 856 } 857 } else { 858 MDI_PHCI_LOCK(ph); 859 } 860 } 861 862 /* 863 * i_mdi_phci_unlock(): 864 * Unlock the pHCI component 865 */ 866 static void 867 i_mdi_phci_unlock(mdi_phci_t *ph) 868 { 869 MDI_PHCI_UNLOCK(ph); 870 } 871 872 /* 873 * i_mdi_devinfo_create(): 874 * create client device's devinfo node 875 * Return Values: 876 * dev_info 877 * NULL 878 * Notes: 879 */ 880 static dev_info_t * 881 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 882 char **compatible, int ncompatible) 883 { 884 dev_info_t *cdip = NULL; 885 886 ASSERT(MUTEX_HELD(&mdi_mutex)); 887 888 /* Verify for duplicate entry */ 889 cdip = i_mdi_devinfo_find(vh, name, guid); 890 ASSERT(cdip == NULL); 891 if (cdip) { 892 cmn_err(CE_WARN, 893 "i_mdi_devinfo_create: client dip %p already exists", 894 (void *)cdip); 895 } 896 897 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 898 if (cdip == NULL) 899 goto fail; 900 901 /* 902 * Create component type and Global unique identifier 903 * properties 904 */ 905 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 906 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 907 goto fail; 908 } 909 910 /* Decorate the node with compatible property */ 911 if (compatible && 912 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 913 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 914 goto fail; 915 } 916 917 return (cdip); 918 919 fail: 920 if (cdip) { 921 (void) ndi_prop_remove_all(cdip); 922 (void) ndi_devi_free(cdip); 923 } 924 return (NULL); 925 } 926 927 /* 928 * i_mdi_devinfo_find(): 929 * Find a matching devinfo node for given client node name 930 * and its guid. 931 * Return Values: 932 * Handle to a dev_info node or NULL 933 */ 934 935 static dev_info_t * 936 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 937 { 938 char *data; 939 dev_info_t *cdip = NULL; 940 dev_info_t *ndip = NULL; 941 int circular; 942 943 ndi_devi_enter(vh->vh_dip, &circular); 944 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 945 while ((cdip = ndip) != NULL) { 946 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 947 948 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 949 continue; 950 } 951 952 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 953 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 954 &data) != DDI_PROP_SUCCESS) { 955 continue; 956 } 957 958 if (strcmp(data, guid) != 0) { 959 ddi_prop_free(data); 960 continue; 961 } 962 ddi_prop_free(data); 963 break; 964 } 965 ndi_devi_exit(vh->vh_dip, circular); 966 return (cdip); 967 } 968 969 /* 970 * i_mdi_devinfo_remove(): 971 * Remove a client device node 972 */ 973 static int 974 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 975 { 976 int rv = MDI_SUCCESS; 977 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 978 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 979 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 980 if (rv != NDI_SUCCESS) { 981 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 982 " failed. cdip = %p\n", cdip)); 983 } 984 /* 985 * Convert to MDI error code 986 */ 987 switch (rv) { 988 case NDI_SUCCESS: 989 rv = MDI_SUCCESS; 990 break; 991 case NDI_BUSY: 992 rv = MDI_BUSY; 993 break; 994 default: 995 rv = MDI_FAILURE; 996 break; 997 } 998 } 999 return (rv); 1000 } 1001 1002 /* 1003 * i_devi_get_client() 1004 * Utility function to get mpxio component extensions 1005 */ 1006 static mdi_client_t * 1007 i_devi_get_client(dev_info_t *cdip) 1008 { 1009 mdi_client_t *ct = NULL; 1010 if (MDI_CLIENT(cdip)) { 1011 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1012 } 1013 return (ct); 1014 } 1015 1016 /* 1017 * i_mdi_is_child_present(): 1018 * Search for the presence of client device dev_info node 1019 */ 1020 1021 static int 1022 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1023 { 1024 int rv = MDI_FAILURE; 1025 struct dev_info *dip; 1026 int circular; 1027 1028 ndi_devi_enter(vdip, &circular); 1029 dip = DEVI(vdip)->devi_child; 1030 while (dip) { 1031 if (dip == DEVI(cdip)) { 1032 rv = MDI_SUCCESS; 1033 break; 1034 } 1035 dip = dip->devi_sibling; 1036 } 1037 ndi_devi_exit(vdip, circular); 1038 return (rv); 1039 } 1040 1041 1042 /* 1043 * i_mdi_client_lock(): 1044 * Grab client component lock 1045 * Return Values: 1046 * None 1047 * Note: 1048 * The default locking order is: 1049 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1050 * But there are number of situations where locks need to be 1051 * grabbed in reverse order. This routine implements try and lock 1052 * mechanism depending on the requested parameter option. 1053 */ 1054 1055 static void 1056 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1057 { 1058 if (pip) { 1059 /* 1060 * Reverse locking is requested. 1061 */ 1062 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1063 /* 1064 * tryenter failed. Try to grab again 1065 * after a small delay 1066 */ 1067 MDI_PI_HOLD(pip); 1068 MDI_PI_UNLOCK(pip); 1069 delay(1); 1070 MDI_PI_LOCK(pip); 1071 MDI_PI_RELE(pip); 1072 } 1073 } else { 1074 MDI_CLIENT_LOCK(ct); 1075 } 1076 } 1077 1078 /* 1079 * i_mdi_client_unlock(): 1080 * Unlock a client component 1081 */ 1082 1083 static void 1084 i_mdi_client_unlock(mdi_client_t *ct) 1085 { 1086 MDI_CLIENT_UNLOCK(ct); 1087 } 1088 1089 /* 1090 * i_mdi_client_alloc(): 1091 * Allocate and initialize a client structure. Caller should 1092 * hold the global mdi_mutex. 1093 * Return Values: 1094 * Handle to a client component 1095 */ 1096 /*ARGSUSED*/ 1097 static mdi_client_t * 1098 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1099 { 1100 mdi_client_t *ct; 1101 1102 ASSERT(MUTEX_HELD(&mdi_mutex)); 1103 1104 /* 1105 * Allocate and initialize a component structure. 1106 */ 1107 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1108 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1109 ct->ct_hnext = NULL; 1110 ct->ct_hprev = NULL; 1111 ct->ct_dip = NULL; 1112 ct->ct_vhci = vh; 1113 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1114 (void) strcpy(ct->ct_drvname, name); 1115 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1116 (void) strcpy(ct->ct_guid, lguid); 1117 ct->ct_cprivate = NULL; 1118 ct->ct_vprivate = NULL; 1119 ct->ct_flags = 0; 1120 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1121 MDI_CLIENT_SET_OFFLINE(ct); 1122 MDI_CLIENT_SET_DETACH(ct); 1123 MDI_CLIENT_SET_POWER_UP(ct); 1124 ct->ct_failover_flags = 0; 1125 ct->ct_failover_status = 0; 1126 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1127 ct->ct_unstable = 0; 1128 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1129 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1130 ct->ct_lb = vh->vh_lb; 1131 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1132 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1133 ct->ct_path_count = 0; 1134 ct->ct_path_head = NULL; 1135 ct->ct_path_tail = NULL; 1136 ct->ct_path_last = NULL; 1137 1138 /* 1139 * Add this client component to our client hash queue 1140 */ 1141 i_mdi_client_enlist_table(vh, ct); 1142 return (ct); 1143 } 1144 1145 /* 1146 * i_mdi_client_enlist_table(): 1147 * Attach the client device to the client hash table. Caller 1148 * should hold the mdi_mutex 1149 */ 1150 1151 static void 1152 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1153 { 1154 int index; 1155 struct client_hash *head; 1156 1157 ASSERT(MUTEX_HELD(&mdi_mutex)); 1158 index = i_mdi_get_hash_key(ct->ct_guid); 1159 head = &vh->vh_client_table[index]; 1160 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1161 head->ct_hash_head = ct; 1162 head->ct_hash_count++; 1163 vh->vh_client_count++; 1164 } 1165 1166 /* 1167 * i_mdi_client_delist_table(): 1168 * Attach the client device to the client hash table. 1169 * Caller should hold the mdi_mutex 1170 */ 1171 1172 static void 1173 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1174 { 1175 int index; 1176 char *guid; 1177 struct client_hash *head; 1178 mdi_client_t *next; 1179 mdi_client_t *last; 1180 1181 ASSERT(MUTEX_HELD(&mdi_mutex)); 1182 guid = ct->ct_guid; 1183 index = i_mdi_get_hash_key(guid); 1184 head = &vh->vh_client_table[index]; 1185 1186 last = NULL; 1187 next = (mdi_client_t *)head->ct_hash_head; 1188 while (next != NULL) { 1189 if (next == ct) { 1190 break; 1191 } 1192 last = next; 1193 next = next->ct_hnext; 1194 } 1195 1196 if (next) { 1197 head->ct_hash_count--; 1198 if (last == NULL) { 1199 head->ct_hash_head = ct->ct_hnext; 1200 } else { 1201 last->ct_hnext = ct->ct_hnext; 1202 } 1203 ct->ct_hnext = NULL; 1204 vh->vh_client_count--; 1205 } 1206 } 1207 1208 1209 /* 1210 * i_mdi_client_free(): 1211 * Free a client component 1212 */ 1213 static int 1214 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1215 { 1216 int rv = MDI_SUCCESS; 1217 int flags = ct->ct_flags; 1218 dev_info_t *cdip; 1219 dev_info_t *vdip; 1220 1221 ASSERT(MUTEX_HELD(&mdi_mutex)); 1222 vdip = vh->vh_dip; 1223 cdip = ct->ct_dip; 1224 1225 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1226 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1227 DEVI(cdip)->devi_mdi_client = NULL; 1228 1229 /* 1230 * Clear out back ref. to dev_info_t node 1231 */ 1232 ct->ct_dip = NULL; 1233 1234 /* 1235 * Remove this client from our hash queue 1236 */ 1237 i_mdi_client_delist_table(vh, ct); 1238 1239 /* 1240 * Uninitialize and free the component 1241 */ 1242 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1243 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1244 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1245 cv_destroy(&ct->ct_failover_cv); 1246 cv_destroy(&ct->ct_unstable_cv); 1247 cv_destroy(&ct->ct_powerchange_cv); 1248 mutex_destroy(&ct->ct_mutex); 1249 kmem_free(ct, sizeof (*ct)); 1250 1251 if (cdip != NULL) { 1252 mutex_exit(&mdi_mutex); 1253 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1254 mutex_enter(&mdi_mutex); 1255 } 1256 return (rv); 1257 } 1258 1259 /* 1260 * i_mdi_client_find(): 1261 * Find the client structure corresponding to a given guid 1262 * Caller should hold the mdi_mutex 1263 */ 1264 static mdi_client_t * 1265 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1266 { 1267 int index; 1268 struct client_hash *head; 1269 mdi_client_t *ct; 1270 1271 ASSERT(MUTEX_HELD(&mdi_mutex)); 1272 index = i_mdi_get_hash_key(guid); 1273 head = &vh->vh_client_table[index]; 1274 1275 ct = head->ct_hash_head; 1276 while (ct != NULL) { 1277 if (strcmp(ct->ct_guid, guid) == 0 && 1278 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1279 break; 1280 } 1281 ct = ct->ct_hnext; 1282 } 1283 return (ct); 1284 } 1285 1286 1287 1288 /* 1289 * i_mdi_client_update_state(): 1290 * Compute and update client device state 1291 * Notes: 1292 * A client device can be in any of three possible states: 1293 * 1294 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1295 * one online/standby paths. Can tolerate failures. 1296 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1297 * no alternate paths available as standby. A failure on the online 1298 * would result in loss of access to device data. 1299 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1300 * no paths available to access the device. 1301 */ 1302 static void 1303 i_mdi_client_update_state(mdi_client_t *ct) 1304 { 1305 int state; 1306 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1307 state = i_mdi_client_compute_state(ct, NULL); 1308 MDI_CLIENT_SET_STATE(ct, state); 1309 } 1310 1311 /* 1312 * i_mdi_client_compute_state(): 1313 * Compute client device state 1314 * 1315 * mdi_phci_t * Pointer to pHCI structure which should 1316 * while computing the new value. Used by 1317 * i_mdi_phci_offline() to find the new 1318 * client state after DR of a pHCI. 1319 */ 1320 static int 1321 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1322 { 1323 int state; 1324 int online_count = 0; 1325 int standby_count = 0; 1326 mdi_pathinfo_t *pip, *next; 1327 1328 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1329 pip = ct->ct_path_head; 1330 while (pip != NULL) { 1331 MDI_PI_LOCK(pip); 1332 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1333 if (MDI_PI(pip)->pi_phci == ph) { 1334 MDI_PI_UNLOCK(pip); 1335 pip = next; 1336 continue; 1337 } 1338 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1339 == MDI_PATHINFO_STATE_ONLINE) 1340 online_count++; 1341 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1342 == MDI_PATHINFO_STATE_STANDBY) 1343 standby_count++; 1344 MDI_PI_UNLOCK(pip); 1345 pip = next; 1346 } 1347 1348 if (online_count == 0) { 1349 if (standby_count == 0) { 1350 state = MDI_CLIENT_STATE_FAILED; 1351 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1352 " ct = %p\n", ct)); 1353 } else if (standby_count == 1) { 1354 state = MDI_CLIENT_STATE_DEGRADED; 1355 } else { 1356 state = MDI_CLIENT_STATE_OPTIMAL; 1357 } 1358 } else if (online_count == 1) { 1359 if (standby_count == 0) { 1360 state = MDI_CLIENT_STATE_DEGRADED; 1361 } else { 1362 state = MDI_CLIENT_STATE_OPTIMAL; 1363 } 1364 } else { 1365 state = MDI_CLIENT_STATE_OPTIMAL; 1366 } 1367 return (state); 1368 } 1369 1370 /* 1371 * i_mdi_client2devinfo(): 1372 * Utility function 1373 */ 1374 dev_info_t * 1375 i_mdi_client2devinfo(mdi_client_t *ct) 1376 { 1377 return (ct->ct_dip); 1378 } 1379 1380 /* 1381 * mdi_client_path2_devinfo(): 1382 * Given the parent devinfo and child devfs pathname, search for 1383 * a valid devfs node handle. 1384 */ 1385 dev_info_t * 1386 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1387 { 1388 dev_info_t *cdip = NULL; 1389 dev_info_t *ndip = NULL; 1390 char *temp_pathname; 1391 int circular; 1392 1393 /* 1394 * Allocate temp buffer 1395 */ 1396 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1397 1398 /* 1399 * Lock parent against changes 1400 */ 1401 ndi_devi_enter(vdip, &circular); 1402 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1403 while ((cdip = ndip) != NULL) { 1404 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1405 1406 *temp_pathname = '\0'; 1407 (void) ddi_pathname(cdip, temp_pathname); 1408 if (strcmp(temp_pathname, pathname) == 0) { 1409 break; 1410 } 1411 } 1412 /* 1413 * Release devinfo lock 1414 */ 1415 ndi_devi_exit(vdip, circular); 1416 1417 /* 1418 * Free the temp buffer 1419 */ 1420 kmem_free(temp_pathname, MAXPATHLEN); 1421 return (cdip); 1422 } 1423 1424 1425 /* 1426 * mdi_client_get_path_count(): 1427 * Utility function to get number of path information nodes 1428 * associated with a given client device. 1429 */ 1430 int 1431 mdi_client_get_path_count(dev_info_t *cdip) 1432 { 1433 mdi_client_t *ct; 1434 int count = 0; 1435 1436 ct = i_devi_get_client(cdip); 1437 if (ct != NULL) { 1438 count = ct->ct_path_count; 1439 } 1440 return (count); 1441 } 1442 1443 1444 /* 1445 * i_mdi_get_hash_key(): 1446 * Create a hash using strings as keys 1447 * 1448 */ 1449 static int 1450 i_mdi_get_hash_key(char *str) 1451 { 1452 uint32_t g, hash = 0; 1453 char *p; 1454 1455 for (p = str; *p != '\0'; p++) { 1456 g = *p; 1457 hash += g; 1458 } 1459 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1460 } 1461 1462 /* 1463 * mdi_get_lb_policy(): 1464 * Get current load balancing policy for a given client device 1465 */ 1466 client_lb_t 1467 mdi_get_lb_policy(dev_info_t *cdip) 1468 { 1469 client_lb_t lb = LOAD_BALANCE_NONE; 1470 mdi_client_t *ct; 1471 1472 ct = i_devi_get_client(cdip); 1473 if (ct != NULL) { 1474 lb = ct->ct_lb; 1475 } 1476 return (lb); 1477 } 1478 1479 /* 1480 * mdi_set_lb_region_size(): 1481 * Set current region size for the load-balance 1482 */ 1483 int 1484 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1485 { 1486 mdi_client_t *ct; 1487 int rv = MDI_FAILURE; 1488 1489 ct = i_devi_get_client(cdip); 1490 if (ct != NULL && ct->ct_lb_args != NULL) { 1491 ct->ct_lb_args->region_size = region_size; 1492 rv = MDI_SUCCESS; 1493 } 1494 return (rv); 1495 } 1496 1497 /* 1498 * mdi_Set_lb_policy(): 1499 * Set current load balancing policy for a given client device 1500 */ 1501 int 1502 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1503 { 1504 mdi_client_t *ct; 1505 int rv = MDI_FAILURE; 1506 1507 ct = i_devi_get_client(cdip); 1508 if (ct != NULL) { 1509 ct->ct_lb = lb; 1510 rv = MDI_SUCCESS; 1511 } 1512 return (rv); 1513 } 1514 1515 /* 1516 * mdi_failover(): 1517 * failover function called by the vHCI drivers to initiate 1518 * a failover operation. This is typically due to non-availability 1519 * of online paths to route I/O requests. Failover can be 1520 * triggered through user application also. 1521 * 1522 * The vHCI driver calls mdi_failover() to initiate a failover 1523 * operation. mdi_failover() calls back into the vHCI driver's 1524 * vo_failover() entry point to perform the actual failover 1525 * operation. The reason for requiring the vHCI driver to 1526 * initiate failover by calling mdi_failover(), instead of directly 1527 * executing vo_failover() itself, is to ensure that the mdi 1528 * framework can keep track of the client state properly. 1529 * Additionally, mdi_failover() provides as a convenience the 1530 * option of performing the failover operation synchronously or 1531 * asynchronously 1532 * 1533 * Upon successful completion of the failover operation, the 1534 * paths that were previously ONLINE will be in the STANDBY state, 1535 * and the newly activated paths will be in the ONLINE state. 1536 * 1537 * The flags modifier determines whether the activation is done 1538 * synchronously: MDI_FAILOVER_SYNC 1539 * Return Values: 1540 * MDI_SUCCESS 1541 * MDI_FAILURE 1542 * MDI_BUSY 1543 */ 1544 /*ARGSUSED*/ 1545 int 1546 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1547 { 1548 int rv; 1549 mdi_client_t *ct; 1550 1551 ct = i_devi_get_client(cdip); 1552 ASSERT(ct != NULL); 1553 if (ct == NULL) { 1554 /* cdip is not a valid client device. Nothing more to do. */ 1555 return (MDI_FAILURE); 1556 } 1557 1558 MDI_CLIENT_LOCK(ct); 1559 1560 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1561 /* A path to the client is being freed */ 1562 MDI_CLIENT_UNLOCK(ct); 1563 return (MDI_BUSY); 1564 } 1565 1566 1567 if (MDI_CLIENT_IS_FAILED(ct)) { 1568 /* 1569 * Client is in failed state. Nothing more to do. 1570 */ 1571 MDI_CLIENT_UNLOCK(ct); 1572 return (MDI_FAILURE); 1573 } 1574 1575 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1576 /* 1577 * Failover is already in progress; return BUSY 1578 */ 1579 MDI_CLIENT_UNLOCK(ct); 1580 return (MDI_BUSY); 1581 } 1582 /* 1583 * Make sure that mdi_pathinfo node state changes are processed. 1584 * We do not allow failovers to progress while client path state 1585 * changes are in progress 1586 */ 1587 if (ct->ct_unstable) { 1588 if (flags == MDI_FAILOVER_ASYNC) { 1589 MDI_CLIENT_UNLOCK(ct); 1590 return (MDI_BUSY); 1591 } else { 1592 while (ct->ct_unstable) 1593 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1594 } 1595 } 1596 1597 /* 1598 * Client device is in stable state. Before proceeding, perform sanity 1599 * checks again. 1600 */ 1601 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1602 (!i_ddi_devi_attached(ct->ct_dip))) { 1603 /* 1604 * Client is in failed state. Nothing more to do. 1605 */ 1606 MDI_CLIENT_UNLOCK(ct); 1607 return (MDI_FAILURE); 1608 } 1609 1610 /* 1611 * Set the client state as failover in progress. 1612 */ 1613 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1614 ct->ct_failover_flags = flags; 1615 MDI_CLIENT_UNLOCK(ct); 1616 1617 if (flags == MDI_FAILOVER_ASYNC) { 1618 /* 1619 * Submit the initiate failover request via CPR safe 1620 * taskq threads. 1621 */ 1622 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1623 ct, KM_SLEEP); 1624 return (MDI_ACCEPT); 1625 } else { 1626 /* 1627 * Synchronous failover mode. Typically invoked from the user 1628 * land. 1629 */ 1630 rv = i_mdi_failover(ct); 1631 } 1632 return (rv); 1633 } 1634 1635 /* 1636 * i_mdi_failover(): 1637 * internal failover function. Invokes vHCI drivers failover 1638 * callback function and process the failover status 1639 * Return Values: 1640 * None 1641 * 1642 * Note: A client device in failover state can not be detached or freed. 1643 */ 1644 static int 1645 i_mdi_failover(void *arg) 1646 { 1647 int rv = MDI_SUCCESS; 1648 mdi_client_t *ct = (mdi_client_t *)arg; 1649 mdi_vhci_t *vh = ct->ct_vhci; 1650 1651 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1652 1653 if (vh->vh_ops->vo_failover != NULL) { 1654 /* 1655 * Call vHCI drivers callback routine 1656 */ 1657 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1658 ct->ct_failover_flags); 1659 } 1660 1661 MDI_CLIENT_LOCK(ct); 1662 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1663 1664 /* 1665 * Save the failover return status 1666 */ 1667 ct->ct_failover_status = rv; 1668 1669 /* 1670 * As a result of failover, client status would have been changed. 1671 * Update the client state and wake up anyone waiting on this client 1672 * device. 1673 */ 1674 i_mdi_client_update_state(ct); 1675 1676 cv_broadcast(&ct->ct_failover_cv); 1677 MDI_CLIENT_UNLOCK(ct); 1678 return (rv); 1679 } 1680 1681 /* 1682 * Load balancing is logical block. 1683 * IOs within the range described by region_size 1684 * would go on the same path. This would improve the 1685 * performance by cache-hit on some of the RAID devices. 1686 * Search only for online paths(At some point we 1687 * may want to balance across target ports). 1688 * If no paths are found then default to round-robin. 1689 */ 1690 static int 1691 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1692 { 1693 int path_index = -1; 1694 int online_path_count = 0; 1695 int online_nonpref_path_count = 0; 1696 int region_size = ct->ct_lb_args->region_size; 1697 mdi_pathinfo_t *pip; 1698 mdi_pathinfo_t *next; 1699 int preferred, path_cnt; 1700 1701 pip = ct->ct_path_head; 1702 while (pip) { 1703 MDI_PI_LOCK(pip); 1704 if (MDI_PI(pip)->pi_state == 1705 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1706 online_path_count++; 1707 } else if (MDI_PI(pip)->pi_state == 1708 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1709 online_nonpref_path_count++; 1710 } 1711 next = (mdi_pathinfo_t *) 1712 MDI_PI(pip)->pi_client_link; 1713 MDI_PI_UNLOCK(pip); 1714 pip = next; 1715 } 1716 /* if found any online/preferred then use this type */ 1717 if (online_path_count > 0) { 1718 path_cnt = online_path_count; 1719 preferred = 1; 1720 } else if (online_nonpref_path_count > 0) { 1721 path_cnt = online_nonpref_path_count; 1722 preferred = 0; 1723 } else { 1724 path_cnt = 0; 1725 } 1726 if (path_cnt) { 1727 path_index = (bp->b_blkno >> region_size) % path_cnt; 1728 pip = ct->ct_path_head; 1729 while (pip && path_index != -1) { 1730 MDI_PI_LOCK(pip); 1731 if (path_index == 0 && 1732 (MDI_PI(pip)->pi_state == 1733 MDI_PATHINFO_STATE_ONLINE) && 1734 MDI_PI(pip)->pi_preferred == preferred) { 1735 MDI_PI_HOLD(pip); 1736 MDI_PI_UNLOCK(pip); 1737 *ret_pip = pip; 1738 return (MDI_SUCCESS); 1739 } 1740 path_index --; 1741 next = (mdi_pathinfo_t *) 1742 MDI_PI(pip)->pi_client_link; 1743 MDI_PI_UNLOCK(pip); 1744 pip = next; 1745 } 1746 if (pip == NULL) { 1747 MDI_DEBUG(4, (CE_NOTE, NULL, 1748 "!lba %p, no pip !!\n", 1749 bp->b_blkno)); 1750 } else { 1751 MDI_DEBUG(4, (CE_NOTE, NULL, 1752 "!lba %p, no pip for path_index, " 1753 "pip %p\n", pip)); 1754 } 1755 } 1756 return (MDI_FAILURE); 1757 } 1758 1759 /* 1760 * mdi_select_path(): 1761 * select a path to access a client device. 1762 * 1763 * mdi_select_path() function is called by the vHCI drivers to 1764 * select a path to route the I/O request to. The caller passes 1765 * the block I/O data transfer structure ("buf") as one of the 1766 * parameters. The mpxio framework uses the buf structure 1767 * contents to maintain per path statistics (total I/O size / 1768 * count pending). If more than one online paths are available to 1769 * select, the framework automatically selects a suitable path 1770 * for routing I/O request. If a failover operation is active for 1771 * this client device the call shall be failed with MDI_BUSY error 1772 * code. 1773 * 1774 * By default this function returns a suitable path in online 1775 * state based on the current load balancing policy. Currently 1776 * we support LOAD_BALANCE_NONE (Previously selected online path 1777 * will continue to be used till the path is usable) and 1778 * LOAD_BALANCE_RR (Online paths will be selected in a round 1779 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1780 * based on the logical block). The load balancing 1781 * through vHCI drivers configuration file (driver.conf). 1782 * 1783 * vHCI drivers may override this default behavior by specifying 1784 * appropriate flags. If start_pip is specified (non NULL) is 1785 * used as start point to walk and find the next appropriate path. 1786 * The following values are currently defined: 1787 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1788 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1789 * 1790 * The non-standard behavior is used by the scsi_vhci driver, 1791 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1792 * attach of client devices (to avoid an unnecessary failover 1793 * when the STANDBY path comes up first), during failover 1794 * (to activate a STANDBY path as ONLINE). 1795 * 1796 * The selected path in returned in a held state (ref_cnt). 1797 * Caller should release the hold by calling mdi_rele_path(). 1798 * 1799 * Return Values: 1800 * MDI_SUCCESS - Completed successfully 1801 * MDI_BUSY - Client device is busy failing over 1802 * MDI_NOPATH - Client device is online, but no valid path are 1803 * available to access this client device 1804 * MDI_FAILURE - Invalid client device or state 1805 * MDI_DEVI_ONLINING 1806 * - Client device (struct dev_info state) is in 1807 * onlining state. 1808 */ 1809 1810 /*ARGSUSED*/ 1811 int 1812 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1813 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1814 { 1815 mdi_client_t *ct; 1816 mdi_pathinfo_t *pip; 1817 mdi_pathinfo_t *next; 1818 mdi_pathinfo_t *head; 1819 mdi_pathinfo_t *start; 1820 client_lb_t lbp; /* load balancing policy */ 1821 int sb = 1; /* standard behavior */ 1822 int preferred = 1; /* preferred path */ 1823 int cond, cont = 1; 1824 int retry = 0; 1825 1826 if (flags != 0) { 1827 /* 1828 * disable default behavior 1829 */ 1830 sb = 0; 1831 } 1832 1833 *ret_pip = NULL; 1834 ct = i_devi_get_client(cdip); 1835 if (ct == NULL) { 1836 /* mdi extensions are NULL, Nothing more to do */ 1837 return (MDI_FAILURE); 1838 } 1839 1840 MDI_CLIENT_LOCK(ct); 1841 1842 if (sb) { 1843 if (MDI_CLIENT_IS_FAILED(ct)) { 1844 /* 1845 * Client is not ready to accept any I/O requests. 1846 * Fail this request. 1847 */ 1848 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1849 "client state offline ct = %p\n", ct)); 1850 MDI_CLIENT_UNLOCK(ct); 1851 return (MDI_FAILURE); 1852 } 1853 1854 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1855 /* 1856 * Check for Failover is in progress. If so tell the 1857 * caller that this device is busy. 1858 */ 1859 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1860 "client failover in progress ct = %p\n", ct)); 1861 MDI_CLIENT_UNLOCK(ct); 1862 return (MDI_BUSY); 1863 } 1864 1865 /* 1866 * Check to see whether the client device is attached. 1867 * If not so, let the vHCI driver manually select a path 1868 * (standby) and let the probe/attach process to continue. 1869 */ 1870 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 1871 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1872 MDI_CLIENT_UNLOCK(ct); 1873 return (MDI_DEVI_ONLINING); 1874 } 1875 } 1876 1877 /* 1878 * Cache in the client list head. If head of the list is NULL 1879 * return MDI_NOPATH 1880 */ 1881 head = ct->ct_path_head; 1882 if (head == NULL) { 1883 MDI_CLIENT_UNLOCK(ct); 1884 return (MDI_NOPATH); 1885 } 1886 1887 /* 1888 * for non default behavior, bypass current 1889 * load balancing policy and always use LOAD_BALANCE_RR 1890 * except that the start point will be adjusted based 1891 * on the provided start_pip 1892 */ 1893 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1894 1895 switch (lbp) { 1896 case LOAD_BALANCE_NONE: 1897 /* 1898 * Load balancing is None or Alternate path mode 1899 * Start looking for a online mdi_pathinfo node starting from 1900 * last known selected path 1901 */ 1902 preferred = 1; 1903 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1904 if (pip == NULL) { 1905 pip = head; 1906 } 1907 start = pip; 1908 do { 1909 MDI_PI_LOCK(pip); 1910 /* 1911 * No need to explicitly check if the path is disabled. 1912 * Since we are checking for state == ONLINE and the 1913 * same veriable is used for DISABLE/ENABLE information. 1914 */ 1915 if (MDI_PI(pip)->pi_state == 1916 MDI_PATHINFO_STATE_ONLINE && 1917 preferred == MDI_PI(pip)->pi_preferred) { 1918 /* 1919 * Return the path in hold state. Caller should 1920 * release the lock by calling mdi_rele_path() 1921 */ 1922 MDI_PI_HOLD(pip); 1923 MDI_PI_UNLOCK(pip); 1924 ct->ct_path_last = pip; 1925 *ret_pip = pip; 1926 MDI_CLIENT_UNLOCK(ct); 1927 return (MDI_SUCCESS); 1928 } 1929 1930 /* 1931 * Path is busy. 1932 */ 1933 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1934 MDI_PI_IS_TRANSIENT(pip)) 1935 retry = 1; 1936 /* 1937 * Keep looking for a next available online path 1938 */ 1939 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1940 if (next == NULL) { 1941 next = head; 1942 } 1943 MDI_PI_UNLOCK(pip); 1944 pip = next; 1945 if (start == pip && preferred) { 1946 preferred = 0; 1947 } else if (start == pip && !preferred) { 1948 cont = 0; 1949 } 1950 } while (cont); 1951 break; 1952 1953 case LOAD_BALANCE_LBA: 1954 /* 1955 * Make sure we are looking 1956 * for an online path. Otherwise, if it is for a STANDBY 1957 * path request, it will go through and fetch an ONLINE 1958 * path which is not desirable. 1959 */ 1960 if ((ct->ct_lb_args != NULL) && 1961 (ct->ct_lb_args->region_size) && bp && 1962 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1963 if (i_mdi_lba_lb(ct, ret_pip, bp) 1964 == MDI_SUCCESS) { 1965 MDI_CLIENT_UNLOCK(ct); 1966 return (MDI_SUCCESS); 1967 } 1968 } 1969 /* FALLTHROUGH */ 1970 case LOAD_BALANCE_RR: 1971 /* 1972 * Load balancing is Round Robin. Start looking for a online 1973 * mdi_pathinfo node starting from last known selected path 1974 * as the start point. If override flags are specified, 1975 * process accordingly. 1976 * If the search is already in effect(start_pip not null), 1977 * then lets just use the same path preference to continue the 1978 * traversal. 1979 */ 1980 1981 if (start_pip != NULL) { 1982 preferred = MDI_PI(start_pip)->pi_preferred; 1983 } else { 1984 preferred = 1; 1985 } 1986 1987 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1988 if (start == NULL) { 1989 pip = head; 1990 } else { 1991 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1992 if (pip == NULL) { 1993 if (!sb) { 1994 if (preferred == 0) { 1995 /* 1996 * Looks like we have completed 1997 * the traversal as preferred 1998 * value is 0. Time to bail out. 1999 */ 2000 *ret_pip = NULL; 2001 MDI_CLIENT_UNLOCK(ct); 2002 return (MDI_NOPATH); 2003 } else { 2004 /* 2005 * Looks like we reached the 2006 * end of the list. Lets enable 2007 * traversal of non preferred 2008 * paths. 2009 */ 2010 preferred = 0; 2011 } 2012 } 2013 pip = head; 2014 } 2015 } 2016 start = pip; 2017 do { 2018 MDI_PI_LOCK(pip); 2019 if (sb) { 2020 cond = ((MDI_PI(pip)->pi_state == 2021 MDI_PATHINFO_STATE_ONLINE && 2022 MDI_PI(pip)->pi_preferred == 2023 preferred) ? 1 : 0); 2024 } else { 2025 if (flags == MDI_SELECT_ONLINE_PATH) { 2026 cond = ((MDI_PI(pip)->pi_state == 2027 MDI_PATHINFO_STATE_ONLINE && 2028 MDI_PI(pip)->pi_preferred == 2029 preferred) ? 1 : 0); 2030 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2031 cond = ((MDI_PI(pip)->pi_state == 2032 MDI_PATHINFO_STATE_STANDBY && 2033 MDI_PI(pip)->pi_preferred == 2034 preferred) ? 1 : 0); 2035 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2036 MDI_SELECT_STANDBY_PATH)) { 2037 cond = (((MDI_PI(pip)->pi_state == 2038 MDI_PATHINFO_STATE_ONLINE || 2039 (MDI_PI(pip)->pi_state == 2040 MDI_PATHINFO_STATE_STANDBY)) && 2041 MDI_PI(pip)->pi_preferred == 2042 preferred) ? 1 : 0); 2043 } else { 2044 cond = 0; 2045 } 2046 } 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if (cond) { 2053 /* 2054 * Return the path in hold state. Caller should 2055 * release the lock by calling mdi_rele_path() 2056 */ 2057 MDI_PI_HOLD(pip); 2058 MDI_PI_UNLOCK(pip); 2059 if (sb) 2060 ct->ct_path_last = pip; 2061 *ret_pip = pip; 2062 MDI_CLIENT_UNLOCK(ct); 2063 return (MDI_SUCCESS); 2064 } 2065 /* 2066 * Path is busy. 2067 */ 2068 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2069 MDI_PI_IS_TRANSIENT(pip)) 2070 retry = 1; 2071 2072 /* 2073 * Keep looking for a next available online path 2074 */ 2075 do_again: 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 if (!sb) { 2079 if (preferred == 1) { 2080 /* 2081 * Looks like we reached the 2082 * end of the list. Lets enable 2083 * traversal of non preferred 2084 * paths. 2085 */ 2086 preferred = 0; 2087 next = head; 2088 } else { 2089 /* 2090 * We have done both the passes 2091 * Preferred as well as for 2092 * Non-preferred. Bail out now. 2093 */ 2094 cont = 0; 2095 } 2096 } else { 2097 /* 2098 * Standard behavior case. 2099 */ 2100 next = head; 2101 } 2102 } 2103 MDI_PI_UNLOCK(pip); 2104 if (cont == 0) { 2105 break; 2106 } 2107 pip = next; 2108 2109 if (!sb) { 2110 /* 2111 * We need to handle the selection of 2112 * non-preferred path in the following 2113 * case: 2114 * 2115 * +------+ +------+ +------+ +-----+ 2116 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2117 * +------+ +------+ +------+ +-----+ 2118 * 2119 * If we start the search with B, we need to 2120 * skip beyond B to pick C which is non - 2121 * preferred in the second pass. The following 2122 * test, if true, will allow us to skip over 2123 * the 'start'(B in the example) to select 2124 * other non preferred elements. 2125 */ 2126 if ((start_pip != NULL) && (start_pip == pip) && 2127 (MDI_PI(start_pip)->pi_preferred 2128 != preferred)) { 2129 /* 2130 * try again after going past the start 2131 * pip 2132 */ 2133 MDI_PI_LOCK(pip); 2134 goto do_again; 2135 } 2136 } else { 2137 /* 2138 * Standard behavior case 2139 */ 2140 if (start == pip && preferred) { 2141 /* look for nonpreferred paths */ 2142 preferred = 0; 2143 } else if (start == pip && !preferred) { 2144 /* 2145 * Exit condition 2146 */ 2147 cont = 0; 2148 } 2149 } 2150 } while (cont); 2151 break; 2152 } 2153 2154 MDI_CLIENT_UNLOCK(ct); 2155 if (retry == 1) { 2156 return (MDI_BUSY); 2157 } else { 2158 return (MDI_NOPATH); 2159 } 2160 } 2161 2162 /* 2163 * For a client, return the next available path to any phci 2164 * 2165 * Note: 2166 * Caller should hold the branch's devinfo node to get a consistent 2167 * snap shot of the mdi_pathinfo nodes. 2168 * 2169 * Please note that even the list is stable the mdi_pathinfo 2170 * node state and properties are volatile. The caller should lock 2171 * and unlock the nodes by calling mdi_pi_lock() and 2172 * mdi_pi_unlock() functions to get a stable properties. 2173 * 2174 * If there is a need to use the nodes beyond the hold of the 2175 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2176 * need to be held against unexpected removal by calling 2177 * mdi_hold_path() and should be released by calling 2178 * mdi_rele_path() on completion. 2179 */ 2180 mdi_pathinfo_t * 2181 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2182 { 2183 mdi_client_t *ct; 2184 2185 if (!MDI_CLIENT(ct_dip)) 2186 return (NULL); 2187 2188 /* 2189 * Walk through client link 2190 */ 2191 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2192 ASSERT(ct != NULL); 2193 2194 if (pip == NULL) 2195 return ((mdi_pathinfo_t *)ct->ct_path_head); 2196 2197 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2198 } 2199 2200 /* 2201 * For a phci, return the next available path to any client 2202 * Note: ditto mdi_get_next_phci_path() 2203 */ 2204 mdi_pathinfo_t * 2205 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2206 { 2207 mdi_phci_t *ph; 2208 2209 if (!MDI_PHCI(ph_dip)) 2210 return (NULL); 2211 2212 /* 2213 * Walk through pHCI link 2214 */ 2215 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2216 ASSERT(ph != NULL); 2217 2218 if (pip == NULL) 2219 return ((mdi_pathinfo_t *)ph->ph_path_head); 2220 2221 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2222 } 2223 2224 /* 2225 * mdi_get_nextpath(): 2226 * mdi_pathinfo node walker function. Get the next node from the 2227 * client or pHCI device list. 2228 * 2229 * XXX This is wrapper function for compatibility purposes only. 2230 * 2231 * It doesn't work under Multi-level MPxIO, where a dip 2232 * is both client and phci (which link should next_path follow?). 2233 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2234 * this interface should be removed. 2235 */ 2236 void 2237 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2238 mdi_pathinfo_t **ret_pip) 2239 { 2240 if (MDI_CLIENT(dip)) { 2241 *ret_pip = mdi_get_next_phci_path(dip, pip); 2242 } else if (MDI_PHCI(dip)) { 2243 *ret_pip = mdi_get_next_client_path(dip, pip); 2244 } else { 2245 *ret_pip = NULL; 2246 } 2247 } 2248 2249 /* 2250 * mdi_hold_path(): 2251 * Hold the mdi_pathinfo node against unwanted unexpected free. 2252 * Return Values: 2253 * None 2254 */ 2255 void 2256 mdi_hold_path(mdi_pathinfo_t *pip) 2257 { 2258 if (pip) { 2259 MDI_PI_LOCK(pip); 2260 MDI_PI_HOLD(pip); 2261 MDI_PI_UNLOCK(pip); 2262 } 2263 } 2264 2265 2266 /* 2267 * mdi_rele_path(): 2268 * Release the mdi_pathinfo node which was selected 2269 * through mdi_select_path() mechanism or manually held by 2270 * calling mdi_hold_path(). 2271 * Return Values: 2272 * None 2273 */ 2274 void 2275 mdi_rele_path(mdi_pathinfo_t *pip) 2276 { 2277 if (pip) { 2278 MDI_PI_LOCK(pip); 2279 MDI_PI_RELE(pip); 2280 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2281 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2282 } 2283 MDI_PI_UNLOCK(pip); 2284 } 2285 } 2286 2287 2288 /* 2289 * mdi_pi_lock(): 2290 * Lock the mdi_pathinfo node. 2291 * Note: 2292 * The caller should release the lock by calling mdi_pi_unlock() 2293 */ 2294 void 2295 mdi_pi_lock(mdi_pathinfo_t *pip) 2296 { 2297 ASSERT(pip != NULL); 2298 if (pip) { 2299 MDI_PI_LOCK(pip); 2300 } 2301 } 2302 2303 2304 /* 2305 * mdi_pi_unlock(): 2306 * Unlock the mdi_pathinfo node. 2307 * Note: 2308 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2309 */ 2310 void 2311 mdi_pi_unlock(mdi_pathinfo_t *pip) 2312 { 2313 ASSERT(pip != NULL); 2314 if (pip) { 2315 MDI_PI_UNLOCK(pip); 2316 } 2317 } 2318 2319 /* 2320 * mdi_pi_find(): 2321 * Search the list of mdi_pathinfo nodes attached to the 2322 * pHCI/Client device node whose path address matches "paddr". 2323 * Returns a pointer to the mdi_pathinfo node if a matching node is 2324 * found. 2325 * Return Values: 2326 * mdi_pathinfo node handle 2327 * NULL 2328 * Notes: 2329 * Caller need not hold any locks to call this function. 2330 */ 2331 mdi_pathinfo_t * 2332 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2333 { 2334 mdi_phci_t *ph; 2335 mdi_vhci_t *vh; 2336 mdi_client_t *ct; 2337 mdi_pathinfo_t *pip = NULL; 2338 2339 if ((pdip == NULL) || (paddr == NULL)) { 2340 return (NULL); 2341 } 2342 ph = i_devi_get_phci(pdip); 2343 if (ph == NULL) { 2344 /* 2345 * Invalid pHCI device, Nothing more to do. 2346 */ 2347 MDI_DEBUG(2, (CE_WARN, NULL, 2348 "!mdi_pi_find: invalid phci")); 2349 return (NULL); 2350 } 2351 2352 vh = ph->ph_vhci; 2353 if (vh == NULL) { 2354 /* 2355 * Invalid vHCI device, Nothing more to do. 2356 */ 2357 MDI_DEBUG(2, (CE_WARN, NULL, 2358 "!mdi_pi_find: invalid phci")); 2359 return (NULL); 2360 } 2361 2362 /* 2363 * Look for client device identified by caddr (guid) 2364 */ 2365 if (caddr == NULL) { 2366 /* 2367 * Find a mdi_pathinfo node under pHCI list for a matching 2368 * unit address. 2369 */ 2370 mutex_enter(&ph->ph_mutex); 2371 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2372 2373 while (pip != NULL) { 2374 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2375 break; 2376 } 2377 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2378 } 2379 mutex_exit(&ph->ph_mutex); 2380 return (pip); 2381 } 2382 2383 /* 2384 * XXX - Is the rest of the code in this function really necessary? 2385 * The consumers of mdi_pi_find() can search for the desired pathinfo 2386 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2387 * whether the search is based on the pathinfo nodes attached to 2388 * the pHCI or the client node, the result will be the same. 2389 */ 2390 2391 /* 2392 * Find the client device corresponding to 'caddr' 2393 */ 2394 mutex_enter(&mdi_mutex); 2395 2396 /* 2397 * XXX - Passing NULL to the following function works as long as the 2398 * the client addresses (caddr) are unique per vhci basis. 2399 */ 2400 ct = i_mdi_client_find(vh, NULL, caddr); 2401 if (ct == NULL) { 2402 /* 2403 * Client not found, Obviously mdi_pathinfo node has not been 2404 * created yet. 2405 */ 2406 mutex_exit(&mdi_mutex); 2407 return (pip); 2408 } 2409 2410 /* 2411 * Hold the client lock and look for a mdi_pathinfo node with matching 2412 * pHCI and paddr 2413 */ 2414 MDI_CLIENT_LOCK(ct); 2415 2416 /* 2417 * Release the global mutex as it is no more needed. Note: We always 2418 * respect the locking order while acquiring. 2419 */ 2420 mutex_exit(&mdi_mutex); 2421 2422 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2423 while (pip != NULL) { 2424 /* 2425 * Compare the unit address 2426 */ 2427 if ((MDI_PI(pip)->pi_phci == ph) && 2428 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2429 break; 2430 } 2431 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2432 } 2433 MDI_CLIENT_UNLOCK(ct); 2434 return (pip); 2435 } 2436 2437 /* 2438 * mdi_pi_alloc(): 2439 * Allocate and initialize a new instance of a mdi_pathinfo node. 2440 * The mdi_pathinfo node returned by this function identifies a 2441 * unique device path is capable of having properties attached 2442 * and passed to mdi_pi_online() to fully attach and online the 2443 * path and client device node. 2444 * The mdi_pathinfo node returned by this function must be 2445 * destroyed using mdi_pi_free() if the path is no longer 2446 * operational or if the caller fails to attach a client device 2447 * node when calling mdi_pi_online(). The framework will not free 2448 * the resources allocated. 2449 * This function can be called from both interrupt and kernel 2450 * contexts. DDI_NOSLEEP flag should be used while calling 2451 * from interrupt contexts. 2452 * Return Values: 2453 * MDI_SUCCESS 2454 * MDI_FAILURE 2455 * MDI_NOMEM 2456 */ 2457 /*ARGSUSED*/ 2458 int 2459 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2460 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2461 { 2462 mdi_vhci_t *vh; 2463 mdi_phci_t *ph; 2464 mdi_client_t *ct; 2465 mdi_pathinfo_t *pip = NULL; 2466 dev_info_t *cdip; 2467 int rv = MDI_NOMEM; 2468 int path_allocated = 0; 2469 2470 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2471 ret_pip == NULL) { 2472 /* Nothing more to do */ 2473 return (MDI_FAILURE); 2474 } 2475 2476 *ret_pip = NULL; 2477 ph = i_devi_get_phci(pdip); 2478 ASSERT(ph != NULL); 2479 if (ph == NULL) { 2480 /* Invalid pHCI device, return failure */ 2481 MDI_DEBUG(1, (CE_WARN, NULL, 2482 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2483 return (MDI_FAILURE); 2484 } 2485 2486 MDI_PHCI_LOCK(ph); 2487 vh = ph->ph_vhci; 2488 if (vh == NULL) { 2489 /* Invalid vHCI device, return failure */ 2490 MDI_DEBUG(1, (CE_WARN, NULL, 2491 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2492 MDI_PHCI_UNLOCK(ph); 2493 return (MDI_FAILURE); 2494 } 2495 2496 if (MDI_PHCI_IS_READY(ph) == 0) { 2497 /* 2498 * Do not allow new node creation when pHCI is in 2499 * offline/suspended states 2500 */ 2501 MDI_DEBUG(1, (CE_WARN, NULL, 2502 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (MDI_BUSY); 2505 } 2506 MDI_PHCI_UNSTABLE(ph); 2507 MDI_PHCI_UNLOCK(ph); 2508 2509 /* look for a matching client, create one if not found */ 2510 mutex_enter(&mdi_mutex); 2511 ct = i_mdi_client_find(vh, cname, caddr); 2512 if (ct == NULL) { 2513 ct = i_mdi_client_alloc(vh, cname, caddr); 2514 ASSERT(ct != NULL); 2515 } 2516 2517 if (ct->ct_dip == NULL) { 2518 /* 2519 * Allocate a devinfo node 2520 */ 2521 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2522 compatible, ncompatible); 2523 if (ct->ct_dip == NULL) { 2524 (void) i_mdi_client_free(vh, ct); 2525 goto fail; 2526 } 2527 } 2528 cdip = ct->ct_dip; 2529 2530 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2531 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2532 2533 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2534 while (pip != NULL) { 2535 /* 2536 * Compare the unit address 2537 */ 2538 if ((MDI_PI(pip)->pi_phci == ph) && 2539 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2540 break; 2541 } 2542 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2543 } 2544 2545 if (pip == NULL) { 2546 /* 2547 * This is a new path for this client device. Allocate and 2548 * initialize a new pathinfo node 2549 */ 2550 pip = i_mdi_pi_alloc(ph, paddr, ct); 2551 ASSERT(pip != NULL); 2552 path_allocated = 1; 2553 } 2554 rv = MDI_SUCCESS; 2555 2556 fail: 2557 /* 2558 * Release the global mutex. 2559 */ 2560 mutex_exit(&mdi_mutex); 2561 2562 /* 2563 * Mark the pHCI as stable 2564 */ 2565 MDI_PHCI_LOCK(ph); 2566 MDI_PHCI_STABLE(ph); 2567 MDI_PHCI_UNLOCK(ph); 2568 *ret_pip = pip; 2569 2570 if (path_allocated) 2571 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2572 2573 return (rv); 2574 } 2575 2576 /*ARGSUSED*/ 2577 int 2578 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2579 int flags, mdi_pathinfo_t **ret_pip) 2580 { 2581 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2582 flags, ret_pip)); 2583 } 2584 2585 /* 2586 * i_mdi_pi_alloc(): 2587 * Allocate a mdi_pathinfo node and add to the pHCI path list 2588 * Return Values: 2589 * mdi_pathinfo 2590 */ 2591 2592 /*ARGSUSED*/ 2593 static mdi_pathinfo_t * 2594 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2595 { 2596 mdi_pathinfo_t *pip; 2597 int ct_circular; 2598 int ph_circular; 2599 int se_flag; 2600 int kmem_flag; 2601 2602 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2603 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2604 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2605 MDI_PATHINFO_STATE_TRANSIENT; 2606 2607 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2608 MDI_PI_SET_USER_DISABLE(pip); 2609 2610 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2611 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2612 2613 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2614 MDI_PI_SET_DRV_DISABLE(pip); 2615 2616 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2617 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2618 MDI_PI(pip)->pi_client = ct; 2619 MDI_PI(pip)->pi_phci = ph; 2620 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2621 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2622 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2623 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2624 MDI_PI(pip)->pi_pprivate = NULL; 2625 MDI_PI(pip)->pi_cprivate = NULL; 2626 MDI_PI(pip)->pi_vprivate = NULL; 2627 MDI_PI(pip)->pi_client_link = NULL; 2628 MDI_PI(pip)->pi_phci_link = NULL; 2629 MDI_PI(pip)->pi_ref_cnt = 0; 2630 MDI_PI(pip)->pi_kstats = NULL; 2631 MDI_PI(pip)->pi_preferred = 1; 2632 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2633 2634 /* 2635 * Lock both dev_info nodes against changes in parallel. 2636 */ 2637 ndi_devi_enter(ct->ct_dip, &ct_circular); 2638 ndi_devi_enter(ph->ph_dip, &ph_circular); 2639 2640 i_mdi_phci_add_path(ph, pip); 2641 i_mdi_client_add_path(ct, pip); 2642 2643 ndi_devi_exit(ph->ph_dip, ph_circular); 2644 ndi_devi_exit(ct->ct_dip, ct_circular); 2645 2646 /* determine interrupt context */ 2647 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2648 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2649 2650 i_ddi_di_cache_invalidate(kmem_flag); 2651 2652 return (pip); 2653 } 2654 2655 /* 2656 * i_mdi_phci_add_path(): 2657 * Add a mdi_pathinfo node to pHCI list. 2658 * Notes: 2659 * Caller should per-pHCI mutex 2660 */ 2661 2662 static void 2663 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2664 { 2665 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2666 2667 if (ph->ph_path_head == NULL) { 2668 ph->ph_path_head = pip; 2669 } else { 2670 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2671 } 2672 ph->ph_path_tail = pip; 2673 ph->ph_path_count++; 2674 } 2675 2676 /* 2677 * i_mdi_client_add_path(): 2678 * Add mdi_pathinfo node to client list 2679 */ 2680 2681 static void 2682 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2683 { 2684 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2685 2686 if (ct->ct_path_head == NULL) { 2687 ct->ct_path_head = pip; 2688 } else { 2689 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2690 } 2691 ct->ct_path_tail = pip; 2692 ct->ct_path_count++; 2693 } 2694 2695 /* 2696 * mdi_pi_free(): 2697 * Free the mdi_pathinfo node and also client device node if this 2698 * is the last path to the device 2699 * Return Values: 2700 * MDI_SUCCESS 2701 * MDI_FAILURE 2702 * MDI_BUSY 2703 */ 2704 2705 /*ARGSUSED*/ 2706 int 2707 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2708 { 2709 int rv = MDI_SUCCESS; 2710 mdi_vhci_t *vh; 2711 mdi_phci_t *ph; 2712 mdi_client_t *ct; 2713 int (*f)(); 2714 int client_held = 0; 2715 2716 MDI_PI_LOCK(pip); 2717 ph = MDI_PI(pip)->pi_phci; 2718 ASSERT(ph != NULL); 2719 if (ph == NULL) { 2720 /* 2721 * Invalid pHCI device, return failure 2722 */ 2723 MDI_DEBUG(1, (CE_WARN, NULL, 2724 "!mdi_pi_free: invalid pHCI")); 2725 MDI_PI_UNLOCK(pip); 2726 return (MDI_FAILURE); 2727 } 2728 2729 vh = ph->ph_vhci; 2730 ASSERT(vh != NULL); 2731 if (vh == NULL) { 2732 /* Invalid pHCI device, return failure */ 2733 MDI_DEBUG(1, (CE_WARN, NULL, 2734 "!mdi_pi_free: invalid vHCI")); 2735 MDI_PI_UNLOCK(pip); 2736 return (MDI_FAILURE); 2737 } 2738 2739 ct = MDI_PI(pip)->pi_client; 2740 ASSERT(ct != NULL); 2741 if (ct == NULL) { 2742 /* 2743 * Invalid Client device, return failure 2744 */ 2745 MDI_DEBUG(1, (CE_WARN, NULL, 2746 "!mdi_pi_free: invalid client")); 2747 MDI_PI_UNLOCK(pip); 2748 return (MDI_FAILURE); 2749 } 2750 2751 /* 2752 * Check to see for busy condition. A mdi_pathinfo can only be freed 2753 * if the node state is either offline or init and the reference count 2754 * is zero. 2755 */ 2756 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2757 MDI_PI_IS_INITING(pip))) { 2758 /* 2759 * Node is busy 2760 */ 2761 MDI_DEBUG(1, (CE_WARN, NULL, 2762 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2763 MDI_PI_UNLOCK(pip); 2764 return (MDI_BUSY); 2765 } 2766 2767 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2768 /* 2769 * Give a chance for pending I/Os to complete. 2770 */ 2771 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2772 "%d cmds still pending on path: %p\n", 2773 MDI_PI(pip)->pi_ref_cnt, pip)); 2774 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2775 &MDI_PI(pip)->pi_mutex, 2776 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2777 /* 2778 * The timeout time reached without ref_cnt being zero 2779 * being signaled. 2780 */ 2781 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2782 "!mdi_pi_free: " 2783 "Timeout reached on path %p without the cond\n", 2784 pip)); 2785 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2786 "!mdi_pi_free: " 2787 "%d cmds still pending on path: %p\n", 2788 MDI_PI(pip)->pi_ref_cnt, pip)); 2789 MDI_PI_UNLOCK(pip); 2790 return (MDI_BUSY); 2791 } 2792 } 2793 if (MDI_PI(pip)->pi_pm_held) { 2794 client_held = 1; 2795 } 2796 MDI_PI_UNLOCK(pip); 2797 2798 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2799 2800 MDI_CLIENT_LOCK(ct); 2801 2802 /* Prevent further failovers till mdi_mutex is held */ 2803 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2804 2805 /* 2806 * Wait till failover is complete before removing this node. 2807 */ 2808 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2809 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2810 2811 MDI_CLIENT_UNLOCK(ct); 2812 mutex_enter(&mdi_mutex); 2813 MDI_CLIENT_LOCK(ct); 2814 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2815 2816 if (!MDI_PI_IS_INITING(pip)) { 2817 f = vh->vh_ops->vo_pi_uninit; 2818 if (f != NULL) { 2819 rv = (*f)(vh->vh_dip, pip, 0); 2820 } 2821 } 2822 /* 2823 * If vo_pi_uninit() completed successfully. 2824 */ 2825 if (rv == MDI_SUCCESS) { 2826 if (client_held) { 2827 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2828 "i_mdi_pm_rele_client\n")); 2829 i_mdi_pm_rele_client(ct, 1); 2830 } 2831 i_mdi_pi_free(ph, pip, ct); 2832 if (ct->ct_path_count == 0) { 2833 /* 2834 * Client lost its last path. 2835 * Clean up the client device 2836 */ 2837 MDI_CLIENT_UNLOCK(ct); 2838 (void) i_mdi_client_free(ct->ct_vhci, ct); 2839 mutex_exit(&mdi_mutex); 2840 return (rv); 2841 } 2842 } 2843 MDI_CLIENT_UNLOCK(ct); 2844 mutex_exit(&mdi_mutex); 2845 2846 if (rv == MDI_FAILURE) 2847 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2848 2849 return (rv); 2850 } 2851 2852 /* 2853 * i_mdi_pi_free(): 2854 * Free the mdi_pathinfo node 2855 */ 2856 static void 2857 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2858 { 2859 int ct_circular; 2860 int ph_circular; 2861 int se_flag; 2862 int kmem_flag; 2863 2864 /* 2865 * remove any per-path kstats 2866 */ 2867 i_mdi_pi_kstat_destroy(pip); 2868 2869 ndi_devi_enter(ct->ct_dip, &ct_circular); 2870 ndi_devi_enter(ph->ph_dip, &ph_circular); 2871 2872 i_mdi_client_remove_path(ct, pip); 2873 i_mdi_phci_remove_path(ph, pip); 2874 2875 ndi_devi_exit(ph->ph_dip, ph_circular); 2876 ndi_devi_exit(ct->ct_dip, ct_circular); 2877 2878 /* determine interrupt context */ 2879 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2880 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2881 2882 i_ddi_di_cache_invalidate(kmem_flag); 2883 2884 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2885 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2886 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2887 if (MDI_PI(pip)->pi_addr) { 2888 kmem_free(MDI_PI(pip)->pi_addr, 2889 strlen(MDI_PI(pip)->pi_addr) + 1); 2890 MDI_PI(pip)->pi_addr = NULL; 2891 } 2892 2893 if (MDI_PI(pip)->pi_prop) { 2894 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2895 MDI_PI(pip)->pi_prop = NULL; 2896 } 2897 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2898 } 2899 2900 2901 /* 2902 * i_mdi_phci_remove_path(): 2903 * Remove a mdi_pathinfo node from pHCI list. 2904 * Notes: 2905 * Caller should hold per-pHCI mutex 2906 */ 2907 2908 static void 2909 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2910 { 2911 mdi_pathinfo_t *prev = NULL; 2912 mdi_pathinfo_t *path = NULL; 2913 2914 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2915 2916 path = ph->ph_path_head; 2917 while (path != NULL) { 2918 if (path == pip) { 2919 break; 2920 } 2921 prev = path; 2922 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2923 } 2924 2925 if (path) { 2926 ph->ph_path_count--; 2927 if (prev) { 2928 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2929 } else { 2930 ph->ph_path_head = 2931 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2932 } 2933 if (ph->ph_path_tail == path) { 2934 ph->ph_path_tail = prev; 2935 } 2936 } 2937 2938 /* 2939 * Clear the pHCI link 2940 */ 2941 MDI_PI(pip)->pi_phci_link = NULL; 2942 MDI_PI(pip)->pi_phci = NULL; 2943 } 2944 2945 /* 2946 * i_mdi_client_remove_path(): 2947 * Remove a mdi_pathinfo node from client path list. 2948 */ 2949 2950 static void 2951 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2952 { 2953 mdi_pathinfo_t *prev = NULL; 2954 mdi_pathinfo_t *path; 2955 2956 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2957 2958 path = ct->ct_path_head; 2959 while (path != NULL) { 2960 if (path == pip) { 2961 break; 2962 } 2963 prev = path; 2964 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2965 } 2966 2967 if (path) { 2968 ct->ct_path_count--; 2969 if (prev) { 2970 MDI_PI(prev)->pi_client_link = 2971 MDI_PI(path)->pi_client_link; 2972 } else { 2973 ct->ct_path_head = 2974 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2975 } 2976 if (ct->ct_path_tail == path) { 2977 ct->ct_path_tail = prev; 2978 } 2979 if (ct->ct_path_last == path) { 2980 ct->ct_path_last = ct->ct_path_head; 2981 } 2982 } 2983 MDI_PI(pip)->pi_client_link = NULL; 2984 MDI_PI(pip)->pi_client = NULL; 2985 } 2986 2987 /* 2988 * i_mdi_pi_state_change(): 2989 * online a mdi_pathinfo node 2990 * 2991 * Return Values: 2992 * MDI_SUCCESS 2993 * MDI_FAILURE 2994 */ 2995 /*ARGSUSED*/ 2996 static int 2997 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 2998 { 2999 int rv = MDI_SUCCESS; 3000 mdi_vhci_t *vh; 3001 mdi_phci_t *ph; 3002 mdi_client_t *ct; 3003 int (*f)(); 3004 dev_info_t *cdip; 3005 3006 MDI_PI_LOCK(pip); 3007 3008 ph = MDI_PI(pip)->pi_phci; 3009 ASSERT(ph); 3010 if (ph == NULL) { 3011 /* 3012 * Invalid pHCI device, fail the request 3013 */ 3014 MDI_PI_UNLOCK(pip); 3015 MDI_DEBUG(1, (CE_WARN, NULL, 3016 "!mdi_pi_state_change: invalid phci")); 3017 return (MDI_FAILURE); 3018 } 3019 3020 vh = ph->ph_vhci; 3021 ASSERT(vh); 3022 if (vh == NULL) { 3023 /* 3024 * Invalid vHCI device, fail the request 3025 */ 3026 MDI_PI_UNLOCK(pip); 3027 MDI_DEBUG(1, (CE_WARN, NULL, 3028 "!mdi_pi_state_change: invalid vhci")); 3029 return (MDI_FAILURE); 3030 } 3031 3032 ct = MDI_PI(pip)->pi_client; 3033 ASSERT(ct != NULL); 3034 if (ct == NULL) { 3035 /* 3036 * Invalid client device, fail the request 3037 */ 3038 MDI_PI_UNLOCK(pip); 3039 MDI_DEBUG(1, (CE_WARN, NULL, 3040 "!mdi_pi_state_change: invalid client")); 3041 return (MDI_FAILURE); 3042 } 3043 3044 /* 3045 * If this path has not been initialized yet, Callback vHCI driver's 3046 * pathinfo node initialize entry point 3047 */ 3048 3049 if (MDI_PI_IS_INITING(pip)) { 3050 MDI_PI_UNLOCK(pip); 3051 f = vh->vh_ops->vo_pi_init; 3052 if (f != NULL) { 3053 rv = (*f)(vh->vh_dip, pip, 0); 3054 if (rv != MDI_SUCCESS) { 3055 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3056 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3057 vh, pip)); 3058 return (MDI_FAILURE); 3059 } 3060 } 3061 MDI_PI_LOCK(pip); 3062 MDI_PI_CLEAR_TRANSIENT(pip); 3063 } 3064 3065 /* 3066 * Do not allow state transition when pHCI is in offline/suspended 3067 * states 3068 */ 3069 i_mdi_phci_lock(ph, pip); 3070 if (MDI_PHCI_IS_READY(ph) == 0) { 3071 MDI_DEBUG(1, (CE_WARN, NULL, 3072 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3073 MDI_PI_UNLOCK(pip); 3074 i_mdi_phci_unlock(ph); 3075 return (MDI_BUSY); 3076 } 3077 MDI_PHCI_UNSTABLE(ph); 3078 i_mdi_phci_unlock(ph); 3079 3080 /* 3081 * Check if mdi_pathinfo state is in transient state. 3082 * If yes, offlining is in progress and wait till transient state is 3083 * cleared. 3084 */ 3085 if (MDI_PI_IS_TRANSIENT(pip)) { 3086 while (MDI_PI_IS_TRANSIENT(pip)) { 3087 cv_wait(&MDI_PI(pip)->pi_state_cv, 3088 &MDI_PI(pip)->pi_mutex); 3089 } 3090 } 3091 3092 /* 3093 * Grab the client lock in reverse order sequence and release the 3094 * mdi_pathinfo mutex. 3095 */ 3096 i_mdi_client_lock(ct, pip); 3097 MDI_PI_UNLOCK(pip); 3098 3099 /* 3100 * Wait till failover state is cleared 3101 */ 3102 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3103 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3104 3105 /* 3106 * Mark the mdi_pathinfo node state as transient 3107 */ 3108 MDI_PI_LOCK(pip); 3109 switch (state) { 3110 case MDI_PATHINFO_STATE_ONLINE: 3111 MDI_PI_SET_ONLINING(pip); 3112 break; 3113 3114 case MDI_PATHINFO_STATE_STANDBY: 3115 MDI_PI_SET_STANDBYING(pip); 3116 break; 3117 3118 case MDI_PATHINFO_STATE_FAULT: 3119 /* 3120 * Mark the pathinfo state as FAULTED 3121 */ 3122 MDI_PI_SET_FAULTING(pip); 3123 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3124 break; 3125 3126 case MDI_PATHINFO_STATE_OFFLINE: 3127 /* 3128 * ndi_devi_offline() cannot hold pip or ct locks. 3129 */ 3130 MDI_PI_UNLOCK(pip); 3131 /* 3132 * Do not offline if path will become last path and path 3133 * is busy for user initiated events. 3134 */ 3135 cdip = ct->ct_dip; 3136 if ((flag & NDI_DEVI_REMOVE) && 3137 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3138 i_mdi_client_unlock(ct); 3139 rv = ndi_devi_offline(cdip, 0); 3140 if (rv != NDI_SUCCESS) { 3141 /* 3142 * Convert to MDI error code 3143 */ 3144 switch (rv) { 3145 case NDI_BUSY: 3146 rv = MDI_BUSY; 3147 break; 3148 default: 3149 rv = MDI_FAILURE; 3150 break; 3151 } 3152 goto state_change_exit; 3153 } else { 3154 i_mdi_client_lock(ct, NULL); 3155 } 3156 } 3157 /* 3158 * Mark the mdi_pathinfo node state as transient 3159 */ 3160 MDI_PI_LOCK(pip); 3161 MDI_PI_SET_OFFLINING(pip); 3162 break; 3163 } 3164 MDI_PI_UNLOCK(pip); 3165 MDI_CLIENT_UNSTABLE(ct); 3166 i_mdi_client_unlock(ct); 3167 3168 f = vh->vh_ops->vo_pi_state_change; 3169 if (f != NULL) { 3170 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3171 if (rv == MDI_NOT_SUPPORTED) { 3172 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3173 } 3174 if (rv != MDI_SUCCESS) { 3175 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3176 "!vo_pi_state_change: failed rv = %x", rv)); 3177 } 3178 } 3179 MDI_CLIENT_LOCK(ct); 3180 MDI_PI_LOCK(pip); 3181 if (MDI_PI_IS_TRANSIENT(pip)) { 3182 if (rv == MDI_SUCCESS) { 3183 MDI_PI_CLEAR_TRANSIENT(pip); 3184 } else { 3185 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3186 } 3187 } 3188 3189 /* 3190 * Wake anyone waiting for this mdi_pathinfo node 3191 */ 3192 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3193 MDI_PI_UNLOCK(pip); 3194 3195 /* 3196 * Mark the client device as stable 3197 */ 3198 MDI_CLIENT_STABLE(ct); 3199 if (rv == MDI_SUCCESS) { 3200 if (ct->ct_unstable == 0) { 3201 cdip = ct->ct_dip; 3202 3203 /* 3204 * Onlining the mdi_pathinfo node will impact the 3205 * client state Update the client and dev_info node 3206 * state accordingly 3207 */ 3208 rv = NDI_SUCCESS; 3209 i_mdi_client_update_state(ct); 3210 switch (MDI_CLIENT_STATE(ct)) { 3211 case MDI_CLIENT_STATE_OPTIMAL: 3212 case MDI_CLIENT_STATE_DEGRADED: 3213 if (cdip && !i_ddi_devi_attached(cdip) && 3214 ((state == MDI_PATHINFO_STATE_ONLINE) || 3215 (state == MDI_PATHINFO_STATE_STANDBY))) { 3216 3217 i_mdi_client_unlock(ct); 3218 /* 3219 * Must do ndi_devi_online() through 3220 * hotplug thread for deferred 3221 * attach mechanism to work 3222 */ 3223 rv = ndi_devi_online(cdip, 0); 3224 i_mdi_client_lock(ct, NULL); 3225 if ((rv != NDI_SUCCESS) && 3226 (MDI_CLIENT_STATE(ct) == 3227 MDI_CLIENT_STATE_DEGRADED)) { 3228 /* 3229 * ndi_devi_online failed. 3230 * Reset client flags to 3231 * offline. 3232 */ 3233 MDI_DEBUG(1, (CE_WARN, cdip, 3234 "!ndi_devi_online: failed " 3235 " Error: %x", rv)); 3236 MDI_CLIENT_SET_OFFLINE(ct); 3237 } 3238 if (rv != NDI_SUCCESS) { 3239 /* Reset the path state */ 3240 MDI_PI_LOCK(pip); 3241 MDI_PI(pip)->pi_state = 3242 MDI_PI_OLD_STATE(pip); 3243 MDI_PI_UNLOCK(pip); 3244 } 3245 } 3246 break; 3247 3248 case MDI_CLIENT_STATE_FAILED: 3249 /* 3250 * This is the last path case for 3251 * non-user initiated events. 3252 */ 3253 if (((flag & NDI_DEVI_REMOVE) == 0) && 3254 cdip && (i_ddi_node_state(cdip) >= 3255 DS_INITIALIZED)) { 3256 i_mdi_client_unlock(ct); 3257 rv = ndi_devi_offline(cdip, 0); 3258 i_mdi_client_lock(ct, NULL); 3259 3260 if (rv != NDI_SUCCESS) { 3261 /* 3262 * ndi_devi_offline failed. 3263 * Reset client flags to 3264 * online as the path could not 3265 * be offlined. 3266 */ 3267 MDI_DEBUG(1, (CE_WARN, cdip, 3268 "!ndi_devi_offline: failed " 3269 " Error: %x", rv)); 3270 MDI_CLIENT_SET_ONLINE(ct); 3271 } 3272 } 3273 break; 3274 } 3275 /* 3276 * Convert to MDI error code 3277 */ 3278 switch (rv) { 3279 case NDI_SUCCESS: 3280 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3281 i_mdi_report_path_state(ct, pip); 3282 rv = MDI_SUCCESS; 3283 break; 3284 case NDI_BUSY: 3285 rv = MDI_BUSY; 3286 break; 3287 default: 3288 rv = MDI_FAILURE; 3289 break; 3290 } 3291 } 3292 } 3293 MDI_CLIENT_UNLOCK(ct); 3294 3295 state_change_exit: 3296 /* 3297 * Mark the pHCI as stable again. 3298 */ 3299 MDI_PHCI_LOCK(ph); 3300 MDI_PHCI_STABLE(ph); 3301 MDI_PHCI_UNLOCK(ph); 3302 return (rv); 3303 } 3304 3305 /* 3306 * mdi_pi_online(): 3307 * Place the path_info node in the online state. The path is 3308 * now available to be selected by mdi_select_path() for 3309 * transporting I/O requests to client devices. 3310 * Return Values: 3311 * MDI_SUCCESS 3312 * MDI_FAILURE 3313 */ 3314 int 3315 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3316 { 3317 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3318 dev_info_t *cdip; 3319 int client_held = 0; 3320 int rv; 3321 3322 ASSERT(ct != NULL); 3323 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3324 if (rv != MDI_SUCCESS) 3325 return (rv); 3326 3327 MDI_PI_LOCK(pip); 3328 if (MDI_PI(pip)->pi_pm_held == 0) { 3329 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3330 "i_mdi_pm_hold_pip\n")); 3331 i_mdi_pm_hold_pip(pip); 3332 client_held = 1; 3333 } 3334 MDI_PI_UNLOCK(pip); 3335 3336 if (client_held) { 3337 MDI_CLIENT_LOCK(ct); 3338 if (ct->ct_power_cnt == 0) { 3339 rv = i_mdi_power_all_phci(ct); 3340 } 3341 3342 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3343 "i_mdi_pm_hold_client\n")); 3344 i_mdi_pm_hold_client(ct, 1); 3345 MDI_CLIENT_UNLOCK(ct); 3346 } 3347 3348 /* 3349 * Create the per-path (pathinfo) IO and error kstats which 3350 * are reported via iostat(1m). 3351 * 3352 * Defer creating the per-path kstats if device is not yet 3353 * attached; the names of the kstats are constructed in part 3354 * using the devices instance number which is assigned during 3355 * process of attaching the client device. 3356 * 3357 * The framework post_attach handler, mdi_post_attach(), is 3358 * is responsible for initializing the client's pathinfo list 3359 * once successfully attached. 3360 */ 3361 cdip = ct->ct_dip; 3362 ASSERT(cdip); 3363 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3364 return (rv); 3365 3366 MDI_CLIENT_LOCK(ct); 3367 rv = i_mdi_pi_kstat_create(pip); 3368 MDI_CLIENT_UNLOCK(ct); 3369 return (rv); 3370 } 3371 3372 /* 3373 * mdi_pi_standby(): 3374 * Place the mdi_pathinfo node in standby state 3375 * 3376 * Return Values: 3377 * MDI_SUCCESS 3378 * MDI_FAILURE 3379 */ 3380 int 3381 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3382 { 3383 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3384 } 3385 3386 /* 3387 * mdi_pi_fault(): 3388 * Place the mdi_pathinfo node in fault'ed state 3389 * Return Values: 3390 * MDI_SUCCESS 3391 * MDI_FAILURE 3392 */ 3393 int 3394 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3395 { 3396 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3397 } 3398 3399 /* 3400 * mdi_pi_offline(): 3401 * Offline a mdi_pathinfo node. 3402 * Return Values: 3403 * MDI_SUCCESS 3404 * MDI_FAILURE 3405 */ 3406 int 3407 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3408 { 3409 int ret, client_held = 0; 3410 mdi_client_t *ct; 3411 3412 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3413 3414 if (ret == MDI_SUCCESS) { 3415 MDI_PI_LOCK(pip); 3416 if (MDI_PI(pip)->pi_pm_held) { 3417 client_held = 1; 3418 } 3419 MDI_PI_UNLOCK(pip); 3420 3421 if (client_held) { 3422 ct = MDI_PI(pip)->pi_client; 3423 MDI_CLIENT_LOCK(ct); 3424 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3425 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3426 i_mdi_pm_rele_client(ct, 1); 3427 MDI_CLIENT_UNLOCK(ct); 3428 } 3429 } 3430 3431 return (ret); 3432 } 3433 3434 /* 3435 * i_mdi_pi_offline(): 3436 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3437 */ 3438 static int 3439 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3440 { 3441 dev_info_t *vdip = NULL; 3442 mdi_vhci_t *vh = NULL; 3443 mdi_client_t *ct = NULL; 3444 int (*f)(); 3445 int rv; 3446 3447 MDI_PI_LOCK(pip); 3448 ct = MDI_PI(pip)->pi_client; 3449 ASSERT(ct != NULL); 3450 3451 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3452 /* 3453 * Give a chance for pending I/Os to complete. 3454 */ 3455 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3456 "%d cmds still pending on path: %p\n", 3457 MDI_PI(pip)->pi_ref_cnt, pip)); 3458 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3459 &MDI_PI(pip)->pi_mutex, 3460 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3461 /* 3462 * The timeout time reached without ref_cnt being zero 3463 * being signaled. 3464 */ 3465 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3466 "Timeout reached on path %p without the cond\n", 3467 pip)); 3468 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3469 "%d cmds still pending on path: %p\n", 3470 MDI_PI(pip)->pi_ref_cnt, pip)); 3471 } 3472 } 3473 vh = ct->ct_vhci; 3474 vdip = vh->vh_dip; 3475 3476 /* 3477 * Notify vHCI that has registered this event 3478 */ 3479 ASSERT(vh->vh_ops); 3480 f = vh->vh_ops->vo_pi_state_change; 3481 3482 if (f != NULL) { 3483 MDI_PI_UNLOCK(pip); 3484 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3485 flags)) != MDI_SUCCESS) { 3486 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3487 "vdip 0x%x, pip 0x%x", vdip, pip)); 3488 } 3489 MDI_PI_LOCK(pip); 3490 } 3491 3492 /* 3493 * Set the mdi_pathinfo node state and clear the transient condition 3494 */ 3495 MDI_PI_SET_OFFLINE(pip); 3496 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3497 MDI_PI_UNLOCK(pip); 3498 3499 MDI_CLIENT_LOCK(ct); 3500 if (rv == MDI_SUCCESS) { 3501 if (ct->ct_unstable == 0) { 3502 dev_info_t *cdip = ct->ct_dip; 3503 3504 /* 3505 * Onlining the mdi_pathinfo node will impact the 3506 * client state Update the client and dev_info node 3507 * state accordingly 3508 */ 3509 i_mdi_client_update_state(ct); 3510 rv = NDI_SUCCESS; 3511 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3512 if (cdip && 3513 (i_ddi_node_state(cdip) >= 3514 DS_INITIALIZED)) { 3515 MDI_CLIENT_UNLOCK(ct); 3516 rv = ndi_devi_offline(cdip, 0); 3517 MDI_CLIENT_LOCK(ct); 3518 if (rv != NDI_SUCCESS) { 3519 /* 3520 * ndi_devi_offline failed. 3521 * Reset client flags to 3522 * online. 3523 */ 3524 MDI_DEBUG(4, (CE_WARN, cdip, 3525 "!ndi_devi_offline: failed " 3526 " Error: %x", rv)); 3527 MDI_CLIENT_SET_ONLINE(ct); 3528 } 3529 } 3530 } 3531 /* 3532 * Convert to MDI error code 3533 */ 3534 switch (rv) { 3535 case NDI_SUCCESS: 3536 rv = MDI_SUCCESS; 3537 break; 3538 case NDI_BUSY: 3539 rv = MDI_BUSY; 3540 break; 3541 default: 3542 rv = MDI_FAILURE; 3543 break; 3544 } 3545 } 3546 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3547 i_mdi_report_path_state(ct, pip); 3548 } 3549 3550 MDI_CLIENT_UNLOCK(ct); 3551 3552 /* 3553 * Change in the mdi_pathinfo node state will impact the client state 3554 */ 3555 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3556 ct, pip)); 3557 return (rv); 3558 } 3559 3560 3561 /* 3562 * mdi_pi_get_addr(): 3563 * Get the unit address associated with a mdi_pathinfo node 3564 * 3565 * Return Values: 3566 * char * 3567 */ 3568 char * 3569 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3570 { 3571 if (pip == NULL) 3572 return (NULL); 3573 3574 return (MDI_PI(pip)->pi_addr); 3575 } 3576 3577 /* 3578 * mdi_pi_get_client(): 3579 * Get the client devinfo associated with a mdi_pathinfo node 3580 * 3581 * Return Values: 3582 * Handle to client device dev_info node 3583 */ 3584 dev_info_t * 3585 mdi_pi_get_client(mdi_pathinfo_t *pip) 3586 { 3587 dev_info_t *dip = NULL; 3588 if (pip) { 3589 dip = MDI_PI(pip)->pi_client->ct_dip; 3590 } 3591 return (dip); 3592 } 3593 3594 /* 3595 * mdi_pi_get_phci(): 3596 * Get the pHCI devinfo associated with the mdi_pathinfo node 3597 * Return Values: 3598 * Handle to dev_info node 3599 */ 3600 dev_info_t * 3601 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3602 { 3603 dev_info_t *dip = NULL; 3604 if (pip) { 3605 dip = MDI_PI(pip)->pi_phci->ph_dip; 3606 } 3607 return (dip); 3608 } 3609 3610 /* 3611 * mdi_pi_get_client_private(): 3612 * Get the client private information associated with the 3613 * mdi_pathinfo node 3614 */ 3615 void * 3616 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3617 { 3618 void *cprivate = NULL; 3619 if (pip) { 3620 cprivate = MDI_PI(pip)->pi_cprivate; 3621 } 3622 return (cprivate); 3623 } 3624 3625 /* 3626 * mdi_pi_set_client_private(): 3627 * Set the client private information in the mdi_pathinfo node 3628 */ 3629 void 3630 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3631 { 3632 if (pip) { 3633 MDI_PI(pip)->pi_cprivate = priv; 3634 } 3635 } 3636 3637 /* 3638 * mdi_pi_get_phci_private(): 3639 * Get the pHCI private information associated with the 3640 * mdi_pathinfo node 3641 */ 3642 caddr_t 3643 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3644 { 3645 caddr_t pprivate = NULL; 3646 if (pip) { 3647 pprivate = MDI_PI(pip)->pi_pprivate; 3648 } 3649 return (pprivate); 3650 } 3651 3652 /* 3653 * mdi_pi_set_phci_private(): 3654 * Set the pHCI private information in the mdi_pathinfo node 3655 */ 3656 void 3657 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3658 { 3659 if (pip) { 3660 MDI_PI(pip)->pi_pprivate = priv; 3661 } 3662 } 3663 3664 /* 3665 * mdi_pi_get_state(): 3666 * Get the mdi_pathinfo node state. Transient states are internal 3667 * and not provided to the users 3668 */ 3669 mdi_pathinfo_state_t 3670 mdi_pi_get_state(mdi_pathinfo_t *pip) 3671 { 3672 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3673 3674 if (pip) { 3675 if (MDI_PI_IS_TRANSIENT(pip)) { 3676 /* 3677 * mdi_pathinfo is in state transition. Return the 3678 * last good state. 3679 */ 3680 state = MDI_PI_OLD_STATE(pip); 3681 } else { 3682 state = MDI_PI_STATE(pip); 3683 } 3684 } 3685 return (state); 3686 } 3687 3688 /* 3689 * Note that the following function needs to be the new interface for 3690 * mdi_pi_get_state when mpxio gets integrated to ON. 3691 */ 3692 int 3693 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3694 uint32_t *ext_state) 3695 { 3696 *state = MDI_PATHINFO_STATE_INIT; 3697 3698 if (pip) { 3699 if (MDI_PI_IS_TRANSIENT(pip)) { 3700 /* 3701 * mdi_pathinfo is in state transition. Return the 3702 * last good state. 3703 */ 3704 *state = MDI_PI_OLD_STATE(pip); 3705 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3706 } else { 3707 *state = MDI_PI_STATE(pip); 3708 *ext_state = MDI_PI_EXT_STATE(pip); 3709 } 3710 } 3711 return (MDI_SUCCESS); 3712 } 3713 3714 /* 3715 * mdi_pi_get_preferred: 3716 * Get the preferred path flag 3717 */ 3718 int 3719 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3720 { 3721 if (pip) { 3722 return (MDI_PI(pip)->pi_preferred); 3723 } 3724 return (0); 3725 } 3726 3727 /* 3728 * mdi_pi_set_preferred: 3729 * Set the preferred path flag 3730 */ 3731 void 3732 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3733 { 3734 if (pip) { 3735 MDI_PI(pip)->pi_preferred = preferred; 3736 } 3737 } 3738 3739 3740 /* 3741 * mdi_pi_set_state(): 3742 * Set the mdi_pathinfo node state 3743 */ 3744 void 3745 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3746 { 3747 uint32_t ext_state; 3748 3749 if (pip) { 3750 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3751 MDI_PI(pip)->pi_state = state; 3752 MDI_PI(pip)->pi_state |= ext_state; 3753 } 3754 } 3755 3756 /* 3757 * Property functions: 3758 */ 3759 3760 int 3761 i_map_nvlist_error_to_mdi(int val) 3762 { 3763 int rv; 3764 3765 switch (val) { 3766 case 0: 3767 rv = DDI_PROP_SUCCESS; 3768 break; 3769 case EINVAL: 3770 case ENOTSUP: 3771 rv = DDI_PROP_INVAL_ARG; 3772 break; 3773 case ENOMEM: 3774 rv = DDI_PROP_NO_MEMORY; 3775 break; 3776 default: 3777 rv = DDI_PROP_NOT_FOUND; 3778 break; 3779 } 3780 return (rv); 3781 } 3782 3783 /* 3784 * mdi_pi_get_next_prop(): 3785 * Property walk function. The caller should hold mdi_pi_lock() 3786 * and release by calling mdi_pi_unlock() at the end of walk to 3787 * get a consistent value. 3788 */ 3789 3790 nvpair_t * 3791 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3792 { 3793 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3794 return (NULL); 3795 } 3796 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3797 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3798 } 3799 3800 /* 3801 * mdi_prop_remove(): 3802 * Remove the named property from the named list. 3803 */ 3804 3805 int 3806 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3807 { 3808 if (pip == NULL) { 3809 return (DDI_PROP_NOT_FOUND); 3810 } 3811 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3812 MDI_PI_LOCK(pip); 3813 if (MDI_PI(pip)->pi_prop == NULL) { 3814 MDI_PI_UNLOCK(pip); 3815 return (DDI_PROP_NOT_FOUND); 3816 } 3817 if (name) { 3818 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3819 } else { 3820 char nvp_name[MAXNAMELEN]; 3821 nvpair_t *nvp; 3822 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3823 while (nvp) { 3824 nvpair_t *next; 3825 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3826 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3827 nvpair_name(nvp)); 3828 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3829 nvp_name); 3830 nvp = next; 3831 } 3832 } 3833 MDI_PI_UNLOCK(pip); 3834 return (DDI_PROP_SUCCESS); 3835 } 3836 3837 /* 3838 * mdi_prop_size(): 3839 * Get buffer size needed to pack the property data. 3840 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3841 * buffer size. 3842 */ 3843 3844 int 3845 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3846 { 3847 int rv; 3848 size_t bufsize; 3849 3850 *buflenp = 0; 3851 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3852 return (DDI_PROP_NOT_FOUND); 3853 } 3854 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3855 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3856 &bufsize, NV_ENCODE_NATIVE); 3857 *buflenp = bufsize; 3858 return (i_map_nvlist_error_to_mdi(rv)); 3859 } 3860 3861 /* 3862 * mdi_prop_pack(): 3863 * pack the property list. The caller should hold the 3864 * mdi_pathinfo_t node to get a consistent data 3865 */ 3866 3867 int 3868 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3869 { 3870 int rv; 3871 size_t bufsize; 3872 3873 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3874 return (DDI_PROP_NOT_FOUND); 3875 } 3876 3877 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3878 3879 bufsize = buflen; 3880 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3881 NV_ENCODE_NATIVE, KM_SLEEP); 3882 3883 return (i_map_nvlist_error_to_mdi(rv)); 3884 } 3885 3886 /* 3887 * mdi_prop_update_byte(): 3888 * Create/Update a byte property 3889 */ 3890 int 3891 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3892 { 3893 int rv; 3894 3895 if (pip == NULL) { 3896 return (DDI_PROP_INVAL_ARG); 3897 } 3898 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3899 MDI_PI_LOCK(pip); 3900 if (MDI_PI(pip)->pi_prop == NULL) { 3901 MDI_PI_UNLOCK(pip); 3902 return (DDI_PROP_NOT_FOUND); 3903 } 3904 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3905 MDI_PI_UNLOCK(pip); 3906 return (i_map_nvlist_error_to_mdi(rv)); 3907 } 3908 3909 /* 3910 * mdi_prop_update_byte_array(): 3911 * Create/Update a byte array property 3912 */ 3913 int 3914 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3915 uint_t nelements) 3916 { 3917 int rv; 3918 3919 if (pip == NULL) { 3920 return (DDI_PROP_INVAL_ARG); 3921 } 3922 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3923 MDI_PI_LOCK(pip); 3924 if (MDI_PI(pip)->pi_prop == NULL) { 3925 MDI_PI_UNLOCK(pip); 3926 return (DDI_PROP_NOT_FOUND); 3927 } 3928 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3929 MDI_PI_UNLOCK(pip); 3930 return (i_map_nvlist_error_to_mdi(rv)); 3931 } 3932 3933 /* 3934 * mdi_prop_update_int(): 3935 * Create/Update a 32 bit integer property 3936 */ 3937 int 3938 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3939 { 3940 int rv; 3941 3942 if (pip == NULL) { 3943 return (DDI_PROP_INVAL_ARG); 3944 } 3945 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3946 MDI_PI_LOCK(pip); 3947 if (MDI_PI(pip)->pi_prop == NULL) { 3948 MDI_PI_UNLOCK(pip); 3949 return (DDI_PROP_NOT_FOUND); 3950 } 3951 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3952 MDI_PI_UNLOCK(pip); 3953 return (i_map_nvlist_error_to_mdi(rv)); 3954 } 3955 3956 /* 3957 * mdi_prop_update_int64(): 3958 * Create/Update a 64 bit integer property 3959 */ 3960 int 3961 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3962 { 3963 int rv; 3964 3965 if (pip == NULL) { 3966 return (DDI_PROP_INVAL_ARG); 3967 } 3968 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3969 MDI_PI_LOCK(pip); 3970 if (MDI_PI(pip)->pi_prop == NULL) { 3971 MDI_PI_UNLOCK(pip); 3972 return (DDI_PROP_NOT_FOUND); 3973 } 3974 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3975 MDI_PI_UNLOCK(pip); 3976 return (i_map_nvlist_error_to_mdi(rv)); 3977 } 3978 3979 /* 3980 * mdi_prop_update_int_array(): 3981 * Create/Update a int array property 3982 */ 3983 int 3984 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 3985 uint_t nelements) 3986 { 3987 int rv; 3988 3989 if (pip == NULL) { 3990 return (DDI_PROP_INVAL_ARG); 3991 } 3992 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3993 MDI_PI_LOCK(pip); 3994 if (MDI_PI(pip)->pi_prop == NULL) { 3995 MDI_PI_UNLOCK(pip); 3996 return (DDI_PROP_NOT_FOUND); 3997 } 3998 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 3999 nelements); 4000 MDI_PI_UNLOCK(pip); 4001 return (i_map_nvlist_error_to_mdi(rv)); 4002 } 4003 4004 /* 4005 * mdi_prop_update_string(): 4006 * Create/Update a string property 4007 */ 4008 int 4009 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4010 { 4011 int rv; 4012 4013 if (pip == NULL) { 4014 return (DDI_PROP_INVAL_ARG); 4015 } 4016 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4017 MDI_PI_LOCK(pip); 4018 if (MDI_PI(pip)->pi_prop == NULL) { 4019 MDI_PI_UNLOCK(pip); 4020 return (DDI_PROP_NOT_FOUND); 4021 } 4022 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4023 MDI_PI_UNLOCK(pip); 4024 return (i_map_nvlist_error_to_mdi(rv)); 4025 } 4026 4027 /* 4028 * mdi_prop_update_string_array(): 4029 * Create/Update a string array property 4030 */ 4031 int 4032 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4033 uint_t nelements) 4034 { 4035 int rv; 4036 4037 if (pip == NULL) { 4038 return (DDI_PROP_INVAL_ARG); 4039 } 4040 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4041 MDI_PI_LOCK(pip); 4042 if (MDI_PI(pip)->pi_prop == NULL) { 4043 MDI_PI_UNLOCK(pip); 4044 return (DDI_PROP_NOT_FOUND); 4045 } 4046 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4047 nelements); 4048 MDI_PI_UNLOCK(pip); 4049 return (i_map_nvlist_error_to_mdi(rv)); 4050 } 4051 4052 /* 4053 * mdi_prop_lookup_byte(): 4054 * Look for byte property identified by name. The data returned 4055 * is the actual property and valid as long as mdi_pathinfo_t node 4056 * is alive. 4057 */ 4058 int 4059 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4060 { 4061 int rv; 4062 4063 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4064 return (DDI_PROP_NOT_FOUND); 4065 } 4066 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4067 return (i_map_nvlist_error_to_mdi(rv)); 4068 } 4069 4070 4071 /* 4072 * mdi_prop_lookup_byte_array(): 4073 * Look for byte array property identified by name. The data 4074 * returned is the actual property and valid as long as 4075 * mdi_pathinfo_t node is alive. 4076 */ 4077 int 4078 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4079 uint_t *nelements) 4080 { 4081 int rv; 4082 4083 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4084 return (DDI_PROP_NOT_FOUND); 4085 } 4086 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4087 nelements); 4088 return (i_map_nvlist_error_to_mdi(rv)); 4089 } 4090 4091 /* 4092 * mdi_prop_lookup_int(): 4093 * Look for int property identified by name. The data returned 4094 * is the actual property and valid as long as mdi_pathinfo_t 4095 * node is alive. 4096 */ 4097 int 4098 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4099 { 4100 int rv; 4101 4102 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4103 return (DDI_PROP_NOT_FOUND); 4104 } 4105 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4106 return (i_map_nvlist_error_to_mdi(rv)); 4107 } 4108 4109 /* 4110 * mdi_prop_lookup_int64(): 4111 * Look for int64 property identified by name. The data returned 4112 * is the actual property and valid as long as mdi_pathinfo_t node 4113 * is alive. 4114 */ 4115 int 4116 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4117 { 4118 int rv; 4119 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4120 return (DDI_PROP_NOT_FOUND); 4121 } 4122 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4123 return (i_map_nvlist_error_to_mdi(rv)); 4124 } 4125 4126 /* 4127 * mdi_prop_lookup_int_array(): 4128 * Look for int array property identified by name. The data 4129 * returned is the actual property and valid as long as 4130 * mdi_pathinfo_t node is alive. 4131 */ 4132 int 4133 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4134 uint_t *nelements) 4135 { 4136 int rv; 4137 4138 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4139 return (DDI_PROP_NOT_FOUND); 4140 } 4141 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4142 (int32_t **)data, nelements); 4143 return (i_map_nvlist_error_to_mdi(rv)); 4144 } 4145 4146 /* 4147 * mdi_prop_lookup_string(): 4148 * Look for string property identified by name. The data 4149 * returned is the actual property and valid as long as 4150 * mdi_pathinfo_t node is alive. 4151 */ 4152 int 4153 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4154 { 4155 int rv; 4156 4157 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4158 return (DDI_PROP_NOT_FOUND); 4159 } 4160 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4161 return (i_map_nvlist_error_to_mdi(rv)); 4162 } 4163 4164 /* 4165 * mdi_prop_lookup_string_array(): 4166 * Look for string array property identified by name. The data 4167 * returned is the actual property and valid as long as 4168 * mdi_pathinfo_t node is alive. 4169 */ 4170 4171 int 4172 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4173 uint_t *nelements) 4174 { 4175 int rv; 4176 4177 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4178 return (DDI_PROP_NOT_FOUND); 4179 } 4180 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4181 nelements); 4182 return (i_map_nvlist_error_to_mdi(rv)); 4183 } 4184 4185 /* 4186 * mdi_prop_free(): 4187 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4188 * functions return the pointer to actual property data and not a 4189 * copy of it. So the data returned is valid as long as 4190 * mdi_pathinfo_t node is valid. 4191 */ 4192 4193 /*ARGSUSED*/ 4194 int 4195 mdi_prop_free(void *data) 4196 { 4197 return (DDI_PROP_SUCCESS); 4198 } 4199 4200 /*ARGSUSED*/ 4201 static void 4202 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4203 { 4204 char *phci_path, *ct_path; 4205 char *ct_status; 4206 char *status; 4207 dev_info_t *dip = ct->ct_dip; 4208 char lb_buf[64]; 4209 4210 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4211 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4212 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4213 return; 4214 } 4215 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4216 ct_status = "optimal"; 4217 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4218 ct_status = "degraded"; 4219 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4220 ct_status = "failed"; 4221 } else { 4222 ct_status = "unknown"; 4223 } 4224 4225 if (MDI_PI_IS_OFFLINE(pip)) { 4226 status = "offline"; 4227 } else if (MDI_PI_IS_ONLINE(pip)) { 4228 status = "online"; 4229 } else if (MDI_PI_IS_STANDBY(pip)) { 4230 status = "standby"; 4231 } else if (MDI_PI_IS_FAULT(pip)) { 4232 status = "faulted"; 4233 } else { 4234 status = "unknown"; 4235 } 4236 4237 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4238 (void) snprintf(lb_buf, sizeof (lb_buf), 4239 "%s, region-size: %d", mdi_load_balance_lba, 4240 ct->ct_lb_args->region_size); 4241 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4242 (void) snprintf(lb_buf, sizeof (lb_buf), 4243 "%s", mdi_load_balance_none); 4244 } else { 4245 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4246 mdi_load_balance_rr); 4247 } 4248 4249 if (dip) { 4250 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4251 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4252 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4253 "path %s (%s%d) to target address: %s is %s" 4254 " Load balancing: %s\n", 4255 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4256 ddi_get_instance(dip), ct_status, 4257 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4258 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4259 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4260 MDI_PI(pip)->pi_addr, status, lb_buf); 4261 kmem_free(phci_path, MAXPATHLEN); 4262 kmem_free(ct_path, MAXPATHLEN); 4263 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4264 } 4265 } 4266 4267 #ifdef DEBUG 4268 /* 4269 * i_mdi_log(): 4270 * Utility function for error message management 4271 * 4272 */ 4273 4274 /*VARARGS3*/ 4275 static void 4276 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4277 { 4278 char buf[MAXNAMELEN]; 4279 char name[MAXNAMELEN]; 4280 va_list ap; 4281 int log_only = 0; 4282 int boot_only = 0; 4283 int console_only = 0; 4284 4285 if (dip) { 4286 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4287 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4288 ddi_node_name(dip), ddi_get_instance(dip)); 4289 } else { 4290 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4291 ddi_node_name(dip), ddi_get_instance(dip)); 4292 } 4293 } else { 4294 name[0] = '\0'; 4295 } 4296 4297 va_start(ap, fmt); 4298 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4299 va_end(ap); 4300 4301 switch (buf[0]) { 4302 case '!': 4303 log_only = 1; 4304 break; 4305 case '?': 4306 boot_only = 1; 4307 break; 4308 case '^': 4309 console_only = 1; 4310 break; 4311 } 4312 4313 switch (level) { 4314 case CE_NOTE: 4315 level = CE_CONT; 4316 /* FALLTHROUGH */ 4317 case CE_CONT: 4318 case CE_WARN: 4319 case CE_PANIC: 4320 if (boot_only) { 4321 cmn_err(level, "?%s\t%s", name, &buf[1]); 4322 } else if (console_only) { 4323 cmn_err(level, "^%s\t%s", name, &buf[1]); 4324 } else if (log_only) { 4325 cmn_err(level, "!%s\t%s", name, &buf[1]); 4326 } else { 4327 cmn_err(level, "%s\t%s", name, buf); 4328 } 4329 break; 4330 default: 4331 cmn_err(level, "%s\t%s", name, buf); 4332 break; 4333 } 4334 } 4335 #endif /* DEBUG */ 4336 4337 void 4338 i_mdi_client_online(dev_info_t *ct_dip) 4339 { 4340 mdi_client_t *ct; 4341 4342 /* 4343 * Client online notification. Mark client state as online 4344 * restore our binding with dev_info node 4345 */ 4346 ct = i_devi_get_client(ct_dip); 4347 ASSERT(ct != NULL); 4348 MDI_CLIENT_LOCK(ct); 4349 MDI_CLIENT_SET_ONLINE(ct); 4350 /* catch for any memory leaks */ 4351 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4352 ct->ct_dip = ct_dip; 4353 4354 if (ct->ct_power_cnt == 0) 4355 (void) i_mdi_power_all_phci(ct); 4356 4357 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4358 "i_mdi_pm_hold_client\n")); 4359 i_mdi_pm_hold_client(ct, 1); 4360 4361 MDI_CLIENT_UNLOCK(ct); 4362 } 4363 4364 void 4365 i_mdi_phci_online(dev_info_t *ph_dip) 4366 { 4367 mdi_phci_t *ph; 4368 4369 /* pHCI online notification. Mark state accordingly */ 4370 ph = i_devi_get_phci(ph_dip); 4371 ASSERT(ph != NULL); 4372 MDI_PHCI_LOCK(ph); 4373 MDI_PHCI_SET_ONLINE(ph); 4374 MDI_PHCI_UNLOCK(ph); 4375 } 4376 4377 /* 4378 * mdi_devi_online(): 4379 * Online notification from NDI framework on pHCI/client 4380 * device online. 4381 * Return Values: 4382 * NDI_SUCCESS 4383 * MDI_FAILURE 4384 */ 4385 4386 /*ARGSUSED*/ 4387 int 4388 mdi_devi_online(dev_info_t *dip, uint_t flags) 4389 { 4390 if (MDI_PHCI(dip)) { 4391 i_mdi_phci_online(dip); 4392 } 4393 4394 if (MDI_CLIENT(dip)) { 4395 i_mdi_client_online(dip); 4396 } 4397 return (NDI_SUCCESS); 4398 } 4399 4400 /* 4401 * mdi_devi_offline(): 4402 * Offline notification from NDI framework on pHCI/Client device 4403 * offline. 4404 * 4405 * Return Values: 4406 * NDI_SUCCESS 4407 * NDI_FAILURE 4408 */ 4409 4410 /*ARGSUSED*/ 4411 int 4412 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4413 { 4414 int rv = NDI_SUCCESS; 4415 4416 if (MDI_CLIENT(dip)) { 4417 rv = i_mdi_client_offline(dip, flags); 4418 if (rv != NDI_SUCCESS) 4419 return (rv); 4420 } 4421 4422 if (MDI_PHCI(dip)) { 4423 rv = i_mdi_phci_offline(dip, flags); 4424 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4425 /* set client back online */ 4426 i_mdi_client_online(dip); 4427 } 4428 } 4429 4430 return (rv); 4431 } 4432 4433 /*ARGSUSED*/ 4434 static int 4435 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4436 { 4437 int rv = NDI_SUCCESS; 4438 mdi_phci_t *ph; 4439 mdi_client_t *ct; 4440 mdi_pathinfo_t *pip; 4441 mdi_pathinfo_t *next; 4442 mdi_pathinfo_t *failed_pip = NULL; 4443 dev_info_t *cdip; 4444 4445 /* 4446 * pHCI component offline notification 4447 * Make sure that this pHCI instance is free to be offlined. 4448 * If it is OK to proceed, Offline and remove all the child 4449 * mdi_pathinfo nodes. This process automatically offlines 4450 * corresponding client devices, for which this pHCI provides 4451 * critical services. 4452 */ 4453 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4454 dip)); 4455 4456 ph = i_devi_get_phci(dip); 4457 if (ph == NULL) { 4458 return (rv); 4459 } 4460 4461 MDI_PHCI_LOCK(ph); 4462 4463 if (MDI_PHCI_IS_OFFLINE(ph)) { 4464 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4465 MDI_PHCI_UNLOCK(ph); 4466 return (NDI_SUCCESS); 4467 } 4468 4469 /* 4470 * Check to see if the pHCI can be offlined 4471 */ 4472 if (ph->ph_unstable) { 4473 MDI_DEBUG(1, (CE_WARN, dip, 4474 "!One or more target devices are in transient " 4475 "state. This device can not be removed at " 4476 "this moment. Please try again later.")); 4477 MDI_PHCI_UNLOCK(ph); 4478 return (NDI_BUSY); 4479 } 4480 4481 pip = ph->ph_path_head; 4482 while (pip != NULL) { 4483 MDI_PI_LOCK(pip); 4484 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4485 /* 4486 * The mdi_pathinfo state is OK. Check the client state. 4487 * If failover in progress fail the pHCI from offlining 4488 */ 4489 ct = MDI_PI(pip)->pi_client; 4490 i_mdi_client_lock(ct, pip); 4491 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4492 (ct->ct_unstable)) { 4493 /* 4494 * Failover is in progress, Fail the DR 4495 */ 4496 MDI_DEBUG(1, (CE_WARN, dip, 4497 "!pHCI device (%s%d) is Busy. %s", 4498 ddi_driver_name(dip), ddi_get_instance(dip), 4499 "This device can not be removed at " 4500 "this moment. Please try again later.")); 4501 MDI_PI_UNLOCK(pip); 4502 MDI_CLIENT_UNLOCK(ct); 4503 MDI_PHCI_UNLOCK(ph); 4504 return (NDI_BUSY); 4505 } 4506 MDI_PI_UNLOCK(pip); 4507 4508 /* 4509 * Check to see of we are removing the last path of this 4510 * client device... 4511 */ 4512 cdip = ct->ct_dip; 4513 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4514 (i_mdi_client_compute_state(ct, ph) == 4515 MDI_CLIENT_STATE_FAILED)) { 4516 i_mdi_client_unlock(ct); 4517 MDI_PHCI_UNLOCK(ph); 4518 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4519 /* 4520 * ndi_devi_offline() failed. 4521 * This pHCI provides the critical path 4522 * to one or more client devices. 4523 * Return busy. 4524 */ 4525 MDI_PHCI_LOCK(ph); 4526 MDI_DEBUG(1, (CE_WARN, dip, 4527 "!pHCI device (%s%d) is Busy. %s", 4528 ddi_driver_name(dip), ddi_get_instance(dip), 4529 "This device can not be removed at " 4530 "this moment. Please try again later.")); 4531 failed_pip = pip; 4532 break; 4533 } else { 4534 MDI_PHCI_LOCK(ph); 4535 pip = next; 4536 } 4537 } else { 4538 i_mdi_client_unlock(ct); 4539 pip = next; 4540 } 4541 } 4542 4543 if (failed_pip) { 4544 pip = ph->ph_path_head; 4545 while (pip != failed_pip) { 4546 MDI_PI_LOCK(pip); 4547 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4548 ct = MDI_PI(pip)->pi_client; 4549 i_mdi_client_lock(ct, pip); 4550 cdip = ct->ct_dip; 4551 switch (MDI_CLIENT_STATE(ct)) { 4552 case MDI_CLIENT_STATE_OPTIMAL: 4553 case MDI_CLIENT_STATE_DEGRADED: 4554 if (cdip) { 4555 MDI_PI_UNLOCK(pip); 4556 i_mdi_client_unlock(ct); 4557 MDI_PHCI_UNLOCK(ph); 4558 (void) ndi_devi_online(cdip, 0); 4559 MDI_PHCI_LOCK(ph); 4560 pip = next; 4561 continue; 4562 } 4563 break; 4564 4565 case MDI_CLIENT_STATE_FAILED: 4566 if (cdip) { 4567 MDI_PI_UNLOCK(pip); 4568 i_mdi_client_unlock(ct); 4569 MDI_PHCI_UNLOCK(ph); 4570 (void) ndi_devi_offline(cdip, 0); 4571 MDI_PHCI_LOCK(ph); 4572 pip = next; 4573 continue; 4574 } 4575 break; 4576 } 4577 MDI_PI_UNLOCK(pip); 4578 i_mdi_client_unlock(ct); 4579 pip = next; 4580 } 4581 MDI_PHCI_UNLOCK(ph); 4582 return (NDI_BUSY); 4583 } 4584 4585 /* 4586 * Mark the pHCI as offline 4587 */ 4588 MDI_PHCI_SET_OFFLINE(ph); 4589 4590 /* 4591 * Mark the child mdi_pathinfo nodes as transient 4592 */ 4593 pip = ph->ph_path_head; 4594 while (pip != NULL) { 4595 MDI_PI_LOCK(pip); 4596 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4597 MDI_PI_SET_OFFLINING(pip); 4598 MDI_PI_UNLOCK(pip); 4599 pip = next; 4600 } 4601 MDI_PHCI_UNLOCK(ph); 4602 /* 4603 * Give a chance for any pending commands to execute 4604 */ 4605 delay(1); 4606 MDI_PHCI_LOCK(ph); 4607 pip = ph->ph_path_head; 4608 while (pip != NULL) { 4609 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4610 (void) i_mdi_pi_offline(pip, flags); 4611 MDI_PI_LOCK(pip); 4612 ct = MDI_PI(pip)->pi_client; 4613 if (!MDI_PI_IS_OFFLINE(pip)) { 4614 MDI_DEBUG(1, (CE_WARN, dip, 4615 "!pHCI device (%s%d) is Busy. %s", 4616 ddi_driver_name(dip), ddi_get_instance(dip), 4617 "This device can not be removed at " 4618 "this moment. Please try again later.")); 4619 MDI_PI_UNLOCK(pip); 4620 MDI_PHCI_SET_ONLINE(ph); 4621 MDI_PHCI_UNLOCK(ph); 4622 return (NDI_BUSY); 4623 } 4624 MDI_PI_UNLOCK(pip); 4625 pip = next; 4626 } 4627 MDI_PHCI_UNLOCK(ph); 4628 4629 return (rv); 4630 } 4631 4632 /*ARGSUSED*/ 4633 static int 4634 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4635 { 4636 int rv = NDI_SUCCESS; 4637 mdi_client_t *ct; 4638 4639 /* 4640 * Client component to go offline. Make sure that we are 4641 * not in failing over state and update client state 4642 * accordingly 4643 */ 4644 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4645 dip)); 4646 ct = i_devi_get_client(dip); 4647 if (ct != NULL) { 4648 MDI_CLIENT_LOCK(ct); 4649 if (ct->ct_unstable) { 4650 /* 4651 * One or more paths are in transient state, 4652 * Dont allow offline of a client device 4653 */ 4654 MDI_DEBUG(1, (CE_WARN, dip, 4655 "!One or more paths to this device is " 4656 "in transient state. This device can not " 4657 "be removed at this moment. " 4658 "Please try again later.")); 4659 MDI_CLIENT_UNLOCK(ct); 4660 return (NDI_BUSY); 4661 } 4662 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4663 /* 4664 * Failover is in progress, Dont allow DR of 4665 * a client device 4666 */ 4667 MDI_DEBUG(1, (CE_WARN, dip, 4668 "!Client device (%s%d) is Busy. %s", 4669 ddi_driver_name(dip), ddi_get_instance(dip), 4670 "This device can not be removed at " 4671 "this moment. Please try again later.")); 4672 MDI_CLIENT_UNLOCK(ct); 4673 return (NDI_BUSY); 4674 } 4675 MDI_CLIENT_SET_OFFLINE(ct); 4676 4677 /* 4678 * Unbind our relationship with the dev_info node 4679 */ 4680 if (flags & NDI_DEVI_REMOVE) { 4681 ct->ct_dip = NULL; 4682 } 4683 MDI_CLIENT_UNLOCK(ct); 4684 } 4685 return (rv); 4686 } 4687 4688 /* 4689 * mdi_pre_attach(): 4690 * Pre attach() notification handler 4691 */ 4692 4693 /*ARGSUSED*/ 4694 int 4695 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4696 { 4697 /* don't support old DDI_PM_RESUME */ 4698 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4699 (cmd == DDI_PM_RESUME)) 4700 return (DDI_FAILURE); 4701 4702 return (DDI_SUCCESS); 4703 } 4704 4705 /* 4706 * mdi_post_attach(): 4707 * Post attach() notification handler 4708 */ 4709 4710 /*ARGSUSED*/ 4711 void 4712 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4713 { 4714 mdi_phci_t *ph; 4715 mdi_client_t *ct; 4716 mdi_pathinfo_t *pip; 4717 4718 if (MDI_PHCI(dip)) { 4719 ph = i_devi_get_phci(dip); 4720 ASSERT(ph != NULL); 4721 4722 MDI_PHCI_LOCK(ph); 4723 switch (cmd) { 4724 case DDI_ATTACH: 4725 MDI_DEBUG(2, (CE_NOTE, dip, 4726 "!pHCI post_attach: called %p\n", ph)); 4727 if (error == DDI_SUCCESS) { 4728 MDI_PHCI_SET_ATTACH(ph); 4729 } else { 4730 MDI_DEBUG(1, (CE_NOTE, dip, 4731 "!pHCI post_attach: failed error=%d\n", 4732 error)); 4733 MDI_PHCI_SET_DETACH(ph); 4734 } 4735 break; 4736 4737 case DDI_RESUME: 4738 MDI_DEBUG(2, (CE_NOTE, dip, 4739 "!pHCI post_resume: called %p\n", ph)); 4740 if (error == DDI_SUCCESS) { 4741 MDI_PHCI_SET_RESUME(ph); 4742 } else { 4743 MDI_DEBUG(1, (CE_NOTE, dip, 4744 "!pHCI post_resume: failed error=%d\n", 4745 error)); 4746 MDI_PHCI_SET_SUSPEND(ph); 4747 } 4748 break; 4749 } 4750 MDI_PHCI_UNLOCK(ph); 4751 } 4752 4753 if (MDI_CLIENT(dip)) { 4754 ct = i_devi_get_client(dip); 4755 ASSERT(ct != NULL); 4756 4757 MDI_CLIENT_LOCK(ct); 4758 switch (cmd) { 4759 case DDI_ATTACH: 4760 MDI_DEBUG(2, (CE_NOTE, dip, 4761 "!Client post_attach: called %p\n", ct)); 4762 if (error != DDI_SUCCESS) { 4763 MDI_DEBUG(1, (CE_NOTE, dip, 4764 "!Client post_attach: failed error=%d\n", 4765 error)); 4766 MDI_CLIENT_SET_DETACH(ct); 4767 MDI_DEBUG(4, (CE_WARN, dip, 4768 "mdi_post_attach i_mdi_pm_reset_client\n")); 4769 i_mdi_pm_reset_client(ct); 4770 break; 4771 } 4772 4773 /* 4774 * Client device has successfully attached. 4775 * Create kstats for any pathinfo structures 4776 * initially associated with this client. 4777 */ 4778 for (pip = ct->ct_path_head; pip != NULL; 4779 pip = (mdi_pathinfo_t *) 4780 MDI_PI(pip)->pi_client_link) { 4781 (void) i_mdi_pi_kstat_create(pip); 4782 i_mdi_report_path_state(ct, pip); 4783 } 4784 MDI_CLIENT_SET_ATTACH(ct); 4785 break; 4786 4787 case DDI_RESUME: 4788 MDI_DEBUG(2, (CE_NOTE, dip, 4789 "!Client post_attach: called %p\n", ct)); 4790 if (error == DDI_SUCCESS) { 4791 MDI_CLIENT_SET_RESUME(ct); 4792 } else { 4793 MDI_DEBUG(1, (CE_NOTE, dip, 4794 "!Client post_resume: failed error=%d\n", 4795 error)); 4796 MDI_CLIENT_SET_SUSPEND(ct); 4797 } 4798 break; 4799 } 4800 MDI_CLIENT_UNLOCK(ct); 4801 } 4802 } 4803 4804 /* 4805 * mdi_pre_detach(): 4806 * Pre detach notification handler 4807 */ 4808 4809 /*ARGSUSED*/ 4810 int 4811 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4812 { 4813 int rv = DDI_SUCCESS; 4814 4815 if (MDI_CLIENT(dip)) { 4816 (void) i_mdi_client_pre_detach(dip, cmd); 4817 } 4818 4819 if (MDI_PHCI(dip)) { 4820 rv = i_mdi_phci_pre_detach(dip, cmd); 4821 } 4822 4823 return (rv); 4824 } 4825 4826 /*ARGSUSED*/ 4827 static int 4828 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4829 { 4830 int rv = DDI_SUCCESS; 4831 mdi_phci_t *ph; 4832 mdi_client_t *ct; 4833 mdi_pathinfo_t *pip; 4834 mdi_pathinfo_t *failed_pip = NULL; 4835 mdi_pathinfo_t *next; 4836 4837 ph = i_devi_get_phci(dip); 4838 if (ph == NULL) { 4839 return (rv); 4840 } 4841 4842 MDI_PHCI_LOCK(ph); 4843 switch (cmd) { 4844 case DDI_DETACH: 4845 MDI_DEBUG(2, (CE_NOTE, dip, 4846 "!pHCI pre_detach: called %p\n", ph)); 4847 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4848 /* 4849 * mdi_pathinfo nodes are still attached to 4850 * this pHCI. Fail the detach for this pHCI. 4851 */ 4852 MDI_DEBUG(2, (CE_WARN, dip, 4853 "!pHCI pre_detach: " 4854 "mdi_pathinfo nodes are still attached " 4855 "%p\n", ph)); 4856 rv = DDI_FAILURE; 4857 break; 4858 } 4859 MDI_PHCI_SET_DETACH(ph); 4860 break; 4861 4862 case DDI_SUSPEND: 4863 /* 4864 * pHCI is getting suspended. Since mpxio client 4865 * devices may not be suspended at this point, to avoid 4866 * a potential stack overflow, it is important to suspend 4867 * client devices before pHCI can be suspended. 4868 */ 4869 4870 MDI_DEBUG(2, (CE_NOTE, dip, 4871 "!pHCI pre_suspend: called %p\n", ph)); 4872 /* 4873 * Suspend all the client devices accessible through this pHCI 4874 */ 4875 pip = ph->ph_path_head; 4876 while (pip != NULL && rv == DDI_SUCCESS) { 4877 dev_info_t *cdip; 4878 MDI_PI_LOCK(pip); 4879 next = 4880 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4881 ct = MDI_PI(pip)->pi_client; 4882 i_mdi_client_lock(ct, pip); 4883 cdip = ct->ct_dip; 4884 MDI_PI_UNLOCK(pip); 4885 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4886 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4887 i_mdi_client_unlock(ct); 4888 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4889 DDI_SUCCESS) { 4890 /* 4891 * Suspend of one of the client 4892 * device has failed. 4893 */ 4894 MDI_DEBUG(1, (CE_WARN, dip, 4895 "!Suspend of device (%s%d) failed.", 4896 ddi_driver_name(cdip), 4897 ddi_get_instance(cdip))); 4898 failed_pip = pip; 4899 break; 4900 } 4901 } else { 4902 i_mdi_client_unlock(ct); 4903 } 4904 pip = next; 4905 } 4906 4907 if (rv == DDI_SUCCESS) { 4908 /* 4909 * Suspend of client devices is complete. Proceed 4910 * with pHCI suspend. 4911 */ 4912 MDI_PHCI_SET_SUSPEND(ph); 4913 } else { 4914 /* 4915 * Revert back all the suspended client device states 4916 * to converse. 4917 */ 4918 pip = ph->ph_path_head; 4919 while (pip != failed_pip) { 4920 dev_info_t *cdip; 4921 MDI_PI_LOCK(pip); 4922 next = 4923 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4924 ct = MDI_PI(pip)->pi_client; 4925 i_mdi_client_lock(ct, pip); 4926 cdip = ct->ct_dip; 4927 MDI_PI_UNLOCK(pip); 4928 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4929 i_mdi_client_unlock(ct); 4930 (void) devi_attach(cdip, DDI_RESUME); 4931 } else { 4932 i_mdi_client_unlock(ct); 4933 } 4934 pip = next; 4935 } 4936 } 4937 break; 4938 4939 default: 4940 rv = DDI_FAILURE; 4941 break; 4942 } 4943 MDI_PHCI_UNLOCK(ph); 4944 return (rv); 4945 } 4946 4947 /*ARGSUSED*/ 4948 static int 4949 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4950 { 4951 int rv = DDI_SUCCESS; 4952 mdi_client_t *ct; 4953 4954 ct = i_devi_get_client(dip); 4955 if (ct == NULL) { 4956 return (rv); 4957 } 4958 4959 MDI_CLIENT_LOCK(ct); 4960 switch (cmd) { 4961 case DDI_DETACH: 4962 MDI_DEBUG(2, (CE_NOTE, dip, 4963 "!Client pre_detach: called %p\n", ct)); 4964 MDI_CLIENT_SET_DETACH(ct); 4965 break; 4966 4967 case DDI_SUSPEND: 4968 MDI_DEBUG(2, (CE_NOTE, dip, 4969 "!Client pre_suspend: called %p\n", ct)); 4970 MDI_CLIENT_SET_SUSPEND(ct); 4971 break; 4972 4973 default: 4974 rv = DDI_FAILURE; 4975 break; 4976 } 4977 MDI_CLIENT_UNLOCK(ct); 4978 return (rv); 4979 } 4980 4981 /* 4982 * mdi_post_detach(): 4983 * Post detach notification handler 4984 */ 4985 4986 /*ARGSUSED*/ 4987 void 4988 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4989 { 4990 /* 4991 * Detach/Suspend of mpxio component failed. Update our state 4992 * too 4993 */ 4994 if (MDI_PHCI(dip)) 4995 i_mdi_phci_post_detach(dip, cmd, error); 4996 4997 if (MDI_CLIENT(dip)) 4998 i_mdi_client_post_detach(dip, cmd, error); 4999 } 5000 5001 /*ARGSUSED*/ 5002 static void 5003 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5004 { 5005 mdi_phci_t *ph; 5006 5007 /* 5008 * Detach/Suspend of phci component failed. Update our state 5009 * too 5010 */ 5011 ph = i_devi_get_phci(dip); 5012 if (ph == NULL) { 5013 return; 5014 } 5015 5016 MDI_PHCI_LOCK(ph); 5017 /* 5018 * Detach of pHCI failed. Restore back converse 5019 * state 5020 */ 5021 switch (cmd) { 5022 case DDI_DETACH: 5023 MDI_DEBUG(2, (CE_NOTE, dip, 5024 "!pHCI post_detach: called %p\n", ph)); 5025 if (error != DDI_SUCCESS) 5026 MDI_PHCI_SET_ATTACH(ph); 5027 break; 5028 5029 case DDI_SUSPEND: 5030 MDI_DEBUG(2, (CE_NOTE, dip, 5031 "!pHCI post_suspend: called %p\n", ph)); 5032 if (error != DDI_SUCCESS) 5033 MDI_PHCI_SET_RESUME(ph); 5034 break; 5035 } 5036 MDI_PHCI_UNLOCK(ph); 5037 } 5038 5039 /*ARGSUSED*/ 5040 static void 5041 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5042 { 5043 mdi_client_t *ct; 5044 5045 ct = i_devi_get_client(dip); 5046 if (ct == NULL) { 5047 return; 5048 } 5049 MDI_CLIENT_LOCK(ct); 5050 /* 5051 * Detach of Client failed. Restore back converse 5052 * state 5053 */ 5054 switch (cmd) { 5055 case DDI_DETACH: 5056 MDI_DEBUG(2, (CE_NOTE, dip, 5057 "!Client post_detach: called %p\n", ct)); 5058 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5059 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5060 "i_mdi_pm_rele_client\n")); 5061 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5062 } else { 5063 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5064 "i_mdi_pm_reset_client\n")); 5065 i_mdi_pm_reset_client(ct); 5066 } 5067 if (error != DDI_SUCCESS) 5068 MDI_CLIENT_SET_ATTACH(ct); 5069 break; 5070 5071 case DDI_SUSPEND: 5072 MDI_DEBUG(2, (CE_NOTE, dip, 5073 "!Client post_suspend: called %p\n", ct)); 5074 if (error != DDI_SUCCESS) 5075 MDI_CLIENT_SET_RESUME(ct); 5076 break; 5077 } 5078 MDI_CLIENT_UNLOCK(ct); 5079 } 5080 5081 /* 5082 * create and install per-path (client - pHCI) statistics 5083 * I/O stats supported: nread, nwritten, reads, and writes 5084 * Error stats - hard errors, soft errors, & transport errors 5085 */ 5086 static int 5087 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5088 { 5089 5090 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5091 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5092 char ksname[KSTAT_STRLEN]; 5093 mdi_pathinfo_t *cpip; 5094 const char *err_postfix = ",err"; 5095 kstat_t *kiosp, *kerrsp; 5096 struct pi_errs *nsp; 5097 struct mdi_pi_kstats *mdi_statp; 5098 5099 ASSERT(client != NULL && ppath != NULL); 5100 5101 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5102 5103 if (MDI_PI(pip)->pi_kstats != NULL) 5104 return (MDI_SUCCESS); 5105 5106 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5107 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5108 if (cpip == pip) 5109 continue; 5110 /* 5111 * We have found a different path with same parent 5112 * kstats for a given client-pHCI are common 5113 */ 5114 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5115 (MDI_PI(cpip)->pi_kstats != NULL)) { 5116 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5117 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5118 return (MDI_SUCCESS); 5119 } 5120 } 5121 5122 /* 5123 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5124 * clamp length of name against max length of error kstat name 5125 */ 5126 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5127 ddi_driver_name(client), ddi_get_instance(client), 5128 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5129 (KSTAT_STRLEN - strlen(err_postfix))) { 5130 return (MDI_FAILURE); 5131 } 5132 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5133 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5134 return (MDI_FAILURE); 5135 } 5136 5137 (void) strcat(ksname, err_postfix); 5138 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5139 KSTAT_TYPE_NAMED, 5140 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5141 5142 if (kerrsp == NULL) { 5143 kstat_delete(kiosp); 5144 return (MDI_FAILURE); 5145 } 5146 5147 nsp = (struct pi_errs *)kerrsp->ks_data; 5148 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5149 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5150 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5151 KSTAT_DATA_UINT32); 5152 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5153 KSTAT_DATA_UINT32); 5154 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5155 KSTAT_DATA_UINT32); 5156 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5157 KSTAT_DATA_UINT32); 5158 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5159 KSTAT_DATA_UINT32); 5160 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5161 KSTAT_DATA_UINT32); 5162 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5163 KSTAT_DATA_UINT32); 5164 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5165 5166 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5167 mdi_statp->pi_kstat_ref = 1; 5168 mdi_statp->pi_kstat_iostats = kiosp; 5169 mdi_statp->pi_kstat_errstats = kerrsp; 5170 kstat_install(kiosp); 5171 kstat_install(kerrsp); 5172 MDI_PI(pip)->pi_kstats = mdi_statp; 5173 return (MDI_SUCCESS); 5174 } 5175 5176 /* 5177 * destroy per-path properties 5178 */ 5179 static void 5180 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5181 { 5182 5183 struct mdi_pi_kstats *mdi_statp; 5184 5185 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5186 return; 5187 5188 MDI_PI(pip)->pi_kstats = NULL; 5189 5190 /* 5191 * the kstat may be shared between multiple pathinfo nodes 5192 * decrement this pathinfo's usage, removing the kstats 5193 * themselves when the last pathinfo reference is removed. 5194 */ 5195 ASSERT(mdi_statp->pi_kstat_ref > 0); 5196 if (--mdi_statp->pi_kstat_ref != 0) 5197 return; 5198 5199 kstat_delete(mdi_statp->pi_kstat_iostats); 5200 kstat_delete(mdi_statp->pi_kstat_errstats); 5201 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5202 } 5203 5204 /* 5205 * update I/O paths KSTATS 5206 */ 5207 void 5208 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5209 { 5210 kstat_t *iostatp; 5211 size_t xfer_cnt; 5212 5213 ASSERT(pip != NULL); 5214 5215 /* 5216 * I/O can be driven across a path prior to having path 5217 * statistics available, i.e. probe(9e). 5218 */ 5219 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5220 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5221 xfer_cnt = bp->b_bcount - bp->b_resid; 5222 if (bp->b_flags & B_READ) { 5223 KSTAT_IO_PTR(iostatp)->reads++; 5224 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5225 } else { 5226 KSTAT_IO_PTR(iostatp)->writes++; 5227 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5228 } 5229 } 5230 } 5231 5232 /* 5233 * disable the path to a particular pHCI (pHCI specified in the phci_path 5234 * argument) for a particular client (specified in the client_path argument). 5235 * Disabling a path means that MPxIO will not select the disabled path for 5236 * routing any new I/O requests. 5237 */ 5238 int 5239 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5240 { 5241 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5242 } 5243 5244 /* 5245 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5246 * argument) for a particular client (specified in the client_path argument). 5247 * Enabling a path means that MPxIO may select the enabled path for routing 5248 * future I/O requests, subject to other path state constraints. 5249 */ 5250 5251 int 5252 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5253 { 5254 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5255 } 5256 5257 5258 /* 5259 * Common routine for doing enable/disable. 5260 */ 5261 int 5262 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5263 { 5264 5265 mdi_phci_t *ph; 5266 mdi_vhci_t *vh = NULL; 5267 mdi_client_t *ct; 5268 mdi_pathinfo_t *next, *pip; 5269 int found_it; 5270 int (*f)() = NULL; 5271 int rv; 5272 int sync_flag = 0; 5273 5274 ph = i_devi_get_phci(pdip); 5275 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5276 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5277 if (ph == NULL) { 5278 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5279 " failed. ph = NULL operation = %d\n", op)); 5280 return (MDI_FAILURE); 5281 } 5282 5283 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5284 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5285 " Invalid operation = %d\n", op)); 5286 return (MDI_FAILURE); 5287 } 5288 5289 sync_flag = (flags << 8) & 0xf00; 5290 5291 vh = ph->ph_vhci; 5292 f = vh->vh_ops->vo_pi_state_change; 5293 5294 if (cdip == NULL) { 5295 /* 5296 * Need to mark the Phci as enabled/disabled. 5297 */ 5298 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5299 "Operation %d for the phci\n", op)); 5300 MDI_PHCI_LOCK(ph); 5301 switch (flags) { 5302 case USER_DISABLE: 5303 if (op == MDI_DISABLE_OP) 5304 MDI_PHCI_SET_USER_DISABLE(ph); 5305 else 5306 MDI_PHCI_SET_USER_ENABLE(ph); 5307 break; 5308 case DRIVER_DISABLE: 5309 if (op == MDI_DISABLE_OP) 5310 MDI_PHCI_SET_DRV_DISABLE(ph); 5311 else 5312 MDI_PHCI_SET_DRV_ENABLE(ph); 5313 break; 5314 case DRIVER_DISABLE_TRANSIENT: 5315 if (op == MDI_DISABLE_OP) 5316 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5317 else 5318 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5319 break; 5320 default: 5321 MDI_PHCI_UNLOCK(ph); 5322 MDI_DEBUG(1, (CE_NOTE, NULL, 5323 "!i_mdi_pi_enable_disable:" 5324 " Invalid flag argument= %d\n", flags)); 5325 } 5326 5327 /* 5328 * Phci has been disabled. Now try to enable/disable 5329 * path info's to each client. 5330 */ 5331 pip = ph->ph_path_head; 5332 while (pip != NULL) { 5333 /* 5334 * Do a callback into the mdi consumer to let it 5335 * know that path is about to be enabled/disabled. 5336 */ 5337 if (f != NULL) { 5338 rv = (*f)(vh->vh_dip, pip, 0, 5339 MDI_PI_EXT_STATE(pip), 5340 MDI_EXT_STATE_CHANGE | sync_flag | 5341 op | MDI_BEFORE_STATE_CHANGE); 5342 if (rv != MDI_SUCCESS) { 5343 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5344 "!vo_pi_state_change: failed rv = %x", rv)); 5345 } 5346 } 5347 5348 MDI_PI_LOCK(pip); 5349 next = 5350 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5351 switch (flags) { 5352 case USER_DISABLE: 5353 if (op == MDI_DISABLE_OP) 5354 MDI_PI_SET_USER_DISABLE(pip); 5355 else 5356 MDI_PI_SET_USER_ENABLE(pip); 5357 break; 5358 case DRIVER_DISABLE: 5359 if (op == MDI_DISABLE_OP) 5360 MDI_PI_SET_DRV_DISABLE(pip); 5361 else 5362 MDI_PI_SET_DRV_ENABLE(pip); 5363 break; 5364 case DRIVER_DISABLE_TRANSIENT: 5365 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5366 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5367 else 5368 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5369 break; 5370 } 5371 MDI_PI_UNLOCK(pip); 5372 /* 5373 * Do a callback into the mdi consumer to let it 5374 * know that path is now enabled/disabled. 5375 */ 5376 if (f != NULL) { 5377 rv = (*f)(vh->vh_dip, pip, 0, 5378 MDI_PI_EXT_STATE(pip), 5379 MDI_EXT_STATE_CHANGE | sync_flag | 5380 op | MDI_AFTER_STATE_CHANGE); 5381 if (rv != MDI_SUCCESS) { 5382 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5383 "!vo_pi_state_change: failed rv = %x", rv)); 5384 } 5385 } 5386 pip = next; 5387 } 5388 MDI_PHCI_UNLOCK(ph); 5389 } else { 5390 5391 /* 5392 * Disable a specific client. 5393 */ 5394 ct = i_devi_get_client(cdip); 5395 if (ct == NULL) { 5396 MDI_DEBUG(1, (CE_NOTE, NULL, 5397 "!i_mdi_pi_enable_disable:" 5398 " failed. ct = NULL operation = %d\n", op)); 5399 return (MDI_FAILURE); 5400 } 5401 5402 MDI_CLIENT_LOCK(ct); 5403 pip = ct->ct_path_head; 5404 found_it = 0; 5405 while (pip != NULL) { 5406 MDI_PI_LOCK(pip); 5407 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5408 if (MDI_PI(pip)->pi_phci == ph) { 5409 MDI_PI_UNLOCK(pip); 5410 found_it = 1; 5411 break; 5412 } 5413 MDI_PI_UNLOCK(pip); 5414 pip = next; 5415 } 5416 5417 MDI_CLIENT_UNLOCK(ct); 5418 if (found_it == 0) { 5419 MDI_DEBUG(1, (CE_NOTE, NULL, 5420 "!i_mdi_pi_enable_disable:" 5421 " failed. Could not find corresponding pip\n")); 5422 return (MDI_FAILURE); 5423 } 5424 /* 5425 * Do a callback into the mdi consumer to let it 5426 * know that path is about to get enabled/disabled. 5427 */ 5428 if (f != NULL) { 5429 rv = (*f)(vh->vh_dip, pip, 0, 5430 MDI_PI_EXT_STATE(pip), 5431 MDI_EXT_STATE_CHANGE | sync_flag | 5432 op | MDI_BEFORE_STATE_CHANGE); 5433 if (rv != MDI_SUCCESS) { 5434 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5435 "!vo_pi_state_change: failed rv = %x", rv)); 5436 } 5437 } 5438 MDI_PI_LOCK(pip); 5439 switch (flags) { 5440 case USER_DISABLE: 5441 if (op == MDI_DISABLE_OP) 5442 MDI_PI_SET_USER_DISABLE(pip); 5443 else 5444 MDI_PI_SET_USER_ENABLE(pip); 5445 break; 5446 case DRIVER_DISABLE: 5447 if (op == MDI_DISABLE_OP) 5448 MDI_PI_SET_DRV_DISABLE(pip); 5449 else 5450 MDI_PI_SET_DRV_ENABLE(pip); 5451 break; 5452 case DRIVER_DISABLE_TRANSIENT: 5453 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5454 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5455 else 5456 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5457 break; 5458 } 5459 MDI_PI_UNLOCK(pip); 5460 /* 5461 * Do a callback into the mdi consumer to let it 5462 * know that path is now enabled/disabled. 5463 */ 5464 if (f != NULL) { 5465 rv = (*f)(vh->vh_dip, pip, 0, 5466 MDI_PI_EXT_STATE(pip), 5467 MDI_EXT_STATE_CHANGE | sync_flag | 5468 op | MDI_AFTER_STATE_CHANGE); 5469 if (rv != MDI_SUCCESS) { 5470 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5471 "!vo_pi_state_change: failed rv = %x", rv)); 5472 } 5473 } 5474 } 5475 5476 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5477 " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); 5478 return (MDI_SUCCESS); 5479 } 5480 5481 /*ARGSUSED3*/ 5482 int 5483 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5484 int flags, clock_t timeout) 5485 { 5486 mdi_pathinfo_t *pip; 5487 dev_info_t *dip; 5488 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5489 char *paddr; 5490 5491 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5492 5493 if (!MDI_PHCI(pdip)) 5494 return (MDI_FAILURE); 5495 5496 paddr = strchr(devnm, '@'); 5497 if (paddr == NULL) 5498 return (MDI_FAILURE); 5499 5500 paddr++; /* skip '@' */ 5501 pip = mdi_pi_find(pdip, NULL, paddr); 5502 while (pip == NULL && timeout > 0) { 5503 if (interval > timeout) 5504 interval = timeout; 5505 if (flags & NDI_DEVI_DEBUG) { 5506 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5507 ddi_driver_name(pdip), ddi_get_instance(pdip), 5508 paddr, interval, timeout); 5509 } 5510 delay(interval); 5511 timeout -= interval; 5512 interval += interval; 5513 pip = mdi_pi_find(pdip, NULL, paddr); 5514 } 5515 5516 if (pip == NULL) 5517 return (MDI_FAILURE); 5518 dip = mdi_pi_get_client(pip); 5519 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5520 return (MDI_FAILURE); 5521 *cdipp = dip; 5522 5523 /* TODO: holding should happen inside search functions */ 5524 ndi_hold_devi(dip); 5525 return (MDI_SUCCESS); 5526 } 5527 5528 /* 5529 * Ensure phci powered up 5530 */ 5531 static void 5532 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5533 { 5534 dev_info_t *ph_dip; 5535 5536 ASSERT(pip != NULL); 5537 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5538 5539 if (MDI_PI(pip)->pi_pm_held) { 5540 return; 5541 } 5542 5543 ph_dip = mdi_pi_get_phci(pip); 5544 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5545 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5546 if (ph_dip == NULL) { 5547 return; 5548 } 5549 5550 MDI_PI_UNLOCK(pip); 5551 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5552 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5553 pm_hold_power(ph_dip); 5554 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5555 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5556 MDI_PI_LOCK(pip); 5557 5558 MDI_PI(pip)->pi_pm_held = 1; 5559 } 5560 5561 /* 5562 * Allow phci powered down 5563 */ 5564 static void 5565 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5566 { 5567 dev_info_t *ph_dip = NULL; 5568 5569 ASSERT(pip != NULL); 5570 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5571 5572 if (MDI_PI(pip)->pi_pm_held == 0) { 5573 return; 5574 } 5575 5576 ph_dip = mdi_pi_get_phci(pip); 5577 ASSERT(ph_dip != NULL); 5578 5579 MDI_PI_UNLOCK(pip); 5580 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5581 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5582 5583 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5584 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5585 pm_rele_power(ph_dip); 5586 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5587 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5588 5589 MDI_PI_LOCK(pip); 5590 MDI_PI(pip)->pi_pm_held = 0; 5591 } 5592 5593 static void 5594 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5595 { 5596 ASSERT(ct); 5597 5598 ct->ct_power_cnt += incr; 5599 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5600 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5601 ASSERT(ct->ct_power_cnt >= 0); 5602 } 5603 5604 static void 5605 i_mdi_rele_all_phci(mdi_client_t *ct) 5606 { 5607 mdi_pathinfo_t *pip; 5608 5609 ASSERT(mutex_owned(&ct->ct_mutex)); 5610 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5611 while (pip != NULL) { 5612 mdi_hold_path(pip); 5613 MDI_PI_LOCK(pip); 5614 i_mdi_pm_rele_pip(pip); 5615 MDI_PI_UNLOCK(pip); 5616 mdi_rele_path(pip); 5617 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5618 } 5619 } 5620 5621 static void 5622 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5623 { 5624 ASSERT(ct); 5625 5626 if (i_ddi_devi_attached(ct->ct_dip)) { 5627 ct->ct_power_cnt -= decr; 5628 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5629 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5630 } 5631 5632 ASSERT(ct->ct_power_cnt >= 0); 5633 if (ct->ct_power_cnt == 0) { 5634 i_mdi_rele_all_phci(ct); 5635 return; 5636 } 5637 } 5638 5639 static void 5640 i_mdi_pm_reset_client(mdi_client_t *ct) 5641 { 5642 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5643 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5644 ct->ct_power_cnt = 0; 5645 i_mdi_rele_all_phci(ct); 5646 ct->ct_powercnt_config = 0; 5647 ct->ct_powercnt_unconfig = 0; 5648 ct->ct_powercnt_reset = 1; 5649 } 5650 5651 static void 5652 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5653 { 5654 mdi_pathinfo_t *pip; 5655 ASSERT(mutex_owned(&ct->ct_mutex)); 5656 5657 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5658 while (pip != NULL) { 5659 mdi_hold_path(pip); 5660 MDI_PI_LOCK(pip); 5661 i_mdi_pm_hold_pip(pip); 5662 MDI_PI_UNLOCK(pip); 5663 mdi_rele_path(pip); 5664 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5665 } 5666 } 5667 5668 static int 5669 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5670 { 5671 int ret; 5672 dev_info_t *ph_dip; 5673 5674 MDI_PI_LOCK(pip); 5675 i_mdi_pm_hold_pip(pip); 5676 5677 ph_dip = mdi_pi_get_phci(pip); 5678 MDI_PI_UNLOCK(pip); 5679 5680 /* bring all components of phci to full power */ 5681 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5682 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5683 ddi_get_instance(ph_dip))); 5684 5685 ret = pm_powerup(ph_dip); 5686 5687 if (ret == DDI_FAILURE) { 5688 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5689 "pm_powerup FAILED for %s%d\n", 5690 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5691 5692 MDI_PI_LOCK(pip); 5693 i_mdi_pm_rele_pip(pip); 5694 MDI_PI_UNLOCK(pip); 5695 return (MDI_FAILURE); 5696 } 5697 5698 return (MDI_SUCCESS); 5699 } 5700 5701 static int 5702 i_mdi_power_all_phci(mdi_client_t *ct) 5703 { 5704 mdi_pathinfo_t *pip; 5705 int succeeded = 0; 5706 5707 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5708 while (pip != NULL) { 5709 mdi_hold_path(pip); 5710 MDI_CLIENT_UNLOCK(ct); 5711 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5712 succeeded = 1; 5713 5714 ASSERT(ct == MDI_PI(pip)->pi_client); 5715 MDI_CLIENT_LOCK(ct); 5716 mdi_rele_path(pip); 5717 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5718 } 5719 5720 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5721 } 5722 5723 /* 5724 * mdi_bus_power(): 5725 * 1. Place the phci(s) into powered up state so that 5726 * client can do power management 5727 * 2. Ensure phci powered up as client power managing 5728 * Return Values: 5729 * MDI_SUCCESS 5730 * MDI_FAILURE 5731 */ 5732 int 5733 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5734 void *arg, void *result) 5735 { 5736 int ret = MDI_SUCCESS; 5737 pm_bp_child_pwrchg_t *bpc; 5738 mdi_client_t *ct; 5739 dev_info_t *cdip; 5740 pm_bp_has_changed_t *bphc; 5741 5742 /* 5743 * BUS_POWER_NOINVOL not supported 5744 */ 5745 if (op == BUS_POWER_NOINVOL) 5746 return (MDI_FAILURE); 5747 5748 /* 5749 * ignore other OPs. 5750 * return quickly to save cou cycles on the ct processing 5751 */ 5752 switch (op) { 5753 case BUS_POWER_PRE_NOTIFICATION: 5754 case BUS_POWER_POST_NOTIFICATION: 5755 bpc = (pm_bp_child_pwrchg_t *)arg; 5756 cdip = bpc->bpc_dip; 5757 break; 5758 case BUS_POWER_HAS_CHANGED: 5759 bphc = (pm_bp_has_changed_t *)arg; 5760 cdip = bphc->bphc_dip; 5761 break; 5762 default: 5763 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5764 } 5765 5766 ASSERT(MDI_CLIENT(cdip)); 5767 5768 ct = i_devi_get_client(cdip); 5769 if (ct == NULL) 5770 return (MDI_FAILURE); 5771 5772 /* 5773 * wait till the mdi_pathinfo node state change are processed 5774 */ 5775 MDI_CLIENT_LOCK(ct); 5776 switch (op) { 5777 case BUS_POWER_PRE_NOTIFICATION: 5778 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5779 "BUS_POWER_PRE_NOTIFICATION:" 5780 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5781 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5782 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5783 5784 /* serialize power level change per client */ 5785 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5786 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5787 5788 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5789 5790 if (ct->ct_power_cnt == 0) { 5791 ret = i_mdi_power_all_phci(ct); 5792 } 5793 5794 /* 5795 * if new_level > 0: 5796 * - hold phci(s) 5797 * - power up phci(s) if not already 5798 * ignore power down 5799 */ 5800 if (bpc->bpc_nlevel > 0) { 5801 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5802 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5803 "mdi_bus_power i_mdi_pm_hold_client\n")); 5804 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5805 } 5806 } 5807 break; 5808 case BUS_POWER_POST_NOTIFICATION: 5809 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5810 "BUS_POWER_POST_NOTIFICATION:" 5811 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5812 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5813 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5814 *(int *)result)); 5815 5816 if (*(int *)result == DDI_SUCCESS) { 5817 if (bpc->bpc_nlevel > 0) { 5818 MDI_CLIENT_SET_POWER_UP(ct); 5819 } else { 5820 MDI_CLIENT_SET_POWER_DOWN(ct); 5821 } 5822 } 5823 5824 /* release the hold we did in pre-notification */ 5825 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5826 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5827 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5828 "mdi_bus_power i_mdi_pm_rele_client\n")); 5829 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5830 } 5831 5832 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5833 /* another thread might started attaching */ 5834 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5835 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5836 "mdi_bus_power i_mdi_pm_rele_client\n")); 5837 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5838 /* detaching has been taken care in pm_post_unconfig */ 5839 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5840 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5841 "mdi_bus_power i_mdi_pm_reset_client\n")); 5842 i_mdi_pm_reset_client(ct); 5843 } 5844 } 5845 5846 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5847 cv_broadcast(&ct->ct_powerchange_cv); 5848 5849 break; 5850 5851 /* need to do more */ 5852 case BUS_POWER_HAS_CHANGED: 5853 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5854 "BUS_POWER_HAS_CHANGED:" 5855 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5856 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5857 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5858 5859 if (bphc->bphc_nlevel > 0 && 5860 bphc->bphc_nlevel > bphc->bphc_olevel) { 5861 if (ct->ct_power_cnt == 0) { 5862 ret = i_mdi_power_all_phci(ct); 5863 } 5864 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5865 "mdi_bus_power i_mdi_pm_hold_client\n")); 5866 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5867 } 5868 5869 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5870 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5871 "mdi_bus_power i_mdi_pm_rele_client\n")); 5872 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5873 } 5874 break; 5875 } 5876 5877 MDI_CLIENT_UNLOCK(ct); 5878 return (ret); 5879 } 5880 5881 static int 5882 i_mdi_pm_pre_config_one(dev_info_t *child) 5883 { 5884 int ret = MDI_SUCCESS; 5885 mdi_client_t *ct; 5886 5887 ct = i_devi_get_client(child); 5888 if (ct == NULL) 5889 return (MDI_FAILURE); 5890 5891 MDI_CLIENT_LOCK(ct); 5892 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5893 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5894 5895 if (!MDI_CLIENT_IS_FAILED(ct)) { 5896 MDI_CLIENT_UNLOCK(ct); 5897 MDI_DEBUG(4, (CE_NOTE, child, 5898 "i_mdi_pm_pre_config_one already configured\n")); 5899 return (MDI_SUCCESS); 5900 } 5901 5902 if (ct->ct_powercnt_config) { 5903 MDI_CLIENT_UNLOCK(ct); 5904 MDI_DEBUG(4, (CE_NOTE, child, 5905 "i_mdi_pm_pre_config_one ALREADY held\n")); 5906 return (MDI_SUCCESS); 5907 } 5908 5909 if (ct->ct_power_cnt == 0) { 5910 ret = i_mdi_power_all_phci(ct); 5911 } 5912 MDI_DEBUG(4, (CE_NOTE, child, 5913 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5914 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5915 ct->ct_powercnt_config = 1; 5916 ct->ct_powercnt_reset = 0; 5917 MDI_CLIENT_UNLOCK(ct); 5918 return (ret); 5919 } 5920 5921 static int 5922 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5923 { 5924 int ret = MDI_SUCCESS; 5925 dev_info_t *cdip; 5926 int circ; 5927 5928 ASSERT(MDI_VHCI(parent)); 5929 5930 /* ndi_devi_config_one */ 5931 if (child) { 5932 return (i_mdi_pm_pre_config_one(child)); 5933 } 5934 5935 /* devi_config_common */ 5936 ndi_devi_enter(parent, &circ); 5937 cdip = ddi_get_child(parent); 5938 while (cdip) { 5939 dev_info_t *next = ddi_get_next_sibling(cdip); 5940 5941 ret = i_mdi_pm_pre_config_one(cdip); 5942 if (ret != MDI_SUCCESS) 5943 break; 5944 cdip = next; 5945 } 5946 ndi_devi_exit(parent, circ); 5947 return (ret); 5948 } 5949 5950 static int 5951 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5952 { 5953 int ret = MDI_SUCCESS; 5954 mdi_client_t *ct; 5955 5956 ct = i_devi_get_client(child); 5957 if (ct == NULL) 5958 return (MDI_FAILURE); 5959 5960 MDI_CLIENT_LOCK(ct); 5961 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5962 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5963 5964 if (!i_ddi_devi_attached(ct->ct_dip)) { 5965 MDI_DEBUG(4, (CE_NOTE, child, 5966 "i_mdi_pm_pre_unconfig node detached already\n")); 5967 MDI_CLIENT_UNLOCK(ct); 5968 return (MDI_SUCCESS); 5969 } 5970 5971 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 5972 (flags & NDI_AUTODETACH)) { 5973 MDI_DEBUG(4, (CE_NOTE, child, 5974 "i_mdi_pm_pre_unconfig auto-modunload\n")); 5975 MDI_CLIENT_UNLOCK(ct); 5976 return (MDI_FAILURE); 5977 } 5978 5979 if (ct->ct_powercnt_unconfig) { 5980 MDI_DEBUG(4, (CE_NOTE, child, 5981 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 5982 MDI_CLIENT_UNLOCK(ct); 5983 *held = 1; 5984 return (MDI_SUCCESS); 5985 } 5986 5987 if (ct->ct_power_cnt == 0) { 5988 ret = i_mdi_power_all_phci(ct); 5989 } 5990 MDI_DEBUG(4, (CE_NOTE, child, 5991 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 5992 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5993 ct->ct_powercnt_unconfig = 1; 5994 ct->ct_powercnt_reset = 0; 5995 MDI_CLIENT_UNLOCK(ct); 5996 if (ret == MDI_SUCCESS) 5997 *held = 1; 5998 return (ret); 5999 } 6000 6001 static int 6002 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 6003 int flags) 6004 { 6005 int ret = MDI_SUCCESS; 6006 dev_info_t *cdip; 6007 int circ; 6008 6009 ASSERT(MDI_VHCI(parent)); 6010 *held = 0; 6011 6012 /* ndi_devi_unconfig_one */ 6013 if (child) { 6014 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6015 } 6016 6017 /* devi_unconfig_common */ 6018 ndi_devi_enter(parent, &circ); 6019 cdip = ddi_get_child(parent); 6020 while (cdip) { 6021 dev_info_t *next = ddi_get_next_sibling(cdip); 6022 6023 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6024 cdip = next; 6025 } 6026 ndi_devi_exit(parent, circ); 6027 6028 if (*held) 6029 ret = MDI_SUCCESS; 6030 6031 return (ret); 6032 } 6033 6034 static void 6035 i_mdi_pm_post_config_one(dev_info_t *child) 6036 { 6037 mdi_client_t *ct; 6038 6039 ct = i_devi_get_client(child); 6040 if (ct == NULL) 6041 return; 6042 6043 MDI_CLIENT_LOCK(ct); 6044 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6045 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6046 6047 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6048 MDI_DEBUG(4, (CE_NOTE, child, 6049 "i_mdi_pm_post_config_one NOT configured\n")); 6050 MDI_CLIENT_UNLOCK(ct); 6051 return; 6052 } 6053 6054 /* client has not been updated */ 6055 if (MDI_CLIENT_IS_FAILED(ct)) { 6056 MDI_DEBUG(4, (CE_NOTE, child, 6057 "i_mdi_pm_post_config_one NOT configured\n")); 6058 MDI_CLIENT_UNLOCK(ct); 6059 return; 6060 } 6061 6062 /* another thread might have powered it down or detached it */ 6063 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6064 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6065 (!i_ddi_devi_attached(ct->ct_dip) && 6066 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6067 MDI_DEBUG(4, (CE_NOTE, child, 6068 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6069 i_mdi_pm_reset_client(ct); 6070 } else { 6071 mdi_pathinfo_t *pip, *next; 6072 int valid_path_count = 0; 6073 6074 MDI_DEBUG(4, (CE_NOTE, child, 6075 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6076 pip = ct->ct_path_head; 6077 while (pip != NULL) { 6078 MDI_PI_LOCK(pip); 6079 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6080 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6081 valid_path_count ++; 6082 MDI_PI_UNLOCK(pip); 6083 pip = next; 6084 } 6085 i_mdi_pm_rele_client(ct, valid_path_count); 6086 } 6087 ct->ct_powercnt_config = 0; 6088 MDI_CLIENT_UNLOCK(ct); 6089 } 6090 6091 static void 6092 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6093 { 6094 int circ; 6095 dev_info_t *cdip; 6096 ASSERT(MDI_VHCI(parent)); 6097 6098 /* ndi_devi_config_one */ 6099 if (child) { 6100 i_mdi_pm_post_config_one(child); 6101 return; 6102 } 6103 6104 /* devi_config_common */ 6105 ndi_devi_enter(parent, &circ); 6106 cdip = ddi_get_child(parent); 6107 while (cdip) { 6108 dev_info_t *next = ddi_get_next_sibling(cdip); 6109 6110 i_mdi_pm_post_config_one(cdip); 6111 cdip = next; 6112 } 6113 ndi_devi_exit(parent, circ); 6114 } 6115 6116 static void 6117 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6118 { 6119 mdi_client_t *ct; 6120 6121 ct = i_devi_get_client(child); 6122 if (ct == NULL) 6123 return; 6124 6125 MDI_CLIENT_LOCK(ct); 6126 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6127 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6128 6129 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6130 MDI_DEBUG(4, (CE_NOTE, child, 6131 "i_mdi_pm_post_unconfig NOT held\n")); 6132 MDI_CLIENT_UNLOCK(ct); 6133 return; 6134 } 6135 6136 /* failure detaching or another thread just attached it */ 6137 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6138 i_ddi_devi_attached(ct->ct_dip)) || 6139 (!i_ddi_devi_attached(ct->ct_dip) && 6140 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6141 MDI_DEBUG(4, (CE_NOTE, child, 6142 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6143 i_mdi_pm_reset_client(ct); 6144 } else { 6145 mdi_pathinfo_t *pip, *next; 6146 int valid_path_count = 0; 6147 6148 MDI_DEBUG(4, (CE_NOTE, child, 6149 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6150 pip = ct->ct_path_head; 6151 while (pip != NULL) { 6152 MDI_PI_LOCK(pip); 6153 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6154 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6155 valid_path_count ++; 6156 MDI_PI_UNLOCK(pip); 6157 pip = next; 6158 } 6159 i_mdi_pm_rele_client(ct, valid_path_count); 6160 ct->ct_powercnt_unconfig = 0; 6161 } 6162 6163 MDI_CLIENT_UNLOCK(ct); 6164 } 6165 6166 static void 6167 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6168 { 6169 int circ; 6170 dev_info_t *cdip; 6171 6172 ASSERT(MDI_VHCI(parent)); 6173 6174 if (!held) { 6175 MDI_DEBUG(4, (CE_NOTE, parent, 6176 "i_mdi_pm_post_unconfig held = %d\n", held)); 6177 return; 6178 } 6179 6180 if (child) { 6181 i_mdi_pm_post_unconfig_one(child); 6182 return; 6183 } 6184 6185 ndi_devi_enter(parent, &circ); 6186 cdip = ddi_get_child(parent); 6187 while (cdip) { 6188 dev_info_t *next = ddi_get_next_sibling(cdip); 6189 6190 i_mdi_pm_post_unconfig_one(cdip); 6191 cdip = next; 6192 } 6193 ndi_devi_exit(parent, circ); 6194 } 6195 6196 int 6197 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6198 { 6199 int circ, ret = MDI_SUCCESS; 6200 dev_info_t *client_dip = NULL; 6201 mdi_client_t *ct; 6202 6203 /* 6204 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6205 * Power up pHCI for the named client device. 6206 * Note: Before the client is enumerated under vhci by phci, 6207 * client_dip can be NULL. Then proceed to power up all the 6208 * pHCIs. 6209 */ 6210 if (devnm != NULL) { 6211 ndi_devi_enter(vdip, &circ); 6212 client_dip = ndi_devi_findchild(vdip, devnm); 6213 ndi_devi_exit(vdip, circ); 6214 } 6215 6216 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6217 6218 switch (op) { 6219 case MDI_PM_PRE_CONFIG: 6220 ret = i_mdi_pm_pre_config(vdip, client_dip); 6221 6222 break; 6223 case MDI_PM_PRE_UNCONFIG: 6224 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6225 flags); 6226 6227 break; 6228 case MDI_PM_POST_CONFIG: 6229 i_mdi_pm_post_config(vdip, client_dip); 6230 6231 break; 6232 case MDI_PM_POST_UNCONFIG: 6233 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6234 6235 break; 6236 case MDI_PM_HOLD_POWER: 6237 case MDI_PM_RELE_POWER: 6238 ASSERT(args); 6239 6240 client_dip = (dev_info_t *)args; 6241 ASSERT(MDI_CLIENT(client_dip)); 6242 6243 ct = i_devi_get_client(client_dip); 6244 MDI_CLIENT_LOCK(ct); 6245 6246 if (op == MDI_PM_HOLD_POWER) { 6247 if (ct->ct_power_cnt == 0) { 6248 (void) i_mdi_power_all_phci(ct); 6249 MDI_DEBUG(4, (CE_NOTE, client_dip, 6250 "mdi_power i_mdi_pm_hold_client\n")); 6251 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6252 } 6253 } else { 6254 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6255 MDI_DEBUG(4, (CE_NOTE, client_dip, 6256 "mdi_power i_mdi_pm_rele_client\n")); 6257 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6258 } else { 6259 MDI_DEBUG(4, (CE_NOTE, client_dip, 6260 "mdi_power i_mdi_pm_reset_client\n")); 6261 i_mdi_pm_reset_client(ct); 6262 } 6263 } 6264 6265 MDI_CLIENT_UNLOCK(ct); 6266 break; 6267 default: 6268 break; 6269 } 6270 6271 return (ret); 6272 } 6273 6274 int 6275 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6276 { 6277 mdi_vhci_t *vhci; 6278 6279 if (!MDI_VHCI(dip)) 6280 return (MDI_FAILURE); 6281 6282 if (mdi_class) { 6283 vhci = DEVI(dip)->devi_mdi_xhci; 6284 ASSERT(vhci); 6285 *mdi_class = vhci->vh_class; 6286 } 6287 6288 return (MDI_SUCCESS); 6289 } 6290 6291 int 6292 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6293 { 6294 mdi_phci_t *phci; 6295 6296 if (!MDI_PHCI(dip)) 6297 return (MDI_FAILURE); 6298 6299 if (mdi_class) { 6300 phci = DEVI(dip)->devi_mdi_xhci; 6301 ASSERT(phci); 6302 *mdi_class = phci->ph_vhci->vh_class; 6303 } 6304 6305 return (MDI_SUCCESS); 6306 } 6307 6308 int 6309 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6310 { 6311 mdi_client_t *client; 6312 6313 if (!MDI_CLIENT(dip)) 6314 return (MDI_FAILURE); 6315 6316 if (mdi_class) { 6317 client = DEVI(dip)->devi_mdi_client; 6318 ASSERT(client); 6319 *mdi_class = client->ct_vhci->vh_class; 6320 } 6321 6322 return (MDI_SUCCESS); 6323 } 6324 6325 void * 6326 mdi_client_get_vhci_private(dev_info_t *dip) 6327 { 6328 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6329 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6330 mdi_client_t *ct; 6331 ct = i_devi_get_client(dip); 6332 return (ct->ct_vprivate); 6333 } 6334 return (NULL); 6335 } 6336 6337 void 6338 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6339 { 6340 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6341 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6342 mdi_client_t *ct; 6343 ct = i_devi_get_client(dip); 6344 ct->ct_vprivate = data; 6345 } 6346 } 6347 /* 6348 * mdi_pi_get_vhci_private(): 6349 * Get the vhci private information associated with the 6350 * mdi_pathinfo node 6351 */ 6352 void * 6353 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6354 { 6355 caddr_t vprivate = NULL; 6356 if (pip) { 6357 vprivate = MDI_PI(pip)->pi_vprivate; 6358 } 6359 return (vprivate); 6360 } 6361 6362 /* 6363 * mdi_pi_set_vhci_private(): 6364 * Set the vhci private information in the mdi_pathinfo node 6365 */ 6366 void 6367 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6368 { 6369 if (pip) { 6370 MDI_PI(pip)->pi_vprivate = priv; 6371 } 6372 } 6373 6374 /* 6375 * mdi_phci_get_vhci_private(): 6376 * Get the vhci private information associated with the 6377 * mdi_phci node 6378 */ 6379 void * 6380 mdi_phci_get_vhci_private(dev_info_t *dip) 6381 { 6382 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6383 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6384 mdi_phci_t *ph; 6385 ph = i_devi_get_phci(dip); 6386 return (ph->ph_vprivate); 6387 } 6388 return (NULL); 6389 } 6390 6391 /* 6392 * mdi_phci_set_vhci_private(): 6393 * Set the vhci private information in the mdi_phci node 6394 */ 6395 void 6396 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6397 { 6398 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6399 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6400 mdi_phci_t *ph; 6401 ph = i_devi_get_phci(dip); 6402 ph->ph_vprivate = priv; 6403 } 6404 } 6405 6406 /* 6407 * List of vhci class names: 6408 * A vhci class name must be in this list only if the corresponding vhci 6409 * driver intends to use the mdi provided bus config implementation 6410 * (i.e., mdi_vhci_bus_config()). 6411 */ 6412 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6413 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6414 6415 /* 6416 * Built-in list of phci drivers for every vhci class. 6417 * All phci drivers expect iscsi have root device support. 6418 */ 6419 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6420 { "fp", 1 }, 6421 { "iscsi", 0 }, 6422 { "ibsrp", 1 } 6423 }; 6424 6425 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6426 6427 /* 6428 * During boot time, the on-disk vhci cache for every vhci class is read 6429 * in the form of an nvlist and stored here. 6430 */ 6431 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6432 6433 /* nvpair names in vhci cache nvlist */ 6434 #define MDI_VHCI_CACHE_VERSION 1 6435 #define MDI_NVPNAME_VERSION "version" 6436 #define MDI_NVPNAME_PHCIS "phcis" 6437 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6438 6439 /* 6440 * Given vhci class name, return its on-disk vhci cache filename. 6441 * Memory for the returned filename which includes the full path is allocated 6442 * by this function. 6443 */ 6444 static char * 6445 vhclass2vhcache_filename(char *vhclass) 6446 { 6447 char *filename; 6448 int len; 6449 static char *fmt = "/etc/devices/mdi_%s_cache"; 6450 6451 /* 6452 * fmt contains the on-disk vhci cache file name format; 6453 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6454 */ 6455 6456 /* the -1 below is to account for "%s" in the format string */ 6457 len = strlen(fmt) + strlen(vhclass) - 1; 6458 filename = kmem_alloc(len, KM_SLEEP); 6459 (void) snprintf(filename, len, fmt, vhclass); 6460 ASSERT(len == (strlen(filename) + 1)); 6461 return (filename); 6462 } 6463 6464 /* 6465 * initialize the vhci cache related data structures and read the on-disk 6466 * vhci cached data into memory. 6467 */ 6468 static void 6469 setup_vhci_cache(mdi_vhci_t *vh) 6470 { 6471 mdi_vhci_config_t *vhc; 6472 mdi_vhci_cache_t *vhcache; 6473 int i; 6474 nvlist_t *nvl = NULL; 6475 6476 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6477 vh->vh_config = vhc; 6478 vhcache = &vhc->vhc_vhcache; 6479 6480 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6481 6482 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6483 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6484 6485 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6486 6487 /* 6488 * Create string hash; same as mod_hash_create_strhash() except that 6489 * we use NULL key destructor. 6490 */ 6491 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6492 mdi_bus_config_cache_hash_size, 6493 mod_hash_null_keydtor, mod_hash_null_valdtor, 6494 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6495 6496 setup_phci_driver_list(vh); 6497 6498 /* 6499 * The on-disk vhci cache is read during booting prior to the 6500 * lights-out period by mdi_read_devices_files(). 6501 */ 6502 for (i = 0; i < N_VHCI_CLASSES; i++) { 6503 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6504 nvl = vhcache_nvl[i]; 6505 vhcache_nvl[i] = NULL; 6506 break; 6507 } 6508 } 6509 6510 /* 6511 * this is to cover the case of some one manually causing unloading 6512 * (or detaching) and reloading (or attaching) of a vhci driver. 6513 */ 6514 if (nvl == NULL && modrootloaded) 6515 nvl = read_on_disk_vhci_cache(vh->vh_class); 6516 6517 if (nvl != NULL) { 6518 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6519 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6520 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6521 else { 6522 cmn_err(CE_WARN, 6523 "%s: data file corrupted, will recreate\n", 6524 vhc->vhc_vhcache_filename); 6525 } 6526 rw_exit(&vhcache->vhcache_lock); 6527 nvlist_free(nvl); 6528 } 6529 6530 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6531 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6532 6533 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6534 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6535 } 6536 6537 /* 6538 * free all vhci cache related resources 6539 */ 6540 static int 6541 destroy_vhci_cache(mdi_vhci_t *vh) 6542 { 6543 mdi_vhci_config_t *vhc = vh->vh_config; 6544 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6545 mdi_vhcache_phci_t *cphci, *cphci_next; 6546 mdi_vhcache_client_t *cct, *cct_next; 6547 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6548 6549 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6550 return (MDI_FAILURE); 6551 6552 kmem_free(vhc->vhc_vhcache_filename, 6553 strlen(vhc->vhc_vhcache_filename) + 1); 6554 6555 if (vhc->vhc_phci_driver_list) 6556 free_phci_driver_list(vhc); 6557 6558 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6559 6560 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6561 cphci = cphci_next) { 6562 cphci_next = cphci->cphci_next; 6563 free_vhcache_phci(cphci); 6564 } 6565 6566 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6567 cct_next = cct->cct_next; 6568 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6569 cpi_next = cpi->cpi_next; 6570 free_vhcache_pathinfo(cpi); 6571 } 6572 free_vhcache_client(cct); 6573 } 6574 6575 rw_destroy(&vhcache->vhcache_lock); 6576 6577 mutex_destroy(&vhc->vhc_lock); 6578 cv_destroy(&vhc->vhc_cv); 6579 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6580 return (MDI_SUCCESS); 6581 } 6582 6583 /* 6584 * Setup the list of phci drivers associated with the specified vhci class. 6585 * MDI uses this information to rebuild bus config cache if in case the 6586 * cache is not available or corrupted. 6587 */ 6588 static void 6589 setup_phci_driver_list(mdi_vhci_t *vh) 6590 { 6591 mdi_vhci_config_t *vhc = vh->vh_config; 6592 mdi_phci_driver_info_t *driver_list; 6593 char **driver_list1; 6594 uint_t ndrivers, ndrivers1; 6595 int i, j; 6596 6597 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6598 driver_list = scsi_phci_driver_list; 6599 ndrivers = sizeof (scsi_phci_driver_list) / 6600 sizeof (mdi_phci_driver_info_t); 6601 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6602 driver_list = ib_phci_driver_list; 6603 ndrivers = sizeof (ib_phci_driver_list) / 6604 sizeof (mdi_phci_driver_info_t); 6605 } else { 6606 driver_list = NULL; 6607 ndrivers = 0; 6608 } 6609 6610 /* 6611 * The driver.conf file of a vhci driver can specify additional 6612 * phci drivers using a project private "phci-drivers" property. 6613 */ 6614 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6615 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6616 &ndrivers1) != DDI_PROP_SUCCESS) 6617 ndrivers1 = 0; 6618 6619 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6620 if (vhc->vhc_nphci_drivers == 0) 6621 return; 6622 6623 vhc->vhc_phci_driver_list = kmem_alloc( 6624 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6625 6626 for (i = 0; i < ndrivers; i++) { 6627 vhc->vhc_phci_driver_list[i].phdriver_name = 6628 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6629 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6630 driver_list[i].phdriver_root_support; 6631 } 6632 6633 for (j = 0; j < ndrivers1; j++, i++) { 6634 vhc->vhc_phci_driver_list[i].phdriver_name = 6635 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6636 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6637 } 6638 6639 if (ndrivers1) 6640 ddi_prop_free(driver_list1); 6641 } 6642 6643 /* 6644 * Free the memory allocated for the phci driver list 6645 */ 6646 static void 6647 free_phci_driver_list(mdi_vhci_config_t *vhc) 6648 { 6649 int i; 6650 6651 if (vhc->vhc_phci_driver_list == NULL) 6652 return; 6653 6654 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6655 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6656 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6657 } 6658 6659 kmem_free(vhc->vhc_phci_driver_list, 6660 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6661 } 6662 6663 /* 6664 * Stop all vhci cache related async threads and free their resources. 6665 */ 6666 static int 6667 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6668 { 6669 mdi_async_client_config_t *acc, *acc_next; 6670 6671 mutex_enter(&vhc->vhc_lock); 6672 vhc->vhc_flags |= MDI_VHC_EXIT; 6673 ASSERT(vhc->vhc_acc_thrcount >= 0); 6674 cv_broadcast(&vhc->vhc_cv); 6675 6676 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6677 vhc->vhc_acc_thrcount != 0) { 6678 mutex_exit(&vhc->vhc_lock); 6679 delay(1); 6680 mutex_enter(&vhc->vhc_lock); 6681 } 6682 6683 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6684 6685 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6686 acc_next = acc->acc_next; 6687 free_async_client_config(acc); 6688 } 6689 vhc->vhc_acc_list_head = NULL; 6690 vhc->vhc_acc_list_tail = NULL; 6691 vhc->vhc_acc_count = 0; 6692 6693 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6694 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6695 mutex_exit(&vhc->vhc_lock); 6696 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6697 vhcache_dirty(vhc); 6698 return (MDI_FAILURE); 6699 } 6700 } else 6701 mutex_exit(&vhc->vhc_lock); 6702 6703 if (callb_delete(vhc->vhc_cbid) != 0) 6704 return (MDI_FAILURE); 6705 6706 return (MDI_SUCCESS); 6707 } 6708 6709 /* 6710 * Stop vhci cache flush thread 6711 */ 6712 /* ARGSUSED */ 6713 static boolean_t 6714 stop_vhcache_flush_thread(void *arg, int code) 6715 { 6716 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6717 6718 mutex_enter(&vhc->vhc_lock); 6719 vhc->vhc_flags |= MDI_VHC_EXIT; 6720 cv_broadcast(&vhc->vhc_cv); 6721 6722 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6723 mutex_exit(&vhc->vhc_lock); 6724 delay(1); 6725 mutex_enter(&vhc->vhc_lock); 6726 } 6727 6728 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6729 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6730 mutex_exit(&vhc->vhc_lock); 6731 (void) flush_vhcache(vhc, 1); 6732 } else 6733 mutex_exit(&vhc->vhc_lock); 6734 6735 return (B_TRUE); 6736 } 6737 6738 /* 6739 * Enqueue the vhcache phci (cphci) at the tail of the list 6740 */ 6741 static void 6742 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6743 { 6744 cphci->cphci_next = NULL; 6745 if (vhcache->vhcache_phci_head == NULL) 6746 vhcache->vhcache_phci_head = cphci; 6747 else 6748 vhcache->vhcache_phci_tail->cphci_next = cphci; 6749 vhcache->vhcache_phci_tail = cphci; 6750 } 6751 6752 /* 6753 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6754 */ 6755 static void 6756 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6757 mdi_vhcache_pathinfo_t *cpi) 6758 { 6759 cpi->cpi_next = NULL; 6760 if (cct->cct_cpi_head == NULL) 6761 cct->cct_cpi_head = cpi; 6762 else 6763 cct->cct_cpi_tail->cpi_next = cpi; 6764 cct->cct_cpi_tail = cpi; 6765 } 6766 6767 /* 6768 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6769 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6770 * flag set come at the beginning of the list. All cpis which have this 6771 * flag set come at the end of the list. 6772 */ 6773 static void 6774 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6775 mdi_vhcache_pathinfo_t *newcpi) 6776 { 6777 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6778 6779 if (cct->cct_cpi_head == NULL || 6780 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6781 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6782 else { 6783 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6784 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6785 prev_cpi = cpi, cpi = cpi->cpi_next) 6786 ; 6787 6788 if (prev_cpi == NULL) 6789 cct->cct_cpi_head = newcpi; 6790 else 6791 prev_cpi->cpi_next = newcpi; 6792 6793 newcpi->cpi_next = cpi; 6794 6795 if (cpi == NULL) 6796 cct->cct_cpi_tail = newcpi; 6797 } 6798 } 6799 6800 /* 6801 * Enqueue the vhcache client (cct) at the tail of the list 6802 */ 6803 static void 6804 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6805 mdi_vhcache_client_t *cct) 6806 { 6807 cct->cct_next = NULL; 6808 if (vhcache->vhcache_client_head == NULL) 6809 vhcache->vhcache_client_head = cct; 6810 else 6811 vhcache->vhcache_client_tail->cct_next = cct; 6812 vhcache->vhcache_client_tail = cct; 6813 } 6814 6815 static void 6816 free_string_array(char **str, int nelem) 6817 { 6818 int i; 6819 6820 if (str) { 6821 for (i = 0; i < nelem; i++) { 6822 if (str[i]) 6823 kmem_free(str[i], strlen(str[i]) + 1); 6824 } 6825 kmem_free(str, sizeof (char *) * nelem); 6826 } 6827 } 6828 6829 static void 6830 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6831 { 6832 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6833 kmem_free(cphci, sizeof (*cphci)); 6834 } 6835 6836 static void 6837 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6838 { 6839 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6840 kmem_free(cpi, sizeof (*cpi)); 6841 } 6842 6843 static void 6844 free_vhcache_client(mdi_vhcache_client_t *cct) 6845 { 6846 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6847 kmem_free(cct, sizeof (*cct)); 6848 } 6849 6850 static char * 6851 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6852 { 6853 char *name_addr; 6854 int len; 6855 6856 len = strlen(ct_name) + strlen(ct_addr) + 2; 6857 name_addr = kmem_alloc(len, KM_SLEEP); 6858 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6859 6860 if (ret_len) 6861 *ret_len = len; 6862 return (name_addr); 6863 } 6864 6865 /* 6866 * Copy the contents of paddrnvl to vhci cache. 6867 * paddrnvl nvlist contains path information for a vhci client. 6868 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6869 */ 6870 static void 6871 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6872 mdi_vhcache_client_t *cct) 6873 { 6874 nvpair_t *nvp = NULL; 6875 mdi_vhcache_pathinfo_t *cpi; 6876 uint_t nelem; 6877 uint32_t *val; 6878 6879 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6880 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6881 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6882 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6883 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6884 ASSERT(nelem == 2); 6885 cpi->cpi_cphci = cphci_list[val[0]]; 6886 cpi->cpi_flags = val[1]; 6887 enqueue_tail_vhcache_pathinfo(cct, cpi); 6888 } 6889 } 6890 6891 /* 6892 * Copy the contents of caddrmapnvl to vhci cache. 6893 * caddrmapnvl nvlist contains vhci client address to phci client address 6894 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6895 * this nvlist. 6896 */ 6897 static void 6898 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6899 mdi_vhcache_phci_t *cphci_list[]) 6900 { 6901 nvpair_t *nvp = NULL; 6902 nvlist_t *paddrnvl; 6903 mdi_vhcache_client_t *cct; 6904 6905 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6906 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6907 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6908 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6909 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6910 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6911 /* the client must contain at least one path */ 6912 ASSERT(cct->cct_cpi_head != NULL); 6913 6914 enqueue_vhcache_client(vhcache, cct); 6915 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6916 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6917 } 6918 } 6919 6920 /* 6921 * Copy the contents of the main nvlist to vhci cache. 6922 * 6923 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6924 * The nvlist contains the mappings between the vhci client addresses and 6925 * their corresponding phci client addresses. 6926 * 6927 * The structure of the nvlist is as follows: 6928 * 6929 * Main nvlist: 6930 * NAME TYPE DATA 6931 * version int32 version number 6932 * phcis string array array of phci paths 6933 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6934 * 6935 * structure of c2paddrs_nvl: 6936 * NAME TYPE DATA 6937 * caddr1 nvlist_t paddrs_nvl1 6938 * caddr2 nvlist_t paddrs_nvl2 6939 * ... 6940 * where caddr1, caddr2, ... are vhci client name and addresses in the 6941 * form of "<clientname>@<clientaddress>". 6942 * (for example: "ssd@2000002037cd9f72"); 6943 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6944 * 6945 * structure of paddrs_nvl: 6946 * NAME TYPE DATA 6947 * pi_addr1 uint32_array (phci-id, cpi_flags) 6948 * pi_addr2 uint32_array (phci-id, cpi_flags) 6949 * ... 6950 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6951 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6952 * phci-ids are integers that identify PHCIs to which the 6953 * the bus specific address belongs to. These integers are used as an index 6954 * into to the phcis string array in the main nvlist to get the PHCI path. 6955 */ 6956 static int 6957 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6958 { 6959 char **phcis, **phci_namep; 6960 uint_t nphcis; 6961 mdi_vhcache_phci_t *cphci, **cphci_list; 6962 nvlist_t *caddrmapnvl; 6963 int32_t ver; 6964 int i; 6965 size_t cphci_list_size; 6966 6967 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6968 6969 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6970 ver != MDI_VHCI_CACHE_VERSION) 6971 return (MDI_FAILURE); 6972 6973 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6974 &nphcis) != 0) 6975 return (MDI_SUCCESS); 6976 6977 ASSERT(nphcis > 0); 6978 6979 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6980 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6981 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6982 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6983 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6984 enqueue_vhcache_phci(vhcache, cphci); 6985 cphci_list[i] = cphci; 6986 } 6987 6988 ASSERT(vhcache->vhcache_phci_head != NULL); 6989 6990 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6991 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6992 6993 kmem_free(cphci_list, cphci_list_size); 6994 return (MDI_SUCCESS); 6995 } 6996 6997 /* 6998 * Build paddrnvl for the specified client using the information in the 6999 * vhci cache and add it to the caddrmapnnvl. 7000 * Returns 0 on success, errno on failure. 7001 */ 7002 static int 7003 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7004 nvlist_t *caddrmapnvl) 7005 { 7006 mdi_vhcache_pathinfo_t *cpi; 7007 nvlist_t *nvl; 7008 int err; 7009 uint32_t val[2]; 7010 7011 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7012 7013 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7014 return (err); 7015 7016 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7017 val[0] = cpi->cpi_cphci->cphci_id; 7018 val[1] = cpi->cpi_flags; 7019 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7020 != 0) 7021 goto out; 7022 } 7023 7024 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7025 out: 7026 nvlist_free(nvl); 7027 return (err); 7028 } 7029 7030 /* 7031 * Build caddrmapnvl using the information in the vhci cache 7032 * and add it to the mainnvl. 7033 * Returns 0 on success, errno on failure. 7034 */ 7035 static int 7036 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7037 { 7038 mdi_vhcache_client_t *cct; 7039 nvlist_t *nvl; 7040 int err; 7041 7042 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7043 7044 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7045 return (err); 7046 7047 for (cct = vhcache->vhcache_client_head; cct != NULL; 7048 cct = cct->cct_next) { 7049 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7050 goto out; 7051 } 7052 7053 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7054 out: 7055 nvlist_free(nvl); 7056 return (err); 7057 } 7058 7059 /* 7060 * Build nvlist using the information in the vhci cache. 7061 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7062 * Returns nvl on success, NULL on failure. 7063 */ 7064 static nvlist_t * 7065 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7066 { 7067 mdi_vhcache_phci_t *cphci; 7068 uint_t phci_count; 7069 char **phcis; 7070 nvlist_t *nvl; 7071 int err, i; 7072 7073 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7074 nvl = NULL; 7075 goto out; 7076 } 7077 7078 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7079 MDI_VHCI_CACHE_VERSION)) != 0) 7080 goto out; 7081 7082 rw_enter(&vhcache->vhcache_lock, RW_READER); 7083 if (vhcache->vhcache_phci_head == NULL) { 7084 rw_exit(&vhcache->vhcache_lock); 7085 return (nvl); 7086 } 7087 7088 phci_count = 0; 7089 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7090 cphci = cphci->cphci_next) 7091 cphci->cphci_id = phci_count++; 7092 7093 /* build phci pathname list */ 7094 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7095 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7096 cphci = cphci->cphci_next, i++) 7097 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7098 7099 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7100 phci_count); 7101 free_string_array(phcis, phci_count); 7102 7103 if (err == 0 && 7104 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7105 rw_exit(&vhcache->vhcache_lock); 7106 return (nvl); 7107 } 7108 7109 rw_exit(&vhcache->vhcache_lock); 7110 out: 7111 if (nvl) 7112 nvlist_free(nvl); 7113 return (NULL); 7114 } 7115 7116 /* 7117 * Lookup vhcache phci structure for the specified phci path. 7118 */ 7119 static mdi_vhcache_phci_t * 7120 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7121 { 7122 mdi_vhcache_phci_t *cphci; 7123 7124 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7125 7126 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7127 cphci = cphci->cphci_next) { 7128 if (strcmp(cphci->cphci_path, phci_path) == 0) 7129 return (cphci); 7130 } 7131 7132 return (NULL); 7133 } 7134 7135 /* 7136 * Lookup vhcache phci structure for the specified phci. 7137 */ 7138 static mdi_vhcache_phci_t * 7139 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7140 { 7141 mdi_vhcache_phci_t *cphci; 7142 7143 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7144 7145 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7146 cphci = cphci->cphci_next) { 7147 if (cphci->cphci_phci == ph) 7148 return (cphci); 7149 } 7150 7151 return (NULL); 7152 } 7153 7154 /* 7155 * Add the specified phci to the vhci cache if not already present. 7156 */ 7157 static void 7158 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7159 { 7160 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7161 mdi_vhcache_phci_t *cphci; 7162 char *pathname; 7163 int cache_updated; 7164 7165 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7166 7167 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7168 (void) ddi_pathname(ph->ph_dip, pathname); 7169 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7170 != NULL) { 7171 cphci->cphci_phci = ph; 7172 cache_updated = 0; 7173 } else { 7174 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7175 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7176 cphci->cphci_phci = ph; 7177 enqueue_vhcache_phci(vhcache, cphci); 7178 cache_updated = 1; 7179 } 7180 7181 rw_exit(&vhcache->vhcache_lock); 7182 7183 /* 7184 * Since a new phci has been added, reset 7185 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7186 * during next vhcache_discover_paths(). 7187 */ 7188 mutex_enter(&vhc->vhc_lock); 7189 vhc->vhc_path_discovery_cutoff_time = 0; 7190 mutex_exit(&vhc->vhc_lock); 7191 7192 kmem_free(pathname, MAXPATHLEN); 7193 if (cache_updated) 7194 vhcache_dirty(vhc); 7195 } 7196 7197 /* 7198 * Remove the reference to the specified phci from the vhci cache. 7199 */ 7200 static void 7201 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7202 { 7203 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7204 mdi_vhcache_phci_t *cphci; 7205 7206 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7207 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7208 /* do not remove the actual mdi_vhcache_phci structure */ 7209 cphci->cphci_phci = NULL; 7210 } 7211 rw_exit(&vhcache->vhcache_lock); 7212 } 7213 7214 static void 7215 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7216 mdi_vhcache_lookup_token_t *src) 7217 { 7218 if (src == NULL) { 7219 dst->lt_cct = NULL; 7220 dst->lt_cct_lookup_time = 0; 7221 } else { 7222 dst->lt_cct = src->lt_cct; 7223 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7224 } 7225 } 7226 7227 /* 7228 * Look up vhcache client for the specified client. 7229 */ 7230 static mdi_vhcache_client_t * 7231 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7232 mdi_vhcache_lookup_token_t *token) 7233 { 7234 mod_hash_val_t hv; 7235 char *name_addr; 7236 int len; 7237 7238 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7239 7240 /* 7241 * If no vhcache clean occurred since the last lookup, we can 7242 * simply return the cct from the last lookup operation. 7243 * It works because ccts are never freed except during the vhcache 7244 * cleanup operation. 7245 */ 7246 if (token != NULL && 7247 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7248 return (token->lt_cct); 7249 7250 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7251 if (mod_hash_find(vhcache->vhcache_client_hash, 7252 (mod_hash_key_t)name_addr, &hv) == 0) { 7253 if (token) { 7254 token->lt_cct = (mdi_vhcache_client_t *)hv; 7255 token->lt_cct_lookup_time = lbolt64; 7256 } 7257 } else { 7258 if (token) { 7259 token->lt_cct = NULL; 7260 token->lt_cct_lookup_time = 0; 7261 } 7262 hv = NULL; 7263 } 7264 kmem_free(name_addr, len); 7265 return ((mdi_vhcache_client_t *)hv); 7266 } 7267 7268 /* 7269 * Add the specified path to the vhci cache if not already present. 7270 * Also add the vhcache client for the client corresponding to this path 7271 * if it doesn't already exist. 7272 */ 7273 static void 7274 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7275 { 7276 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7277 mdi_vhcache_client_t *cct; 7278 mdi_vhcache_pathinfo_t *cpi; 7279 mdi_phci_t *ph = pip->pi_phci; 7280 mdi_client_t *ct = pip->pi_client; 7281 int cache_updated = 0; 7282 7283 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7284 7285 /* if vhcache client for this pip doesn't already exist, add it */ 7286 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7287 NULL)) == NULL) { 7288 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7289 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7290 ct->ct_guid, NULL); 7291 enqueue_vhcache_client(vhcache, cct); 7292 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7293 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7294 cache_updated = 1; 7295 } 7296 7297 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7298 if (cpi->cpi_cphci->cphci_phci == ph && 7299 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7300 cpi->cpi_pip = pip; 7301 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7302 cpi->cpi_flags &= 7303 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7304 sort_vhcache_paths(cct); 7305 cache_updated = 1; 7306 } 7307 break; 7308 } 7309 } 7310 7311 if (cpi == NULL) { 7312 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7313 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7314 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7315 ASSERT(cpi->cpi_cphci != NULL); 7316 cpi->cpi_pip = pip; 7317 enqueue_vhcache_pathinfo(cct, cpi); 7318 cache_updated = 1; 7319 } 7320 7321 rw_exit(&vhcache->vhcache_lock); 7322 7323 if (cache_updated) 7324 vhcache_dirty(vhc); 7325 } 7326 7327 /* 7328 * Remove the reference to the specified path from the vhci cache. 7329 */ 7330 static void 7331 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7332 { 7333 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7334 mdi_client_t *ct = pip->pi_client; 7335 mdi_vhcache_client_t *cct; 7336 mdi_vhcache_pathinfo_t *cpi; 7337 7338 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7339 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7340 NULL)) != NULL) { 7341 for (cpi = cct->cct_cpi_head; cpi != NULL; 7342 cpi = cpi->cpi_next) { 7343 if (cpi->cpi_pip == pip) { 7344 cpi->cpi_pip = NULL; 7345 break; 7346 } 7347 } 7348 } 7349 rw_exit(&vhcache->vhcache_lock); 7350 } 7351 7352 /* 7353 * Flush the vhci cache to disk. 7354 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7355 */ 7356 static int 7357 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7358 { 7359 nvlist_t *nvl; 7360 int err; 7361 int rv; 7362 7363 /* 7364 * It is possible that the system may shutdown before 7365 * i_ddi_io_initialized (during stmsboot for example). To allow for 7366 * flushing the cache in this case do not check for 7367 * i_ddi_io_initialized when force flag is set. 7368 */ 7369 if (force_flag == 0 && !i_ddi_io_initialized()) 7370 return (MDI_FAILURE); 7371 7372 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7373 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7374 nvlist_free(nvl); 7375 } else 7376 err = EFAULT; 7377 7378 rv = MDI_SUCCESS; 7379 mutex_enter(&vhc->vhc_lock); 7380 if (err != 0) { 7381 if (err == EROFS) { 7382 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7383 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7384 MDI_VHC_VHCACHE_DIRTY); 7385 } else { 7386 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7387 cmn_err(CE_CONT, "%s: update failed\n", 7388 vhc->vhc_vhcache_filename); 7389 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7390 } 7391 rv = MDI_FAILURE; 7392 } 7393 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7394 cmn_err(CE_CONT, 7395 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7396 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7397 } 7398 mutex_exit(&vhc->vhc_lock); 7399 7400 return (rv); 7401 } 7402 7403 /* 7404 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7405 * Exits itself if left idle for the idle timeout period. 7406 */ 7407 static void 7408 vhcache_flush_thread(void *arg) 7409 { 7410 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7411 clock_t idle_time, quit_at_ticks; 7412 callb_cpr_t cprinfo; 7413 7414 /* number of seconds to sleep idle before exiting */ 7415 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7416 7417 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7418 "mdi_vhcache_flush"); 7419 mutex_enter(&vhc->vhc_lock); 7420 for (; ; ) { 7421 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7422 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7423 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7424 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7425 (void) cv_timedwait(&vhc->vhc_cv, 7426 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7427 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7428 } else { 7429 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7430 mutex_exit(&vhc->vhc_lock); 7431 7432 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7433 vhcache_dirty(vhc); 7434 7435 mutex_enter(&vhc->vhc_lock); 7436 } 7437 } 7438 7439 quit_at_ticks = ddi_get_lbolt() + idle_time; 7440 7441 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7442 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7443 ddi_get_lbolt() < quit_at_ticks) { 7444 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7445 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7446 quit_at_ticks); 7447 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7448 } 7449 7450 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7451 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7452 goto out; 7453 } 7454 7455 out: 7456 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7457 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7458 CALLB_CPR_EXIT(&cprinfo); 7459 } 7460 7461 /* 7462 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7463 */ 7464 static void 7465 vhcache_dirty(mdi_vhci_config_t *vhc) 7466 { 7467 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7468 int create_thread; 7469 7470 rw_enter(&vhcache->vhcache_lock, RW_READER); 7471 /* do not flush cache until the cache is fully built */ 7472 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7473 rw_exit(&vhcache->vhcache_lock); 7474 return; 7475 } 7476 rw_exit(&vhcache->vhcache_lock); 7477 7478 mutex_enter(&vhc->vhc_lock); 7479 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7480 mutex_exit(&vhc->vhc_lock); 7481 return; 7482 } 7483 7484 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7485 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7486 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7487 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7488 cv_broadcast(&vhc->vhc_cv); 7489 create_thread = 0; 7490 } else { 7491 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7492 create_thread = 1; 7493 } 7494 mutex_exit(&vhc->vhc_lock); 7495 7496 if (create_thread) 7497 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7498 0, &p0, TS_RUN, minclsyspri); 7499 } 7500 7501 /* 7502 * phci bus config structure - one for for each phci bus config operation that 7503 * we initiate on behalf of a vhci. 7504 */ 7505 typedef struct mdi_phci_bus_config_s { 7506 char *phbc_phci_path; 7507 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7508 struct mdi_phci_bus_config_s *phbc_next; 7509 } mdi_phci_bus_config_t; 7510 7511 /* vhci bus config structure - one for each vhci bus config operation */ 7512 typedef struct mdi_vhci_bus_config_s { 7513 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7514 major_t vhbc_op_major; /* bus config op major */ 7515 uint_t vhbc_op_flags; /* bus config op flags */ 7516 kmutex_t vhbc_lock; 7517 kcondvar_t vhbc_cv; 7518 int vhbc_thr_count; 7519 } mdi_vhci_bus_config_t; 7520 7521 /* 7522 * bus config the specified phci 7523 */ 7524 static void 7525 bus_config_phci(void *arg) 7526 { 7527 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7528 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7529 dev_info_t *ph_dip; 7530 7531 /* 7532 * first configure all path components upto phci and then configure 7533 * the phci children. 7534 */ 7535 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7536 != NULL) { 7537 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7538 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7539 (void) ndi_devi_config_driver(ph_dip, 7540 vhbc->vhbc_op_flags, 7541 vhbc->vhbc_op_major); 7542 } else 7543 (void) ndi_devi_config(ph_dip, 7544 vhbc->vhbc_op_flags); 7545 7546 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7547 ndi_rele_devi(ph_dip); 7548 } 7549 7550 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7551 kmem_free(phbc, sizeof (*phbc)); 7552 7553 mutex_enter(&vhbc->vhbc_lock); 7554 vhbc->vhbc_thr_count--; 7555 if (vhbc->vhbc_thr_count == 0) 7556 cv_broadcast(&vhbc->vhbc_cv); 7557 mutex_exit(&vhbc->vhbc_lock); 7558 } 7559 7560 /* 7561 * Bus config all phcis associated with the vhci in parallel. 7562 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7563 */ 7564 static void 7565 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7566 ddi_bus_config_op_t op, major_t maj) 7567 { 7568 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7569 mdi_vhci_bus_config_t *vhbc; 7570 mdi_vhcache_phci_t *cphci; 7571 7572 rw_enter(&vhcache->vhcache_lock, RW_READER); 7573 if (vhcache->vhcache_phci_head == NULL) { 7574 rw_exit(&vhcache->vhcache_lock); 7575 return; 7576 } 7577 7578 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7579 7580 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7581 cphci = cphci->cphci_next) { 7582 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7583 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7584 KM_SLEEP); 7585 phbc->phbc_vhbusconfig = vhbc; 7586 phbc->phbc_next = phbc_head; 7587 phbc_head = phbc; 7588 vhbc->vhbc_thr_count++; 7589 } 7590 rw_exit(&vhcache->vhcache_lock); 7591 7592 vhbc->vhbc_op = op; 7593 vhbc->vhbc_op_major = maj; 7594 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7595 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7596 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7597 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7598 7599 /* now create threads to initiate bus config on all phcis in parallel */ 7600 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7601 phbc_next = phbc->phbc_next; 7602 if (mdi_mtc_off) 7603 bus_config_phci((void *)phbc); 7604 else 7605 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7606 0, &p0, TS_RUN, minclsyspri); 7607 } 7608 7609 mutex_enter(&vhbc->vhbc_lock); 7610 /* wait until all threads exit */ 7611 while (vhbc->vhbc_thr_count > 0) 7612 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7613 mutex_exit(&vhbc->vhbc_lock); 7614 7615 mutex_destroy(&vhbc->vhbc_lock); 7616 cv_destroy(&vhbc->vhbc_cv); 7617 kmem_free(vhbc, sizeof (*vhbc)); 7618 } 7619 7620 /* 7621 * Single threaded version of bus_config_all_phcis() 7622 */ 7623 static void 7624 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7625 ddi_bus_config_op_t op, major_t maj) 7626 { 7627 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7628 7629 single_threaded_vhconfig_enter(vhc); 7630 bus_config_all_phcis(vhcache, flags, op, maj); 7631 single_threaded_vhconfig_exit(vhc); 7632 } 7633 7634 /* 7635 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7636 * The path includes the child component in addition to the phci path. 7637 */ 7638 static int 7639 bus_config_one_phci_child(char *path) 7640 { 7641 dev_info_t *ph_dip, *child; 7642 char *devnm; 7643 int rv = MDI_FAILURE; 7644 7645 /* extract the child component of the phci */ 7646 devnm = strrchr(path, '/'); 7647 *devnm++ = '\0'; 7648 7649 /* 7650 * first configure all path components upto phci and then 7651 * configure the phci child. 7652 */ 7653 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7654 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7655 NDI_SUCCESS) { 7656 /* 7657 * release the hold that ndi_devi_config_one() placed 7658 */ 7659 ndi_rele_devi(child); 7660 rv = MDI_SUCCESS; 7661 } 7662 7663 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7664 ndi_rele_devi(ph_dip); 7665 } 7666 7667 devnm--; 7668 *devnm = '/'; 7669 return (rv); 7670 } 7671 7672 /* 7673 * Build a list of phci client paths for the specified vhci client. 7674 * The list includes only those phci client paths which aren't configured yet. 7675 */ 7676 static mdi_phys_path_t * 7677 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7678 { 7679 mdi_vhcache_pathinfo_t *cpi; 7680 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7681 int config_path, len; 7682 7683 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7684 /* 7685 * include only those paths that aren't configured. 7686 */ 7687 config_path = 0; 7688 if (cpi->cpi_pip == NULL) 7689 config_path = 1; 7690 else { 7691 MDI_PI_LOCK(cpi->cpi_pip); 7692 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7693 config_path = 1; 7694 MDI_PI_UNLOCK(cpi->cpi_pip); 7695 } 7696 7697 if (config_path) { 7698 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7699 len = strlen(cpi->cpi_cphci->cphci_path) + 7700 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7701 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7702 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7703 cpi->cpi_cphci->cphci_path, ct_name, 7704 cpi->cpi_addr); 7705 pp->phys_path_next = NULL; 7706 7707 if (pp_head == NULL) 7708 pp_head = pp; 7709 else 7710 pp_tail->phys_path_next = pp; 7711 pp_tail = pp; 7712 } 7713 } 7714 7715 return (pp_head); 7716 } 7717 7718 /* 7719 * Free the memory allocated for phci client path list. 7720 */ 7721 static void 7722 free_phclient_path_list(mdi_phys_path_t *pp_head) 7723 { 7724 mdi_phys_path_t *pp, *pp_next; 7725 7726 for (pp = pp_head; pp != NULL; pp = pp_next) { 7727 pp_next = pp->phys_path_next; 7728 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7729 kmem_free(pp, sizeof (*pp)); 7730 } 7731 } 7732 7733 /* 7734 * Allocated async client structure and initialize with the specified values. 7735 */ 7736 static mdi_async_client_config_t * 7737 alloc_async_client_config(char *ct_name, char *ct_addr, 7738 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7739 { 7740 mdi_async_client_config_t *acc; 7741 7742 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7743 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7744 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7745 acc->acc_phclient_path_list_head = pp_head; 7746 init_vhcache_lookup_token(&acc->acc_token, tok); 7747 acc->acc_next = NULL; 7748 return (acc); 7749 } 7750 7751 /* 7752 * Free the memory allocated for the async client structure and their members. 7753 */ 7754 static void 7755 free_async_client_config(mdi_async_client_config_t *acc) 7756 { 7757 if (acc->acc_phclient_path_list_head) 7758 free_phclient_path_list(acc->acc_phclient_path_list_head); 7759 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7760 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7761 kmem_free(acc, sizeof (*acc)); 7762 } 7763 7764 /* 7765 * Sort vhcache pathinfos (cpis) of the specified client. 7766 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7767 * flag set come at the beginning of the list. All cpis which have this 7768 * flag set come at the end of the list. 7769 */ 7770 static void 7771 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7772 { 7773 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7774 7775 cpi_head = cct->cct_cpi_head; 7776 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7777 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7778 cpi_next = cpi->cpi_next; 7779 enqueue_vhcache_pathinfo(cct, cpi); 7780 } 7781 } 7782 7783 /* 7784 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7785 * every vhcache pathinfo of the specified client. If not adjust the flag 7786 * setting appropriately. 7787 * 7788 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7789 * on-disk vhci cache. So every time this flag is updated the cache must be 7790 * flushed. 7791 */ 7792 static void 7793 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7794 mdi_vhcache_lookup_token_t *tok) 7795 { 7796 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7797 mdi_vhcache_client_t *cct; 7798 mdi_vhcache_pathinfo_t *cpi; 7799 7800 rw_enter(&vhcache->vhcache_lock, RW_READER); 7801 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7802 == NULL) { 7803 rw_exit(&vhcache->vhcache_lock); 7804 return; 7805 } 7806 7807 /* 7808 * to avoid unnecessary on-disk cache updates, first check if an 7809 * update is really needed. If no update is needed simply return. 7810 */ 7811 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7812 if ((cpi->cpi_pip != NULL && 7813 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7814 (cpi->cpi_pip == NULL && 7815 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7816 break; 7817 } 7818 } 7819 if (cpi == NULL) { 7820 rw_exit(&vhcache->vhcache_lock); 7821 return; 7822 } 7823 7824 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7825 rw_exit(&vhcache->vhcache_lock); 7826 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7827 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7828 tok)) == NULL) { 7829 rw_exit(&vhcache->vhcache_lock); 7830 return; 7831 } 7832 } 7833 7834 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7835 if (cpi->cpi_pip != NULL) 7836 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7837 else 7838 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7839 } 7840 sort_vhcache_paths(cct); 7841 7842 rw_exit(&vhcache->vhcache_lock); 7843 vhcache_dirty(vhc); 7844 } 7845 7846 /* 7847 * Configure all specified paths of the client. 7848 */ 7849 static void 7850 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7851 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7852 { 7853 mdi_phys_path_t *pp; 7854 7855 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7856 (void) bus_config_one_phci_child(pp->phys_path); 7857 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7858 } 7859 7860 /* 7861 * Dequeue elements from vhci async client config list and bus configure 7862 * their corresponding phci clients. 7863 */ 7864 static void 7865 config_client_paths_thread(void *arg) 7866 { 7867 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7868 mdi_async_client_config_t *acc; 7869 clock_t quit_at_ticks; 7870 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7871 callb_cpr_t cprinfo; 7872 7873 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7874 "mdi_config_client_paths"); 7875 7876 for (; ; ) { 7877 quit_at_ticks = ddi_get_lbolt() + idle_time; 7878 7879 mutex_enter(&vhc->vhc_lock); 7880 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7881 vhc->vhc_acc_list_head == NULL && 7882 ddi_get_lbolt() < quit_at_ticks) { 7883 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7884 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7885 quit_at_ticks); 7886 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7887 } 7888 7889 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7890 vhc->vhc_acc_list_head == NULL) 7891 goto out; 7892 7893 acc = vhc->vhc_acc_list_head; 7894 vhc->vhc_acc_list_head = acc->acc_next; 7895 if (vhc->vhc_acc_list_head == NULL) 7896 vhc->vhc_acc_list_tail = NULL; 7897 vhc->vhc_acc_count--; 7898 mutex_exit(&vhc->vhc_lock); 7899 7900 config_client_paths_sync(vhc, acc->acc_ct_name, 7901 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7902 &acc->acc_token); 7903 7904 free_async_client_config(acc); 7905 } 7906 7907 out: 7908 vhc->vhc_acc_thrcount--; 7909 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7910 CALLB_CPR_EXIT(&cprinfo); 7911 } 7912 7913 /* 7914 * Arrange for all the phci client paths (pp_head) for the specified client 7915 * to be bus configured asynchronously by a thread. 7916 */ 7917 static void 7918 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7919 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7920 { 7921 mdi_async_client_config_t *acc, *newacc; 7922 int create_thread; 7923 7924 if (pp_head == NULL) 7925 return; 7926 7927 if (mdi_mtc_off) { 7928 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7929 free_phclient_path_list(pp_head); 7930 return; 7931 } 7932 7933 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7934 ASSERT(newacc); 7935 7936 mutex_enter(&vhc->vhc_lock); 7937 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7938 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7939 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7940 free_async_client_config(newacc); 7941 mutex_exit(&vhc->vhc_lock); 7942 return; 7943 } 7944 } 7945 7946 if (vhc->vhc_acc_list_head == NULL) 7947 vhc->vhc_acc_list_head = newacc; 7948 else 7949 vhc->vhc_acc_list_tail->acc_next = newacc; 7950 vhc->vhc_acc_list_tail = newacc; 7951 vhc->vhc_acc_count++; 7952 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7953 cv_broadcast(&vhc->vhc_cv); 7954 create_thread = 0; 7955 } else { 7956 vhc->vhc_acc_thrcount++; 7957 create_thread = 1; 7958 } 7959 mutex_exit(&vhc->vhc_lock); 7960 7961 if (create_thread) 7962 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7963 0, &p0, TS_RUN, minclsyspri); 7964 } 7965 7966 /* 7967 * Return number of online paths for the specified client. 7968 */ 7969 static int 7970 nonline_paths(mdi_vhcache_client_t *cct) 7971 { 7972 mdi_vhcache_pathinfo_t *cpi; 7973 int online_count = 0; 7974 7975 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7976 if (cpi->cpi_pip != NULL) { 7977 MDI_PI_LOCK(cpi->cpi_pip); 7978 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7979 online_count++; 7980 MDI_PI_UNLOCK(cpi->cpi_pip); 7981 } 7982 } 7983 7984 return (online_count); 7985 } 7986 7987 /* 7988 * Bus configure all paths for the specified vhci client. 7989 * If at least one path for the client is already online, the remaining paths 7990 * will be configured asynchronously. Otherwise, it synchronously configures 7991 * the paths until at least one path is online and then rest of the paths 7992 * will be configured asynchronously. 7993 */ 7994 static void 7995 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7996 { 7997 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7998 mdi_phys_path_t *pp_head, *pp; 7999 mdi_vhcache_client_t *cct; 8000 mdi_vhcache_lookup_token_t tok; 8001 8002 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8003 8004 init_vhcache_lookup_token(&tok, NULL); 8005 8006 if (ct_name == NULL || ct_addr == NULL || 8007 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8008 == NULL || 8009 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8010 rw_exit(&vhcache->vhcache_lock); 8011 return; 8012 } 8013 8014 /* if at least one path is online, configure the rest asynchronously */ 8015 if (nonline_paths(cct) > 0) { 8016 rw_exit(&vhcache->vhcache_lock); 8017 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8018 return; 8019 } 8020 8021 rw_exit(&vhcache->vhcache_lock); 8022 8023 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8024 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8025 rw_enter(&vhcache->vhcache_lock, RW_READER); 8026 8027 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8028 ct_addr, &tok)) == NULL) { 8029 rw_exit(&vhcache->vhcache_lock); 8030 goto out; 8031 } 8032 8033 if (nonline_paths(cct) > 0 && 8034 pp->phys_path_next != NULL) { 8035 rw_exit(&vhcache->vhcache_lock); 8036 config_client_paths_async(vhc, ct_name, ct_addr, 8037 pp->phys_path_next, &tok); 8038 pp->phys_path_next = NULL; 8039 goto out; 8040 } 8041 8042 rw_exit(&vhcache->vhcache_lock); 8043 } 8044 } 8045 8046 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8047 out: 8048 free_phclient_path_list(pp_head); 8049 } 8050 8051 static void 8052 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8053 { 8054 mutex_enter(&vhc->vhc_lock); 8055 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8056 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8057 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8058 mutex_exit(&vhc->vhc_lock); 8059 } 8060 8061 static void 8062 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8063 { 8064 mutex_enter(&vhc->vhc_lock); 8065 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8066 cv_broadcast(&vhc->vhc_cv); 8067 mutex_exit(&vhc->vhc_lock); 8068 } 8069 8070 /* 8071 * Attach the phci driver instances associated with the vhci: 8072 * If root is mounted attach all phci driver instances. 8073 * If root is not mounted, attach the instances of only those phci 8074 * drivers that have the root support. 8075 */ 8076 static void 8077 attach_phci_drivers(mdi_vhci_config_t *vhc) 8078 { 8079 int i; 8080 major_t m; 8081 8082 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8083 if (modrootloaded == 0 && 8084 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8085 continue; 8086 8087 m = ddi_name_to_major( 8088 vhc->vhc_phci_driver_list[i].phdriver_name); 8089 if (m != (major_t)-1) { 8090 if (ddi_hold_installed_driver(m) != NULL) 8091 ddi_rele_driver(m); 8092 } 8093 } 8094 } 8095 8096 /* 8097 * Build vhci cache: 8098 * 8099 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8100 * the phci driver instances. During this process the cache gets built. 8101 * 8102 * Cache is built fully if the root is mounted. 8103 * If the root is not mounted, phci drivers that do not have root support 8104 * are not attached. As a result the cache is built partially. The entries 8105 * in the cache reflect only those phci drivers that have root support. 8106 */ 8107 static int 8108 build_vhci_cache(mdi_vhci_config_t *vhc) 8109 { 8110 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8111 8112 single_threaded_vhconfig_enter(vhc); 8113 8114 rw_enter(&vhcache->vhcache_lock, RW_READER); 8115 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8116 rw_exit(&vhcache->vhcache_lock); 8117 single_threaded_vhconfig_exit(vhc); 8118 return (0); 8119 } 8120 rw_exit(&vhcache->vhcache_lock); 8121 8122 attach_phci_drivers(vhc); 8123 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8124 BUS_CONFIG_ALL, (major_t)-1); 8125 8126 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8127 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8128 rw_exit(&vhcache->vhcache_lock); 8129 8130 single_threaded_vhconfig_exit(vhc); 8131 vhcache_dirty(vhc); 8132 return (1); 8133 } 8134 8135 /* 8136 * Determine if discovery of paths is needed. 8137 */ 8138 static int 8139 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8140 { 8141 int rv = 1; 8142 8143 mutex_enter(&vhc->vhc_lock); 8144 if (i_ddi_io_initialized() == 0) { 8145 if (vhc->vhc_path_discovery_boot > 0) { 8146 vhc->vhc_path_discovery_boot--; 8147 goto out; 8148 } 8149 } else { 8150 if (vhc->vhc_path_discovery_postboot > 0) { 8151 vhc->vhc_path_discovery_postboot--; 8152 goto out; 8153 } 8154 } 8155 8156 /* 8157 * Do full path discovery at most once per mdi_path_discovery_interval. 8158 * This is to avoid a series of full path discoveries when opening 8159 * stale /dev/[r]dsk links. 8160 */ 8161 if (mdi_path_discovery_interval != -1 && 8162 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8163 goto out; 8164 8165 rv = 0; 8166 out: 8167 mutex_exit(&vhc->vhc_lock); 8168 return (rv); 8169 } 8170 8171 /* 8172 * Discover all paths: 8173 * 8174 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8175 * driver instances. During this process all paths will be discovered. 8176 */ 8177 static int 8178 vhcache_discover_paths(mdi_vhci_config_t *vhc) 8179 { 8180 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8181 int rv = 0; 8182 8183 single_threaded_vhconfig_enter(vhc); 8184 8185 if (vhcache_do_discovery(vhc)) { 8186 attach_phci_drivers(vhc); 8187 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8188 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8189 8190 mutex_enter(&vhc->vhc_lock); 8191 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8192 mdi_path_discovery_interval * TICKS_PER_SECOND; 8193 mutex_exit(&vhc->vhc_lock); 8194 rv = 1; 8195 } 8196 8197 single_threaded_vhconfig_exit(vhc); 8198 return (rv); 8199 } 8200 8201 /* 8202 * Generic vhci bus config implementation: 8203 * 8204 * Parameters 8205 * vdip vhci dip 8206 * flags bus config flags 8207 * op bus config operation 8208 * The remaining parameters are bus config operation specific 8209 * 8210 * for BUS_CONFIG_ONE 8211 * arg pointer to name@addr 8212 * child upon successful return from this function, *child will be 8213 * set to the configured and held devinfo child node of vdip. 8214 * ct_addr pointer to client address (i.e. GUID) 8215 * 8216 * for BUS_CONFIG_DRIVER 8217 * arg major number of the driver 8218 * child and ct_addr parameters are ignored 8219 * 8220 * for BUS_CONFIG_ALL 8221 * arg, child, and ct_addr parameters are ignored 8222 * 8223 * Note that for the rest of the bus config operations, this function simply 8224 * calls the framework provided default bus config routine. 8225 */ 8226 int 8227 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8228 void *arg, dev_info_t **child, char *ct_addr) 8229 { 8230 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8231 mdi_vhci_config_t *vhc = vh->vh_config; 8232 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8233 int rv = 0; 8234 int params_valid = 0; 8235 char *cp; 8236 8237 /* 8238 * While bus configuring phcis, the phci driver interactions with MDI 8239 * cause child nodes to be enumerated under the vhci node for which 8240 * they need to ndi_devi_enter the vhci node. 8241 * 8242 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8243 * for the bus config operations on phcis to finish while holding the 8244 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8245 * phcis and call the default framework provided bus config function 8246 * if we are called with ndi_devi_enter lock held. 8247 */ 8248 if (DEVI_BUSY_OWNED(vdip)) { 8249 MDI_DEBUG(2, (CE_NOTE, vdip, 8250 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8251 goto default_bus_config; 8252 } 8253 8254 rw_enter(&vhcache->vhcache_lock, RW_READER); 8255 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8256 rw_exit(&vhcache->vhcache_lock); 8257 rv = build_vhci_cache(vhc); 8258 rw_enter(&vhcache->vhcache_lock, RW_READER); 8259 } 8260 8261 switch (op) { 8262 case BUS_CONFIG_ONE: 8263 if (arg != NULL && ct_addr != NULL) { 8264 /* extract node name */ 8265 cp = (char *)arg; 8266 while (*cp != '\0' && *cp != '@') 8267 cp++; 8268 if (*cp == '@') { 8269 params_valid = 1; 8270 *cp = '\0'; 8271 config_client_paths(vhc, (char *)arg, ct_addr); 8272 /* config_client_paths() releases cache_lock */ 8273 *cp = '@'; 8274 break; 8275 } 8276 } 8277 8278 rw_exit(&vhcache->vhcache_lock); 8279 break; 8280 8281 case BUS_CONFIG_DRIVER: 8282 rw_exit(&vhcache->vhcache_lock); 8283 if (rv == 0) 8284 st_bus_config_all_phcis(vhc, flags, op, 8285 (major_t)(uintptr_t)arg); 8286 break; 8287 8288 case BUS_CONFIG_ALL: 8289 rw_exit(&vhcache->vhcache_lock); 8290 if (rv == 0) 8291 st_bus_config_all_phcis(vhc, flags, op, -1); 8292 break; 8293 8294 default: 8295 rw_exit(&vhcache->vhcache_lock); 8296 break; 8297 } 8298 8299 8300 default_bus_config: 8301 /* 8302 * All requested child nodes are enumerated under the vhci. 8303 * Now configure them. 8304 */ 8305 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8306 NDI_SUCCESS) { 8307 return (MDI_SUCCESS); 8308 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8309 /* discover all paths and try configuring again */ 8310 if (vhcache_discover_paths(vhc) && 8311 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8312 NDI_SUCCESS) 8313 return (MDI_SUCCESS); 8314 } 8315 8316 return (MDI_FAILURE); 8317 } 8318 8319 /* 8320 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8321 */ 8322 static nvlist_t * 8323 read_on_disk_vhci_cache(char *vhci_class) 8324 { 8325 nvlist_t *nvl; 8326 int err; 8327 char *filename; 8328 8329 filename = vhclass2vhcache_filename(vhci_class); 8330 8331 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8332 kmem_free(filename, strlen(filename) + 1); 8333 return (nvl); 8334 } else if (err == EIO) 8335 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8336 else if (err == EINVAL) 8337 cmn_err(CE_WARN, 8338 "%s: data file corrupted, will recreate\n", filename); 8339 8340 kmem_free(filename, strlen(filename) + 1); 8341 return (NULL); 8342 } 8343 8344 /* 8345 * Read on-disk vhci cache into nvlists for all vhci classes. 8346 * Called during booting by i_ddi_read_devices_files(). 8347 */ 8348 void 8349 mdi_read_devices_files(void) 8350 { 8351 int i; 8352 8353 for (i = 0; i < N_VHCI_CLASSES; i++) 8354 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8355 } 8356 8357 /* 8358 * Remove all stale entries from vhci cache. 8359 */ 8360 static void 8361 clean_vhcache(mdi_vhci_config_t *vhc) 8362 { 8363 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8364 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8365 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8366 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8367 8368 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8369 8370 cct_head = vhcache->vhcache_client_head; 8371 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8372 for (cct = cct_head; cct != NULL; cct = cct_next) { 8373 cct_next = cct->cct_next; 8374 8375 cpi_head = cct->cct_cpi_head; 8376 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8377 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8378 cpi_next = cpi->cpi_next; 8379 if (cpi->cpi_pip != NULL) { 8380 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8381 enqueue_tail_vhcache_pathinfo(cct, cpi); 8382 } else 8383 free_vhcache_pathinfo(cpi); 8384 } 8385 8386 if (cct->cct_cpi_head != NULL) 8387 enqueue_vhcache_client(vhcache, cct); 8388 else { 8389 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8390 (mod_hash_key_t)cct->cct_name_addr); 8391 free_vhcache_client(cct); 8392 } 8393 } 8394 8395 cphci_head = vhcache->vhcache_phci_head; 8396 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8397 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8398 cphci_next = cphci->cphci_next; 8399 if (cphci->cphci_phci != NULL) 8400 enqueue_vhcache_phci(vhcache, cphci); 8401 else 8402 free_vhcache_phci(cphci); 8403 } 8404 8405 vhcache->vhcache_clean_time = lbolt64; 8406 rw_exit(&vhcache->vhcache_lock); 8407 vhcache_dirty(vhc); 8408 } 8409 8410 /* 8411 * Remove all stale entries from vhci cache. 8412 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8413 */ 8414 void 8415 mdi_clean_vhcache(void) 8416 { 8417 mdi_vhci_t *vh; 8418 8419 mutex_enter(&mdi_mutex); 8420 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8421 vh->vh_refcnt++; 8422 mutex_exit(&mdi_mutex); 8423 clean_vhcache(vh->vh_config); 8424 mutex_enter(&mdi_mutex); 8425 vh->vh_refcnt--; 8426 } 8427 mutex_exit(&mdi_mutex); 8428 } 8429 8430 /* 8431 * mdi_vhci_walk_clients(): 8432 * Walker routine to traverse client dev_info nodes 8433 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8434 * below the client, including nexus devices, which we dont want. 8435 * So we just traverse the immediate siblings, starting from 1st client. 8436 */ 8437 void 8438 mdi_vhci_walk_clients(dev_info_t *vdip, 8439 int (*f)(dev_info_t *, void *), void *arg) 8440 { 8441 dev_info_t *cdip; 8442 mdi_client_t *ct; 8443 8444 mutex_enter(&mdi_mutex); 8445 8446 cdip = ddi_get_child(vdip); 8447 8448 while (cdip) { 8449 ct = i_devi_get_client(cdip); 8450 MDI_CLIENT_LOCK(ct); 8451 8452 switch ((*f)(cdip, arg)) { 8453 case DDI_WALK_CONTINUE: 8454 cdip = ddi_get_next_sibling(cdip); 8455 MDI_CLIENT_UNLOCK(ct); 8456 break; 8457 8458 default: 8459 MDI_CLIENT_UNLOCK(ct); 8460 mutex_exit(&mdi_mutex); 8461 return; 8462 } 8463 } 8464 8465 mutex_exit(&mdi_mutex); 8466 } 8467 8468 /* 8469 * mdi_vhci_walk_phcis(): 8470 * Walker routine to traverse phci dev_info nodes 8471 */ 8472 void 8473 mdi_vhci_walk_phcis(dev_info_t *vdip, 8474 int (*f)(dev_info_t *, void *), void *arg) 8475 { 8476 mdi_vhci_t *vh = NULL; 8477 mdi_phci_t *ph = NULL; 8478 8479 mutex_enter(&mdi_mutex); 8480 8481 vh = i_devi_get_vhci(vdip); 8482 ph = vh->vh_phci_head; 8483 8484 while (ph) { 8485 MDI_PHCI_LOCK(ph); 8486 8487 switch ((*f)(ph->ph_dip, arg)) { 8488 case DDI_WALK_CONTINUE: 8489 MDI_PHCI_UNLOCK(ph); 8490 ph = ph->ph_next; 8491 break; 8492 8493 default: 8494 MDI_PHCI_UNLOCK(ph); 8495 mutex_exit(&mdi_mutex); 8496 return; 8497 } 8498 } 8499 8500 mutex_exit(&mdi_mutex); 8501 } 8502 8503 8504 /* 8505 * mdi_walk_vhcis(): 8506 * Walker routine to traverse vhci dev_info nodes 8507 */ 8508 void 8509 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8510 { 8511 mdi_vhci_t *vh = NULL; 8512 8513 mutex_enter(&mdi_mutex); 8514 /* 8515 * Scan for already registered vhci 8516 */ 8517 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8518 vh->vh_refcnt++; 8519 mutex_exit(&mdi_mutex); 8520 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8521 mutex_enter(&mdi_mutex); 8522 vh->vh_refcnt--; 8523 break; 8524 } else { 8525 mutex_enter(&mdi_mutex); 8526 vh->vh_refcnt--; 8527 } 8528 } 8529 8530 mutex_exit(&mdi_mutex); 8531 } 8532 8533 /* 8534 * i_mdi_log_sysevent(): 8535 * Logs events for pickup by syseventd 8536 */ 8537 static void 8538 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8539 { 8540 char *path_name; 8541 nvlist_t *attr_list; 8542 8543 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8544 KM_SLEEP) != DDI_SUCCESS) { 8545 goto alloc_failed; 8546 } 8547 8548 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8549 (void) ddi_pathname(dip, path_name); 8550 8551 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8552 ddi_driver_name(dip)) != DDI_SUCCESS) { 8553 goto error; 8554 } 8555 8556 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8557 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8558 goto error; 8559 } 8560 8561 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8562 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8563 goto error; 8564 } 8565 8566 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8567 path_name) != DDI_SUCCESS) { 8568 goto error; 8569 } 8570 8571 if (nvlist_add_string(attr_list, DDI_CLASS, 8572 ph_vh_class) != DDI_SUCCESS) { 8573 goto error; 8574 } 8575 8576 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8577 attr_list, NULL, DDI_SLEEP); 8578 8579 error: 8580 kmem_free(path_name, MAXPATHLEN); 8581 nvlist_free(attr_list); 8582 return; 8583 8584 alloc_failed: 8585 MDI_DEBUG(1, (CE_WARN, dip, 8586 "!i_mdi_log_sysevent: Unable to send sysevent")); 8587 } 8588