1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 30 * detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 71 #ifdef DEBUG 72 #include <sys/debug.h> 73 int mdi_debug = 1; 74 #define MDI_DEBUG(level, stmnt) \ 75 if (mdi_debug >= (level)) i_mdi_log stmnt 76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 77 #else /* !DEBUG */ 78 #define MDI_DEBUG(level, stmnt) 79 #endif /* DEBUG */ 80 81 extern pri_t minclsyspri; 82 extern int modrootloaded; 83 84 /* 85 * Global mutex: 86 * Protects vHCI list and structure members, pHCI and Client lists. 87 */ 88 kmutex_t mdi_mutex; 89 90 /* 91 * Registered vHCI class driver lists 92 */ 93 int mdi_vhci_count; 94 mdi_vhci_t *mdi_vhci_head; 95 mdi_vhci_t *mdi_vhci_tail; 96 97 /* 98 * Client Hash Table size 99 */ 100 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 101 102 /* 103 * taskq interface definitions 104 */ 105 #define MDI_TASKQ_N_THREADS 8 106 #define MDI_TASKQ_PRI minclsyspri 107 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 108 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 109 110 taskq_t *mdi_taskq; 111 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 112 113 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 114 115 /* 116 * The data should be "quiet" for this interval (in seconds) before the 117 * vhci cached data is flushed to the disk. 118 */ 119 static int mdi_vhcache_flush_delay = 10; 120 121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 122 static int mdi_vhcache_flush_daemon_idle_time = 60; 123 124 /* 125 * MDI falls back to discovery of all paths when a bus_config_one fails. 126 * The following parameters can be used to tune this operation. 127 * 128 * mdi_path_discovery_boot 129 * Number of times path discovery will be attempted during early boot. 130 * Probably there is no reason to ever set this value to greater than one. 131 * 132 * mdi_path_discovery_postboot 133 * Number of times path discovery will be attempted after early boot. 134 * Set it to a minimum of two to allow for discovery of iscsi paths which 135 * may happen very late during booting. 136 * 137 * mdi_path_discovery_interval 138 * Minimum number of seconds MDI will wait between successive discovery 139 * of all paths. Set it to -1 to disable discovery of all paths. 140 */ 141 static int mdi_path_discovery_boot = 1; 142 static int mdi_path_discovery_postboot = 2; 143 static int mdi_path_discovery_interval = 10; 144 145 /* 146 * number of seconds the asynchronous configuration thread will sleep idle 147 * before exiting. 148 */ 149 static int mdi_async_config_idle_time = 600; 150 151 static int mdi_bus_config_cache_hash_size = 256; 152 153 /* turns off multithreaded configuration for certain operations */ 154 static int mdi_mtc_off = 0; 155 156 /* 157 * MDI component property name/value string definitions 158 */ 159 const char *mdi_component_prop = "mpxio-component"; 160 const char *mdi_component_prop_vhci = "vhci"; 161 const char *mdi_component_prop_phci = "phci"; 162 const char *mdi_component_prop_client = "client"; 163 164 /* 165 * MDI client global unique identifier property name 166 */ 167 const char *mdi_client_guid_prop = "client-guid"; 168 169 /* 170 * MDI client load balancing property name/value string definitions 171 */ 172 const char *mdi_load_balance = "load-balance"; 173 const char *mdi_load_balance_none = "none"; 174 const char *mdi_load_balance_rr = "round-robin"; 175 const char *mdi_load_balance_lba = "logical-block"; 176 177 /* 178 * Obsolete vHCI class definition; to be removed after Leadville update 179 */ 180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 181 182 static char vhci_greeting[] = 183 "\tThere already exists one vHCI driver for class %s\n" 184 "\tOnly one vHCI driver for each class is allowed\n"; 185 186 /* 187 * Static function prototypes 188 */ 189 static int i_mdi_phci_offline(dev_info_t *, uint_t); 190 static int i_mdi_client_offline(dev_info_t *, uint_t); 191 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 192 static void i_mdi_phci_post_detach(dev_info_t *, 193 ddi_detach_cmd_t, int); 194 static int i_mdi_client_pre_detach(dev_info_t *, 195 ddi_detach_cmd_t); 196 static void i_mdi_client_post_detach(dev_info_t *, 197 ddi_detach_cmd_t, int); 198 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 199 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 200 static int i_mdi_lba_lb(mdi_client_t *ct, 201 mdi_pathinfo_t **ret_pip, struct buf *buf); 202 static void i_mdi_pm_hold_client(mdi_client_t *, int); 203 static void i_mdi_pm_rele_client(mdi_client_t *, int); 204 static void i_mdi_pm_reset_client(mdi_client_t *); 205 static void i_mdi_pm_hold_all_phci(mdi_client_t *); 206 static int i_mdi_power_all_phci(mdi_client_t *); 207 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 208 209 210 /* 211 * Internal mdi_pathinfo node functions 212 */ 213 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 214 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 215 216 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 217 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 218 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 219 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 220 static void i_mdi_phci_get_client_lock(mdi_phci_t *, 221 mdi_client_t *); 222 static void i_mdi_phci_unlock(mdi_phci_t *); 223 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 224 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 225 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 226 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 227 mdi_client_t *); 228 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 229 static void i_mdi_client_remove_path(mdi_client_t *, 230 mdi_pathinfo_t *); 231 232 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 233 mdi_pathinfo_state_t, int); 234 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 235 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 236 char **, int); 237 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 238 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 239 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 240 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 241 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 242 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 243 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 244 static void i_mdi_client_update_state(mdi_client_t *); 245 static int i_mdi_client_compute_state(mdi_client_t *, 246 mdi_phci_t *); 247 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 248 static void i_mdi_client_unlock(mdi_client_t *); 249 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 250 static mdi_client_t *i_devi_get_client(dev_info_t *); 251 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, 252 int); 253 /* 254 * Failover related function prototypes 255 */ 256 static int i_mdi_failover(void *); 257 258 /* 259 * misc internal functions 260 */ 261 static int i_mdi_get_hash_key(char *); 262 static int i_map_nvlist_error_to_mdi(int); 263 static void i_mdi_report_path_state(mdi_client_t *, 264 mdi_pathinfo_t *); 265 266 static void setup_vhci_cache(mdi_vhci_t *); 267 static int destroy_vhci_cache(mdi_vhci_t *); 268 static void setup_phci_driver_list(mdi_vhci_t *); 269 static void free_phci_driver_list(mdi_vhci_config_t *); 270 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 271 static boolean_t stop_vhcache_flush_thread(void *, int); 272 static void free_string_array(char **, int); 273 static void free_vhcache_phci(mdi_vhcache_phci_t *); 274 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 275 static void free_vhcache_client(mdi_vhcache_client_t *); 276 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 277 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 278 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 279 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 280 static void vhcache_pi_add(mdi_vhci_config_t *, 281 struct mdi_pathinfo *); 282 static void vhcache_pi_remove(mdi_vhci_config_t *, 283 struct mdi_pathinfo *); 284 static void free_phclient_path_list(mdi_phys_path_t *); 285 static void sort_vhcache_paths(mdi_vhcache_client_t *); 286 static int flush_vhcache(mdi_vhci_config_t *, int); 287 static void vhcache_dirty(mdi_vhci_config_t *); 288 static void free_async_client_config(mdi_async_client_config_t *); 289 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 290 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 291 static nvlist_t *read_on_disk_vhci_cache(char *); 292 extern int fread_nvlist(char *, nvlist_t **); 293 extern int fwrite_nvlist(char *, nvlist_t *); 294 295 /* called once when first vhci registers with mdi */ 296 static void 297 i_mdi_init() 298 { 299 static int initialized = 0; 300 301 if (initialized) 302 return; 303 initialized = 1; 304 305 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 306 /* 307 * Create our taskq resources 308 */ 309 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 310 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 311 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 312 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 313 } 314 315 /* 316 * mdi_get_component_type(): 317 * Return mpxio component type 318 * Return Values: 319 * MDI_COMPONENT_NONE 320 * MDI_COMPONENT_VHCI 321 * MDI_COMPONENT_PHCI 322 * MDI_COMPONENT_CLIENT 323 * XXX This doesn't work under multi-level MPxIO and should be 324 * removed when clients migrate mdi_is_*() interfaces. 325 */ 326 int 327 mdi_get_component_type(dev_info_t *dip) 328 { 329 return (DEVI(dip)->devi_mdi_component); 330 } 331 332 /* 333 * mdi_vhci_register(): 334 * Register a vHCI module with the mpxio framework 335 * mdi_vhci_register() is called by vHCI drivers to register the 336 * 'class_driver' vHCI driver and its MDI entrypoints with the 337 * mpxio framework. The vHCI driver must call this interface as 338 * part of its attach(9e) handler. 339 * Competing threads may try to attach mdi_vhci_register() as 340 * the vHCI drivers are loaded and attached as a result of pHCI 341 * driver instance registration (mdi_phci_register()) with the 342 * framework. 343 * Return Values: 344 * MDI_SUCCESS 345 * MDI_FAILURE 346 */ 347 348 /*ARGSUSED*/ 349 int 350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 351 int flags) 352 { 353 mdi_vhci_t *vh = NULL; 354 355 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 356 357 i_mdi_init(); 358 359 mutex_enter(&mdi_mutex); 360 /* 361 * Scan for already registered vhci 362 */ 363 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 364 if (strcmp(vh->vh_class, class) == 0) { 365 /* 366 * vHCI has already been created. Check for valid 367 * vHCI ops registration. We only support one vHCI 368 * module per class 369 */ 370 if (vh->vh_ops != NULL) { 371 mutex_exit(&mdi_mutex); 372 cmn_err(CE_NOTE, vhci_greeting, class); 373 return (MDI_FAILURE); 374 } 375 break; 376 } 377 } 378 379 /* 380 * if not yet created, create the vHCI component 381 */ 382 if (vh == NULL) { 383 struct client_hash *hash = NULL; 384 char *load_balance; 385 386 /* 387 * Allocate and initialize the mdi extensions 388 */ 389 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 390 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 391 KM_SLEEP); 392 vh->vh_client_table = hash; 393 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 394 (void) strcpy(vh->vh_class, class); 395 vh->vh_lb = LOAD_BALANCE_RR; 396 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 397 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 398 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 399 vh->vh_lb = LOAD_BALANCE_NONE; 400 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 401 == 0) { 402 vh->vh_lb = LOAD_BALANCE_LBA; 403 } 404 ddi_prop_free(load_balance); 405 } 406 407 /* 408 * Store the vHCI ops vectors 409 */ 410 vh->vh_dip = vdip; 411 vh->vh_ops = vops; 412 413 setup_vhci_cache(vh); 414 415 if (mdi_vhci_head == NULL) { 416 mdi_vhci_head = vh; 417 } 418 if (mdi_vhci_tail) { 419 mdi_vhci_tail->vh_next = vh; 420 } 421 mdi_vhci_tail = vh; 422 mdi_vhci_count++; 423 } 424 425 /* 426 * Claim the devfs node as a vhci component 427 */ 428 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 429 430 /* 431 * Initialize our back reference from dev_info node 432 */ 433 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 434 mutex_exit(&mdi_mutex); 435 return (MDI_SUCCESS); 436 } 437 438 /* 439 * mdi_vhci_unregister(): 440 * Unregister a vHCI module from mpxio framework 441 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 442 * of a vhci to unregister it from the framework. 443 * Return Values: 444 * MDI_SUCCESS 445 * MDI_FAILURE 446 */ 447 448 /*ARGSUSED*/ 449 int 450 mdi_vhci_unregister(dev_info_t *vdip, int flags) 451 { 452 mdi_vhci_t *found, *vh, *prev = NULL; 453 454 /* 455 * Check for invalid VHCI 456 */ 457 if ((vh = i_devi_get_vhci(vdip)) == NULL) 458 return (MDI_FAILURE); 459 460 mutex_enter(&mdi_mutex); 461 462 /* 463 * Scan the list of registered vHCIs for a match 464 */ 465 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 466 if (found == vh) 467 break; 468 prev = found; 469 } 470 471 if (found == NULL) { 472 mutex_exit(&mdi_mutex); 473 return (MDI_FAILURE); 474 } 475 476 /* 477 * Check the vHCI, pHCI and client count. All the pHCIs and clients 478 * should have been unregistered, before a vHCI can be 479 * unregistered. 480 */ 481 if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) { 482 mutex_exit(&mdi_mutex); 483 return (MDI_FAILURE); 484 } 485 486 /* 487 * Remove the vHCI from the global list 488 */ 489 if (vh == mdi_vhci_head) { 490 mdi_vhci_head = vh->vh_next; 491 } else { 492 prev->vh_next = vh->vh_next; 493 } 494 if (vh == mdi_vhci_tail) { 495 mdi_vhci_tail = prev; 496 } 497 498 mdi_vhci_count--; 499 mutex_exit(&mdi_mutex); 500 501 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 502 /* add vhci to the global list */ 503 mutex_enter(&mdi_mutex); 504 if (mdi_vhci_head == NULL) 505 mdi_vhci_head = vh; 506 else 507 mdi_vhci_tail->vh_next = vh; 508 mdi_vhci_tail = vh; 509 mdi_vhci_count++; 510 mutex_exit(&mdi_mutex); 511 return (MDI_FAILURE); 512 } 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 521 kmem_free(vh, sizeof (mdi_vhci_t)); 522 return (MDI_SUCCESS); 523 } 524 525 /* 526 * i_mdi_vhci_class2vhci(): 527 * Look for a matching vHCI module given a vHCI class name 528 * Return Values: 529 * Handle to a vHCI component 530 * NULL 531 */ 532 static mdi_vhci_t * 533 i_mdi_vhci_class2vhci(char *class) 534 { 535 mdi_vhci_t *vh = NULL; 536 537 ASSERT(!MUTEX_HELD(&mdi_mutex)); 538 539 mutex_enter(&mdi_mutex); 540 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 541 if (strcmp(vh->vh_class, class) == 0) { 542 break; 543 } 544 } 545 mutex_exit(&mdi_mutex); 546 return (vh); 547 } 548 549 /* 550 * i_devi_get_vhci(): 551 * Utility function to get the handle to a vHCI component 552 * Return Values: 553 * Handle to a vHCI component 554 * NULL 555 */ 556 mdi_vhci_t * 557 i_devi_get_vhci(dev_info_t *vdip) 558 { 559 mdi_vhci_t *vh = NULL; 560 if (MDI_VHCI(vdip)) { 561 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 562 } 563 return (vh); 564 } 565 566 /* 567 * mdi_phci_register(): 568 * Register a pHCI module with mpxio framework 569 * mdi_phci_register() is called by pHCI drivers to register with 570 * the mpxio framework and a specific 'class_driver' vHCI. The 571 * pHCI driver must call this interface as part of its attach(9e) 572 * handler. 573 * Return Values: 574 * MDI_SUCCESS 575 * MDI_FAILURE 576 */ 577 578 /*ARGSUSED*/ 579 int 580 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 581 { 582 mdi_phci_t *ph; 583 mdi_vhci_t *vh; 584 char *data; 585 char *pathname; 586 587 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 588 (void) ddi_pathname(pdip, pathname); 589 590 /* 591 * Check for mpxio-disable property. Enable mpxio if the property is 592 * missing or not set to "yes". 593 * If the property is set to "yes" then emit a brief message. 594 */ 595 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 596 &data) == DDI_SUCCESS)) { 597 if (strcmp(data, "yes") == 0) { 598 MDI_DEBUG(1, (CE_CONT, pdip, 599 "?%s (%s%d) multipath capabilities " 600 "disabled via %s.conf.\n", pathname, 601 ddi_driver_name(pdip), ddi_get_instance(pdip), 602 ddi_driver_name(pdip))); 603 ddi_prop_free(data); 604 kmem_free(pathname, MAXPATHLEN); 605 return (MDI_FAILURE); 606 } 607 ddi_prop_free(data); 608 } 609 610 kmem_free(pathname, MAXPATHLEN); 611 612 /* 613 * Search for a matching vHCI 614 */ 615 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 616 if (vh == NULL) { 617 return (MDI_FAILURE); 618 } 619 620 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 621 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 622 ph->ph_dip = pdip; 623 ph->ph_vhci = vh; 624 ph->ph_next = NULL; 625 ph->ph_unstable = 0; 626 ph->ph_vprivate = 0; 627 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 628 cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); 629 630 MDI_PHCI_SET_POWER_UP(ph); 631 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 632 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 633 634 vhcache_phci_add(vh->vh_config, ph); 635 636 mutex_enter(&mdi_mutex); 637 if (vh->vh_phci_head == NULL) { 638 vh->vh_phci_head = ph; 639 } 640 if (vh->vh_phci_tail) { 641 vh->vh_phci_tail->ph_next = ph; 642 } 643 vh->vh_phci_tail = ph; 644 vh->vh_phci_count++; 645 mutex_exit(&mdi_mutex); 646 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 647 return (MDI_SUCCESS); 648 } 649 650 /* 651 * mdi_phci_unregister(): 652 * Unregister a pHCI module from mpxio framework 653 * mdi_phci_unregister() is called by the pHCI drivers from their 654 * detach(9E) handler to unregister their instances from the 655 * framework. 656 * Return Values: 657 * MDI_SUCCESS 658 * MDI_FAILURE 659 */ 660 661 /*ARGSUSED*/ 662 int 663 mdi_phci_unregister(dev_info_t *pdip, int flags) 664 { 665 mdi_vhci_t *vh; 666 mdi_phci_t *ph; 667 mdi_phci_t *tmp; 668 mdi_phci_t *prev = NULL; 669 670 ph = i_devi_get_phci(pdip); 671 if (ph == NULL) { 672 MDI_DEBUG(1, (CE_WARN, pdip, 673 "!pHCI unregister: Not a valid pHCI")); 674 return (MDI_FAILURE); 675 } 676 677 vh = ph->ph_vhci; 678 ASSERT(vh != NULL); 679 if (vh == NULL) { 680 MDI_DEBUG(1, (CE_WARN, pdip, 681 "!pHCI unregister: Not a valid vHCI")); 682 return (MDI_FAILURE); 683 } 684 685 mutex_enter(&mdi_mutex); 686 tmp = vh->vh_phci_head; 687 while (tmp) { 688 if (tmp == ph) { 689 break; 690 } 691 prev = tmp; 692 tmp = tmp->ph_next; 693 } 694 695 if (ph == vh->vh_phci_head) { 696 vh->vh_phci_head = ph->ph_next; 697 } else { 698 prev->ph_next = ph->ph_next; 699 } 700 701 if (ph == vh->vh_phci_tail) { 702 vh->vh_phci_tail = prev; 703 } 704 705 vh->vh_phci_count--; 706 707 mutex_exit(&mdi_mutex); 708 709 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 710 ESC_DDI_INITIATOR_UNREGISTER); 711 vhcache_phci_remove(vh->vh_config, ph); 712 cv_destroy(&ph->ph_unstable_cv); 713 cv_destroy(&ph->ph_powerchange_cv); 714 mutex_destroy(&ph->ph_mutex); 715 kmem_free(ph, sizeof (mdi_phci_t)); 716 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 717 DEVI(pdip)->devi_mdi_xhci = NULL; 718 return (MDI_SUCCESS); 719 } 720 721 /* 722 * i_devi_get_phci(): 723 * Utility function to return the phci extensions. 724 */ 725 static mdi_phci_t * 726 i_devi_get_phci(dev_info_t *pdip) 727 { 728 mdi_phci_t *ph = NULL; 729 if (MDI_PHCI(pdip)) { 730 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 731 } 732 return (ph); 733 } 734 735 /* 736 * mdi_phci_path2devinfo(): 737 * Utility function to search for a valid phci device given 738 * the devfs pathname. 739 */ 740 741 dev_info_t * 742 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 743 { 744 char *temp_pathname; 745 mdi_vhci_t *vh; 746 mdi_phci_t *ph; 747 dev_info_t *pdip = NULL; 748 749 vh = i_devi_get_vhci(vdip); 750 ASSERT(vh != NULL); 751 752 if (vh == NULL) { 753 /* 754 * Invalid vHCI component, return failure 755 */ 756 return (NULL); 757 } 758 759 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 760 mutex_enter(&mdi_mutex); 761 ph = vh->vh_phci_head; 762 while (ph != NULL) { 763 pdip = ph->ph_dip; 764 ASSERT(pdip != NULL); 765 *temp_pathname = '\0'; 766 (void) ddi_pathname(pdip, temp_pathname); 767 if (strcmp(temp_pathname, pathname) == 0) { 768 break; 769 } 770 ph = ph->ph_next; 771 } 772 if (ph == NULL) { 773 pdip = NULL; 774 } 775 mutex_exit(&mdi_mutex); 776 kmem_free(temp_pathname, MAXPATHLEN); 777 return (pdip); 778 } 779 780 /* 781 * mdi_phci_get_path_count(): 782 * get number of path information nodes associated with a given 783 * pHCI device. 784 */ 785 int 786 mdi_phci_get_path_count(dev_info_t *pdip) 787 { 788 mdi_phci_t *ph; 789 int count = 0; 790 791 ph = i_devi_get_phci(pdip); 792 if (ph != NULL) { 793 count = ph->ph_path_count; 794 } 795 return (count); 796 } 797 798 /* 799 * i_mdi_phci_lock(): 800 * Lock a pHCI device 801 * Return Values: 802 * None 803 * Note: 804 * The default locking order is: 805 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 806 * But there are number of situations where locks need to be 807 * grabbed in reverse order. This routine implements try and lock 808 * mechanism depending on the requested parameter option. 809 */ 810 static void 811 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 812 { 813 if (pip) { 814 /* Reverse locking is requested. */ 815 while (MDI_PHCI_TRYLOCK(ph) == 0) { 816 /* 817 * tryenter failed. Try to grab again 818 * after a small delay 819 */ 820 MDI_PI_HOLD(pip); 821 MDI_PI_UNLOCK(pip); 822 delay(1); 823 MDI_PI_LOCK(pip); 824 MDI_PI_RELE(pip); 825 } 826 } else { 827 MDI_PHCI_LOCK(ph); 828 } 829 } 830 831 /* 832 * i_mdi_phci_get_client_lock(): 833 * Lock a pHCI device 834 * Return Values: 835 * None 836 * Note: 837 * The default locking order is: 838 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 839 * But there are number of situations where locks need to be 840 * grabbed in reverse order. This routine implements try and lock 841 * mechanism depending on the requested parameter option. 842 */ 843 static void 844 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) 845 { 846 if (ct) { 847 /* Reverse locking is requested. */ 848 while (MDI_PHCI_TRYLOCK(ph) == 0) { 849 /* 850 * tryenter failed. Try to grab again 851 * after a small delay 852 */ 853 MDI_CLIENT_UNLOCK(ct); 854 delay(1); 855 MDI_CLIENT_LOCK(ct); 856 } 857 } else { 858 MDI_PHCI_LOCK(ph); 859 } 860 } 861 862 /* 863 * i_mdi_phci_unlock(): 864 * Unlock the pHCI component 865 */ 866 static void 867 i_mdi_phci_unlock(mdi_phci_t *ph) 868 { 869 MDI_PHCI_UNLOCK(ph); 870 } 871 872 /* 873 * i_mdi_devinfo_create(): 874 * create client device's devinfo node 875 * Return Values: 876 * dev_info 877 * NULL 878 * Notes: 879 */ 880 static dev_info_t * 881 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 882 char **compatible, int ncompatible) 883 { 884 dev_info_t *cdip = NULL; 885 886 ASSERT(MUTEX_HELD(&mdi_mutex)); 887 888 /* Verify for duplicate entry */ 889 cdip = i_mdi_devinfo_find(vh, name, guid); 890 ASSERT(cdip == NULL); 891 if (cdip) { 892 cmn_err(CE_WARN, 893 "i_mdi_devinfo_create: client dip %p already exists", 894 (void *)cdip); 895 } 896 897 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 898 if (cdip == NULL) 899 goto fail; 900 901 /* 902 * Create component type and Global unique identifier 903 * properties 904 */ 905 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 906 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 907 goto fail; 908 } 909 910 /* Decorate the node with compatible property */ 911 if (compatible && 912 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 913 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 914 goto fail; 915 } 916 917 return (cdip); 918 919 fail: 920 if (cdip) { 921 (void) ndi_prop_remove_all(cdip); 922 (void) ndi_devi_free(cdip); 923 } 924 return (NULL); 925 } 926 927 /* 928 * i_mdi_devinfo_find(): 929 * Find a matching devinfo node for given client node name 930 * and its guid. 931 * Return Values: 932 * Handle to a dev_info node or NULL 933 */ 934 935 static dev_info_t * 936 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 937 { 938 char *data; 939 dev_info_t *cdip = NULL; 940 dev_info_t *ndip = NULL; 941 int circular; 942 943 ndi_devi_enter(vh->vh_dip, &circular); 944 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 945 while ((cdip = ndip) != NULL) { 946 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 947 948 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 949 continue; 950 } 951 952 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 953 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 954 &data) != DDI_PROP_SUCCESS) { 955 continue; 956 } 957 958 if (strcmp(data, guid) != 0) { 959 ddi_prop_free(data); 960 continue; 961 } 962 ddi_prop_free(data); 963 break; 964 } 965 ndi_devi_exit(vh->vh_dip, circular); 966 return (cdip); 967 } 968 969 /* 970 * i_mdi_devinfo_remove(): 971 * Remove a client device node 972 */ 973 static int 974 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 975 { 976 int rv = MDI_SUCCESS; 977 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 978 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 979 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 980 if (rv != NDI_SUCCESS) { 981 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 982 " failed. cdip = %p\n", cdip)); 983 } 984 /* 985 * Convert to MDI error code 986 */ 987 switch (rv) { 988 case NDI_SUCCESS: 989 rv = MDI_SUCCESS; 990 break; 991 case NDI_BUSY: 992 rv = MDI_BUSY; 993 break; 994 default: 995 rv = MDI_FAILURE; 996 break; 997 } 998 } 999 return (rv); 1000 } 1001 1002 /* 1003 * i_devi_get_client() 1004 * Utility function to get mpxio component extensions 1005 */ 1006 static mdi_client_t * 1007 i_devi_get_client(dev_info_t *cdip) 1008 { 1009 mdi_client_t *ct = NULL; 1010 if (MDI_CLIENT(cdip)) { 1011 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1012 } 1013 return (ct); 1014 } 1015 1016 /* 1017 * i_mdi_is_child_present(): 1018 * Search for the presence of client device dev_info node 1019 */ 1020 1021 static int 1022 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1023 { 1024 int rv = MDI_FAILURE; 1025 struct dev_info *dip; 1026 int circular; 1027 1028 ndi_devi_enter(vdip, &circular); 1029 dip = DEVI(vdip)->devi_child; 1030 while (dip) { 1031 if (dip == DEVI(cdip)) { 1032 rv = MDI_SUCCESS; 1033 break; 1034 } 1035 dip = dip->devi_sibling; 1036 } 1037 ndi_devi_exit(vdip, circular); 1038 return (rv); 1039 } 1040 1041 1042 /* 1043 * i_mdi_client_lock(): 1044 * Grab client component lock 1045 * Return Values: 1046 * None 1047 * Note: 1048 * The default locking order is: 1049 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1050 * But there are number of situations where locks need to be 1051 * grabbed in reverse order. This routine implements try and lock 1052 * mechanism depending on the requested parameter option. 1053 */ 1054 1055 static void 1056 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1057 { 1058 if (pip) { 1059 /* 1060 * Reverse locking is requested. 1061 */ 1062 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1063 /* 1064 * tryenter failed. Try to grab again 1065 * after a small delay 1066 */ 1067 MDI_PI_HOLD(pip); 1068 MDI_PI_UNLOCK(pip); 1069 delay(1); 1070 MDI_PI_LOCK(pip); 1071 MDI_PI_RELE(pip); 1072 } 1073 } else { 1074 MDI_CLIENT_LOCK(ct); 1075 } 1076 } 1077 1078 /* 1079 * i_mdi_client_unlock(): 1080 * Unlock a client component 1081 */ 1082 1083 static void 1084 i_mdi_client_unlock(mdi_client_t *ct) 1085 { 1086 MDI_CLIENT_UNLOCK(ct); 1087 } 1088 1089 /* 1090 * i_mdi_client_alloc(): 1091 * Allocate and initialize a client structure. Caller should 1092 * hold the global mdi_mutex. 1093 * Return Values: 1094 * Handle to a client component 1095 */ 1096 /*ARGSUSED*/ 1097 static mdi_client_t * 1098 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1099 { 1100 mdi_client_t *ct; 1101 1102 ASSERT(MUTEX_HELD(&mdi_mutex)); 1103 1104 /* 1105 * Allocate and initialize a component structure. 1106 */ 1107 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1108 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1109 ct->ct_hnext = NULL; 1110 ct->ct_hprev = NULL; 1111 ct->ct_dip = NULL; 1112 ct->ct_vhci = vh; 1113 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1114 (void) strcpy(ct->ct_drvname, name); 1115 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1116 (void) strcpy(ct->ct_guid, lguid); 1117 ct->ct_cprivate = NULL; 1118 ct->ct_vprivate = NULL; 1119 ct->ct_flags = 0; 1120 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1121 MDI_CLIENT_SET_OFFLINE(ct); 1122 MDI_CLIENT_SET_DETACH(ct); 1123 MDI_CLIENT_SET_POWER_UP(ct); 1124 ct->ct_failover_flags = 0; 1125 ct->ct_failover_status = 0; 1126 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1127 ct->ct_unstable = 0; 1128 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1129 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1130 ct->ct_lb = vh->vh_lb; 1131 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1132 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1133 ct->ct_path_count = 0; 1134 ct->ct_path_head = NULL; 1135 ct->ct_path_tail = NULL; 1136 ct->ct_path_last = NULL; 1137 1138 /* 1139 * Add this client component to our client hash queue 1140 */ 1141 i_mdi_client_enlist_table(vh, ct); 1142 return (ct); 1143 } 1144 1145 /* 1146 * i_mdi_client_enlist_table(): 1147 * Attach the client device to the client hash table. Caller 1148 * should hold the mdi_mutex 1149 */ 1150 1151 static void 1152 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1153 { 1154 int index; 1155 struct client_hash *head; 1156 1157 ASSERT(MUTEX_HELD(&mdi_mutex)); 1158 index = i_mdi_get_hash_key(ct->ct_guid); 1159 head = &vh->vh_client_table[index]; 1160 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1161 head->ct_hash_head = ct; 1162 head->ct_hash_count++; 1163 vh->vh_client_count++; 1164 } 1165 1166 /* 1167 * i_mdi_client_delist_table(): 1168 * Attach the client device to the client hash table. 1169 * Caller should hold the mdi_mutex 1170 */ 1171 1172 static void 1173 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1174 { 1175 int index; 1176 char *guid; 1177 struct client_hash *head; 1178 mdi_client_t *next; 1179 mdi_client_t *last; 1180 1181 ASSERT(MUTEX_HELD(&mdi_mutex)); 1182 guid = ct->ct_guid; 1183 index = i_mdi_get_hash_key(guid); 1184 head = &vh->vh_client_table[index]; 1185 1186 last = NULL; 1187 next = (mdi_client_t *)head->ct_hash_head; 1188 while (next != NULL) { 1189 if (next == ct) { 1190 break; 1191 } 1192 last = next; 1193 next = next->ct_hnext; 1194 } 1195 1196 if (next) { 1197 head->ct_hash_count--; 1198 if (last == NULL) { 1199 head->ct_hash_head = ct->ct_hnext; 1200 } else { 1201 last->ct_hnext = ct->ct_hnext; 1202 } 1203 ct->ct_hnext = NULL; 1204 vh->vh_client_count--; 1205 } 1206 } 1207 1208 1209 /* 1210 * i_mdi_client_free(): 1211 * Free a client component 1212 */ 1213 static int 1214 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1215 { 1216 int rv = MDI_SUCCESS; 1217 int flags = ct->ct_flags; 1218 dev_info_t *cdip; 1219 dev_info_t *vdip; 1220 1221 ASSERT(MUTEX_HELD(&mdi_mutex)); 1222 vdip = vh->vh_dip; 1223 cdip = ct->ct_dip; 1224 1225 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1226 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1227 DEVI(cdip)->devi_mdi_client = NULL; 1228 1229 /* 1230 * Clear out back ref. to dev_info_t node 1231 */ 1232 ct->ct_dip = NULL; 1233 1234 /* 1235 * Remove this client from our hash queue 1236 */ 1237 i_mdi_client_delist_table(vh, ct); 1238 1239 /* 1240 * Uninitialize and free the component 1241 */ 1242 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1243 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1244 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1245 cv_destroy(&ct->ct_failover_cv); 1246 cv_destroy(&ct->ct_unstable_cv); 1247 cv_destroy(&ct->ct_powerchange_cv); 1248 mutex_destroy(&ct->ct_mutex); 1249 kmem_free(ct, sizeof (*ct)); 1250 1251 if (cdip != NULL) { 1252 mutex_exit(&mdi_mutex); 1253 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1254 mutex_enter(&mdi_mutex); 1255 } 1256 return (rv); 1257 } 1258 1259 /* 1260 * i_mdi_client_find(): 1261 * Find the client structure corresponding to a given guid 1262 * Caller should hold the mdi_mutex 1263 */ 1264 static mdi_client_t * 1265 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1266 { 1267 int index; 1268 struct client_hash *head; 1269 mdi_client_t *ct; 1270 1271 ASSERT(MUTEX_HELD(&mdi_mutex)); 1272 index = i_mdi_get_hash_key(guid); 1273 head = &vh->vh_client_table[index]; 1274 1275 ct = head->ct_hash_head; 1276 while (ct != NULL) { 1277 if (strcmp(ct->ct_guid, guid) == 0 && 1278 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1279 break; 1280 } 1281 ct = ct->ct_hnext; 1282 } 1283 return (ct); 1284 } 1285 1286 1287 1288 /* 1289 * i_mdi_client_update_state(): 1290 * Compute and update client device state 1291 * Notes: 1292 * A client device can be in any of three possible states: 1293 * 1294 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1295 * one online/standby paths. Can tolerate failures. 1296 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1297 * no alternate paths available as standby. A failure on the online 1298 * would result in loss of access to device data. 1299 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1300 * no paths available to access the device. 1301 */ 1302 static void 1303 i_mdi_client_update_state(mdi_client_t *ct) 1304 { 1305 int state; 1306 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1307 state = i_mdi_client_compute_state(ct, NULL); 1308 MDI_CLIENT_SET_STATE(ct, state); 1309 } 1310 1311 /* 1312 * i_mdi_client_compute_state(): 1313 * Compute client device state 1314 * 1315 * mdi_phci_t * Pointer to pHCI structure which should 1316 * while computing the new value. Used by 1317 * i_mdi_phci_offline() to find the new 1318 * client state after DR of a pHCI. 1319 */ 1320 static int 1321 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1322 { 1323 int state; 1324 int online_count = 0; 1325 int standby_count = 0; 1326 mdi_pathinfo_t *pip, *next; 1327 1328 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1329 pip = ct->ct_path_head; 1330 while (pip != NULL) { 1331 MDI_PI_LOCK(pip); 1332 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1333 if (MDI_PI(pip)->pi_phci == ph) { 1334 MDI_PI_UNLOCK(pip); 1335 pip = next; 1336 continue; 1337 } 1338 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1339 == MDI_PATHINFO_STATE_ONLINE) 1340 online_count++; 1341 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1342 == MDI_PATHINFO_STATE_STANDBY) 1343 standby_count++; 1344 MDI_PI_UNLOCK(pip); 1345 pip = next; 1346 } 1347 1348 if (online_count == 0) { 1349 if (standby_count == 0) { 1350 state = MDI_CLIENT_STATE_FAILED; 1351 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1352 " ct = %p\n", ct)); 1353 } else if (standby_count == 1) { 1354 state = MDI_CLIENT_STATE_DEGRADED; 1355 } else { 1356 state = MDI_CLIENT_STATE_OPTIMAL; 1357 } 1358 } else if (online_count == 1) { 1359 if (standby_count == 0) { 1360 state = MDI_CLIENT_STATE_DEGRADED; 1361 } else { 1362 state = MDI_CLIENT_STATE_OPTIMAL; 1363 } 1364 } else { 1365 state = MDI_CLIENT_STATE_OPTIMAL; 1366 } 1367 return (state); 1368 } 1369 1370 /* 1371 * i_mdi_client2devinfo(): 1372 * Utility function 1373 */ 1374 dev_info_t * 1375 i_mdi_client2devinfo(mdi_client_t *ct) 1376 { 1377 return (ct->ct_dip); 1378 } 1379 1380 /* 1381 * mdi_client_path2_devinfo(): 1382 * Given the parent devinfo and child devfs pathname, search for 1383 * a valid devfs node handle. 1384 */ 1385 dev_info_t * 1386 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1387 { 1388 dev_info_t *cdip = NULL; 1389 dev_info_t *ndip = NULL; 1390 char *temp_pathname; 1391 int circular; 1392 1393 /* 1394 * Allocate temp buffer 1395 */ 1396 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1397 1398 /* 1399 * Lock parent against changes 1400 */ 1401 ndi_devi_enter(vdip, &circular); 1402 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1403 while ((cdip = ndip) != NULL) { 1404 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1405 1406 *temp_pathname = '\0'; 1407 (void) ddi_pathname(cdip, temp_pathname); 1408 if (strcmp(temp_pathname, pathname) == 0) { 1409 break; 1410 } 1411 } 1412 /* 1413 * Release devinfo lock 1414 */ 1415 ndi_devi_exit(vdip, circular); 1416 1417 /* 1418 * Free the temp buffer 1419 */ 1420 kmem_free(temp_pathname, MAXPATHLEN); 1421 return (cdip); 1422 } 1423 1424 1425 /* 1426 * mdi_client_get_path_count(): 1427 * Utility function to get number of path information nodes 1428 * associated with a given client device. 1429 */ 1430 int 1431 mdi_client_get_path_count(dev_info_t *cdip) 1432 { 1433 mdi_client_t *ct; 1434 int count = 0; 1435 1436 ct = i_devi_get_client(cdip); 1437 if (ct != NULL) { 1438 count = ct->ct_path_count; 1439 } 1440 return (count); 1441 } 1442 1443 1444 /* 1445 * i_mdi_get_hash_key(): 1446 * Create a hash using strings as keys 1447 * 1448 */ 1449 static int 1450 i_mdi_get_hash_key(char *str) 1451 { 1452 uint32_t g, hash = 0; 1453 char *p; 1454 1455 for (p = str; *p != '\0'; p++) { 1456 g = *p; 1457 hash += g; 1458 } 1459 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1460 } 1461 1462 /* 1463 * mdi_get_lb_policy(): 1464 * Get current load balancing policy for a given client device 1465 */ 1466 client_lb_t 1467 mdi_get_lb_policy(dev_info_t *cdip) 1468 { 1469 client_lb_t lb = LOAD_BALANCE_NONE; 1470 mdi_client_t *ct; 1471 1472 ct = i_devi_get_client(cdip); 1473 if (ct != NULL) { 1474 lb = ct->ct_lb; 1475 } 1476 return (lb); 1477 } 1478 1479 /* 1480 * mdi_set_lb_region_size(): 1481 * Set current region size for the load-balance 1482 */ 1483 int 1484 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1485 { 1486 mdi_client_t *ct; 1487 int rv = MDI_FAILURE; 1488 1489 ct = i_devi_get_client(cdip); 1490 if (ct != NULL && ct->ct_lb_args != NULL) { 1491 ct->ct_lb_args->region_size = region_size; 1492 rv = MDI_SUCCESS; 1493 } 1494 return (rv); 1495 } 1496 1497 /* 1498 * mdi_Set_lb_policy(): 1499 * Set current load balancing policy for a given client device 1500 */ 1501 int 1502 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1503 { 1504 mdi_client_t *ct; 1505 int rv = MDI_FAILURE; 1506 1507 ct = i_devi_get_client(cdip); 1508 if (ct != NULL) { 1509 ct->ct_lb = lb; 1510 rv = MDI_SUCCESS; 1511 } 1512 return (rv); 1513 } 1514 1515 /* 1516 * mdi_failover(): 1517 * failover function called by the vHCI drivers to initiate 1518 * a failover operation. This is typically due to non-availability 1519 * of online paths to route I/O requests. Failover can be 1520 * triggered through user application also. 1521 * 1522 * The vHCI driver calls mdi_failover() to initiate a failover 1523 * operation. mdi_failover() calls back into the vHCI driver's 1524 * vo_failover() entry point to perform the actual failover 1525 * operation. The reason for requiring the vHCI driver to 1526 * initiate failover by calling mdi_failover(), instead of directly 1527 * executing vo_failover() itself, is to ensure that the mdi 1528 * framework can keep track of the client state properly. 1529 * Additionally, mdi_failover() provides as a convenience the 1530 * option of performing the failover operation synchronously or 1531 * asynchronously 1532 * 1533 * Upon successful completion of the failover operation, the 1534 * paths that were previously ONLINE will be in the STANDBY state, 1535 * and the newly activated paths will be in the ONLINE state. 1536 * 1537 * The flags modifier determines whether the activation is done 1538 * synchronously: MDI_FAILOVER_SYNC 1539 * Return Values: 1540 * MDI_SUCCESS 1541 * MDI_FAILURE 1542 * MDI_BUSY 1543 */ 1544 /*ARGSUSED*/ 1545 int 1546 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1547 { 1548 int rv; 1549 mdi_client_t *ct; 1550 1551 ct = i_devi_get_client(cdip); 1552 ASSERT(ct != NULL); 1553 if (ct == NULL) { 1554 /* cdip is not a valid client device. Nothing more to do. */ 1555 return (MDI_FAILURE); 1556 } 1557 1558 MDI_CLIENT_LOCK(ct); 1559 1560 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1561 /* A path to the client is being freed */ 1562 MDI_CLIENT_UNLOCK(ct); 1563 return (MDI_BUSY); 1564 } 1565 1566 1567 if (MDI_CLIENT_IS_FAILED(ct)) { 1568 /* 1569 * Client is in failed state. Nothing more to do. 1570 */ 1571 MDI_CLIENT_UNLOCK(ct); 1572 return (MDI_FAILURE); 1573 } 1574 1575 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1576 /* 1577 * Failover is already in progress; return BUSY 1578 */ 1579 MDI_CLIENT_UNLOCK(ct); 1580 return (MDI_BUSY); 1581 } 1582 /* 1583 * Make sure that mdi_pathinfo node state changes are processed. 1584 * We do not allow failovers to progress while client path state 1585 * changes are in progress 1586 */ 1587 if (ct->ct_unstable) { 1588 if (flags == MDI_FAILOVER_ASYNC) { 1589 MDI_CLIENT_UNLOCK(ct); 1590 return (MDI_BUSY); 1591 } else { 1592 while (ct->ct_unstable) 1593 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1594 } 1595 } 1596 1597 /* 1598 * Client device is in stable state. Before proceeding, perform sanity 1599 * checks again. 1600 */ 1601 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1602 (i_ddi_node_state(ct->ct_dip) < DS_READY)) { 1603 /* 1604 * Client is in failed state. Nothing more to do. 1605 */ 1606 MDI_CLIENT_UNLOCK(ct); 1607 return (MDI_FAILURE); 1608 } 1609 1610 /* 1611 * Set the client state as failover in progress. 1612 */ 1613 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1614 ct->ct_failover_flags = flags; 1615 MDI_CLIENT_UNLOCK(ct); 1616 1617 if (flags == MDI_FAILOVER_ASYNC) { 1618 /* 1619 * Submit the initiate failover request via CPR safe 1620 * taskq threads. 1621 */ 1622 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1623 ct, KM_SLEEP); 1624 return (MDI_ACCEPT); 1625 } else { 1626 /* 1627 * Synchronous failover mode. Typically invoked from the user 1628 * land. 1629 */ 1630 rv = i_mdi_failover(ct); 1631 } 1632 return (rv); 1633 } 1634 1635 /* 1636 * i_mdi_failover(): 1637 * internal failover function. Invokes vHCI drivers failover 1638 * callback function and process the failover status 1639 * Return Values: 1640 * None 1641 * 1642 * Note: A client device in failover state can not be detached or freed. 1643 */ 1644 static int 1645 i_mdi_failover(void *arg) 1646 { 1647 int rv = MDI_SUCCESS; 1648 mdi_client_t *ct = (mdi_client_t *)arg; 1649 mdi_vhci_t *vh = ct->ct_vhci; 1650 1651 ASSERT(!MUTEX_HELD(&ct->ct_mutex)); 1652 1653 if (vh->vh_ops->vo_failover != NULL) { 1654 /* 1655 * Call vHCI drivers callback routine 1656 */ 1657 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1658 ct->ct_failover_flags); 1659 } 1660 1661 MDI_CLIENT_LOCK(ct); 1662 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1663 1664 /* 1665 * Save the failover return status 1666 */ 1667 ct->ct_failover_status = rv; 1668 1669 /* 1670 * As a result of failover, client status would have been changed. 1671 * Update the client state and wake up anyone waiting on this client 1672 * device. 1673 */ 1674 i_mdi_client_update_state(ct); 1675 1676 cv_broadcast(&ct->ct_failover_cv); 1677 MDI_CLIENT_UNLOCK(ct); 1678 return (rv); 1679 } 1680 1681 /* 1682 * Load balancing is logical block. 1683 * IOs within the range described by region_size 1684 * would go on the same path. This would improve the 1685 * performance by cache-hit on some of the RAID devices. 1686 * Search only for online paths(At some point we 1687 * may want to balance across target ports). 1688 * If no paths are found then default to round-robin. 1689 */ 1690 static int 1691 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1692 { 1693 int path_index = -1; 1694 int online_path_count = 0; 1695 int online_nonpref_path_count = 0; 1696 int region_size = ct->ct_lb_args->region_size; 1697 mdi_pathinfo_t *pip; 1698 mdi_pathinfo_t *next; 1699 int preferred, path_cnt; 1700 1701 pip = ct->ct_path_head; 1702 while (pip) { 1703 MDI_PI_LOCK(pip); 1704 if (MDI_PI(pip)->pi_state == 1705 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1706 online_path_count++; 1707 } else if (MDI_PI(pip)->pi_state == 1708 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1709 online_nonpref_path_count++; 1710 } 1711 next = (mdi_pathinfo_t *) 1712 MDI_PI(pip)->pi_client_link; 1713 MDI_PI_UNLOCK(pip); 1714 pip = next; 1715 } 1716 /* if found any online/preferred then use this type */ 1717 if (online_path_count > 0) { 1718 path_cnt = online_path_count; 1719 preferred = 1; 1720 } else if (online_nonpref_path_count > 0) { 1721 path_cnt = online_nonpref_path_count; 1722 preferred = 0; 1723 } else { 1724 path_cnt = 0; 1725 } 1726 if (path_cnt) { 1727 path_index = (bp->b_blkno >> region_size) % path_cnt; 1728 pip = ct->ct_path_head; 1729 while (pip && path_index != -1) { 1730 MDI_PI_LOCK(pip); 1731 if (path_index == 0 && 1732 (MDI_PI(pip)->pi_state == 1733 MDI_PATHINFO_STATE_ONLINE) && 1734 MDI_PI(pip)->pi_preferred == preferred) { 1735 MDI_PI_HOLD(pip); 1736 MDI_PI_UNLOCK(pip); 1737 *ret_pip = pip; 1738 return (MDI_SUCCESS); 1739 } 1740 path_index --; 1741 next = (mdi_pathinfo_t *) 1742 MDI_PI(pip)->pi_client_link; 1743 MDI_PI_UNLOCK(pip); 1744 pip = next; 1745 } 1746 if (pip == NULL) { 1747 MDI_DEBUG(4, (CE_NOTE, NULL, 1748 "!lba %p, no pip !!\n", 1749 bp->b_blkno)); 1750 } else { 1751 MDI_DEBUG(4, (CE_NOTE, NULL, 1752 "!lba %p, no pip for path_index, " 1753 "pip %p\n", pip)); 1754 } 1755 } 1756 return (MDI_FAILURE); 1757 } 1758 1759 /* 1760 * mdi_select_path(): 1761 * select a path to access a client device. 1762 * 1763 * mdi_select_path() function is called by the vHCI drivers to 1764 * select a path to route the I/O request to. The caller passes 1765 * the block I/O data transfer structure ("buf") as one of the 1766 * parameters. The mpxio framework uses the buf structure 1767 * contents to maintain per path statistics (total I/O size / 1768 * count pending). If more than one online paths are available to 1769 * select, the framework automatically selects a suitable path 1770 * for routing I/O request. If a failover operation is active for 1771 * this client device the call shall be failed with MDI_BUSY error 1772 * code. 1773 * 1774 * By default this function returns a suitable path in online 1775 * state based on the current load balancing policy. Currently 1776 * we support LOAD_BALANCE_NONE (Previously selected online path 1777 * will continue to be used till the path is usable) and 1778 * LOAD_BALANCE_RR (Online paths will be selected in a round 1779 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1780 * based on the logical block). The load balancing 1781 * through vHCI drivers configuration file (driver.conf). 1782 * 1783 * vHCI drivers may override this default behavior by specifying 1784 * appropriate flags. If start_pip is specified (non NULL) is 1785 * used as start point to walk and find the next appropriate path. 1786 * The following values are currently defined: 1787 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1788 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1789 * 1790 * The non-standard behavior is used by the scsi_vhci driver, 1791 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1792 * attach of client devices (to avoid an unnecessary failover 1793 * when the STANDBY path comes up first), during failover 1794 * (to activate a STANDBY path as ONLINE). 1795 * 1796 * The selected path in returned in a held state (ref_cnt). 1797 * Caller should release the hold by calling mdi_rele_path(). 1798 * 1799 * Return Values: 1800 * MDI_SUCCESS - Completed successfully 1801 * MDI_BUSY - Client device is busy failing over 1802 * MDI_NOPATH - Client device is online, but no valid path are 1803 * available to access this client device 1804 * MDI_FAILURE - Invalid client device or state 1805 * MDI_DEVI_ONLINING 1806 * - Client device (struct dev_info state) is in 1807 * onlining state. 1808 */ 1809 1810 /*ARGSUSED*/ 1811 int 1812 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1813 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1814 { 1815 mdi_client_t *ct; 1816 mdi_pathinfo_t *pip; 1817 mdi_pathinfo_t *next; 1818 mdi_pathinfo_t *head; 1819 mdi_pathinfo_t *start; 1820 client_lb_t lbp; /* load balancing policy */ 1821 int sb = 1; /* standard behavior */ 1822 int preferred = 1; /* preferred path */ 1823 int cond, cont = 1; 1824 int retry = 0; 1825 1826 if (flags != 0) { 1827 /* 1828 * disable default behavior 1829 */ 1830 sb = 0; 1831 } 1832 1833 *ret_pip = NULL; 1834 ct = i_devi_get_client(cdip); 1835 if (ct == NULL) { 1836 /* mdi extensions are NULL, Nothing more to do */ 1837 return (MDI_FAILURE); 1838 } 1839 1840 MDI_CLIENT_LOCK(ct); 1841 1842 if (sb) { 1843 if (MDI_CLIENT_IS_FAILED(ct)) { 1844 /* 1845 * Client is not ready to accept any I/O requests. 1846 * Fail this request. 1847 */ 1848 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1849 "client state offline ct = %p\n", ct)); 1850 MDI_CLIENT_UNLOCK(ct); 1851 return (MDI_FAILURE); 1852 } 1853 1854 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1855 /* 1856 * Check for Failover is in progress. If so tell the 1857 * caller that this device is busy. 1858 */ 1859 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1860 "client failover in progress ct = %p\n", ct)); 1861 MDI_CLIENT_UNLOCK(ct); 1862 return (MDI_BUSY); 1863 } 1864 1865 /* 1866 * Check to see whether the client device is attached. 1867 * If not so, let the vHCI driver manually select a path 1868 * (standby) and let the probe/attach process to continue. 1869 */ 1870 if ((MDI_CLIENT_IS_DETACHED(ct)) || 1871 i_ddi_node_state(cdip) < DS_READY) { 1872 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); 1873 MDI_CLIENT_UNLOCK(ct); 1874 return (MDI_DEVI_ONLINING); 1875 } 1876 } 1877 1878 /* 1879 * Cache in the client list head. If head of the list is NULL 1880 * return MDI_NOPATH 1881 */ 1882 head = ct->ct_path_head; 1883 if (head == NULL) { 1884 MDI_CLIENT_UNLOCK(ct); 1885 return (MDI_NOPATH); 1886 } 1887 1888 /* 1889 * for non default behavior, bypass current 1890 * load balancing policy and always use LOAD_BALANCE_RR 1891 * except that the start point will be adjusted based 1892 * on the provided start_pip 1893 */ 1894 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 1895 1896 switch (lbp) { 1897 case LOAD_BALANCE_NONE: 1898 /* 1899 * Load balancing is None or Alternate path mode 1900 * Start looking for a online mdi_pathinfo node starting from 1901 * last known selected path 1902 */ 1903 preferred = 1; 1904 pip = (mdi_pathinfo_t *)ct->ct_path_last; 1905 if (pip == NULL) { 1906 pip = head; 1907 } 1908 start = pip; 1909 do { 1910 MDI_PI_LOCK(pip); 1911 /* 1912 * No need to explicitly check if the path is disabled. 1913 * Since we are checking for state == ONLINE and the 1914 * same veriable is used for DISABLE/ENABLE information. 1915 */ 1916 if (MDI_PI(pip)->pi_state == 1917 MDI_PATHINFO_STATE_ONLINE && 1918 preferred == MDI_PI(pip)->pi_preferred) { 1919 /* 1920 * Return the path in hold state. Caller should 1921 * release the lock by calling mdi_rele_path() 1922 */ 1923 MDI_PI_HOLD(pip); 1924 MDI_PI_UNLOCK(pip); 1925 ct->ct_path_last = pip; 1926 *ret_pip = pip; 1927 MDI_CLIENT_UNLOCK(ct); 1928 return (MDI_SUCCESS); 1929 } 1930 1931 /* 1932 * Path is busy. 1933 */ 1934 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 1935 MDI_PI_IS_TRANSIENT(pip)) 1936 retry = 1; 1937 /* 1938 * Keep looking for a next available online path 1939 */ 1940 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1941 if (next == NULL) { 1942 next = head; 1943 } 1944 MDI_PI_UNLOCK(pip); 1945 pip = next; 1946 if (start == pip && preferred) { 1947 preferred = 0; 1948 } else if (start == pip && !preferred) { 1949 cont = 0; 1950 } 1951 } while (cont); 1952 break; 1953 1954 case LOAD_BALANCE_LBA: 1955 /* 1956 * Make sure we are looking 1957 * for an online path. Otherwise, if it is for a STANDBY 1958 * path request, it will go through and fetch an ONLINE 1959 * path which is not desirable. 1960 */ 1961 if ((ct->ct_lb_args != NULL) && 1962 (ct->ct_lb_args->region_size) && bp && 1963 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 1964 if (i_mdi_lba_lb(ct, ret_pip, bp) 1965 == MDI_SUCCESS) { 1966 MDI_CLIENT_UNLOCK(ct); 1967 return (MDI_SUCCESS); 1968 } 1969 } 1970 /* FALLTHROUGH */ 1971 case LOAD_BALANCE_RR: 1972 /* 1973 * Load balancing is Round Robin. Start looking for a online 1974 * mdi_pathinfo node starting from last known selected path 1975 * as the start point. If override flags are specified, 1976 * process accordingly. 1977 * If the search is already in effect(start_pip not null), 1978 * then lets just use the same path preference to continue the 1979 * traversal. 1980 */ 1981 1982 if (start_pip != NULL) { 1983 preferred = MDI_PI(start_pip)->pi_preferred; 1984 } else { 1985 preferred = 1; 1986 } 1987 1988 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 1989 if (start == NULL) { 1990 pip = head; 1991 } else { 1992 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 1993 if (pip == NULL) { 1994 if (!sb) { 1995 if (preferred == 0) { 1996 /* 1997 * Looks like we have completed 1998 * the traversal as preferred 1999 * value is 0. Time to bail out. 2000 */ 2001 *ret_pip = NULL; 2002 MDI_CLIENT_UNLOCK(ct); 2003 return (MDI_NOPATH); 2004 } else { 2005 /* 2006 * Looks like we reached the 2007 * end of the list. Lets enable 2008 * traversal of non preferred 2009 * paths. 2010 */ 2011 preferred = 0; 2012 } 2013 } 2014 pip = head; 2015 } 2016 } 2017 start = pip; 2018 do { 2019 MDI_PI_LOCK(pip); 2020 if (sb) { 2021 cond = ((MDI_PI(pip)->pi_state == 2022 MDI_PATHINFO_STATE_ONLINE && 2023 MDI_PI(pip)->pi_preferred == 2024 preferred) ? 1 : 0); 2025 } else { 2026 if (flags == MDI_SELECT_ONLINE_PATH) { 2027 cond = ((MDI_PI(pip)->pi_state == 2028 MDI_PATHINFO_STATE_ONLINE && 2029 MDI_PI(pip)->pi_preferred == 2030 preferred) ? 1 : 0); 2031 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2032 cond = ((MDI_PI(pip)->pi_state == 2033 MDI_PATHINFO_STATE_STANDBY && 2034 MDI_PI(pip)->pi_preferred == 2035 preferred) ? 1 : 0); 2036 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2037 MDI_SELECT_STANDBY_PATH)) { 2038 cond = (((MDI_PI(pip)->pi_state == 2039 MDI_PATHINFO_STATE_ONLINE || 2040 (MDI_PI(pip)->pi_state == 2041 MDI_PATHINFO_STATE_STANDBY)) && 2042 MDI_PI(pip)->pi_preferred == 2043 preferred) ? 1 : 0); 2044 } else { 2045 cond = 0; 2046 } 2047 } 2048 /* 2049 * No need to explicitly check if the path is disabled. 2050 * Since we are checking for state == ONLINE and the 2051 * same veriable is used for DISABLE/ENABLE information. 2052 */ 2053 if (cond) { 2054 /* 2055 * Return the path in hold state. Caller should 2056 * release the lock by calling mdi_rele_path() 2057 */ 2058 MDI_PI_HOLD(pip); 2059 MDI_PI_UNLOCK(pip); 2060 if (sb) 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 /* 2067 * Path is busy. 2068 */ 2069 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2070 MDI_PI_IS_TRANSIENT(pip)) 2071 retry = 1; 2072 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 do_again: 2077 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2078 if (next == NULL) { 2079 if (!sb) { 2080 if (preferred == 1) { 2081 /* 2082 * Looks like we reached the 2083 * end of the list. Lets enable 2084 * traversal of non preferred 2085 * paths. 2086 */ 2087 preferred = 0; 2088 next = head; 2089 } else { 2090 /* 2091 * We have done both the passes 2092 * Preferred as well as for 2093 * Non-preferred. Bail out now. 2094 */ 2095 cont = 0; 2096 } 2097 } else { 2098 /* 2099 * Standard behavior case. 2100 */ 2101 next = head; 2102 } 2103 } 2104 MDI_PI_UNLOCK(pip); 2105 if (cont == 0) { 2106 break; 2107 } 2108 pip = next; 2109 2110 if (!sb) { 2111 /* 2112 * We need to handle the selection of 2113 * non-preferred path in the following 2114 * case: 2115 * 2116 * +------+ +------+ +------+ +-----+ 2117 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2118 * +------+ +------+ +------+ +-----+ 2119 * 2120 * If we start the search with B, we need to 2121 * skip beyond B to pick C which is non - 2122 * preferred in the second pass. The following 2123 * test, if true, will allow us to skip over 2124 * the 'start'(B in the example) to select 2125 * other non preferred elements. 2126 */ 2127 if ((start_pip != NULL) && (start_pip == pip) && 2128 (MDI_PI(start_pip)->pi_preferred 2129 != preferred)) { 2130 /* 2131 * try again after going past the start 2132 * pip 2133 */ 2134 MDI_PI_LOCK(pip); 2135 goto do_again; 2136 } 2137 } else { 2138 /* 2139 * Standard behavior case 2140 */ 2141 if (start == pip && preferred) { 2142 /* look for nonpreferred paths */ 2143 preferred = 0; 2144 } else if (start == pip && !preferred) { 2145 /* 2146 * Exit condition 2147 */ 2148 cont = 0; 2149 } 2150 } 2151 } while (cont); 2152 break; 2153 } 2154 2155 MDI_CLIENT_UNLOCK(ct); 2156 if (retry == 1) { 2157 return (MDI_BUSY); 2158 } else { 2159 return (MDI_NOPATH); 2160 } 2161 } 2162 2163 /* 2164 * For a client, return the next available path to any phci 2165 * 2166 * Note: 2167 * Caller should hold the branch's devinfo node to get a consistent 2168 * snap shot of the mdi_pathinfo nodes. 2169 * 2170 * Please note that even the list is stable the mdi_pathinfo 2171 * node state and properties are volatile. The caller should lock 2172 * and unlock the nodes by calling mdi_pi_lock() and 2173 * mdi_pi_unlock() functions to get a stable properties. 2174 * 2175 * If there is a need to use the nodes beyond the hold of the 2176 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2177 * need to be held against unexpected removal by calling 2178 * mdi_hold_path() and should be released by calling 2179 * mdi_rele_path() on completion. 2180 */ 2181 mdi_pathinfo_t * 2182 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2183 { 2184 mdi_client_t *ct; 2185 2186 if (!MDI_CLIENT(ct_dip)) 2187 return (NULL); 2188 2189 /* 2190 * Walk through client link 2191 */ 2192 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2193 ASSERT(ct != NULL); 2194 2195 if (pip == NULL) 2196 return ((mdi_pathinfo_t *)ct->ct_path_head); 2197 2198 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2199 } 2200 2201 /* 2202 * For a phci, return the next available path to any client 2203 * Note: ditto mdi_get_next_phci_path() 2204 */ 2205 mdi_pathinfo_t * 2206 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2207 { 2208 mdi_phci_t *ph; 2209 2210 if (!MDI_PHCI(ph_dip)) 2211 return (NULL); 2212 2213 /* 2214 * Walk through pHCI link 2215 */ 2216 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2217 ASSERT(ph != NULL); 2218 2219 if (pip == NULL) 2220 return ((mdi_pathinfo_t *)ph->ph_path_head); 2221 2222 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2223 } 2224 2225 /* 2226 * mdi_get_nextpath(): 2227 * mdi_pathinfo node walker function. Get the next node from the 2228 * client or pHCI device list. 2229 * 2230 * XXX This is wrapper function for compatibility purposes only. 2231 * 2232 * It doesn't work under Multi-level MPxIO, where a dip 2233 * is both client and phci (which link should next_path follow?). 2234 * Once Leadville is modified to call mdi_get_next_phci/client_path, 2235 * this interface should be removed. 2236 */ 2237 void 2238 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, 2239 mdi_pathinfo_t **ret_pip) 2240 { 2241 if (MDI_CLIENT(dip)) { 2242 *ret_pip = mdi_get_next_phci_path(dip, pip); 2243 } else if (MDI_PHCI(dip)) { 2244 *ret_pip = mdi_get_next_client_path(dip, pip); 2245 } else { 2246 *ret_pip = NULL; 2247 } 2248 } 2249 2250 /* 2251 * mdi_hold_path(): 2252 * Hold the mdi_pathinfo node against unwanted unexpected free. 2253 * Return Values: 2254 * None 2255 */ 2256 void 2257 mdi_hold_path(mdi_pathinfo_t *pip) 2258 { 2259 if (pip) { 2260 MDI_PI_LOCK(pip); 2261 MDI_PI_HOLD(pip); 2262 MDI_PI_UNLOCK(pip); 2263 } 2264 } 2265 2266 2267 /* 2268 * mdi_rele_path(): 2269 * Release the mdi_pathinfo node which was selected 2270 * through mdi_select_path() mechanism or manually held by 2271 * calling mdi_hold_path(). 2272 * Return Values: 2273 * None 2274 */ 2275 void 2276 mdi_rele_path(mdi_pathinfo_t *pip) 2277 { 2278 if (pip) { 2279 MDI_PI_LOCK(pip); 2280 MDI_PI_RELE(pip); 2281 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2282 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2283 } 2284 MDI_PI_UNLOCK(pip); 2285 } 2286 } 2287 2288 2289 /* 2290 * mdi_pi_lock(): 2291 * Lock the mdi_pathinfo node. 2292 * Note: 2293 * The caller should release the lock by calling mdi_pi_unlock() 2294 */ 2295 void 2296 mdi_pi_lock(mdi_pathinfo_t *pip) 2297 { 2298 ASSERT(pip != NULL); 2299 if (pip) { 2300 MDI_PI_LOCK(pip); 2301 } 2302 } 2303 2304 2305 /* 2306 * mdi_pi_unlock(): 2307 * Unlock the mdi_pathinfo node. 2308 * Note: 2309 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2310 */ 2311 void 2312 mdi_pi_unlock(mdi_pathinfo_t *pip) 2313 { 2314 ASSERT(pip != NULL); 2315 if (pip) { 2316 MDI_PI_UNLOCK(pip); 2317 } 2318 } 2319 2320 /* 2321 * mdi_pi_find(): 2322 * Search the list of mdi_pathinfo nodes attached to the 2323 * pHCI/Client device node whose path address matches "paddr". 2324 * Returns a pointer to the mdi_pathinfo node if a matching node is 2325 * found. 2326 * Return Values: 2327 * mdi_pathinfo node handle 2328 * NULL 2329 * Notes: 2330 * Caller need not hold any locks to call this function. 2331 */ 2332 mdi_pathinfo_t * 2333 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2334 { 2335 mdi_phci_t *ph; 2336 mdi_vhci_t *vh; 2337 mdi_client_t *ct; 2338 mdi_pathinfo_t *pip = NULL; 2339 2340 if ((pdip == NULL) || (paddr == NULL)) { 2341 return (NULL); 2342 } 2343 ph = i_devi_get_phci(pdip); 2344 if (ph == NULL) { 2345 /* 2346 * Invalid pHCI device, Nothing more to do. 2347 */ 2348 MDI_DEBUG(2, (CE_WARN, NULL, 2349 "!mdi_pi_find: invalid phci")); 2350 return (NULL); 2351 } 2352 2353 vh = ph->ph_vhci; 2354 if (vh == NULL) { 2355 /* 2356 * Invalid vHCI device, Nothing more to do. 2357 */ 2358 MDI_DEBUG(2, (CE_WARN, NULL, 2359 "!mdi_pi_find: invalid phci")); 2360 return (NULL); 2361 } 2362 2363 /* 2364 * Look for client device identified by caddr (guid) 2365 */ 2366 if (caddr == NULL) { 2367 /* 2368 * Find a mdi_pathinfo node under pHCI list for a matching 2369 * unit address. 2370 */ 2371 mutex_enter(&ph->ph_mutex); 2372 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2373 2374 while (pip != NULL) { 2375 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2376 break; 2377 } 2378 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2379 } 2380 mutex_exit(&ph->ph_mutex); 2381 return (pip); 2382 } 2383 2384 /* 2385 * XXX - Is the rest of the code in this function really necessary? 2386 * The consumers of mdi_pi_find() can search for the desired pathinfo 2387 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2388 * whether the search is based on the pathinfo nodes attached to 2389 * the pHCI or the client node, the result will be the same. 2390 */ 2391 2392 /* 2393 * Find the client device corresponding to 'caddr' 2394 */ 2395 mutex_enter(&mdi_mutex); 2396 2397 /* 2398 * XXX - Passing NULL to the following function works as long as the 2399 * the client addresses (caddr) are unique per vhci basis. 2400 */ 2401 ct = i_mdi_client_find(vh, NULL, caddr); 2402 if (ct == NULL) { 2403 /* 2404 * Client not found, Obviously mdi_pathinfo node has not been 2405 * created yet. 2406 */ 2407 mutex_exit(&mdi_mutex); 2408 return (pip); 2409 } 2410 2411 /* 2412 * Hold the client lock and look for a mdi_pathinfo node with matching 2413 * pHCI and paddr 2414 */ 2415 MDI_CLIENT_LOCK(ct); 2416 2417 /* 2418 * Release the global mutex as it is no more needed. Note: We always 2419 * respect the locking order while acquiring. 2420 */ 2421 mutex_exit(&mdi_mutex); 2422 2423 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2424 while (pip != NULL) { 2425 /* 2426 * Compare the unit address 2427 */ 2428 if ((MDI_PI(pip)->pi_phci == ph) && 2429 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2430 break; 2431 } 2432 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2433 } 2434 MDI_CLIENT_UNLOCK(ct); 2435 return (pip); 2436 } 2437 2438 /* 2439 * mdi_pi_alloc(): 2440 * Allocate and initialize a new instance of a mdi_pathinfo node. 2441 * The mdi_pathinfo node returned by this function identifies a 2442 * unique device path is capable of having properties attached 2443 * and passed to mdi_pi_online() to fully attach and online the 2444 * path and client device node. 2445 * The mdi_pathinfo node returned by this function must be 2446 * destroyed using mdi_pi_free() if the path is no longer 2447 * operational or if the caller fails to attach a client device 2448 * node when calling mdi_pi_online(). The framework will not free 2449 * the resources allocated. 2450 * This function can be called from both interrupt and kernel 2451 * contexts. DDI_NOSLEEP flag should be used while calling 2452 * from interrupt contexts. 2453 * Return Values: 2454 * MDI_SUCCESS 2455 * MDI_FAILURE 2456 * MDI_NOMEM 2457 */ 2458 /*ARGSUSED*/ 2459 int 2460 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2461 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2462 { 2463 mdi_vhci_t *vh; 2464 mdi_phci_t *ph; 2465 mdi_client_t *ct; 2466 mdi_pathinfo_t *pip = NULL; 2467 dev_info_t *cdip; 2468 int rv = MDI_NOMEM; 2469 int path_allocated = 0; 2470 2471 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2472 ret_pip == NULL) { 2473 /* Nothing more to do */ 2474 return (MDI_FAILURE); 2475 } 2476 2477 *ret_pip = NULL; 2478 ph = i_devi_get_phci(pdip); 2479 ASSERT(ph != NULL); 2480 if (ph == NULL) { 2481 /* Invalid pHCI device, return failure */ 2482 MDI_DEBUG(1, (CE_WARN, NULL, 2483 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2484 return (MDI_FAILURE); 2485 } 2486 2487 MDI_PHCI_LOCK(ph); 2488 vh = ph->ph_vhci; 2489 if (vh == NULL) { 2490 /* Invalid vHCI device, return failure */ 2491 MDI_DEBUG(1, (CE_WARN, NULL, 2492 "!mdi_pi_alloc: invalid pHCI=%p", pdip)); 2493 MDI_PHCI_UNLOCK(ph); 2494 return (MDI_FAILURE); 2495 } 2496 2497 if (MDI_PHCI_IS_READY(ph) == 0) { 2498 /* 2499 * Do not allow new node creation when pHCI is in 2500 * offline/suspended states 2501 */ 2502 MDI_DEBUG(1, (CE_WARN, NULL, 2503 "mdi_pi_alloc: pHCI=%p is not ready", ph)); 2504 MDI_PHCI_UNLOCK(ph); 2505 return (MDI_BUSY); 2506 } 2507 MDI_PHCI_UNSTABLE(ph); 2508 MDI_PHCI_UNLOCK(ph); 2509 2510 /* look for a matching client, create one if not found */ 2511 mutex_enter(&mdi_mutex); 2512 ct = i_mdi_client_find(vh, cname, caddr); 2513 if (ct == NULL) { 2514 ct = i_mdi_client_alloc(vh, cname, caddr); 2515 ASSERT(ct != NULL); 2516 } 2517 2518 if (ct->ct_dip == NULL) { 2519 /* 2520 * Allocate a devinfo node 2521 */ 2522 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2523 compatible, ncompatible); 2524 if (ct->ct_dip == NULL) { 2525 (void) i_mdi_client_free(vh, ct); 2526 goto fail; 2527 } 2528 } 2529 cdip = ct->ct_dip; 2530 2531 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2532 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2533 2534 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2535 while (pip != NULL) { 2536 /* 2537 * Compare the unit address 2538 */ 2539 if ((MDI_PI(pip)->pi_phci == ph) && 2540 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2541 break; 2542 } 2543 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2544 } 2545 2546 if (pip == NULL) { 2547 /* 2548 * This is a new path for this client device. Allocate and 2549 * initialize a new pathinfo node 2550 */ 2551 pip = i_mdi_pi_alloc(ph, paddr, ct); 2552 ASSERT(pip != NULL); 2553 path_allocated = 1; 2554 } 2555 rv = MDI_SUCCESS; 2556 2557 fail: 2558 /* 2559 * Release the global mutex. 2560 */ 2561 mutex_exit(&mdi_mutex); 2562 2563 /* 2564 * Mark the pHCI as stable 2565 */ 2566 MDI_PHCI_LOCK(ph); 2567 MDI_PHCI_STABLE(ph); 2568 MDI_PHCI_UNLOCK(ph); 2569 *ret_pip = pip; 2570 2571 if (path_allocated) 2572 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2573 2574 return (rv); 2575 } 2576 2577 /*ARGSUSED*/ 2578 int 2579 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2580 int flags, mdi_pathinfo_t **ret_pip) 2581 { 2582 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2583 flags, ret_pip)); 2584 } 2585 2586 /* 2587 * i_mdi_pi_alloc(): 2588 * Allocate a mdi_pathinfo node and add to the pHCI path list 2589 * Return Values: 2590 * mdi_pathinfo 2591 */ 2592 2593 /*ARGSUSED*/ 2594 static mdi_pathinfo_t * 2595 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2596 { 2597 mdi_pathinfo_t *pip; 2598 int ct_circular; 2599 int ph_circular; 2600 int se_flag; 2601 int kmem_flag; 2602 2603 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2604 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2605 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2606 MDI_PATHINFO_STATE_TRANSIENT; 2607 2608 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2609 MDI_PI_SET_USER_DISABLE(pip); 2610 2611 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2612 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2613 2614 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2615 MDI_PI_SET_DRV_DISABLE(pip); 2616 2617 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2618 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2619 MDI_PI(pip)->pi_client = ct; 2620 MDI_PI(pip)->pi_phci = ph; 2621 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2622 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2623 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2624 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2625 MDI_PI(pip)->pi_pprivate = NULL; 2626 MDI_PI(pip)->pi_cprivate = NULL; 2627 MDI_PI(pip)->pi_vprivate = NULL; 2628 MDI_PI(pip)->pi_client_link = NULL; 2629 MDI_PI(pip)->pi_phci_link = NULL; 2630 MDI_PI(pip)->pi_ref_cnt = 0; 2631 MDI_PI(pip)->pi_kstats = NULL; 2632 MDI_PI(pip)->pi_preferred = 1; 2633 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2634 2635 /* 2636 * Lock both dev_info nodes against changes in parallel. 2637 */ 2638 ndi_devi_enter(ct->ct_dip, &ct_circular); 2639 ndi_devi_enter(ph->ph_dip, &ph_circular); 2640 2641 i_mdi_phci_add_path(ph, pip); 2642 i_mdi_client_add_path(ct, pip); 2643 2644 ndi_devi_exit(ph->ph_dip, ph_circular); 2645 ndi_devi_exit(ct->ct_dip, ct_circular); 2646 2647 /* determine interrupt context */ 2648 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2649 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2650 2651 i_ddi_di_cache_invalidate(kmem_flag); 2652 2653 return (pip); 2654 } 2655 2656 /* 2657 * i_mdi_phci_add_path(): 2658 * Add a mdi_pathinfo node to pHCI list. 2659 * Notes: 2660 * Caller should per-pHCI mutex 2661 */ 2662 2663 static void 2664 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2665 { 2666 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2667 2668 if (ph->ph_path_head == NULL) { 2669 ph->ph_path_head = pip; 2670 } else { 2671 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2672 } 2673 ph->ph_path_tail = pip; 2674 ph->ph_path_count++; 2675 } 2676 2677 /* 2678 * i_mdi_client_add_path(): 2679 * Add mdi_pathinfo node to client list 2680 */ 2681 2682 static void 2683 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2684 { 2685 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2686 2687 if (ct->ct_path_head == NULL) { 2688 ct->ct_path_head = pip; 2689 } else { 2690 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2691 } 2692 ct->ct_path_tail = pip; 2693 ct->ct_path_count++; 2694 } 2695 2696 /* 2697 * mdi_pi_free(): 2698 * Free the mdi_pathinfo node and also client device node if this 2699 * is the last path to the device 2700 * Return Values: 2701 * MDI_SUCCESS 2702 * MDI_FAILURE 2703 * MDI_BUSY 2704 */ 2705 2706 /*ARGSUSED*/ 2707 int 2708 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2709 { 2710 int rv = MDI_SUCCESS; 2711 mdi_vhci_t *vh; 2712 mdi_phci_t *ph; 2713 mdi_client_t *ct; 2714 int (*f)(); 2715 int client_held = 0; 2716 2717 MDI_PI_LOCK(pip); 2718 ph = MDI_PI(pip)->pi_phci; 2719 ASSERT(ph != NULL); 2720 if (ph == NULL) { 2721 /* 2722 * Invalid pHCI device, return failure 2723 */ 2724 MDI_DEBUG(1, (CE_WARN, NULL, 2725 "!mdi_pi_free: invalid pHCI")); 2726 MDI_PI_UNLOCK(pip); 2727 return (MDI_FAILURE); 2728 } 2729 2730 vh = ph->ph_vhci; 2731 ASSERT(vh != NULL); 2732 if (vh == NULL) { 2733 /* Invalid pHCI device, return failure */ 2734 MDI_DEBUG(1, (CE_WARN, NULL, 2735 "!mdi_pi_free: invalid vHCI")); 2736 MDI_PI_UNLOCK(pip); 2737 return (MDI_FAILURE); 2738 } 2739 2740 ct = MDI_PI(pip)->pi_client; 2741 ASSERT(ct != NULL); 2742 if (ct == NULL) { 2743 /* 2744 * Invalid Client device, return failure 2745 */ 2746 MDI_DEBUG(1, (CE_WARN, NULL, 2747 "!mdi_pi_free: invalid client")); 2748 MDI_PI_UNLOCK(pip); 2749 return (MDI_FAILURE); 2750 } 2751 2752 /* 2753 * Check to see for busy condition. A mdi_pathinfo can only be freed 2754 * if the node state is either offline or init and the reference count 2755 * is zero. 2756 */ 2757 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2758 MDI_PI_IS_INITING(pip))) { 2759 /* 2760 * Node is busy 2761 */ 2762 MDI_DEBUG(1, (CE_WARN, NULL, 2763 "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); 2764 MDI_PI_UNLOCK(pip); 2765 return (MDI_BUSY); 2766 } 2767 2768 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2769 /* 2770 * Give a chance for pending I/Os to complete. 2771 */ 2772 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: " 2773 "%d cmds still pending on path: %p\n", 2774 MDI_PI(pip)->pi_ref_cnt, pip)); 2775 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2776 &MDI_PI(pip)->pi_mutex, 2777 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2778 /* 2779 * The timeout time reached without ref_cnt being zero 2780 * being signaled. 2781 */ 2782 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2783 "!mdi_pi_free: " 2784 "Timeout reached on path %p without the cond\n", 2785 pip)); 2786 MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, 2787 "!mdi_pi_free: " 2788 "%d cmds still pending on path: %p\n", 2789 MDI_PI(pip)->pi_ref_cnt, pip)); 2790 MDI_PI_UNLOCK(pip); 2791 return (MDI_BUSY); 2792 } 2793 } 2794 if (MDI_PI(pip)->pi_pm_held) { 2795 client_held = 1; 2796 } 2797 MDI_PI_UNLOCK(pip); 2798 2799 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2800 2801 MDI_CLIENT_LOCK(ct); 2802 2803 /* Prevent further failovers till mdi_mutex is held */ 2804 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2805 2806 /* 2807 * Wait till failover is complete before removing this node. 2808 */ 2809 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2810 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2811 2812 MDI_CLIENT_UNLOCK(ct); 2813 mutex_enter(&mdi_mutex); 2814 MDI_CLIENT_LOCK(ct); 2815 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2816 2817 if (!MDI_PI_IS_INITING(pip)) { 2818 f = vh->vh_ops->vo_pi_uninit; 2819 if (f != NULL) { 2820 rv = (*f)(vh->vh_dip, pip, 0); 2821 } 2822 } 2823 /* 2824 * If vo_pi_uninit() completed successfully. 2825 */ 2826 if (rv == MDI_SUCCESS) { 2827 if (client_held) { 2828 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2829 "i_mdi_pm_rele_client\n")); 2830 i_mdi_pm_rele_client(ct, 1); 2831 } 2832 i_mdi_pi_free(ph, pip, ct); 2833 if (ct->ct_path_count == 0) { 2834 /* 2835 * Client lost its last path. 2836 * Clean up the client device 2837 */ 2838 MDI_CLIENT_UNLOCK(ct); 2839 (void) i_mdi_client_free(ct->ct_vhci, ct); 2840 mutex_exit(&mdi_mutex); 2841 return (rv); 2842 } 2843 } 2844 MDI_CLIENT_UNLOCK(ct); 2845 mutex_exit(&mdi_mutex); 2846 2847 if (rv == MDI_FAILURE) 2848 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2849 2850 return (rv); 2851 } 2852 2853 /* 2854 * i_mdi_pi_free(): 2855 * Free the mdi_pathinfo node 2856 */ 2857 static void 2858 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 2859 { 2860 int ct_circular; 2861 int ph_circular; 2862 int se_flag; 2863 int kmem_flag; 2864 2865 /* 2866 * remove any per-path kstats 2867 */ 2868 i_mdi_pi_kstat_destroy(pip); 2869 2870 ndi_devi_enter(ct->ct_dip, &ct_circular); 2871 ndi_devi_enter(ph->ph_dip, &ph_circular); 2872 2873 i_mdi_client_remove_path(ct, pip); 2874 i_mdi_phci_remove_path(ph, pip); 2875 2876 ndi_devi_exit(ph->ph_dip, ph_circular); 2877 ndi_devi_exit(ct->ct_dip, ct_circular); 2878 2879 /* determine interrupt context */ 2880 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2881 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2882 2883 i_ddi_di_cache_invalidate(kmem_flag); 2884 2885 mutex_destroy(&MDI_PI(pip)->pi_mutex); 2886 cv_destroy(&MDI_PI(pip)->pi_state_cv); 2887 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 2888 if (MDI_PI(pip)->pi_addr) { 2889 kmem_free(MDI_PI(pip)->pi_addr, 2890 strlen(MDI_PI(pip)->pi_addr) + 1); 2891 MDI_PI(pip)->pi_addr = NULL; 2892 } 2893 2894 if (MDI_PI(pip)->pi_prop) { 2895 (void) nvlist_free(MDI_PI(pip)->pi_prop); 2896 MDI_PI(pip)->pi_prop = NULL; 2897 } 2898 kmem_free(pip, sizeof (struct mdi_pathinfo)); 2899 } 2900 2901 2902 /* 2903 * i_mdi_phci_remove_path(): 2904 * Remove a mdi_pathinfo node from pHCI list. 2905 * Notes: 2906 * Caller should hold per-pHCI mutex 2907 */ 2908 2909 static void 2910 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2911 { 2912 mdi_pathinfo_t *prev = NULL; 2913 mdi_pathinfo_t *path = NULL; 2914 2915 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2916 2917 path = ph->ph_path_head; 2918 while (path != NULL) { 2919 if (path == pip) { 2920 break; 2921 } 2922 prev = path; 2923 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2924 } 2925 2926 if (path) { 2927 ph->ph_path_count--; 2928 if (prev) { 2929 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 2930 } else { 2931 ph->ph_path_head = 2932 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 2933 } 2934 if (ph->ph_path_tail == path) { 2935 ph->ph_path_tail = prev; 2936 } 2937 } 2938 2939 /* 2940 * Clear the pHCI link 2941 */ 2942 MDI_PI(pip)->pi_phci_link = NULL; 2943 MDI_PI(pip)->pi_phci = NULL; 2944 } 2945 2946 /* 2947 * i_mdi_client_remove_path(): 2948 * Remove a mdi_pathinfo node from client path list. 2949 */ 2950 2951 static void 2952 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2953 { 2954 mdi_pathinfo_t *prev = NULL; 2955 mdi_pathinfo_t *path; 2956 2957 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2958 2959 path = ct->ct_path_head; 2960 while (path != NULL) { 2961 if (path == pip) { 2962 break; 2963 } 2964 prev = path; 2965 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2966 } 2967 2968 if (path) { 2969 ct->ct_path_count--; 2970 if (prev) { 2971 MDI_PI(prev)->pi_client_link = 2972 MDI_PI(path)->pi_client_link; 2973 } else { 2974 ct->ct_path_head = 2975 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 2976 } 2977 if (ct->ct_path_tail == path) { 2978 ct->ct_path_tail = prev; 2979 } 2980 if (ct->ct_path_last == path) { 2981 ct->ct_path_last = ct->ct_path_head; 2982 } 2983 } 2984 MDI_PI(pip)->pi_client_link = NULL; 2985 MDI_PI(pip)->pi_client = NULL; 2986 } 2987 2988 /* 2989 * i_mdi_pi_state_change(): 2990 * online a mdi_pathinfo node 2991 * 2992 * Return Values: 2993 * MDI_SUCCESS 2994 * MDI_FAILURE 2995 */ 2996 /*ARGSUSED*/ 2997 static int 2998 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 2999 { 3000 int rv = MDI_SUCCESS; 3001 mdi_vhci_t *vh; 3002 mdi_phci_t *ph; 3003 mdi_client_t *ct; 3004 int (*f)(); 3005 dev_info_t *cdip; 3006 3007 MDI_PI_LOCK(pip); 3008 3009 ph = MDI_PI(pip)->pi_phci; 3010 ASSERT(ph); 3011 if (ph == NULL) { 3012 /* 3013 * Invalid pHCI device, fail the request 3014 */ 3015 MDI_PI_UNLOCK(pip); 3016 MDI_DEBUG(1, (CE_WARN, NULL, 3017 "!mdi_pi_state_change: invalid phci")); 3018 return (MDI_FAILURE); 3019 } 3020 3021 vh = ph->ph_vhci; 3022 ASSERT(vh); 3023 if (vh == NULL) { 3024 /* 3025 * Invalid vHCI device, fail the request 3026 */ 3027 MDI_PI_UNLOCK(pip); 3028 MDI_DEBUG(1, (CE_WARN, NULL, 3029 "!mdi_pi_state_change: invalid vhci")); 3030 return (MDI_FAILURE); 3031 } 3032 3033 ct = MDI_PI(pip)->pi_client; 3034 ASSERT(ct != NULL); 3035 if (ct == NULL) { 3036 /* 3037 * Invalid client device, fail the request 3038 */ 3039 MDI_PI_UNLOCK(pip); 3040 MDI_DEBUG(1, (CE_WARN, NULL, 3041 "!mdi_pi_state_change: invalid client")); 3042 return (MDI_FAILURE); 3043 } 3044 3045 /* 3046 * If this path has not been initialized yet, Callback vHCI driver's 3047 * pathinfo node initialize entry point 3048 */ 3049 3050 if (MDI_PI_IS_INITING(pip)) { 3051 MDI_PI_UNLOCK(pip); 3052 f = vh->vh_ops->vo_pi_init; 3053 if (f != NULL) { 3054 rv = (*f)(vh->vh_dip, pip, 0); 3055 if (rv != MDI_SUCCESS) { 3056 MDI_DEBUG(1, (CE_WARN, vh->vh_dip, 3057 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3058 vh, pip)); 3059 return (MDI_FAILURE); 3060 } 3061 } 3062 MDI_PI_LOCK(pip); 3063 MDI_PI_CLEAR_TRANSIENT(pip); 3064 } 3065 3066 /* 3067 * Do not allow state transition when pHCI is in offline/suspended 3068 * states 3069 */ 3070 i_mdi_phci_lock(ph, pip); 3071 if (MDI_PHCI_IS_READY(ph) == 0) { 3072 MDI_DEBUG(1, (CE_WARN, NULL, 3073 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); 3074 MDI_PI_UNLOCK(pip); 3075 i_mdi_phci_unlock(ph); 3076 return (MDI_BUSY); 3077 } 3078 MDI_PHCI_UNSTABLE(ph); 3079 i_mdi_phci_unlock(ph); 3080 3081 /* 3082 * Check if mdi_pathinfo state is in transient state. 3083 * If yes, offlining is in progress and wait till transient state is 3084 * cleared. 3085 */ 3086 if (MDI_PI_IS_TRANSIENT(pip)) { 3087 while (MDI_PI_IS_TRANSIENT(pip)) { 3088 cv_wait(&MDI_PI(pip)->pi_state_cv, 3089 &MDI_PI(pip)->pi_mutex); 3090 } 3091 } 3092 3093 /* 3094 * Grab the client lock in reverse order sequence and release the 3095 * mdi_pathinfo mutex. 3096 */ 3097 i_mdi_client_lock(ct, pip); 3098 MDI_PI_UNLOCK(pip); 3099 3100 /* 3101 * Wait till failover state is cleared 3102 */ 3103 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3104 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3105 3106 /* 3107 * Mark the mdi_pathinfo node state as transient 3108 */ 3109 MDI_PI_LOCK(pip); 3110 switch (state) { 3111 case MDI_PATHINFO_STATE_ONLINE: 3112 MDI_PI_SET_ONLINING(pip); 3113 break; 3114 3115 case MDI_PATHINFO_STATE_STANDBY: 3116 MDI_PI_SET_STANDBYING(pip); 3117 break; 3118 3119 case MDI_PATHINFO_STATE_FAULT: 3120 /* 3121 * Mark the pathinfo state as FAULTED 3122 */ 3123 MDI_PI_SET_FAULTING(pip); 3124 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3125 break; 3126 3127 case MDI_PATHINFO_STATE_OFFLINE: 3128 /* 3129 * ndi_devi_offline() cannot hold pip or ct locks. 3130 */ 3131 MDI_PI_UNLOCK(pip); 3132 /* 3133 * Do not offline if path will become last path and path 3134 * is busy for user initiated events. 3135 */ 3136 cdip = ct->ct_dip; 3137 if ((flag & NDI_DEVI_REMOVE) && 3138 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3139 i_mdi_client_unlock(ct); 3140 rv = ndi_devi_offline(cdip, 0); 3141 if (rv != NDI_SUCCESS) { 3142 /* 3143 * Convert to MDI error code 3144 */ 3145 switch (rv) { 3146 case NDI_BUSY: 3147 rv = MDI_BUSY; 3148 break; 3149 default: 3150 rv = MDI_FAILURE; 3151 break; 3152 } 3153 goto state_change_exit; 3154 } else { 3155 i_mdi_client_lock(ct, NULL); 3156 } 3157 } 3158 /* 3159 * Mark the mdi_pathinfo node state as transient 3160 */ 3161 MDI_PI_LOCK(pip); 3162 MDI_PI_SET_OFFLINING(pip); 3163 break; 3164 } 3165 MDI_PI_UNLOCK(pip); 3166 MDI_CLIENT_UNSTABLE(ct); 3167 i_mdi_client_unlock(ct); 3168 3169 f = vh->vh_ops->vo_pi_state_change; 3170 if (f != NULL) { 3171 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3172 if (rv == MDI_NOT_SUPPORTED) { 3173 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3174 } 3175 if (rv != MDI_SUCCESS) { 3176 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 3177 "!vo_pi_state_change: failed rv = %x", rv)); 3178 } 3179 } 3180 MDI_CLIENT_LOCK(ct); 3181 MDI_PI_LOCK(pip); 3182 if (MDI_PI_IS_TRANSIENT(pip)) { 3183 if (rv == MDI_SUCCESS) { 3184 MDI_PI_CLEAR_TRANSIENT(pip); 3185 } else { 3186 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3187 } 3188 } 3189 3190 /* 3191 * Wake anyone waiting for this mdi_pathinfo node 3192 */ 3193 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3194 MDI_PI_UNLOCK(pip); 3195 3196 /* 3197 * Mark the client device as stable 3198 */ 3199 MDI_CLIENT_STABLE(ct); 3200 if (rv == MDI_SUCCESS) { 3201 if (ct->ct_unstable == 0) { 3202 cdip = ct->ct_dip; 3203 3204 /* 3205 * Onlining the mdi_pathinfo node will impact the 3206 * client state Update the client and dev_info node 3207 * state accordingly 3208 */ 3209 rv = NDI_SUCCESS; 3210 i_mdi_client_update_state(ct); 3211 switch (MDI_CLIENT_STATE(ct)) { 3212 case MDI_CLIENT_STATE_OPTIMAL: 3213 case MDI_CLIENT_STATE_DEGRADED: 3214 if (cdip && 3215 (i_ddi_node_state(cdip) < DS_READY) && 3216 ((state == MDI_PATHINFO_STATE_ONLINE) || 3217 (state == MDI_PATHINFO_STATE_STANDBY))) { 3218 3219 i_mdi_client_unlock(ct); 3220 /* 3221 * Must do ndi_devi_online() through 3222 * hotplug thread for deferred 3223 * attach mechanism to work 3224 */ 3225 rv = ndi_devi_online(cdip, 0); 3226 i_mdi_client_lock(ct, NULL); 3227 if ((rv != NDI_SUCCESS) && 3228 (MDI_CLIENT_STATE(ct) == 3229 MDI_CLIENT_STATE_DEGRADED)) { 3230 /* 3231 * ndi_devi_online failed. 3232 * Reset client flags to 3233 * offline. 3234 */ 3235 MDI_DEBUG(1, (CE_WARN, cdip, 3236 "!ndi_devi_online: failed " 3237 " Error: %x", rv)); 3238 MDI_CLIENT_SET_OFFLINE(ct); 3239 } 3240 if (rv != NDI_SUCCESS) { 3241 /* Reset the path state */ 3242 MDI_PI_LOCK(pip); 3243 MDI_PI(pip)->pi_state = 3244 MDI_PI_OLD_STATE(pip); 3245 MDI_PI_UNLOCK(pip); 3246 } 3247 } 3248 break; 3249 3250 case MDI_CLIENT_STATE_FAILED: 3251 /* 3252 * This is the last path case for 3253 * non-user initiated events. 3254 */ 3255 if (((flag & NDI_DEVI_REMOVE) == 0) && 3256 cdip && (i_ddi_node_state(cdip) >= 3257 DS_INITIALIZED)) { 3258 i_mdi_client_unlock(ct); 3259 rv = ndi_devi_offline(cdip, 0); 3260 i_mdi_client_lock(ct, NULL); 3261 3262 if (rv != NDI_SUCCESS) { 3263 /* 3264 * ndi_devi_offline failed. 3265 * Reset client flags to 3266 * online as the path could not 3267 * be offlined. 3268 */ 3269 MDI_DEBUG(1, (CE_WARN, cdip, 3270 "!ndi_devi_offline: failed " 3271 " Error: %x", rv)); 3272 MDI_CLIENT_SET_ONLINE(ct); 3273 } 3274 } 3275 break; 3276 } 3277 /* 3278 * Convert to MDI error code 3279 */ 3280 switch (rv) { 3281 case NDI_SUCCESS: 3282 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3283 i_mdi_report_path_state(ct, pip); 3284 rv = MDI_SUCCESS; 3285 break; 3286 case NDI_BUSY: 3287 rv = MDI_BUSY; 3288 break; 3289 default: 3290 rv = MDI_FAILURE; 3291 break; 3292 } 3293 } 3294 } 3295 MDI_CLIENT_UNLOCK(ct); 3296 3297 state_change_exit: 3298 /* 3299 * Mark the pHCI as stable again. 3300 */ 3301 MDI_PHCI_LOCK(ph); 3302 MDI_PHCI_STABLE(ph); 3303 MDI_PHCI_UNLOCK(ph); 3304 return (rv); 3305 } 3306 3307 /* 3308 * mdi_pi_online(): 3309 * Place the path_info node in the online state. The path is 3310 * now available to be selected by mdi_select_path() for 3311 * transporting I/O requests to client devices. 3312 * Return Values: 3313 * MDI_SUCCESS 3314 * MDI_FAILURE 3315 */ 3316 int 3317 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3318 { 3319 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3320 dev_info_t *cdip; 3321 int client_held = 0; 3322 int rv; 3323 3324 ASSERT(ct != NULL); 3325 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3326 if (rv != MDI_SUCCESS) 3327 return (rv); 3328 3329 MDI_PI_LOCK(pip); 3330 if (MDI_PI(pip)->pi_pm_held == 0) { 3331 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3332 "i_mdi_pm_hold_pip\n")); 3333 i_mdi_pm_hold_pip(pip); 3334 client_held = 1; 3335 } 3336 MDI_PI_UNLOCK(pip); 3337 3338 if (client_held) { 3339 MDI_CLIENT_LOCK(ct); 3340 if (ct->ct_power_cnt == 0) { 3341 rv = i_mdi_power_all_phci(ct); 3342 } 3343 3344 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3345 "i_mdi_pm_hold_client\n")); 3346 i_mdi_pm_hold_client(ct, 1); 3347 MDI_CLIENT_UNLOCK(ct); 3348 } 3349 3350 /* 3351 * Create the per-path (pathinfo) IO and error kstats which 3352 * are reported via iostat(1m). 3353 * 3354 * Defer creating the per-path kstats if device is not yet 3355 * attached; the names of the kstats are constructed in part 3356 * using the devices instance number which is assigned during 3357 * process of attaching the client device. 3358 * 3359 * The framework post_attach handler, mdi_post_attach(), is 3360 * is responsible for initializing the client's pathinfo list 3361 * once successfully attached. 3362 */ 3363 cdip = ct->ct_dip; 3364 ASSERT(cdip); 3365 if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED)) 3366 return (rv); 3367 3368 MDI_CLIENT_LOCK(ct); 3369 rv = i_mdi_pi_kstat_create(pip); 3370 MDI_CLIENT_UNLOCK(ct); 3371 return (rv); 3372 } 3373 3374 /* 3375 * mdi_pi_standby(): 3376 * Place the mdi_pathinfo node in standby state 3377 * 3378 * Return Values: 3379 * MDI_SUCCESS 3380 * MDI_FAILURE 3381 */ 3382 int 3383 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3384 { 3385 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3386 } 3387 3388 /* 3389 * mdi_pi_fault(): 3390 * Place the mdi_pathinfo node in fault'ed state 3391 * Return Values: 3392 * MDI_SUCCESS 3393 * MDI_FAILURE 3394 */ 3395 int 3396 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3397 { 3398 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3399 } 3400 3401 /* 3402 * mdi_pi_offline(): 3403 * Offline a mdi_pathinfo node. 3404 * Return Values: 3405 * MDI_SUCCESS 3406 * MDI_FAILURE 3407 */ 3408 int 3409 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3410 { 3411 int ret, client_held = 0; 3412 mdi_client_t *ct; 3413 3414 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3415 3416 if (ret == MDI_SUCCESS) { 3417 MDI_PI_LOCK(pip); 3418 if (MDI_PI(pip)->pi_pm_held) { 3419 client_held = 1; 3420 } 3421 MDI_PI_UNLOCK(pip); 3422 3423 if (client_held) { 3424 ct = MDI_PI(pip)->pi_client; 3425 MDI_CLIENT_LOCK(ct); 3426 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3427 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3428 i_mdi_pm_rele_client(ct, 1); 3429 MDI_CLIENT_UNLOCK(ct); 3430 } 3431 } 3432 3433 return (ret); 3434 } 3435 3436 /* 3437 * i_mdi_pi_offline(): 3438 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3439 */ 3440 static int 3441 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3442 { 3443 dev_info_t *vdip = NULL; 3444 mdi_vhci_t *vh = NULL; 3445 mdi_client_t *ct = NULL; 3446 int (*f)(); 3447 int rv; 3448 3449 MDI_PI_LOCK(pip); 3450 ct = MDI_PI(pip)->pi_client; 3451 ASSERT(ct != NULL); 3452 3453 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3454 /* 3455 * Give a chance for pending I/Os to complete. 3456 */ 3457 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3458 "%d cmds still pending on path: %p\n", 3459 MDI_PI(pip)->pi_ref_cnt, pip)); 3460 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3461 &MDI_PI(pip)->pi_mutex, 3462 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3463 /* 3464 * The timeout time reached without ref_cnt being zero 3465 * being signaled. 3466 */ 3467 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3468 "Timeout reached on path %p without the cond\n", 3469 pip)); 3470 MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " 3471 "%d cmds still pending on path: %p\n", 3472 MDI_PI(pip)->pi_ref_cnt, pip)); 3473 } 3474 } 3475 vh = ct->ct_vhci; 3476 vdip = vh->vh_dip; 3477 3478 /* 3479 * Notify vHCI that has registered this event 3480 */ 3481 ASSERT(vh->vh_ops); 3482 f = vh->vh_ops->vo_pi_state_change; 3483 3484 if (f != NULL) { 3485 MDI_PI_UNLOCK(pip); 3486 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3487 flags)) != MDI_SUCCESS) { 3488 MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " 3489 "vdip 0x%x, pip 0x%x", vdip, pip)); 3490 } 3491 MDI_PI_LOCK(pip); 3492 } 3493 3494 /* 3495 * Set the mdi_pathinfo node state and clear the transient condition 3496 */ 3497 MDI_PI_SET_OFFLINE(pip); 3498 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3499 MDI_PI_UNLOCK(pip); 3500 3501 MDI_CLIENT_LOCK(ct); 3502 if (rv == MDI_SUCCESS) { 3503 if (ct->ct_unstable == 0) { 3504 dev_info_t *cdip = ct->ct_dip; 3505 3506 /* 3507 * Onlining the mdi_pathinfo node will impact the 3508 * client state Update the client and dev_info node 3509 * state accordingly 3510 */ 3511 i_mdi_client_update_state(ct); 3512 rv = NDI_SUCCESS; 3513 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3514 if (cdip && 3515 (i_ddi_node_state(cdip) >= 3516 DS_INITIALIZED)) { 3517 MDI_CLIENT_UNLOCK(ct); 3518 rv = ndi_devi_offline(cdip, 0); 3519 MDI_CLIENT_LOCK(ct); 3520 if (rv != NDI_SUCCESS) { 3521 /* 3522 * ndi_devi_offline failed. 3523 * Reset client flags to 3524 * online. 3525 */ 3526 MDI_DEBUG(4, (CE_WARN, cdip, 3527 "!ndi_devi_offline: failed " 3528 " Error: %x", rv)); 3529 MDI_CLIENT_SET_ONLINE(ct); 3530 } 3531 } 3532 } 3533 /* 3534 * Convert to MDI error code 3535 */ 3536 switch (rv) { 3537 case NDI_SUCCESS: 3538 rv = MDI_SUCCESS; 3539 break; 3540 case NDI_BUSY: 3541 rv = MDI_BUSY; 3542 break; 3543 default: 3544 rv = MDI_FAILURE; 3545 break; 3546 } 3547 } 3548 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3549 i_mdi_report_path_state(ct, pip); 3550 } 3551 3552 MDI_CLIENT_UNLOCK(ct); 3553 3554 /* 3555 * Change in the mdi_pathinfo node state will impact the client state 3556 */ 3557 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3558 ct, pip)); 3559 return (rv); 3560 } 3561 3562 3563 /* 3564 * mdi_pi_get_addr(): 3565 * Get the unit address associated with a mdi_pathinfo node 3566 * 3567 * Return Values: 3568 * char * 3569 */ 3570 char * 3571 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3572 { 3573 if (pip == NULL) 3574 return (NULL); 3575 3576 return (MDI_PI(pip)->pi_addr); 3577 } 3578 3579 /* 3580 * mdi_pi_get_client(): 3581 * Get the client devinfo associated with a mdi_pathinfo node 3582 * 3583 * Return Values: 3584 * Handle to client device dev_info node 3585 */ 3586 dev_info_t * 3587 mdi_pi_get_client(mdi_pathinfo_t *pip) 3588 { 3589 dev_info_t *dip = NULL; 3590 if (pip) { 3591 dip = MDI_PI(pip)->pi_client->ct_dip; 3592 } 3593 return (dip); 3594 } 3595 3596 /* 3597 * mdi_pi_get_phci(): 3598 * Get the pHCI devinfo associated with the mdi_pathinfo node 3599 * Return Values: 3600 * Handle to dev_info node 3601 */ 3602 dev_info_t * 3603 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3604 { 3605 dev_info_t *dip = NULL; 3606 if (pip) { 3607 dip = MDI_PI(pip)->pi_phci->ph_dip; 3608 } 3609 return (dip); 3610 } 3611 3612 /* 3613 * mdi_pi_get_client_private(): 3614 * Get the client private information associated with the 3615 * mdi_pathinfo node 3616 */ 3617 void * 3618 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3619 { 3620 void *cprivate = NULL; 3621 if (pip) { 3622 cprivate = MDI_PI(pip)->pi_cprivate; 3623 } 3624 return (cprivate); 3625 } 3626 3627 /* 3628 * mdi_pi_set_client_private(): 3629 * Set the client private information in the mdi_pathinfo node 3630 */ 3631 void 3632 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3633 { 3634 if (pip) { 3635 MDI_PI(pip)->pi_cprivate = priv; 3636 } 3637 } 3638 3639 /* 3640 * mdi_pi_get_phci_private(): 3641 * Get the pHCI private information associated with the 3642 * mdi_pathinfo node 3643 */ 3644 caddr_t 3645 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3646 { 3647 caddr_t pprivate = NULL; 3648 if (pip) { 3649 pprivate = MDI_PI(pip)->pi_pprivate; 3650 } 3651 return (pprivate); 3652 } 3653 3654 /* 3655 * mdi_pi_set_phci_private(): 3656 * Set the pHCI private information in the mdi_pathinfo node 3657 */ 3658 void 3659 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3660 { 3661 if (pip) { 3662 MDI_PI(pip)->pi_pprivate = priv; 3663 } 3664 } 3665 3666 /* 3667 * mdi_pi_get_state(): 3668 * Get the mdi_pathinfo node state. Transient states are internal 3669 * and not provided to the users 3670 */ 3671 mdi_pathinfo_state_t 3672 mdi_pi_get_state(mdi_pathinfo_t *pip) 3673 { 3674 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3675 3676 if (pip) { 3677 if (MDI_PI_IS_TRANSIENT(pip)) { 3678 /* 3679 * mdi_pathinfo is in state transition. Return the 3680 * last good state. 3681 */ 3682 state = MDI_PI_OLD_STATE(pip); 3683 } else { 3684 state = MDI_PI_STATE(pip); 3685 } 3686 } 3687 return (state); 3688 } 3689 3690 /* 3691 * Note that the following function needs to be the new interface for 3692 * mdi_pi_get_state when mpxio gets integrated to ON. 3693 */ 3694 int 3695 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3696 uint32_t *ext_state) 3697 { 3698 *state = MDI_PATHINFO_STATE_INIT; 3699 3700 if (pip) { 3701 if (MDI_PI_IS_TRANSIENT(pip)) { 3702 /* 3703 * mdi_pathinfo is in state transition. Return the 3704 * last good state. 3705 */ 3706 *state = MDI_PI_OLD_STATE(pip); 3707 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3708 } else { 3709 *state = MDI_PI_STATE(pip); 3710 *ext_state = MDI_PI_EXT_STATE(pip); 3711 } 3712 } 3713 return (MDI_SUCCESS); 3714 } 3715 3716 /* 3717 * mdi_pi_get_preferred: 3718 * Get the preferred path flag 3719 */ 3720 int 3721 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3722 { 3723 if (pip) { 3724 return (MDI_PI(pip)->pi_preferred); 3725 } 3726 return (0); 3727 } 3728 3729 /* 3730 * mdi_pi_set_preferred: 3731 * Set the preferred path flag 3732 */ 3733 void 3734 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3735 { 3736 if (pip) { 3737 MDI_PI(pip)->pi_preferred = preferred; 3738 } 3739 } 3740 3741 3742 /* 3743 * mdi_pi_set_state(): 3744 * Set the mdi_pathinfo node state 3745 */ 3746 void 3747 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3748 { 3749 uint32_t ext_state; 3750 3751 if (pip) { 3752 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3753 MDI_PI(pip)->pi_state = state; 3754 MDI_PI(pip)->pi_state |= ext_state; 3755 } 3756 } 3757 3758 /* 3759 * Property functions: 3760 */ 3761 3762 int 3763 i_map_nvlist_error_to_mdi(int val) 3764 { 3765 int rv; 3766 3767 switch (val) { 3768 case 0: 3769 rv = DDI_PROP_SUCCESS; 3770 break; 3771 case EINVAL: 3772 case ENOTSUP: 3773 rv = DDI_PROP_INVAL_ARG; 3774 break; 3775 case ENOMEM: 3776 rv = DDI_PROP_NO_MEMORY; 3777 break; 3778 default: 3779 rv = DDI_PROP_NOT_FOUND; 3780 break; 3781 } 3782 return (rv); 3783 } 3784 3785 /* 3786 * mdi_pi_get_next_prop(): 3787 * Property walk function. The caller should hold mdi_pi_lock() 3788 * and release by calling mdi_pi_unlock() at the end of walk to 3789 * get a consistent value. 3790 */ 3791 3792 nvpair_t * 3793 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3794 { 3795 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3796 return (NULL); 3797 } 3798 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3799 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3800 } 3801 3802 /* 3803 * mdi_prop_remove(): 3804 * Remove the named property from the named list. 3805 */ 3806 3807 int 3808 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3809 { 3810 if (pip == NULL) { 3811 return (DDI_PROP_NOT_FOUND); 3812 } 3813 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3814 MDI_PI_LOCK(pip); 3815 if (MDI_PI(pip)->pi_prop == NULL) { 3816 MDI_PI_UNLOCK(pip); 3817 return (DDI_PROP_NOT_FOUND); 3818 } 3819 if (name) { 3820 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3821 } else { 3822 char nvp_name[MAXNAMELEN]; 3823 nvpair_t *nvp; 3824 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3825 while (nvp) { 3826 nvpair_t *next; 3827 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3828 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3829 nvpair_name(nvp)); 3830 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3831 nvp_name); 3832 nvp = next; 3833 } 3834 } 3835 MDI_PI_UNLOCK(pip); 3836 return (DDI_PROP_SUCCESS); 3837 } 3838 3839 /* 3840 * mdi_prop_size(): 3841 * Get buffer size needed to pack the property data. 3842 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3843 * buffer size. 3844 */ 3845 3846 int 3847 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3848 { 3849 int rv; 3850 size_t bufsize; 3851 3852 *buflenp = 0; 3853 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3854 return (DDI_PROP_NOT_FOUND); 3855 } 3856 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3857 rv = nvlist_size(MDI_PI(pip)->pi_prop, 3858 &bufsize, NV_ENCODE_NATIVE); 3859 *buflenp = bufsize; 3860 return (i_map_nvlist_error_to_mdi(rv)); 3861 } 3862 3863 /* 3864 * mdi_prop_pack(): 3865 * pack the property list. The caller should hold the 3866 * mdi_pathinfo_t node to get a consistent data 3867 */ 3868 3869 int 3870 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 3871 { 3872 int rv; 3873 size_t bufsize; 3874 3875 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 3876 return (DDI_PROP_NOT_FOUND); 3877 } 3878 3879 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3880 3881 bufsize = buflen; 3882 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 3883 NV_ENCODE_NATIVE, KM_SLEEP); 3884 3885 return (i_map_nvlist_error_to_mdi(rv)); 3886 } 3887 3888 /* 3889 * mdi_prop_update_byte(): 3890 * Create/Update a byte property 3891 */ 3892 int 3893 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 3894 { 3895 int rv; 3896 3897 if (pip == NULL) { 3898 return (DDI_PROP_INVAL_ARG); 3899 } 3900 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3901 MDI_PI_LOCK(pip); 3902 if (MDI_PI(pip)->pi_prop == NULL) { 3903 MDI_PI_UNLOCK(pip); 3904 return (DDI_PROP_NOT_FOUND); 3905 } 3906 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 3907 MDI_PI_UNLOCK(pip); 3908 return (i_map_nvlist_error_to_mdi(rv)); 3909 } 3910 3911 /* 3912 * mdi_prop_update_byte_array(): 3913 * Create/Update a byte array property 3914 */ 3915 int 3916 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 3917 uint_t nelements) 3918 { 3919 int rv; 3920 3921 if (pip == NULL) { 3922 return (DDI_PROP_INVAL_ARG); 3923 } 3924 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3925 MDI_PI_LOCK(pip); 3926 if (MDI_PI(pip)->pi_prop == NULL) { 3927 MDI_PI_UNLOCK(pip); 3928 return (DDI_PROP_NOT_FOUND); 3929 } 3930 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 3931 MDI_PI_UNLOCK(pip); 3932 return (i_map_nvlist_error_to_mdi(rv)); 3933 } 3934 3935 /* 3936 * mdi_prop_update_int(): 3937 * Create/Update a 32 bit integer property 3938 */ 3939 int 3940 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 3941 { 3942 int rv; 3943 3944 if (pip == NULL) { 3945 return (DDI_PROP_INVAL_ARG); 3946 } 3947 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3948 MDI_PI_LOCK(pip); 3949 if (MDI_PI(pip)->pi_prop == NULL) { 3950 MDI_PI_UNLOCK(pip); 3951 return (DDI_PROP_NOT_FOUND); 3952 } 3953 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 3954 MDI_PI_UNLOCK(pip); 3955 return (i_map_nvlist_error_to_mdi(rv)); 3956 } 3957 3958 /* 3959 * mdi_prop_update_int64(): 3960 * Create/Update a 64 bit integer property 3961 */ 3962 int 3963 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 3964 { 3965 int rv; 3966 3967 if (pip == NULL) { 3968 return (DDI_PROP_INVAL_ARG); 3969 } 3970 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3971 MDI_PI_LOCK(pip); 3972 if (MDI_PI(pip)->pi_prop == NULL) { 3973 MDI_PI_UNLOCK(pip); 3974 return (DDI_PROP_NOT_FOUND); 3975 } 3976 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 3977 MDI_PI_UNLOCK(pip); 3978 return (i_map_nvlist_error_to_mdi(rv)); 3979 } 3980 3981 /* 3982 * mdi_prop_update_int_array(): 3983 * Create/Update a int array property 3984 */ 3985 int 3986 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 3987 uint_t nelements) 3988 { 3989 int rv; 3990 3991 if (pip == NULL) { 3992 return (DDI_PROP_INVAL_ARG); 3993 } 3994 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 3995 MDI_PI_LOCK(pip); 3996 if (MDI_PI(pip)->pi_prop == NULL) { 3997 MDI_PI_UNLOCK(pip); 3998 return (DDI_PROP_NOT_FOUND); 3999 } 4000 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4001 nelements); 4002 MDI_PI_UNLOCK(pip); 4003 return (i_map_nvlist_error_to_mdi(rv)); 4004 } 4005 4006 /* 4007 * mdi_prop_update_string(): 4008 * Create/Update a string property 4009 */ 4010 int 4011 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4012 { 4013 int rv; 4014 4015 if (pip == NULL) { 4016 return (DDI_PROP_INVAL_ARG); 4017 } 4018 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4019 MDI_PI_LOCK(pip); 4020 if (MDI_PI(pip)->pi_prop == NULL) { 4021 MDI_PI_UNLOCK(pip); 4022 return (DDI_PROP_NOT_FOUND); 4023 } 4024 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4025 MDI_PI_UNLOCK(pip); 4026 return (i_map_nvlist_error_to_mdi(rv)); 4027 } 4028 4029 /* 4030 * mdi_prop_update_string_array(): 4031 * Create/Update a string array property 4032 */ 4033 int 4034 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4035 uint_t nelements) 4036 { 4037 int rv; 4038 4039 if (pip == NULL) { 4040 return (DDI_PROP_INVAL_ARG); 4041 } 4042 ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 4043 MDI_PI_LOCK(pip); 4044 if (MDI_PI(pip)->pi_prop == NULL) { 4045 MDI_PI_UNLOCK(pip); 4046 return (DDI_PROP_NOT_FOUND); 4047 } 4048 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4049 nelements); 4050 MDI_PI_UNLOCK(pip); 4051 return (i_map_nvlist_error_to_mdi(rv)); 4052 } 4053 4054 /* 4055 * mdi_prop_lookup_byte(): 4056 * Look for byte property identified by name. The data returned 4057 * is the actual property and valid as long as mdi_pathinfo_t node 4058 * is alive. 4059 */ 4060 int 4061 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4062 { 4063 int rv; 4064 4065 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4066 return (DDI_PROP_NOT_FOUND); 4067 } 4068 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4069 return (i_map_nvlist_error_to_mdi(rv)); 4070 } 4071 4072 4073 /* 4074 * mdi_prop_lookup_byte_array(): 4075 * Look for byte array property identified by name. The data 4076 * returned is the actual property and valid as long as 4077 * mdi_pathinfo_t node is alive. 4078 */ 4079 int 4080 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4081 uint_t *nelements) 4082 { 4083 int rv; 4084 4085 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4086 return (DDI_PROP_NOT_FOUND); 4087 } 4088 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4089 nelements); 4090 return (i_map_nvlist_error_to_mdi(rv)); 4091 } 4092 4093 /* 4094 * mdi_prop_lookup_int(): 4095 * Look for int property identified by name. The data returned 4096 * is the actual property and valid as long as mdi_pathinfo_t 4097 * node is alive. 4098 */ 4099 int 4100 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4101 { 4102 int rv; 4103 4104 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4105 return (DDI_PROP_NOT_FOUND); 4106 } 4107 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4108 return (i_map_nvlist_error_to_mdi(rv)); 4109 } 4110 4111 /* 4112 * mdi_prop_lookup_int64(): 4113 * Look for int64 property identified by name. The data returned 4114 * is the actual property and valid as long as mdi_pathinfo_t node 4115 * is alive. 4116 */ 4117 int 4118 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4119 { 4120 int rv; 4121 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4122 return (DDI_PROP_NOT_FOUND); 4123 } 4124 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4125 return (i_map_nvlist_error_to_mdi(rv)); 4126 } 4127 4128 /* 4129 * mdi_prop_lookup_int_array(): 4130 * Look for int array property identified by name. The data 4131 * returned is the actual property and valid as long as 4132 * mdi_pathinfo_t node is alive. 4133 */ 4134 int 4135 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4136 uint_t *nelements) 4137 { 4138 int rv; 4139 4140 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4141 return (DDI_PROP_NOT_FOUND); 4142 } 4143 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4144 (int32_t **)data, nelements); 4145 return (i_map_nvlist_error_to_mdi(rv)); 4146 } 4147 4148 /* 4149 * mdi_prop_lookup_string(): 4150 * Look for string property identified by name. The data 4151 * returned is the actual property and valid as long as 4152 * mdi_pathinfo_t node is alive. 4153 */ 4154 int 4155 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4156 { 4157 int rv; 4158 4159 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4160 return (DDI_PROP_NOT_FOUND); 4161 } 4162 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4163 return (i_map_nvlist_error_to_mdi(rv)); 4164 } 4165 4166 /* 4167 * mdi_prop_lookup_string_array(): 4168 * Look for string array property identified by name. The data 4169 * returned is the actual property and valid as long as 4170 * mdi_pathinfo_t node is alive. 4171 */ 4172 4173 int 4174 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4175 uint_t *nelements) 4176 { 4177 int rv; 4178 4179 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4180 return (DDI_PROP_NOT_FOUND); 4181 } 4182 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4183 nelements); 4184 return (i_map_nvlist_error_to_mdi(rv)); 4185 } 4186 4187 /* 4188 * mdi_prop_free(): 4189 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4190 * functions return the pointer to actual property data and not a 4191 * copy of it. So the data returned is valid as long as 4192 * mdi_pathinfo_t node is valid. 4193 */ 4194 4195 /*ARGSUSED*/ 4196 int 4197 mdi_prop_free(void *data) 4198 { 4199 return (DDI_PROP_SUCCESS); 4200 } 4201 4202 /*ARGSUSED*/ 4203 static void 4204 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4205 { 4206 char *phci_path, *ct_path; 4207 char *ct_status; 4208 char *status; 4209 dev_info_t *dip = ct->ct_dip; 4210 char lb_buf[64]; 4211 4212 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 4213 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4214 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4215 return; 4216 } 4217 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4218 ct_status = "optimal"; 4219 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4220 ct_status = "degraded"; 4221 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4222 ct_status = "failed"; 4223 } else { 4224 ct_status = "unknown"; 4225 } 4226 4227 if (MDI_PI_IS_OFFLINE(pip)) { 4228 status = "offline"; 4229 } else if (MDI_PI_IS_ONLINE(pip)) { 4230 status = "online"; 4231 } else if (MDI_PI_IS_STANDBY(pip)) { 4232 status = "standby"; 4233 } else if (MDI_PI_IS_FAULT(pip)) { 4234 status = "faulted"; 4235 } else { 4236 status = "unknown"; 4237 } 4238 4239 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4240 (void) snprintf(lb_buf, sizeof (lb_buf), 4241 "%s, region-size: %d", mdi_load_balance_lba, 4242 ct->ct_lb_args->region_size); 4243 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4244 (void) snprintf(lb_buf, sizeof (lb_buf), 4245 "%s", mdi_load_balance_none); 4246 } else { 4247 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4248 mdi_load_balance_rr); 4249 } 4250 4251 if (dip) { 4252 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4253 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4254 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4255 "path %s (%s%d) to target address: %s is %s" 4256 " Load balancing: %s\n", 4257 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4258 ddi_get_instance(dip), ct_status, 4259 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4260 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4261 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4262 MDI_PI(pip)->pi_addr, status, lb_buf); 4263 kmem_free(phci_path, MAXPATHLEN); 4264 kmem_free(ct_path, MAXPATHLEN); 4265 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4266 } 4267 } 4268 4269 #ifdef DEBUG 4270 /* 4271 * i_mdi_log(): 4272 * Utility function for error message management 4273 * 4274 */ 4275 4276 /*VARARGS3*/ 4277 static void 4278 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4279 { 4280 char buf[MAXNAMELEN]; 4281 char name[MAXNAMELEN]; 4282 va_list ap; 4283 int log_only = 0; 4284 int boot_only = 0; 4285 int console_only = 0; 4286 4287 if (dip) { 4288 if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { 4289 (void) snprintf(name, MAXNAMELEN, "%s%d:\n", 4290 ddi_node_name(dip), ddi_get_instance(dip)); 4291 } else { 4292 (void) snprintf(name, MAXNAMELEN, "%s%d:", 4293 ddi_node_name(dip), ddi_get_instance(dip)); 4294 } 4295 } else { 4296 name[0] = '\0'; 4297 } 4298 4299 va_start(ap, fmt); 4300 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4301 va_end(ap); 4302 4303 switch (buf[0]) { 4304 case '!': 4305 log_only = 1; 4306 break; 4307 case '?': 4308 boot_only = 1; 4309 break; 4310 case '^': 4311 console_only = 1; 4312 break; 4313 } 4314 4315 switch (level) { 4316 case CE_NOTE: 4317 level = CE_CONT; 4318 /* FALLTHROUGH */ 4319 case CE_CONT: 4320 case CE_WARN: 4321 case CE_PANIC: 4322 if (boot_only) { 4323 cmn_err(level, "?%s\t%s", name, &buf[1]); 4324 } else if (console_only) { 4325 cmn_err(level, "^%s\t%s", name, &buf[1]); 4326 } else if (log_only) { 4327 cmn_err(level, "!%s\t%s", name, &buf[1]); 4328 } else { 4329 cmn_err(level, "%s\t%s", name, buf); 4330 } 4331 break; 4332 default: 4333 cmn_err(level, "%s\t%s", name, buf); 4334 break; 4335 } 4336 } 4337 #endif /* DEBUG */ 4338 4339 void 4340 i_mdi_client_online(dev_info_t *ct_dip) 4341 { 4342 mdi_client_t *ct; 4343 4344 /* 4345 * Client online notification. Mark client state as online 4346 * restore our binding with dev_info node 4347 */ 4348 ct = i_devi_get_client(ct_dip); 4349 ASSERT(ct != NULL); 4350 MDI_CLIENT_LOCK(ct); 4351 MDI_CLIENT_SET_ONLINE(ct); 4352 /* catch for any memory leaks */ 4353 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4354 ct->ct_dip = ct_dip; 4355 4356 if (ct->ct_power_cnt == 0) 4357 (void) i_mdi_power_all_phci(ct); 4358 4359 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4360 "i_mdi_pm_hold_client\n")); 4361 i_mdi_pm_hold_client(ct, 1); 4362 4363 MDI_CLIENT_UNLOCK(ct); 4364 } 4365 4366 void 4367 i_mdi_phci_online(dev_info_t *ph_dip) 4368 { 4369 mdi_phci_t *ph; 4370 4371 /* pHCI online notification. Mark state accordingly */ 4372 ph = i_devi_get_phci(ph_dip); 4373 ASSERT(ph != NULL); 4374 MDI_PHCI_LOCK(ph); 4375 MDI_PHCI_SET_ONLINE(ph); 4376 MDI_PHCI_UNLOCK(ph); 4377 } 4378 4379 /* 4380 * mdi_devi_online(): 4381 * Online notification from NDI framework on pHCI/client 4382 * device online. 4383 * Return Values: 4384 * NDI_SUCCESS 4385 * MDI_FAILURE 4386 */ 4387 4388 /*ARGSUSED*/ 4389 int 4390 mdi_devi_online(dev_info_t *dip, uint_t flags) 4391 { 4392 if (MDI_PHCI(dip)) { 4393 i_mdi_phci_online(dip); 4394 } 4395 4396 if (MDI_CLIENT(dip)) { 4397 i_mdi_client_online(dip); 4398 } 4399 return (NDI_SUCCESS); 4400 } 4401 4402 /* 4403 * mdi_devi_offline(): 4404 * Offline notification from NDI framework on pHCI/Client device 4405 * offline. 4406 * 4407 * Return Values: 4408 * NDI_SUCCESS 4409 * NDI_FAILURE 4410 */ 4411 4412 /*ARGSUSED*/ 4413 int 4414 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4415 { 4416 int rv = NDI_SUCCESS; 4417 4418 if (MDI_CLIENT(dip)) { 4419 rv = i_mdi_client_offline(dip, flags); 4420 if (rv != NDI_SUCCESS) 4421 return (rv); 4422 } 4423 4424 if (MDI_PHCI(dip)) { 4425 rv = i_mdi_phci_offline(dip, flags); 4426 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4427 /* set client back online */ 4428 i_mdi_client_online(dip); 4429 } 4430 } 4431 4432 return (rv); 4433 } 4434 4435 /*ARGSUSED*/ 4436 static int 4437 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4438 { 4439 int rv = NDI_SUCCESS; 4440 mdi_phci_t *ph; 4441 mdi_client_t *ct; 4442 mdi_pathinfo_t *pip; 4443 mdi_pathinfo_t *next; 4444 mdi_pathinfo_t *failed_pip = NULL; 4445 dev_info_t *cdip; 4446 4447 /* 4448 * pHCI component offline notification 4449 * Make sure that this pHCI instance is free to be offlined. 4450 * If it is OK to proceed, Offline and remove all the child 4451 * mdi_pathinfo nodes. This process automatically offlines 4452 * corresponding client devices, for which this pHCI provides 4453 * critical services. 4454 */ 4455 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", 4456 dip)); 4457 4458 ph = i_devi_get_phci(dip); 4459 if (ph == NULL) { 4460 return (rv); 4461 } 4462 4463 MDI_PHCI_LOCK(ph); 4464 4465 if (MDI_PHCI_IS_OFFLINE(ph)) { 4466 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); 4467 MDI_PHCI_UNLOCK(ph); 4468 return (NDI_SUCCESS); 4469 } 4470 4471 /* 4472 * Check to see if the pHCI can be offlined 4473 */ 4474 if (ph->ph_unstable) { 4475 MDI_DEBUG(1, (CE_WARN, dip, 4476 "!One or more target devices are in transient " 4477 "state. This device can not be removed at " 4478 "this moment. Please try again later.")); 4479 MDI_PHCI_UNLOCK(ph); 4480 return (NDI_BUSY); 4481 } 4482 4483 pip = ph->ph_path_head; 4484 while (pip != NULL) { 4485 MDI_PI_LOCK(pip); 4486 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4487 /* 4488 * The mdi_pathinfo state is OK. Check the client state. 4489 * If failover in progress fail the pHCI from offlining 4490 */ 4491 ct = MDI_PI(pip)->pi_client; 4492 i_mdi_client_lock(ct, pip); 4493 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4494 (ct->ct_unstable)) { 4495 /* 4496 * Failover is in progress, Fail the DR 4497 */ 4498 MDI_DEBUG(1, (CE_WARN, dip, 4499 "!pHCI device (%s%d) is Busy. %s", 4500 ddi_driver_name(dip), ddi_get_instance(dip), 4501 "This device can not be removed at " 4502 "this moment. Please try again later.")); 4503 MDI_PI_UNLOCK(pip); 4504 MDI_CLIENT_UNLOCK(ct); 4505 MDI_PHCI_UNLOCK(ph); 4506 return (NDI_BUSY); 4507 } 4508 MDI_PI_UNLOCK(pip); 4509 4510 /* 4511 * Check to see of we are removing the last path of this 4512 * client device... 4513 */ 4514 cdip = ct->ct_dip; 4515 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4516 (i_mdi_client_compute_state(ct, ph) == 4517 MDI_CLIENT_STATE_FAILED)) { 4518 i_mdi_client_unlock(ct); 4519 MDI_PHCI_UNLOCK(ph); 4520 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4521 /* 4522 * ndi_devi_offline() failed. 4523 * This pHCI provides the critical path 4524 * to one or more client devices. 4525 * Return busy. 4526 */ 4527 MDI_PHCI_LOCK(ph); 4528 MDI_DEBUG(1, (CE_WARN, dip, 4529 "!pHCI device (%s%d) is Busy. %s", 4530 ddi_driver_name(dip), ddi_get_instance(dip), 4531 "This device can not be removed at " 4532 "this moment. Please try again later.")); 4533 failed_pip = pip; 4534 break; 4535 } else { 4536 MDI_PHCI_LOCK(ph); 4537 pip = next; 4538 } 4539 } else { 4540 i_mdi_client_unlock(ct); 4541 pip = next; 4542 } 4543 } 4544 4545 if (failed_pip) { 4546 pip = ph->ph_path_head; 4547 while (pip != failed_pip) { 4548 MDI_PI_LOCK(pip); 4549 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4550 ct = MDI_PI(pip)->pi_client; 4551 i_mdi_client_lock(ct, pip); 4552 cdip = ct->ct_dip; 4553 switch (MDI_CLIENT_STATE(ct)) { 4554 case MDI_CLIENT_STATE_OPTIMAL: 4555 case MDI_CLIENT_STATE_DEGRADED: 4556 if (cdip) { 4557 MDI_PI_UNLOCK(pip); 4558 i_mdi_client_unlock(ct); 4559 MDI_PHCI_UNLOCK(ph); 4560 (void) ndi_devi_online(cdip, 0); 4561 MDI_PHCI_LOCK(ph); 4562 pip = next; 4563 continue; 4564 } 4565 break; 4566 4567 case MDI_CLIENT_STATE_FAILED: 4568 if (cdip) { 4569 MDI_PI_UNLOCK(pip); 4570 i_mdi_client_unlock(ct); 4571 MDI_PHCI_UNLOCK(ph); 4572 (void) ndi_devi_offline(cdip, 0); 4573 MDI_PHCI_LOCK(ph); 4574 pip = next; 4575 continue; 4576 } 4577 break; 4578 } 4579 MDI_PI_UNLOCK(pip); 4580 i_mdi_client_unlock(ct); 4581 pip = next; 4582 } 4583 MDI_PHCI_UNLOCK(ph); 4584 return (NDI_BUSY); 4585 } 4586 4587 /* 4588 * Mark the pHCI as offline 4589 */ 4590 MDI_PHCI_SET_OFFLINE(ph); 4591 4592 /* 4593 * Mark the child mdi_pathinfo nodes as transient 4594 */ 4595 pip = ph->ph_path_head; 4596 while (pip != NULL) { 4597 MDI_PI_LOCK(pip); 4598 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4599 MDI_PI_SET_OFFLINING(pip); 4600 MDI_PI_UNLOCK(pip); 4601 pip = next; 4602 } 4603 MDI_PHCI_UNLOCK(ph); 4604 /* 4605 * Give a chance for any pending commands to execute 4606 */ 4607 delay(1); 4608 MDI_PHCI_LOCK(ph); 4609 pip = ph->ph_path_head; 4610 while (pip != NULL) { 4611 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4612 (void) i_mdi_pi_offline(pip, flags); 4613 MDI_PI_LOCK(pip); 4614 ct = MDI_PI(pip)->pi_client; 4615 if (!MDI_PI_IS_OFFLINE(pip)) { 4616 MDI_DEBUG(1, (CE_WARN, dip, 4617 "!pHCI device (%s%d) is Busy. %s", 4618 ddi_driver_name(dip), ddi_get_instance(dip), 4619 "This device can not be removed at " 4620 "this moment. Please try again later.")); 4621 MDI_PI_UNLOCK(pip); 4622 MDI_PHCI_SET_ONLINE(ph); 4623 MDI_PHCI_UNLOCK(ph); 4624 return (NDI_BUSY); 4625 } 4626 MDI_PI_UNLOCK(pip); 4627 pip = next; 4628 } 4629 MDI_PHCI_UNLOCK(ph); 4630 4631 return (rv); 4632 } 4633 4634 /*ARGSUSED*/ 4635 static int 4636 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4637 { 4638 int rv = NDI_SUCCESS; 4639 mdi_client_t *ct; 4640 4641 /* 4642 * Client component to go offline. Make sure that we are 4643 * not in failing over state and update client state 4644 * accordingly 4645 */ 4646 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", 4647 dip)); 4648 ct = i_devi_get_client(dip); 4649 if (ct != NULL) { 4650 MDI_CLIENT_LOCK(ct); 4651 if (ct->ct_unstable) { 4652 /* 4653 * One or more paths are in transient state, 4654 * Dont allow offline of a client device 4655 */ 4656 MDI_DEBUG(1, (CE_WARN, dip, 4657 "!One or more paths to this device is " 4658 "in transient state. This device can not " 4659 "be removed at this moment. " 4660 "Please try again later.")); 4661 MDI_CLIENT_UNLOCK(ct); 4662 return (NDI_BUSY); 4663 } 4664 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4665 /* 4666 * Failover is in progress, Dont allow DR of 4667 * a client device 4668 */ 4669 MDI_DEBUG(1, (CE_WARN, dip, 4670 "!Client device (%s%d) is Busy. %s", 4671 ddi_driver_name(dip), ddi_get_instance(dip), 4672 "This device can not be removed at " 4673 "this moment. Please try again later.")); 4674 MDI_CLIENT_UNLOCK(ct); 4675 return (NDI_BUSY); 4676 } 4677 MDI_CLIENT_SET_OFFLINE(ct); 4678 4679 /* 4680 * Unbind our relationship with the dev_info node 4681 */ 4682 if (flags & NDI_DEVI_REMOVE) { 4683 ct->ct_dip = NULL; 4684 } 4685 MDI_CLIENT_UNLOCK(ct); 4686 } 4687 return (rv); 4688 } 4689 4690 /* 4691 * mdi_pre_attach(): 4692 * Pre attach() notification handler 4693 */ 4694 4695 /*ARGSUSED*/ 4696 int 4697 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4698 { 4699 /* don't support old DDI_PM_RESUME */ 4700 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4701 (cmd == DDI_PM_RESUME)) 4702 return (DDI_FAILURE); 4703 4704 return (DDI_SUCCESS); 4705 } 4706 4707 /* 4708 * mdi_post_attach(): 4709 * Post attach() notification handler 4710 */ 4711 4712 /*ARGSUSED*/ 4713 void 4714 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4715 { 4716 mdi_phci_t *ph; 4717 mdi_client_t *ct; 4718 mdi_pathinfo_t *pip; 4719 4720 if (MDI_PHCI(dip)) { 4721 ph = i_devi_get_phci(dip); 4722 ASSERT(ph != NULL); 4723 4724 MDI_PHCI_LOCK(ph); 4725 switch (cmd) { 4726 case DDI_ATTACH: 4727 MDI_DEBUG(2, (CE_NOTE, dip, 4728 "!pHCI post_attach: called %p\n", ph)); 4729 if (error == DDI_SUCCESS) { 4730 MDI_PHCI_SET_ATTACH(ph); 4731 } else { 4732 MDI_DEBUG(1, (CE_NOTE, dip, 4733 "!pHCI post_attach: failed error=%d\n", 4734 error)); 4735 MDI_PHCI_SET_DETACH(ph); 4736 } 4737 break; 4738 4739 case DDI_RESUME: 4740 MDI_DEBUG(2, (CE_NOTE, dip, 4741 "!pHCI post_resume: called %p\n", ph)); 4742 if (error == DDI_SUCCESS) { 4743 MDI_PHCI_SET_RESUME(ph); 4744 } else { 4745 MDI_DEBUG(1, (CE_NOTE, dip, 4746 "!pHCI post_resume: failed error=%d\n", 4747 error)); 4748 MDI_PHCI_SET_SUSPEND(ph); 4749 } 4750 break; 4751 } 4752 MDI_PHCI_UNLOCK(ph); 4753 } 4754 4755 if (MDI_CLIENT(dip)) { 4756 ct = i_devi_get_client(dip); 4757 ASSERT(ct != NULL); 4758 4759 MDI_CLIENT_LOCK(ct); 4760 switch (cmd) { 4761 case DDI_ATTACH: 4762 MDI_DEBUG(2, (CE_NOTE, dip, 4763 "!Client post_attach: called %p\n", ct)); 4764 if (error != DDI_SUCCESS) { 4765 MDI_DEBUG(1, (CE_NOTE, dip, 4766 "!Client post_attach: failed error=%d\n", 4767 error)); 4768 MDI_CLIENT_SET_DETACH(ct); 4769 MDI_DEBUG(4, (CE_WARN, dip, 4770 "mdi_post_attach i_mdi_pm_reset_client\n")); 4771 i_mdi_pm_reset_client(ct); 4772 break; 4773 } 4774 4775 /* 4776 * Client device has successfully attached. 4777 * Create kstats for any pathinfo structures 4778 * initially associated with this client. 4779 */ 4780 for (pip = ct->ct_path_head; pip != NULL; 4781 pip = (mdi_pathinfo_t *) 4782 MDI_PI(pip)->pi_client_link) { 4783 (void) i_mdi_pi_kstat_create(pip); 4784 i_mdi_report_path_state(ct, pip); 4785 } 4786 MDI_CLIENT_SET_ATTACH(ct); 4787 break; 4788 4789 case DDI_RESUME: 4790 MDI_DEBUG(2, (CE_NOTE, dip, 4791 "!Client post_attach: called %p\n", ct)); 4792 if (error == DDI_SUCCESS) { 4793 MDI_CLIENT_SET_RESUME(ct); 4794 } else { 4795 MDI_DEBUG(1, (CE_NOTE, dip, 4796 "!Client post_resume: failed error=%d\n", 4797 error)); 4798 MDI_CLIENT_SET_SUSPEND(ct); 4799 } 4800 break; 4801 } 4802 MDI_CLIENT_UNLOCK(ct); 4803 } 4804 } 4805 4806 /* 4807 * mdi_pre_detach(): 4808 * Pre detach notification handler 4809 */ 4810 4811 /*ARGSUSED*/ 4812 int 4813 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4814 { 4815 int rv = DDI_SUCCESS; 4816 4817 if (MDI_CLIENT(dip)) { 4818 (void) i_mdi_client_pre_detach(dip, cmd); 4819 } 4820 4821 if (MDI_PHCI(dip)) { 4822 rv = i_mdi_phci_pre_detach(dip, cmd); 4823 } 4824 4825 return (rv); 4826 } 4827 4828 /*ARGSUSED*/ 4829 static int 4830 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4831 { 4832 int rv = DDI_SUCCESS; 4833 mdi_phci_t *ph; 4834 mdi_client_t *ct; 4835 mdi_pathinfo_t *pip; 4836 mdi_pathinfo_t *failed_pip = NULL; 4837 mdi_pathinfo_t *next; 4838 4839 ph = i_devi_get_phci(dip); 4840 if (ph == NULL) { 4841 return (rv); 4842 } 4843 4844 MDI_PHCI_LOCK(ph); 4845 switch (cmd) { 4846 case DDI_DETACH: 4847 MDI_DEBUG(2, (CE_NOTE, dip, 4848 "!pHCI pre_detach: called %p\n", ph)); 4849 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4850 /* 4851 * mdi_pathinfo nodes are still attached to 4852 * this pHCI. Fail the detach for this pHCI. 4853 */ 4854 MDI_DEBUG(2, (CE_WARN, dip, 4855 "!pHCI pre_detach: " 4856 "mdi_pathinfo nodes are still attached " 4857 "%p\n", ph)); 4858 rv = DDI_FAILURE; 4859 break; 4860 } 4861 MDI_PHCI_SET_DETACH(ph); 4862 break; 4863 4864 case DDI_SUSPEND: 4865 /* 4866 * pHCI is getting suspended. Since mpxio client 4867 * devices may not be suspended at this point, to avoid 4868 * a potential stack overflow, it is important to suspend 4869 * client devices before pHCI can be suspended. 4870 */ 4871 4872 MDI_DEBUG(2, (CE_NOTE, dip, 4873 "!pHCI pre_suspend: called %p\n", ph)); 4874 /* 4875 * Suspend all the client devices accessible through this pHCI 4876 */ 4877 pip = ph->ph_path_head; 4878 while (pip != NULL && rv == DDI_SUCCESS) { 4879 dev_info_t *cdip; 4880 MDI_PI_LOCK(pip); 4881 next = 4882 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4883 ct = MDI_PI(pip)->pi_client; 4884 i_mdi_client_lock(ct, pip); 4885 cdip = ct->ct_dip; 4886 MDI_PI_UNLOCK(pip); 4887 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 4888 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 4889 i_mdi_client_unlock(ct); 4890 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 4891 DDI_SUCCESS) { 4892 /* 4893 * Suspend of one of the client 4894 * device has failed. 4895 */ 4896 MDI_DEBUG(1, (CE_WARN, dip, 4897 "!Suspend of device (%s%d) failed.", 4898 ddi_driver_name(cdip), 4899 ddi_get_instance(cdip))); 4900 failed_pip = pip; 4901 break; 4902 } 4903 } else { 4904 i_mdi_client_unlock(ct); 4905 } 4906 pip = next; 4907 } 4908 4909 if (rv == DDI_SUCCESS) { 4910 /* 4911 * Suspend of client devices is complete. Proceed 4912 * with pHCI suspend. 4913 */ 4914 MDI_PHCI_SET_SUSPEND(ph); 4915 } else { 4916 /* 4917 * Revert back all the suspended client device states 4918 * to converse. 4919 */ 4920 pip = ph->ph_path_head; 4921 while (pip != failed_pip) { 4922 dev_info_t *cdip; 4923 MDI_PI_LOCK(pip); 4924 next = 4925 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4926 ct = MDI_PI(pip)->pi_client; 4927 i_mdi_client_lock(ct, pip); 4928 cdip = ct->ct_dip; 4929 MDI_PI_UNLOCK(pip); 4930 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 4931 i_mdi_client_unlock(ct); 4932 (void) devi_attach(cdip, DDI_RESUME); 4933 } else { 4934 i_mdi_client_unlock(ct); 4935 } 4936 pip = next; 4937 } 4938 } 4939 break; 4940 4941 default: 4942 rv = DDI_FAILURE; 4943 break; 4944 } 4945 MDI_PHCI_UNLOCK(ph); 4946 return (rv); 4947 } 4948 4949 /*ARGSUSED*/ 4950 static int 4951 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4952 { 4953 int rv = DDI_SUCCESS; 4954 mdi_client_t *ct; 4955 4956 ct = i_devi_get_client(dip); 4957 if (ct == NULL) { 4958 return (rv); 4959 } 4960 4961 MDI_CLIENT_LOCK(ct); 4962 switch (cmd) { 4963 case DDI_DETACH: 4964 MDI_DEBUG(2, (CE_NOTE, dip, 4965 "!Client pre_detach: called %p\n", ct)); 4966 MDI_CLIENT_SET_DETACH(ct); 4967 break; 4968 4969 case DDI_SUSPEND: 4970 MDI_DEBUG(2, (CE_NOTE, dip, 4971 "!Client pre_suspend: called %p\n", ct)); 4972 MDI_CLIENT_SET_SUSPEND(ct); 4973 break; 4974 4975 default: 4976 rv = DDI_FAILURE; 4977 break; 4978 } 4979 MDI_CLIENT_UNLOCK(ct); 4980 return (rv); 4981 } 4982 4983 /* 4984 * mdi_post_detach(): 4985 * Post detach notification handler 4986 */ 4987 4988 /*ARGSUSED*/ 4989 void 4990 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 4991 { 4992 /* 4993 * Detach/Suspend of mpxio component failed. Update our state 4994 * too 4995 */ 4996 if (MDI_PHCI(dip)) 4997 i_mdi_phci_post_detach(dip, cmd, error); 4998 4999 if (MDI_CLIENT(dip)) 5000 i_mdi_client_post_detach(dip, cmd, error); 5001 } 5002 5003 /*ARGSUSED*/ 5004 static void 5005 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5006 { 5007 mdi_phci_t *ph; 5008 5009 /* 5010 * Detach/Suspend of phci component failed. Update our state 5011 * too 5012 */ 5013 ph = i_devi_get_phci(dip); 5014 if (ph == NULL) { 5015 return; 5016 } 5017 5018 MDI_PHCI_LOCK(ph); 5019 /* 5020 * Detach of pHCI failed. Restore back converse 5021 * state 5022 */ 5023 switch (cmd) { 5024 case DDI_DETACH: 5025 MDI_DEBUG(2, (CE_NOTE, dip, 5026 "!pHCI post_detach: called %p\n", ph)); 5027 if (error != DDI_SUCCESS) 5028 MDI_PHCI_SET_ATTACH(ph); 5029 break; 5030 5031 case DDI_SUSPEND: 5032 MDI_DEBUG(2, (CE_NOTE, dip, 5033 "!pHCI post_suspend: called %p\n", ph)); 5034 if (error != DDI_SUCCESS) 5035 MDI_PHCI_SET_RESUME(ph); 5036 break; 5037 } 5038 MDI_PHCI_UNLOCK(ph); 5039 } 5040 5041 /*ARGSUSED*/ 5042 static void 5043 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5044 { 5045 mdi_client_t *ct; 5046 5047 ct = i_devi_get_client(dip); 5048 if (ct == NULL) { 5049 return; 5050 } 5051 MDI_CLIENT_LOCK(ct); 5052 /* 5053 * Detach of Client failed. Restore back converse 5054 * state 5055 */ 5056 switch (cmd) { 5057 case DDI_DETACH: 5058 MDI_DEBUG(2, (CE_NOTE, dip, 5059 "!Client post_detach: called %p\n", ct)); 5060 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5061 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5062 "i_mdi_pm_rele_client\n")); 5063 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5064 } else { 5065 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5066 "i_mdi_pm_reset_client\n")); 5067 i_mdi_pm_reset_client(ct); 5068 } 5069 if (error != DDI_SUCCESS) 5070 MDI_CLIENT_SET_ATTACH(ct); 5071 break; 5072 5073 case DDI_SUSPEND: 5074 MDI_DEBUG(2, (CE_NOTE, dip, 5075 "!Client post_suspend: called %p\n", ct)); 5076 if (error != DDI_SUCCESS) 5077 MDI_CLIENT_SET_RESUME(ct); 5078 break; 5079 } 5080 MDI_CLIENT_UNLOCK(ct); 5081 } 5082 5083 /* 5084 * create and install per-path (client - pHCI) statistics 5085 * I/O stats supported: nread, nwritten, reads, and writes 5086 * Error stats - hard errors, soft errors, & transport errors 5087 */ 5088 static int 5089 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5090 { 5091 5092 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5093 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5094 char ksname[KSTAT_STRLEN]; 5095 mdi_pathinfo_t *cpip; 5096 const char *err_postfix = ",err"; 5097 kstat_t *kiosp, *kerrsp; 5098 struct pi_errs *nsp; 5099 struct mdi_pi_kstats *mdi_statp; 5100 5101 ASSERT(client != NULL && ppath != NULL); 5102 5103 ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); 5104 5105 if (MDI_PI(pip)->pi_kstats != NULL) 5106 return (MDI_SUCCESS); 5107 5108 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5109 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5110 if (cpip == pip) 5111 continue; 5112 /* 5113 * We have found a different path with same parent 5114 * kstats for a given client-pHCI are common 5115 */ 5116 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5117 (MDI_PI(cpip)->pi_kstats != NULL)) { 5118 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5119 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5120 return (MDI_SUCCESS); 5121 } 5122 } 5123 5124 /* 5125 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5126 * clamp length of name against max length of error kstat name 5127 */ 5128 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5129 ddi_driver_name(client), ddi_get_instance(client), 5130 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5131 (KSTAT_STRLEN - strlen(err_postfix))) { 5132 return (MDI_FAILURE); 5133 } 5134 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5135 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5136 return (MDI_FAILURE); 5137 } 5138 5139 (void) strcat(ksname, err_postfix); 5140 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5141 KSTAT_TYPE_NAMED, 5142 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5143 5144 if (kerrsp == NULL) { 5145 kstat_delete(kiosp); 5146 return (MDI_FAILURE); 5147 } 5148 5149 nsp = (struct pi_errs *)kerrsp->ks_data; 5150 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5151 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5152 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5153 KSTAT_DATA_UINT32); 5154 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5155 KSTAT_DATA_UINT32); 5156 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5157 KSTAT_DATA_UINT32); 5158 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5159 KSTAT_DATA_UINT32); 5160 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5161 KSTAT_DATA_UINT32); 5162 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5163 KSTAT_DATA_UINT32); 5164 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5165 KSTAT_DATA_UINT32); 5166 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5167 5168 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5169 mdi_statp->pi_kstat_ref = 1; 5170 mdi_statp->pi_kstat_iostats = kiosp; 5171 mdi_statp->pi_kstat_errstats = kerrsp; 5172 kstat_install(kiosp); 5173 kstat_install(kerrsp); 5174 MDI_PI(pip)->pi_kstats = mdi_statp; 5175 return (MDI_SUCCESS); 5176 } 5177 5178 /* 5179 * destroy per-path properties 5180 */ 5181 static void 5182 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5183 { 5184 5185 struct mdi_pi_kstats *mdi_statp; 5186 5187 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5188 return; 5189 5190 MDI_PI(pip)->pi_kstats = NULL; 5191 5192 /* 5193 * the kstat may be shared between multiple pathinfo nodes 5194 * decrement this pathinfo's usage, removing the kstats 5195 * themselves when the last pathinfo reference is removed. 5196 */ 5197 ASSERT(mdi_statp->pi_kstat_ref > 0); 5198 if (--mdi_statp->pi_kstat_ref != 0) 5199 return; 5200 5201 kstat_delete(mdi_statp->pi_kstat_iostats); 5202 kstat_delete(mdi_statp->pi_kstat_errstats); 5203 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5204 } 5205 5206 /* 5207 * update I/O paths KSTATS 5208 */ 5209 void 5210 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5211 { 5212 kstat_t *iostatp; 5213 size_t xfer_cnt; 5214 5215 ASSERT(pip != NULL); 5216 5217 /* 5218 * I/O can be driven across a path prior to having path 5219 * statistics available, i.e. probe(9e). 5220 */ 5221 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5222 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5223 xfer_cnt = bp->b_bcount - bp->b_resid; 5224 if (bp->b_flags & B_READ) { 5225 KSTAT_IO_PTR(iostatp)->reads++; 5226 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5227 } else { 5228 KSTAT_IO_PTR(iostatp)->writes++; 5229 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5230 } 5231 } 5232 } 5233 5234 /* 5235 * disable the path to a particular pHCI (pHCI specified in the phci_path 5236 * argument) for a particular client (specified in the client_path argument). 5237 * Disabling a path means that MPxIO will not select the disabled path for 5238 * routing any new I/O requests. 5239 */ 5240 int 5241 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5242 { 5243 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5244 } 5245 5246 /* 5247 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5248 * argument) for a particular client (specified in the client_path argument). 5249 * Enabling a path means that MPxIO may select the enabled path for routing 5250 * future I/O requests, subject to other path state constraints. 5251 */ 5252 5253 int 5254 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5255 { 5256 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5257 } 5258 5259 5260 /* 5261 * Common routine for doing enable/disable. 5262 */ 5263 int 5264 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5265 { 5266 5267 mdi_phci_t *ph; 5268 mdi_vhci_t *vh = NULL; 5269 mdi_client_t *ct; 5270 mdi_pathinfo_t *next, *pip; 5271 int found_it; 5272 int (*f)() = NULL; 5273 int rv; 5274 int sync_flag = 0; 5275 5276 ph = i_devi_get_phci(pdip); 5277 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5278 " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); 5279 if (ph == NULL) { 5280 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5281 " failed. ph = NULL operation = %d\n", op)); 5282 return (MDI_FAILURE); 5283 } 5284 5285 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5286 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5287 " Invalid operation = %d\n", op)); 5288 return (MDI_FAILURE); 5289 } 5290 5291 sync_flag = (flags << 8) & 0xf00; 5292 5293 vh = ph->ph_vhci; 5294 f = vh->vh_ops->vo_pi_state_change; 5295 5296 if (cdip == NULL) { 5297 /* 5298 * Need to mark the Phci as enabled/disabled. 5299 */ 5300 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5301 "Operation %d for the phci\n", op)); 5302 MDI_PHCI_LOCK(ph); 5303 switch (flags) { 5304 case USER_DISABLE: 5305 if (op == MDI_DISABLE_OP) 5306 MDI_PHCI_SET_USER_DISABLE(ph); 5307 else 5308 MDI_PHCI_SET_USER_ENABLE(ph); 5309 break; 5310 case DRIVER_DISABLE: 5311 if (op == MDI_DISABLE_OP) 5312 MDI_PHCI_SET_DRV_DISABLE(ph); 5313 else 5314 MDI_PHCI_SET_DRV_ENABLE(ph); 5315 break; 5316 case DRIVER_DISABLE_TRANSIENT: 5317 if (op == MDI_DISABLE_OP) 5318 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5319 else 5320 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5321 break; 5322 default: 5323 MDI_PHCI_UNLOCK(ph); 5324 MDI_DEBUG(1, (CE_NOTE, NULL, 5325 "!i_mdi_pi_enable_disable:" 5326 " Invalid flag argument= %d\n", flags)); 5327 } 5328 5329 /* 5330 * Phci has been disabled. Now try to enable/disable 5331 * path info's to each client. 5332 */ 5333 pip = ph->ph_path_head; 5334 while (pip != NULL) { 5335 /* 5336 * Do a callback into the mdi consumer to let it 5337 * know that path is about to be enabled/disabled. 5338 */ 5339 if (f != NULL) { 5340 rv = (*f)(vh->vh_dip, pip, 0, 5341 MDI_PI_EXT_STATE(pip), 5342 MDI_EXT_STATE_CHANGE | sync_flag | 5343 op | MDI_BEFORE_STATE_CHANGE); 5344 if (rv != MDI_SUCCESS) { 5345 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5346 "!vo_pi_state_change: failed rv = %x", rv)); 5347 } 5348 } 5349 5350 MDI_PI_LOCK(pip); 5351 next = 5352 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5353 switch (flags) { 5354 case USER_DISABLE: 5355 if (op == MDI_DISABLE_OP) 5356 MDI_PI_SET_USER_DISABLE(pip); 5357 else 5358 MDI_PI_SET_USER_ENABLE(pip); 5359 break; 5360 case DRIVER_DISABLE: 5361 if (op == MDI_DISABLE_OP) 5362 MDI_PI_SET_DRV_DISABLE(pip); 5363 else 5364 MDI_PI_SET_DRV_ENABLE(pip); 5365 break; 5366 case DRIVER_DISABLE_TRANSIENT: 5367 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5368 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5369 else 5370 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5371 break; 5372 } 5373 MDI_PI_UNLOCK(pip); 5374 /* 5375 * Do a callback into the mdi consumer to let it 5376 * know that path is now enabled/disabled. 5377 */ 5378 if (f != NULL) { 5379 rv = (*f)(vh->vh_dip, pip, 0, 5380 MDI_PI_EXT_STATE(pip), 5381 MDI_EXT_STATE_CHANGE | sync_flag | 5382 op | MDI_AFTER_STATE_CHANGE); 5383 if (rv != MDI_SUCCESS) { 5384 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5385 "!vo_pi_state_change: failed rv = %x", rv)); 5386 } 5387 } 5388 pip = next; 5389 } 5390 MDI_PHCI_UNLOCK(ph); 5391 } else { 5392 5393 /* 5394 * Disable a specific client. 5395 */ 5396 ct = i_devi_get_client(cdip); 5397 if (ct == NULL) { 5398 MDI_DEBUG(1, (CE_NOTE, NULL, 5399 "!i_mdi_pi_enable_disable:" 5400 " failed. ct = NULL operation = %d\n", op)); 5401 return (MDI_FAILURE); 5402 } 5403 5404 MDI_CLIENT_LOCK(ct); 5405 pip = ct->ct_path_head; 5406 found_it = 0; 5407 while (pip != NULL) { 5408 MDI_PI_LOCK(pip); 5409 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5410 if (MDI_PI(pip)->pi_phci == ph) { 5411 MDI_PI_UNLOCK(pip); 5412 found_it = 1; 5413 break; 5414 } 5415 MDI_PI_UNLOCK(pip); 5416 pip = next; 5417 } 5418 5419 MDI_CLIENT_UNLOCK(ct); 5420 if (found_it == 0) { 5421 MDI_DEBUG(1, (CE_NOTE, NULL, 5422 "!i_mdi_pi_enable_disable:" 5423 " failed. Could not find corresponding pip\n")); 5424 return (MDI_FAILURE); 5425 } 5426 /* 5427 * Do a callback into the mdi consumer to let it 5428 * know that path is about to get enabled/disabled. 5429 */ 5430 if (f != NULL) { 5431 rv = (*f)(vh->vh_dip, pip, 0, 5432 MDI_PI_EXT_STATE(pip), 5433 MDI_EXT_STATE_CHANGE | sync_flag | 5434 op | MDI_BEFORE_STATE_CHANGE); 5435 if (rv != MDI_SUCCESS) { 5436 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5437 "!vo_pi_state_change: failed rv = %x", rv)); 5438 } 5439 } 5440 MDI_PI_LOCK(pip); 5441 switch (flags) { 5442 case USER_DISABLE: 5443 if (op == MDI_DISABLE_OP) 5444 MDI_PI_SET_USER_DISABLE(pip); 5445 else 5446 MDI_PI_SET_USER_ENABLE(pip); 5447 break; 5448 case DRIVER_DISABLE: 5449 if (op == MDI_DISABLE_OP) 5450 MDI_PI_SET_DRV_DISABLE(pip); 5451 else 5452 MDI_PI_SET_DRV_ENABLE(pip); 5453 break; 5454 case DRIVER_DISABLE_TRANSIENT: 5455 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) 5456 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5457 else 5458 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5459 break; 5460 } 5461 MDI_PI_UNLOCK(pip); 5462 /* 5463 * Do a callback into the mdi consumer to let it 5464 * know that path is now enabled/disabled. 5465 */ 5466 if (f != NULL) { 5467 rv = (*f)(vh->vh_dip, pip, 0, 5468 MDI_PI_EXT_STATE(pip), 5469 MDI_EXT_STATE_CHANGE | sync_flag | 5470 op | MDI_AFTER_STATE_CHANGE); 5471 if (rv != MDI_SUCCESS) { 5472 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5473 "!vo_pi_state_change: failed rv = %x", rv)); 5474 } 5475 } 5476 } 5477 5478 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5479 " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); 5480 return (MDI_SUCCESS); 5481 } 5482 5483 /*ARGSUSED3*/ 5484 int 5485 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, 5486 int flags, clock_t timeout) 5487 { 5488 mdi_pathinfo_t *pip; 5489 dev_info_t *dip; 5490 clock_t interval = drv_usectohz(100000); /* 0.1 sec */ 5491 char *paddr; 5492 5493 MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); 5494 5495 if (!MDI_PHCI(pdip)) 5496 return (MDI_FAILURE); 5497 5498 paddr = strchr(devnm, '@'); 5499 if (paddr == NULL) 5500 return (MDI_FAILURE); 5501 5502 paddr++; /* skip '@' */ 5503 pip = mdi_pi_find(pdip, NULL, paddr); 5504 while (pip == NULL && timeout > 0) { 5505 if (interval > timeout) 5506 interval = timeout; 5507 if (flags & NDI_DEVI_DEBUG) { 5508 cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", 5509 ddi_driver_name(pdip), ddi_get_instance(pdip), 5510 paddr, interval, timeout); 5511 } 5512 delay(interval); 5513 timeout -= interval; 5514 interval += interval; 5515 pip = mdi_pi_find(pdip, NULL, paddr); 5516 } 5517 5518 if (pip == NULL) 5519 return (MDI_FAILURE); 5520 dip = mdi_pi_get_client(pip); 5521 if (ndi_devi_online(dip, flags) != NDI_SUCCESS) 5522 return (MDI_FAILURE); 5523 *cdipp = dip; 5524 5525 /* TODO: holding should happen inside search functions */ 5526 ndi_hold_devi(dip); 5527 return (MDI_SUCCESS); 5528 } 5529 5530 /* 5531 * Ensure phci powered up 5532 */ 5533 static void 5534 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5535 { 5536 dev_info_t *ph_dip; 5537 5538 ASSERT(pip != NULL); 5539 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5540 5541 if (MDI_PI(pip)->pi_pm_held) { 5542 return; 5543 } 5544 5545 ph_dip = mdi_pi_get_phci(pip); 5546 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", 5547 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5548 if (ph_dip == NULL) { 5549 return; 5550 } 5551 5552 MDI_PI_UNLOCK(pip); 5553 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5554 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5555 pm_hold_power(ph_dip); 5556 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5557 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5558 MDI_PI_LOCK(pip); 5559 5560 MDI_PI(pip)->pi_pm_held = 1; 5561 } 5562 5563 /* 5564 * Allow phci powered down 5565 */ 5566 static void 5567 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5568 { 5569 dev_info_t *ph_dip = NULL; 5570 5571 ASSERT(pip != NULL); 5572 ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); 5573 5574 if (MDI_PI(pip)->pi_pm_held == 0) { 5575 return; 5576 } 5577 5578 ph_dip = mdi_pi_get_phci(pip); 5579 ASSERT(ph_dip != NULL); 5580 5581 MDI_PI_UNLOCK(pip); 5582 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", 5583 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5584 5585 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5586 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5587 pm_rele_power(ph_dip); 5588 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5589 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5590 5591 MDI_PI_LOCK(pip); 5592 MDI_PI(pip)->pi_pm_held = 0; 5593 } 5594 5595 static void 5596 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5597 { 5598 ASSERT(ct); 5599 5600 ct->ct_power_cnt += incr; 5601 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " 5602 "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); 5603 ASSERT(ct->ct_power_cnt >= 0); 5604 } 5605 5606 static void 5607 i_mdi_rele_all_phci(mdi_client_t *ct) 5608 { 5609 mdi_pathinfo_t *pip; 5610 5611 ASSERT(mutex_owned(&ct->ct_mutex)); 5612 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5613 while (pip != NULL) { 5614 mdi_hold_path(pip); 5615 MDI_PI_LOCK(pip); 5616 i_mdi_pm_rele_pip(pip); 5617 MDI_PI_UNLOCK(pip); 5618 mdi_rele_path(pip); 5619 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5620 } 5621 } 5622 5623 static void 5624 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5625 { 5626 ASSERT(ct); 5627 5628 if (i_ddi_node_state(ct->ct_dip) >= DS_READY) { 5629 ct->ct_power_cnt -= decr; 5630 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " 5631 "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); 5632 } 5633 5634 ASSERT(ct->ct_power_cnt >= 0); 5635 if (ct->ct_power_cnt == 0) { 5636 i_mdi_rele_all_phci(ct); 5637 return; 5638 } 5639 } 5640 5641 static void 5642 i_mdi_pm_reset_client(mdi_client_t *ct) 5643 { 5644 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " 5645 "ct_power_cnt = %d\n", ct->ct_power_cnt)); 5646 ct->ct_power_cnt = 0; 5647 i_mdi_rele_all_phci(ct); 5648 ct->ct_powercnt_config = 0; 5649 ct->ct_powercnt_unconfig = 0; 5650 ct->ct_powercnt_reset = 1; 5651 } 5652 5653 static void 5654 i_mdi_pm_hold_all_phci(mdi_client_t *ct) 5655 { 5656 mdi_pathinfo_t *pip; 5657 ASSERT(mutex_owned(&ct->ct_mutex)); 5658 5659 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5660 while (pip != NULL) { 5661 mdi_hold_path(pip); 5662 MDI_PI_LOCK(pip); 5663 i_mdi_pm_hold_pip(pip); 5664 MDI_PI_UNLOCK(pip); 5665 mdi_rele_path(pip); 5666 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5667 } 5668 } 5669 5670 static int 5671 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5672 { 5673 int ret; 5674 dev_info_t *ph_dip; 5675 5676 MDI_PI_LOCK(pip); 5677 i_mdi_pm_hold_pip(pip); 5678 5679 ph_dip = mdi_pi_get_phci(pip); 5680 MDI_PI_UNLOCK(pip); 5681 5682 /* bring all components of phci to full power */ 5683 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5684 "pm_powerup for %s%d\n", ddi_get_name(ph_dip), 5685 ddi_get_instance(ph_dip))); 5686 5687 ret = pm_powerup(ph_dip); 5688 5689 if (ret == DDI_FAILURE) { 5690 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5691 "pm_powerup FAILED for %s%d\n", 5692 ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); 5693 5694 MDI_PI_LOCK(pip); 5695 i_mdi_pm_rele_pip(pip); 5696 MDI_PI_UNLOCK(pip); 5697 return (MDI_FAILURE); 5698 } 5699 5700 return (MDI_SUCCESS); 5701 } 5702 5703 static int 5704 i_mdi_power_all_phci(mdi_client_t *ct) 5705 { 5706 mdi_pathinfo_t *pip; 5707 int succeeded = 0; 5708 5709 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5710 while (pip != NULL) { 5711 mdi_hold_path(pip); 5712 MDI_CLIENT_UNLOCK(ct); 5713 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5714 succeeded = 1; 5715 5716 ASSERT(ct == MDI_PI(pip)->pi_client); 5717 MDI_CLIENT_LOCK(ct); 5718 mdi_rele_path(pip); 5719 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5720 } 5721 5722 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5723 } 5724 5725 /* 5726 * mdi_bus_power(): 5727 * 1. Place the phci(s) into powered up state so that 5728 * client can do power management 5729 * 2. Ensure phci powered up as client power managing 5730 * Return Values: 5731 * MDI_SUCCESS 5732 * MDI_FAILURE 5733 */ 5734 int 5735 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5736 void *arg, void *result) 5737 { 5738 int ret = MDI_SUCCESS; 5739 pm_bp_child_pwrchg_t *bpc; 5740 mdi_client_t *ct; 5741 dev_info_t *cdip; 5742 pm_bp_has_changed_t *bphc; 5743 5744 /* 5745 * BUS_POWER_NOINVOL not supported 5746 */ 5747 if (op == BUS_POWER_NOINVOL) 5748 return (MDI_FAILURE); 5749 5750 /* 5751 * ignore other OPs. 5752 * return quickly to save cou cycles on the ct processing 5753 */ 5754 switch (op) { 5755 case BUS_POWER_PRE_NOTIFICATION: 5756 case BUS_POWER_POST_NOTIFICATION: 5757 bpc = (pm_bp_child_pwrchg_t *)arg; 5758 cdip = bpc->bpc_dip; 5759 break; 5760 case BUS_POWER_HAS_CHANGED: 5761 bphc = (pm_bp_has_changed_t *)arg; 5762 cdip = bphc->bphc_dip; 5763 break; 5764 default: 5765 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5766 } 5767 5768 ASSERT(MDI_CLIENT(cdip)); 5769 5770 ct = i_devi_get_client(cdip); 5771 if (ct == NULL) 5772 return (MDI_FAILURE); 5773 5774 /* 5775 * wait till the mdi_pathinfo node state change are processed 5776 */ 5777 MDI_CLIENT_LOCK(ct); 5778 switch (op) { 5779 case BUS_POWER_PRE_NOTIFICATION: 5780 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5781 "BUS_POWER_PRE_NOTIFICATION:" 5782 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5783 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5784 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5785 5786 /* serialize power level change per client */ 5787 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5788 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5789 5790 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5791 5792 if (ct->ct_power_cnt == 0) { 5793 ret = i_mdi_power_all_phci(ct); 5794 } 5795 5796 /* 5797 * if new_level > 0: 5798 * - hold phci(s) 5799 * - power up phci(s) if not already 5800 * ignore power down 5801 */ 5802 if (bpc->bpc_nlevel > 0) { 5803 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5804 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5805 "mdi_bus_power i_mdi_pm_hold_client\n")); 5806 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5807 } 5808 } 5809 break; 5810 case BUS_POWER_POST_NOTIFICATION: 5811 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5812 "BUS_POWER_POST_NOTIFICATION:" 5813 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5814 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5815 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5816 *(int *)result)); 5817 5818 if (*(int *)result == DDI_SUCCESS) { 5819 if (bpc->bpc_nlevel > 0) { 5820 MDI_CLIENT_SET_POWER_UP(ct); 5821 } else { 5822 MDI_CLIENT_SET_POWER_DOWN(ct); 5823 } 5824 } 5825 5826 /* release the hold we did in pre-notification */ 5827 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5828 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5829 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5830 "mdi_bus_power i_mdi_pm_rele_client\n")); 5831 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5832 } 5833 5834 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5835 /* another thread might started attaching */ 5836 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5837 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5838 "mdi_bus_power i_mdi_pm_rele_client\n")); 5839 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5840 /* detaching has been taken care in pm_post_unconfig */ 5841 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5842 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5843 "mdi_bus_power i_mdi_pm_reset_client\n")); 5844 i_mdi_pm_reset_client(ct); 5845 } 5846 } 5847 5848 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5849 cv_broadcast(&ct->ct_powerchange_cv); 5850 5851 break; 5852 5853 /* need to do more */ 5854 case BUS_POWER_HAS_CHANGED: 5855 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5856 "BUS_POWER_HAS_CHANGED:" 5857 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5858 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5859 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5860 5861 if (bphc->bphc_nlevel > 0 && 5862 bphc->bphc_nlevel > bphc->bphc_olevel) { 5863 if (ct->ct_power_cnt == 0) { 5864 ret = i_mdi_power_all_phci(ct); 5865 } 5866 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5867 "mdi_bus_power i_mdi_pm_hold_client\n")); 5868 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5869 } 5870 5871 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5872 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5873 "mdi_bus_power i_mdi_pm_rele_client\n")); 5874 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5875 } 5876 break; 5877 } 5878 5879 MDI_CLIENT_UNLOCK(ct); 5880 return (ret); 5881 } 5882 5883 static int 5884 i_mdi_pm_pre_config_one(dev_info_t *child) 5885 { 5886 int ret = MDI_SUCCESS; 5887 mdi_client_t *ct; 5888 5889 ct = i_devi_get_client(child); 5890 if (ct == NULL) 5891 return (MDI_FAILURE); 5892 5893 MDI_CLIENT_LOCK(ct); 5894 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5895 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5896 5897 if (!MDI_CLIENT_IS_FAILED(ct)) { 5898 MDI_CLIENT_UNLOCK(ct); 5899 MDI_DEBUG(4, (CE_NOTE, child, 5900 "i_mdi_pm_pre_config_one already configured\n")); 5901 return (MDI_SUCCESS); 5902 } 5903 5904 if (ct->ct_powercnt_config) { 5905 MDI_CLIENT_UNLOCK(ct); 5906 MDI_DEBUG(4, (CE_NOTE, child, 5907 "i_mdi_pm_pre_config_one ALREADY held\n")); 5908 return (MDI_SUCCESS); 5909 } 5910 5911 if (ct->ct_power_cnt == 0) { 5912 ret = i_mdi_power_all_phci(ct); 5913 } 5914 MDI_DEBUG(4, (CE_NOTE, child, 5915 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 5916 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5917 ct->ct_powercnt_config = 1; 5918 ct->ct_powercnt_reset = 0; 5919 MDI_CLIENT_UNLOCK(ct); 5920 return (ret); 5921 } 5922 5923 static int 5924 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) 5925 { 5926 int ret = MDI_SUCCESS; 5927 dev_info_t *cdip; 5928 int circ; 5929 5930 ASSERT(MDI_VHCI(parent)); 5931 5932 /* ndi_devi_config_one */ 5933 if (child) { 5934 return (i_mdi_pm_pre_config_one(child)); 5935 } 5936 5937 /* devi_config_common */ 5938 ndi_devi_enter(parent, &circ); 5939 cdip = ddi_get_child(parent); 5940 while (cdip) { 5941 dev_info_t *next = ddi_get_next_sibling(cdip); 5942 5943 ret = i_mdi_pm_pre_config_one(cdip); 5944 if (ret != MDI_SUCCESS) 5945 break; 5946 cdip = next; 5947 } 5948 ndi_devi_exit(parent, circ); 5949 return (ret); 5950 } 5951 5952 static int 5953 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 5954 { 5955 int ret = MDI_SUCCESS; 5956 mdi_client_t *ct; 5957 5958 ct = i_devi_get_client(child); 5959 if (ct == NULL) 5960 return (MDI_FAILURE); 5961 5962 MDI_CLIENT_LOCK(ct); 5963 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5964 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5965 5966 if (i_ddi_node_state(ct->ct_dip) < DS_READY) { 5967 MDI_DEBUG(4, (CE_NOTE, child, 5968 "i_mdi_pm_pre_unconfig node detached already\n")); 5969 MDI_CLIENT_UNLOCK(ct); 5970 return (MDI_SUCCESS); 5971 } 5972 5973 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 5974 (flags & NDI_AUTODETACH)) { 5975 MDI_DEBUG(4, (CE_NOTE, child, 5976 "i_mdi_pm_pre_unconfig auto-modunload\n")); 5977 MDI_CLIENT_UNLOCK(ct); 5978 return (MDI_FAILURE); 5979 } 5980 5981 if (ct->ct_powercnt_unconfig) { 5982 MDI_DEBUG(4, (CE_NOTE, child, 5983 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 5984 MDI_CLIENT_UNLOCK(ct); 5985 *held = 1; 5986 return (MDI_SUCCESS); 5987 } 5988 5989 if (ct->ct_power_cnt == 0) { 5990 ret = i_mdi_power_all_phci(ct); 5991 } 5992 MDI_DEBUG(4, (CE_NOTE, child, 5993 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 5994 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5995 ct->ct_powercnt_unconfig = 1; 5996 ct->ct_powercnt_reset = 0; 5997 MDI_CLIENT_UNLOCK(ct); 5998 if (ret == MDI_SUCCESS) 5999 *held = 1; 6000 return (ret); 6001 } 6002 6003 static int 6004 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, 6005 int flags) 6006 { 6007 int ret = MDI_SUCCESS; 6008 dev_info_t *cdip; 6009 int circ; 6010 6011 ASSERT(MDI_VHCI(parent)); 6012 *held = 0; 6013 6014 /* ndi_devi_unconfig_one */ 6015 if (child) { 6016 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6017 } 6018 6019 /* devi_unconfig_common */ 6020 ndi_devi_enter(parent, &circ); 6021 cdip = ddi_get_child(parent); 6022 while (cdip) { 6023 dev_info_t *next = ddi_get_next_sibling(cdip); 6024 6025 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6026 cdip = next; 6027 } 6028 ndi_devi_exit(parent, circ); 6029 6030 if (*held) 6031 ret = MDI_SUCCESS; 6032 6033 return (ret); 6034 } 6035 6036 static void 6037 i_mdi_pm_post_config_one(dev_info_t *child) 6038 { 6039 mdi_client_t *ct; 6040 6041 ct = i_devi_get_client(child); 6042 if (ct == NULL) 6043 return; 6044 6045 MDI_CLIENT_LOCK(ct); 6046 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6047 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6048 6049 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6050 MDI_DEBUG(4, (CE_NOTE, child, 6051 "i_mdi_pm_post_config_one NOT configured\n")); 6052 MDI_CLIENT_UNLOCK(ct); 6053 return; 6054 } 6055 6056 /* client has not been updated */ 6057 if (MDI_CLIENT_IS_FAILED(ct)) { 6058 MDI_DEBUG(4, (CE_NOTE, child, 6059 "i_mdi_pm_post_config_one NOT configured\n")); 6060 MDI_CLIENT_UNLOCK(ct); 6061 return; 6062 } 6063 6064 /* another thread might have powered it down or detached it */ 6065 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6066 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6067 (i_ddi_node_state(ct->ct_dip) < DS_READY && 6068 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6069 MDI_DEBUG(4, (CE_NOTE, child, 6070 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6071 i_mdi_pm_reset_client(ct); 6072 } else { 6073 mdi_pathinfo_t *pip, *next; 6074 int valid_path_count = 0; 6075 6076 MDI_DEBUG(4, (CE_NOTE, child, 6077 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6078 pip = ct->ct_path_head; 6079 while (pip != NULL) { 6080 MDI_PI_LOCK(pip); 6081 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6082 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6083 valid_path_count ++; 6084 MDI_PI_UNLOCK(pip); 6085 pip = next; 6086 } 6087 i_mdi_pm_rele_client(ct, valid_path_count); 6088 } 6089 ct->ct_powercnt_config = 0; 6090 MDI_CLIENT_UNLOCK(ct); 6091 } 6092 6093 static void 6094 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) 6095 { 6096 int circ; 6097 dev_info_t *cdip; 6098 ASSERT(MDI_VHCI(parent)); 6099 6100 /* ndi_devi_config_one */ 6101 if (child) { 6102 i_mdi_pm_post_config_one(child); 6103 return; 6104 } 6105 6106 /* devi_config_common */ 6107 ndi_devi_enter(parent, &circ); 6108 cdip = ddi_get_child(parent); 6109 while (cdip) { 6110 dev_info_t *next = ddi_get_next_sibling(cdip); 6111 6112 i_mdi_pm_post_config_one(cdip); 6113 cdip = next; 6114 } 6115 ndi_devi_exit(parent, circ); 6116 } 6117 6118 static void 6119 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6120 { 6121 mdi_client_t *ct; 6122 6123 ct = i_devi_get_client(child); 6124 if (ct == NULL) 6125 return; 6126 6127 MDI_CLIENT_LOCK(ct); 6128 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6129 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6130 6131 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6132 MDI_DEBUG(4, (CE_NOTE, child, 6133 "i_mdi_pm_post_unconfig NOT held\n")); 6134 MDI_CLIENT_UNLOCK(ct); 6135 return; 6136 } 6137 6138 /* failure detaching or another thread just attached it */ 6139 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6140 i_ddi_node_state(ct->ct_dip) == DS_READY) || 6141 (i_ddi_node_state(ct->ct_dip) != DS_READY && 6142 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6143 MDI_DEBUG(4, (CE_NOTE, child, 6144 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6145 i_mdi_pm_reset_client(ct); 6146 } else { 6147 mdi_pathinfo_t *pip, *next; 6148 int valid_path_count = 0; 6149 6150 MDI_DEBUG(4, (CE_NOTE, child, 6151 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6152 pip = ct->ct_path_head; 6153 while (pip != NULL) { 6154 MDI_PI_LOCK(pip); 6155 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6156 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6157 valid_path_count ++; 6158 MDI_PI_UNLOCK(pip); 6159 pip = next; 6160 } 6161 i_mdi_pm_rele_client(ct, valid_path_count); 6162 ct->ct_powercnt_unconfig = 0; 6163 } 6164 6165 MDI_CLIENT_UNLOCK(ct); 6166 } 6167 6168 static void 6169 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) 6170 { 6171 int circ; 6172 dev_info_t *cdip; 6173 6174 ASSERT(MDI_VHCI(parent)); 6175 6176 if (!held) { 6177 MDI_DEBUG(4, (CE_NOTE, parent, 6178 "i_mdi_pm_post_unconfig held = %d\n", held)); 6179 return; 6180 } 6181 6182 if (child) { 6183 i_mdi_pm_post_unconfig_one(child); 6184 return; 6185 } 6186 6187 ndi_devi_enter(parent, &circ); 6188 cdip = ddi_get_child(parent); 6189 while (cdip) { 6190 dev_info_t *next = ddi_get_next_sibling(cdip); 6191 6192 i_mdi_pm_post_unconfig_one(cdip); 6193 cdip = next; 6194 } 6195 ndi_devi_exit(parent, circ); 6196 } 6197 6198 int 6199 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6200 { 6201 int circ, ret = MDI_SUCCESS; 6202 dev_info_t *client_dip = NULL; 6203 mdi_client_t *ct; 6204 6205 /* 6206 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6207 * Power up pHCI for the named client device. 6208 * Note: Before the client is enumerated under vhci by phci, 6209 * client_dip can be NULL. Then proceed to power up all the 6210 * pHCIs. 6211 */ 6212 if (devnm != NULL) { 6213 ndi_devi_enter(vdip, &circ); 6214 client_dip = ndi_devi_findchild(vdip, devnm); 6215 ndi_devi_exit(vdip, circ); 6216 } 6217 6218 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); 6219 6220 switch (op) { 6221 case MDI_PM_PRE_CONFIG: 6222 ret = i_mdi_pm_pre_config(vdip, client_dip); 6223 6224 break; 6225 case MDI_PM_PRE_UNCONFIG: 6226 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6227 flags); 6228 6229 break; 6230 case MDI_PM_POST_CONFIG: 6231 i_mdi_pm_post_config(vdip, client_dip); 6232 6233 break; 6234 case MDI_PM_POST_UNCONFIG: 6235 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6236 6237 break; 6238 case MDI_PM_HOLD_POWER: 6239 case MDI_PM_RELE_POWER: 6240 ASSERT(args); 6241 6242 client_dip = (dev_info_t *)args; 6243 ASSERT(MDI_CLIENT(client_dip)); 6244 6245 ct = i_devi_get_client(client_dip); 6246 MDI_CLIENT_LOCK(ct); 6247 6248 if (op == MDI_PM_HOLD_POWER) { 6249 if (ct->ct_power_cnt == 0) { 6250 (void) i_mdi_power_all_phci(ct); 6251 MDI_DEBUG(4, (CE_NOTE, client_dip, 6252 "mdi_power i_mdi_pm_hold_client\n")); 6253 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6254 } 6255 } else { 6256 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6257 MDI_DEBUG(4, (CE_NOTE, client_dip, 6258 "mdi_power i_mdi_pm_rele_client\n")); 6259 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6260 } else { 6261 MDI_DEBUG(4, (CE_NOTE, client_dip, 6262 "mdi_power i_mdi_pm_reset_client\n")); 6263 i_mdi_pm_reset_client(ct); 6264 } 6265 } 6266 6267 MDI_CLIENT_UNLOCK(ct); 6268 break; 6269 default: 6270 break; 6271 } 6272 6273 return (ret); 6274 } 6275 6276 int 6277 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6278 { 6279 mdi_vhci_t *vhci; 6280 6281 if (!MDI_VHCI(dip)) 6282 return (MDI_FAILURE); 6283 6284 if (mdi_class) { 6285 vhci = DEVI(dip)->devi_mdi_xhci; 6286 ASSERT(vhci); 6287 *mdi_class = vhci->vh_class; 6288 } 6289 6290 return (MDI_SUCCESS); 6291 } 6292 6293 int 6294 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6295 { 6296 mdi_phci_t *phci; 6297 6298 if (!MDI_PHCI(dip)) 6299 return (MDI_FAILURE); 6300 6301 if (mdi_class) { 6302 phci = DEVI(dip)->devi_mdi_xhci; 6303 ASSERT(phci); 6304 *mdi_class = phci->ph_vhci->vh_class; 6305 } 6306 6307 return (MDI_SUCCESS); 6308 } 6309 6310 int 6311 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6312 { 6313 mdi_client_t *client; 6314 6315 if (!MDI_CLIENT(dip)) 6316 return (MDI_FAILURE); 6317 6318 if (mdi_class) { 6319 client = DEVI(dip)->devi_mdi_client; 6320 ASSERT(client); 6321 *mdi_class = client->ct_vhci->vh_class; 6322 } 6323 6324 return (MDI_SUCCESS); 6325 } 6326 6327 void * 6328 mdi_client_get_vhci_private(dev_info_t *dip) 6329 { 6330 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6331 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6332 mdi_client_t *ct; 6333 ct = i_devi_get_client(dip); 6334 return (ct->ct_vprivate); 6335 } 6336 return (NULL); 6337 } 6338 6339 void 6340 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6341 { 6342 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6343 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6344 mdi_client_t *ct; 6345 ct = i_devi_get_client(dip); 6346 ct->ct_vprivate = data; 6347 } 6348 } 6349 /* 6350 * mdi_pi_get_vhci_private(): 6351 * Get the vhci private information associated with the 6352 * mdi_pathinfo node 6353 */ 6354 void * 6355 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6356 { 6357 caddr_t vprivate = NULL; 6358 if (pip) { 6359 vprivate = MDI_PI(pip)->pi_vprivate; 6360 } 6361 return (vprivate); 6362 } 6363 6364 /* 6365 * mdi_pi_set_vhci_private(): 6366 * Set the vhci private information in the mdi_pathinfo node 6367 */ 6368 void 6369 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6370 { 6371 if (pip) { 6372 MDI_PI(pip)->pi_vprivate = priv; 6373 } 6374 } 6375 6376 /* 6377 * mdi_phci_get_vhci_private(): 6378 * Get the vhci private information associated with the 6379 * mdi_phci node 6380 */ 6381 void * 6382 mdi_phci_get_vhci_private(dev_info_t *dip) 6383 { 6384 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6385 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6386 mdi_phci_t *ph; 6387 ph = i_devi_get_phci(dip); 6388 return (ph->ph_vprivate); 6389 } 6390 return (NULL); 6391 } 6392 6393 /* 6394 * mdi_phci_set_vhci_private(): 6395 * Set the vhci private information in the mdi_phci node 6396 */ 6397 void 6398 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6399 { 6400 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6401 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6402 mdi_phci_t *ph; 6403 ph = i_devi_get_phci(dip); 6404 ph->ph_vprivate = priv; 6405 } 6406 } 6407 6408 /* 6409 * List of vhci class names: 6410 * A vhci class name must be in this list only if the corresponding vhci 6411 * driver intends to use the mdi provided bus config implementation 6412 * (i.e., mdi_vhci_bus_config()). 6413 */ 6414 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6415 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6416 6417 /* 6418 * Built-in list of phci drivers for every vhci class. 6419 * All phci drivers expect iscsi have root device support. 6420 */ 6421 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6422 { "fp", 1 }, 6423 { "iscsi", 0 }, 6424 { "ibsrp", 1 } 6425 }; 6426 6427 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6428 6429 /* 6430 * During boot time, the on-disk vhci cache for every vhci class is read 6431 * in the form of an nvlist and stored here. 6432 */ 6433 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6434 6435 /* nvpair names in vhci cache nvlist */ 6436 #define MDI_VHCI_CACHE_VERSION 1 6437 #define MDI_NVPNAME_VERSION "version" 6438 #define MDI_NVPNAME_PHCIS "phcis" 6439 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6440 6441 /* 6442 * Given vhci class name, return its on-disk vhci cache filename. 6443 * Memory for the returned filename which includes the full path is allocated 6444 * by this function. 6445 */ 6446 static char * 6447 vhclass2vhcache_filename(char *vhclass) 6448 { 6449 char *filename; 6450 int len; 6451 static char *fmt = "/etc/devices/mdi_%s_cache"; 6452 6453 /* 6454 * fmt contains the on-disk vhci cache file name format; 6455 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6456 */ 6457 6458 /* the -1 below is to account for "%s" in the format string */ 6459 len = strlen(fmt) + strlen(vhclass) - 1; 6460 filename = kmem_alloc(len, KM_SLEEP); 6461 (void) snprintf(filename, len, fmt, vhclass); 6462 ASSERT(len == (strlen(filename) + 1)); 6463 return (filename); 6464 } 6465 6466 /* 6467 * initialize the vhci cache related data structures and read the on-disk 6468 * vhci cached data into memory. 6469 */ 6470 static void 6471 setup_vhci_cache(mdi_vhci_t *vh) 6472 { 6473 mdi_vhci_config_t *vhc; 6474 mdi_vhci_cache_t *vhcache; 6475 int i; 6476 nvlist_t *nvl = NULL; 6477 6478 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6479 vh->vh_config = vhc; 6480 vhcache = &vhc->vhc_vhcache; 6481 6482 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6483 6484 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6485 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6486 6487 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6488 6489 /* 6490 * Create string hash; same as mod_hash_create_strhash() except that 6491 * we use NULL key destructor. 6492 */ 6493 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6494 mdi_bus_config_cache_hash_size, 6495 mod_hash_null_keydtor, mod_hash_null_valdtor, 6496 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6497 6498 setup_phci_driver_list(vh); 6499 6500 /* 6501 * The on-disk vhci cache is read during booting prior to the 6502 * lights-out period by mdi_read_devices_files(). 6503 */ 6504 for (i = 0; i < N_VHCI_CLASSES; i++) { 6505 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6506 nvl = vhcache_nvl[i]; 6507 vhcache_nvl[i] = NULL; 6508 break; 6509 } 6510 } 6511 6512 /* 6513 * this is to cover the case of some one manually causing unloading 6514 * (or detaching) and reloading (or attaching) of a vhci driver. 6515 */ 6516 if (nvl == NULL && modrootloaded) 6517 nvl = read_on_disk_vhci_cache(vh->vh_class); 6518 6519 if (nvl != NULL) { 6520 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6521 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6522 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6523 else { 6524 cmn_err(CE_WARN, 6525 "%s: data file corrupted, will recreate\n", 6526 vhc->vhc_vhcache_filename); 6527 } 6528 rw_exit(&vhcache->vhcache_lock); 6529 nvlist_free(nvl); 6530 } 6531 6532 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6533 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6534 6535 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6536 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6537 } 6538 6539 /* 6540 * free all vhci cache related resources 6541 */ 6542 static int 6543 destroy_vhci_cache(mdi_vhci_t *vh) 6544 { 6545 mdi_vhci_config_t *vhc = vh->vh_config; 6546 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6547 mdi_vhcache_phci_t *cphci, *cphci_next; 6548 mdi_vhcache_client_t *cct, *cct_next; 6549 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6550 6551 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6552 return (MDI_FAILURE); 6553 6554 kmem_free(vhc->vhc_vhcache_filename, 6555 strlen(vhc->vhc_vhcache_filename) + 1); 6556 6557 if (vhc->vhc_phci_driver_list) 6558 free_phci_driver_list(vhc); 6559 6560 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6561 6562 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6563 cphci = cphci_next) { 6564 cphci_next = cphci->cphci_next; 6565 free_vhcache_phci(cphci); 6566 } 6567 6568 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6569 cct_next = cct->cct_next; 6570 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6571 cpi_next = cpi->cpi_next; 6572 free_vhcache_pathinfo(cpi); 6573 } 6574 free_vhcache_client(cct); 6575 } 6576 6577 rw_destroy(&vhcache->vhcache_lock); 6578 6579 mutex_destroy(&vhc->vhc_lock); 6580 cv_destroy(&vhc->vhc_cv); 6581 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6582 return (MDI_SUCCESS); 6583 } 6584 6585 /* 6586 * Setup the list of phci drivers associated with the specified vhci class. 6587 * MDI uses this information to rebuild bus config cache if in case the 6588 * cache is not available or corrupted. 6589 */ 6590 static void 6591 setup_phci_driver_list(mdi_vhci_t *vh) 6592 { 6593 mdi_vhci_config_t *vhc = vh->vh_config; 6594 mdi_phci_driver_info_t *driver_list; 6595 char **driver_list1; 6596 uint_t ndrivers, ndrivers1; 6597 int i, j; 6598 6599 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6600 driver_list = scsi_phci_driver_list; 6601 ndrivers = sizeof (scsi_phci_driver_list) / 6602 sizeof (mdi_phci_driver_info_t); 6603 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6604 driver_list = ib_phci_driver_list; 6605 ndrivers = sizeof (ib_phci_driver_list) / 6606 sizeof (mdi_phci_driver_info_t); 6607 } else { 6608 driver_list = NULL; 6609 ndrivers = 0; 6610 } 6611 6612 /* 6613 * The driver.conf file of a vhci driver can specify additional 6614 * phci drivers using a project private "phci-drivers" property. 6615 */ 6616 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6617 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6618 &ndrivers1) != DDI_PROP_SUCCESS) 6619 ndrivers1 = 0; 6620 6621 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6622 if (vhc->vhc_nphci_drivers == 0) 6623 return; 6624 6625 vhc->vhc_phci_driver_list = kmem_alloc( 6626 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6627 6628 for (i = 0; i < ndrivers; i++) { 6629 vhc->vhc_phci_driver_list[i].phdriver_name = 6630 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6631 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6632 driver_list[i].phdriver_root_support; 6633 } 6634 6635 for (j = 0; j < ndrivers1; j++, i++) { 6636 vhc->vhc_phci_driver_list[i].phdriver_name = 6637 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6638 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6639 } 6640 6641 if (ndrivers1) 6642 ddi_prop_free(driver_list1); 6643 } 6644 6645 /* 6646 * Free the memory allocated for the phci driver list 6647 */ 6648 static void 6649 free_phci_driver_list(mdi_vhci_config_t *vhc) 6650 { 6651 int i; 6652 6653 if (vhc->vhc_phci_driver_list == NULL) 6654 return; 6655 6656 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6657 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6658 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6659 } 6660 6661 kmem_free(vhc->vhc_phci_driver_list, 6662 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6663 } 6664 6665 /* 6666 * Stop all vhci cache related async threads and free their resources. 6667 */ 6668 static int 6669 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6670 { 6671 mdi_async_client_config_t *acc, *acc_next; 6672 6673 mutex_enter(&vhc->vhc_lock); 6674 vhc->vhc_flags |= MDI_VHC_EXIT; 6675 ASSERT(vhc->vhc_acc_thrcount >= 0); 6676 cv_broadcast(&vhc->vhc_cv); 6677 6678 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6679 vhc->vhc_acc_thrcount != 0) { 6680 mutex_exit(&vhc->vhc_lock); 6681 delay(1); 6682 mutex_enter(&vhc->vhc_lock); 6683 } 6684 6685 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6686 6687 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6688 acc_next = acc->acc_next; 6689 free_async_client_config(acc); 6690 } 6691 vhc->vhc_acc_list_head = NULL; 6692 vhc->vhc_acc_list_tail = NULL; 6693 vhc->vhc_acc_count = 0; 6694 6695 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6696 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6697 mutex_exit(&vhc->vhc_lock); 6698 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6699 vhcache_dirty(vhc); 6700 return (MDI_FAILURE); 6701 } 6702 } else 6703 mutex_exit(&vhc->vhc_lock); 6704 6705 if (callb_delete(vhc->vhc_cbid) != 0) 6706 return (MDI_FAILURE); 6707 6708 return (MDI_SUCCESS); 6709 } 6710 6711 /* 6712 * Stop vhci cache flush thread 6713 */ 6714 /* ARGSUSED */ 6715 static boolean_t 6716 stop_vhcache_flush_thread(void *arg, int code) 6717 { 6718 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6719 6720 mutex_enter(&vhc->vhc_lock); 6721 vhc->vhc_flags |= MDI_VHC_EXIT; 6722 cv_broadcast(&vhc->vhc_cv); 6723 6724 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6725 mutex_exit(&vhc->vhc_lock); 6726 delay(1); 6727 mutex_enter(&vhc->vhc_lock); 6728 } 6729 6730 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6731 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6732 mutex_exit(&vhc->vhc_lock); 6733 (void) flush_vhcache(vhc, 1); 6734 } else 6735 mutex_exit(&vhc->vhc_lock); 6736 6737 return (B_TRUE); 6738 } 6739 6740 /* 6741 * Enqueue the vhcache phci (cphci) at the tail of the list 6742 */ 6743 static void 6744 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6745 { 6746 cphci->cphci_next = NULL; 6747 if (vhcache->vhcache_phci_head == NULL) 6748 vhcache->vhcache_phci_head = cphci; 6749 else 6750 vhcache->vhcache_phci_tail->cphci_next = cphci; 6751 vhcache->vhcache_phci_tail = cphci; 6752 } 6753 6754 /* 6755 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6756 */ 6757 static void 6758 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6759 mdi_vhcache_pathinfo_t *cpi) 6760 { 6761 cpi->cpi_next = NULL; 6762 if (cct->cct_cpi_head == NULL) 6763 cct->cct_cpi_head = cpi; 6764 else 6765 cct->cct_cpi_tail->cpi_next = cpi; 6766 cct->cct_cpi_tail = cpi; 6767 } 6768 6769 /* 6770 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6771 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6772 * flag set come at the beginning of the list. All cpis which have this 6773 * flag set come at the end of the list. 6774 */ 6775 static void 6776 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6777 mdi_vhcache_pathinfo_t *newcpi) 6778 { 6779 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6780 6781 if (cct->cct_cpi_head == NULL || 6782 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6783 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6784 else { 6785 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6786 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6787 prev_cpi = cpi, cpi = cpi->cpi_next) 6788 ; 6789 6790 if (prev_cpi == NULL) 6791 cct->cct_cpi_head = newcpi; 6792 else 6793 prev_cpi->cpi_next = newcpi; 6794 6795 newcpi->cpi_next = cpi; 6796 6797 if (cpi == NULL) 6798 cct->cct_cpi_tail = newcpi; 6799 } 6800 } 6801 6802 /* 6803 * Enqueue the vhcache client (cct) at the tail of the list 6804 */ 6805 static void 6806 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6807 mdi_vhcache_client_t *cct) 6808 { 6809 cct->cct_next = NULL; 6810 if (vhcache->vhcache_client_head == NULL) 6811 vhcache->vhcache_client_head = cct; 6812 else 6813 vhcache->vhcache_client_tail->cct_next = cct; 6814 vhcache->vhcache_client_tail = cct; 6815 } 6816 6817 static void 6818 free_string_array(char **str, int nelem) 6819 { 6820 int i; 6821 6822 if (str) { 6823 for (i = 0; i < nelem; i++) { 6824 if (str[i]) 6825 kmem_free(str[i], strlen(str[i]) + 1); 6826 } 6827 kmem_free(str, sizeof (char *) * nelem); 6828 } 6829 } 6830 6831 static void 6832 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6833 { 6834 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6835 kmem_free(cphci, sizeof (*cphci)); 6836 } 6837 6838 static void 6839 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6840 { 6841 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6842 kmem_free(cpi, sizeof (*cpi)); 6843 } 6844 6845 static void 6846 free_vhcache_client(mdi_vhcache_client_t *cct) 6847 { 6848 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6849 kmem_free(cct, sizeof (*cct)); 6850 } 6851 6852 static char * 6853 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6854 { 6855 char *name_addr; 6856 int len; 6857 6858 len = strlen(ct_name) + strlen(ct_addr) + 2; 6859 name_addr = kmem_alloc(len, KM_SLEEP); 6860 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6861 6862 if (ret_len) 6863 *ret_len = len; 6864 return (name_addr); 6865 } 6866 6867 /* 6868 * Copy the contents of paddrnvl to vhci cache. 6869 * paddrnvl nvlist contains path information for a vhci client. 6870 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6871 */ 6872 static void 6873 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6874 mdi_vhcache_client_t *cct) 6875 { 6876 nvpair_t *nvp = NULL; 6877 mdi_vhcache_pathinfo_t *cpi; 6878 uint_t nelem; 6879 uint32_t *val; 6880 6881 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6882 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6883 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6884 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6885 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6886 ASSERT(nelem == 2); 6887 cpi->cpi_cphci = cphci_list[val[0]]; 6888 cpi->cpi_flags = val[1]; 6889 enqueue_tail_vhcache_pathinfo(cct, cpi); 6890 } 6891 } 6892 6893 /* 6894 * Copy the contents of caddrmapnvl to vhci cache. 6895 * caddrmapnvl nvlist contains vhci client address to phci client address 6896 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6897 * this nvlist. 6898 */ 6899 static void 6900 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6901 mdi_vhcache_phci_t *cphci_list[]) 6902 { 6903 nvpair_t *nvp = NULL; 6904 nvlist_t *paddrnvl; 6905 mdi_vhcache_client_t *cct; 6906 6907 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6908 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6909 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6910 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6911 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6912 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6913 /* the client must contain at least one path */ 6914 ASSERT(cct->cct_cpi_head != NULL); 6915 6916 enqueue_vhcache_client(vhcache, cct); 6917 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6918 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6919 } 6920 } 6921 6922 /* 6923 * Copy the contents of the main nvlist to vhci cache. 6924 * 6925 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6926 * The nvlist contains the mappings between the vhci client addresses and 6927 * their corresponding phci client addresses. 6928 * 6929 * The structure of the nvlist is as follows: 6930 * 6931 * Main nvlist: 6932 * NAME TYPE DATA 6933 * version int32 version number 6934 * phcis string array array of phci paths 6935 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6936 * 6937 * structure of c2paddrs_nvl: 6938 * NAME TYPE DATA 6939 * caddr1 nvlist_t paddrs_nvl1 6940 * caddr2 nvlist_t paddrs_nvl2 6941 * ... 6942 * where caddr1, caddr2, ... are vhci client name and addresses in the 6943 * form of "<clientname>@<clientaddress>". 6944 * (for example: "ssd@2000002037cd9f72"); 6945 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6946 * 6947 * structure of paddrs_nvl: 6948 * NAME TYPE DATA 6949 * pi_addr1 uint32_array (phci-id, cpi_flags) 6950 * pi_addr2 uint32_array (phci-id, cpi_flags) 6951 * ... 6952 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6953 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6954 * phci-ids are integers that identify PHCIs to which the 6955 * the bus specific address belongs to. These integers are used as an index 6956 * into to the phcis string array in the main nvlist to get the PHCI path. 6957 */ 6958 static int 6959 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6960 { 6961 char **phcis, **phci_namep; 6962 uint_t nphcis; 6963 mdi_vhcache_phci_t *cphci, **cphci_list; 6964 nvlist_t *caddrmapnvl; 6965 int32_t ver; 6966 int i; 6967 size_t cphci_list_size; 6968 6969 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6970 6971 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6972 ver != MDI_VHCI_CACHE_VERSION) 6973 return (MDI_FAILURE); 6974 6975 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6976 &nphcis) != 0) 6977 return (MDI_SUCCESS); 6978 6979 ASSERT(nphcis > 0); 6980 6981 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6982 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6983 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6984 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6985 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6986 enqueue_vhcache_phci(vhcache, cphci); 6987 cphci_list[i] = cphci; 6988 } 6989 6990 ASSERT(vhcache->vhcache_phci_head != NULL); 6991 6992 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6993 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6994 6995 kmem_free(cphci_list, cphci_list_size); 6996 return (MDI_SUCCESS); 6997 } 6998 6999 /* 7000 * Build paddrnvl for the specified client using the information in the 7001 * vhci cache and add it to the caddrmapnnvl. 7002 * Returns 0 on success, errno on failure. 7003 */ 7004 static int 7005 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7006 nvlist_t *caddrmapnvl) 7007 { 7008 mdi_vhcache_pathinfo_t *cpi; 7009 nvlist_t *nvl; 7010 int err; 7011 uint32_t val[2]; 7012 7013 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7014 7015 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7016 return (err); 7017 7018 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7019 val[0] = cpi->cpi_cphci->cphci_id; 7020 val[1] = cpi->cpi_flags; 7021 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7022 != 0) 7023 goto out; 7024 } 7025 7026 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7027 out: 7028 nvlist_free(nvl); 7029 return (err); 7030 } 7031 7032 /* 7033 * Build caddrmapnvl using the information in the vhci cache 7034 * and add it to the mainnvl. 7035 * Returns 0 on success, errno on failure. 7036 */ 7037 static int 7038 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7039 { 7040 mdi_vhcache_client_t *cct; 7041 nvlist_t *nvl; 7042 int err; 7043 7044 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7045 7046 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7047 return (err); 7048 7049 for (cct = vhcache->vhcache_client_head; cct != NULL; 7050 cct = cct->cct_next) { 7051 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7052 goto out; 7053 } 7054 7055 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7056 out: 7057 nvlist_free(nvl); 7058 return (err); 7059 } 7060 7061 /* 7062 * Build nvlist using the information in the vhci cache. 7063 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7064 * Returns nvl on success, NULL on failure. 7065 */ 7066 static nvlist_t * 7067 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7068 { 7069 mdi_vhcache_phci_t *cphci; 7070 uint_t phci_count; 7071 char **phcis; 7072 nvlist_t *nvl; 7073 int err, i; 7074 7075 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7076 nvl = NULL; 7077 goto out; 7078 } 7079 7080 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7081 MDI_VHCI_CACHE_VERSION)) != 0) 7082 goto out; 7083 7084 rw_enter(&vhcache->vhcache_lock, RW_READER); 7085 if (vhcache->vhcache_phci_head == NULL) { 7086 rw_exit(&vhcache->vhcache_lock); 7087 return (nvl); 7088 } 7089 7090 phci_count = 0; 7091 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7092 cphci = cphci->cphci_next) 7093 cphci->cphci_id = phci_count++; 7094 7095 /* build phci pathname list */ 7096 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7097 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7098 cphci = cphci->cphci_next, i++) 7099 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7100 7101 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7102 phci_count); 7103 free_string_array(phcis, phci_count); 7104 7105 if (err == 0 && 7106 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7107 rw_exit(&vhcache->vhcache_lock); 7108 return (nvl); 7109 } 7110 7111 rw_exit(&vhcache->vhcache_lock); 7112 out: 7113 if (nvl) 7114 nvlist_free(nvl); 7115 return (NULL); 7116 } 7117 7118 /* 7119 * Lookup vhcache phci structure for the specified phci path. 7120 */ 7121 static mdi_vhcache_phci_t * 7122 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7123 { 7124 mdi_vhcache_phci_t *cphci; 7125 7126 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7127 7128 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7129 cphci = cphci->cphci_next) { 7130 if (strcmp(cphci->cphci_path, phci_path) == 0) 7131 return (cphci); 7132 } 7133 7134 return (NULL); 7135 } 7136 7137 /* 7138 * Lookup vhcache phci structure for the specified phci. 7139 */ 7140 static mdi_vhcache_phci_t * 7141 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7142 { 7143 mdi_vhcache_phci_t *cphci; 7144 7145 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7146 7147 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7148 cphci = cphci->cphci_next) { 7149 if (cphci->cphci_phci == ph) 7150 return (cphci); 7151 } 7152 7153 return (NULL); 7154 } 7155 7156 /* 7157 * Add the specified phci to the vhci cache if not already present. 7158 */ 7159 static void 7160 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7161 { 7162 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7163 mdi_vhcache_phci_t *cphci; 7164 char *pathname; 7165 int cache_updated; 7166 7167 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7168 7169 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7170 (void) ddi_pathname(ph->ph_dip, pathname); 7171 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7172 != NULL) { 7173 cphci->cphci_phci = ph; 7174 cache_updated = 0; 7175 } else { 7176 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7177 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7178 cphci->cphci_phci = ph; 7179 enqueue_vhcache_phci(vhcache, cphci); 7180 cache_updated = 1; 7181 } 7182 7183 rw_exit(&vhcache->vhcache_lock); 7184 7185 /* 7186 * Since a new phci has been added, reset 7187 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7188 * during next vhcache_discover_paths(). 7189 */ 7190 mutex_enter(&vhc->vhc_lock); 7191 vhc->vhc_path_discovery_cutoff_time = 0; 7192 mutex_exit(&vhc->vhc_lock); 7193 7194 kmem_free(pathname, MAXPATHLEN); 7195 if (cache_updated) 7196 vhcache_dirty(vhc); 7197 } 7198 7199 /* 7200 * Remove the reference to the specified phci from the vhci cache. 7201 */ 7202 static void 7203 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7204 { 7205 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7206 mdi_vhcache_phci_t *cphci; 7207 7208 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7209 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7210 /* do not remove the actual mdi_vhcache_phci structure */ 7211 cphci->cphci_phci = NULL; 7212 } 7213 rw_exit(&vhcache->vhcache_lock); 7214 } 7215 7216 static void 7217 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7218 mdi_vhcache_lookup_token_t *src) 7219 { 7220 if (src == NULL) { 7221 dst->lt_cct = NULL; 7222 dst->lt_cct_lookup_time = 0; 7223 } else { 7224 dst->lt_cct = src->lt_cct; 7225 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7226 } 7227 } 7228 7229 /* 7230 * Look up vhcache client for the specified client. 7231 */ 7232 static mdi_vhcache_client_t * 7233 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7234 mdi_vhcache_lookup_token_t *token) 7235 { 7236 mod_hash_val_t hv; 7237 char *name_addr; 7238 int len; 7239 7240 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7241 7242 /* 7243 * If no vhcache clean occurred since the last lookup, we can 7244 * simply return the cct from the last lookup operation. 7245 * It works because ccts are never freed except during the vhcache 7246 * cleanup operation. 7247 */ 7248 if (token != NULL && 7249 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7250 return (token->lt_cct); 7251 7252 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7253 if (mod_hash_find(vhcache->vhcache_client_hash, 7254 (mod_hash_key_t)name_addr, &hv) == 0) { 7255 if (token) { 7256 token->lt_cct = (mdi_vhcache_client_t *)hv; 7257 token->lt_cct_lookup_time = lbolt64; 7258 } 7259 } else { 7260 if (token) { 7261 token->lt_cct = NULL; 7262 token->lt_cct_lookup_time = 0; 7263 } 7264 hv = NULL; 7265 } 7266 kmem_free(name_addr, len); 7267 return ((mdi_vhcache_client_t *)hv); 7268 } 7269 7270 /* 7271 * Add the specified path to the vhci cache if not already present. 7272 * Also add the vhcache client for the client corresponding to this path 7273 * if it doesn't already exist. 7274 */ 7275 static void 7276 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7277 { 7278 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7279 mdi_vhcache_client_t *cct; 7280 mdi_vhcache_pathinfo_t *cpi; 7281 mdi_phci_t *ph = pip->pi_phci; 7282 mdi_client_t *ct = pip->pi_client; 7283 int cache_updated = 0; 7284 7285 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7286 7287 /* if vhcache client for this pip doesn't already exist, add it */ 7288 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7289 NULL)) == NULL) { 7290 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7291 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7292 ct->ct_guid, NULL); 7293 enqueue_vhcache_client(vhcache, cct); 7294 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7295 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7296 cache_updated = 1; 7297 } 7298 7299 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7300 if (cpi->cpi_cphci->cphci_phci == ph && 7301 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7302 cpi->cpi_pip = pip; 7303 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7304 cpi->cpi_flags &= 7305 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7306 sort_vhcache_paths(cct); 7307 cache_updated = 1; 7308 } 7309 break; 7310 } 7311 } 7312 7313 if (cpi == NULL) { 7314 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7315 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7316 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7317 ASSERT(cpi->cpi_cphci != NULL); 7318 cpi->cpi_pip = pip; 7319 enqueue_vhcache_pathinfo(cct, cpi); 7320 cache_updated = 1; 7321 } 7322 7323 rw_exit(&vhcache->vhcache_lock); 7324 7325 if (cache_updated) 7326 vhcache_dirty(vhc); 7327 } 7328 7329 /* 7330 * Remove the reference to the specified path from the vhci cache. 7331 */ 7332 static void 7333 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7334 { 7335 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7336 mdi_client_t *ct = pip->pi_client; 7337 mdi_vhcache_client_t *cct; 7338 mdi_vhcache_pathinfo_t *cpi; 7339 7340 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7341 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7342 NULL)) != NULL) { 7343 for (cpi = cct->cct_cpi_head; cpi != NULL; 7344 cpi = cpi->cpi_next) { 7345 if (cpi->cpi_pip == pip) { 7346 cpi->cpi_pip = NULL; 7347 break; 7348 } 7349 } 7350 } 7351 rw_exit(&vhcache->vhcache_lock); 7352 } 7353 7354 /* 7355 * Flush the vhci cache to disk. 7356 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7357 */ 7358 static int 7359 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7360 { 7361 nvlist_t *nvl; 7362 int err; 7363 int rv; 7364 7365 /* 7366 * It is possible that the system may shutdown before 7367 * i_ddi_io_initialized (during stmsboot for example). To allow for 7368 * flushing the cache in this case do not check for 7369 * i_ddi_io_initialized when force flag is set. 7370 */ 7371 if (force_flag == 0 && !i_ddi_io_initialized()) 7372 return (MDI_FAILURE); 7373 7374 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7375 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7376 nvlist_free(nvl); 7377 } else 7378 err = EFAULT; 7379 7380 rv = MDI_SUCCESS; 7381 mutex_enter(&vhc->vhc_lock); 7382 if (err != 0) { 7383 if (err == EROFS) { 7384 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7385 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7386 MDI_VHC_VHCACHE_DIRTY); 7387 } else { 7388 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7389 cmn_err(CE_CONT, "%s: update failed\n", 7390 vhc->vhc_vhcache_filename); 7391 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7392 } 7393 rv = MDI_FAILURE; 7394 } 7395 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7396 cmn_err(CE_CONT, 7397 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7398 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7399 } 7400 mutex_exit(&vhc->vhc_lock); 7401 7402 return (rv); 7403 } 7404 7405 /* 7406 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7407 * Exits itself if left idle for the idle timeout period. 7408 */ 7409 static void 7410 vhcache_flush_thread(void *arg) 7411 { 7412 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7413 clock_t idle_time, quit_at_ticks; 7414 callb_cpr_t cprinfo; 7415 7416 /* number of seconds to sleep idle before exiting */ 7417 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7418 7419 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7420 "mdi_vhcache_flush"); 7421 mutex_enter(&vhc->vhc_lock); 7422 for (; ; ) { 7423 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7424 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7425 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7426 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7427 (void) cv_timedwait(&vhc->vhc_cv, 7428 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7429 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7430 } else { 7431 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7432 mutex_exit(&vhc->vhc_lock); 7433 7434 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7435 vhcache_dirty(vhc); 7436 7437 mutex_enter(&vhc->vhc_lock); 7438 } 7439 } 7440 7441 quit_at_ticks = ddi_get_lbolt() + idle_time; 7442 7443 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7444 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7445 ddi_get_lbolt() < quit_at_ticks) { 7446 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7447 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7448 quit_at_ticks); 7449 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7450 } 7451 7452 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7453 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7454 goto out; 7455 } 7456 7457 out: 7458 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7459 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7460 CALLB_CPR_EXIT(&cprinfo); 7461 } 7462 7463 /* 7464 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7465 */ 7466 static void 7467 vhcache_dirty(mdi_vhci_config_t *vhc) 7468 { 7469 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7470 int create_thread; 7471 7472 rw_enter(&vhcache->vhcache_lock, RW_READER); 7473 /* do not flush cache until the cache is fully built */ 7474 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7475 rw_exit(&vhcache->vhcache_lock); 7476 return; 7477 } 7478 rw_exit(&vhcache->vhcache_lock); 7479 7480 mutex_enter(&vhc->vhc_lock); 7481 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7482 mutex_exit(&vhc->vhc_lock); 7483 return; 7484 } 7485 7486 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7487 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7488 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7489 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7490 cv_broadcast(&vhc->vhc_cv); 7491 create_thread = 0; 7492 } else { 7493 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7494 create_thread = 1; 7495 } 7496 mutex_exit(&vhc->vhc_lock); 7497 7498 if (create_thread) 7499 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7500 0, &p0, TS_RUN, minclsyspri); 7501 } 7502 7503 /* 7504 * phci bus config structure - one for for each phci bus config operation that 7505 * we initiate on behalf of a vhci. 7506 */ 7507 typedef struct mdi_phci_bus_config_s { 7508 char *phbc_phci_path; 7509 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7510 struct mdi_phci_bus_config_s *phbc_next; 7511 } mdi_phci_bus_config_t; 7512 7513 /* vhci bus config structure - one for each vhci bus config operation */ 7514 typedef struct mdi_vhci_bus_config_s { 7515 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7516 major_t vhbc_op_major; /* bus config op major */ 7517 uint_t vhbc_op_flags; /* bus config op flags */ 7518 kmutex_t vhbc_lock; 7519 kcondvar_t vhbc_cv; 7520 int vhbc_thr_count; 7521 } mdi_vhci_bus_config_t; 7522 7523 /* 7524 * bus config the specified phci 7525 */ 7526 static void 7527 bus_config_phci(void *arg) 7528 { 7529 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7530 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7531 dev_info_t *ph_dip; 7532 7533 /* 7534 * first configure all path components upto phci and then configure 7535 * the phci children. 7536 */ 7537 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7538 != NULL) { 7539 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7540 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7541 (void) ndi_devi_config_driver(ph_dip, 7542 vhbc->vhbc_op_flags, 7543 vhbc->vhbc_op_major); 7544 } else 7545 (void) ndi_devi_config(ph_dip, 7546 vhbc->vhbc_op_flags); 7547 7548 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7549 ndi_rele_devi(ph_dip); 7550 } 7551 7552 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7553 kmem_free(phbc, sizeof (*phbc)); 7554 7555 mutex_enter(&vhbc->vhbc_lock); 7556 vhbc->vhbc_thr_count--; 7557 if (vhbc->vhbc_thr_count == 0) 7558 cv_broadcast(&vhbc->vhbc_cv); 7559 mutex_exit(&vhbc->vhbc_lock); 7560 } 7561 7562 /* 7563 * Bus config all phcis associated with the vhci in parallel. 7564 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7565 */ 7566 static void 7567 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7568 ddi_bus_config_op_t op, major_t maj) 7569 { 7570 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7571 mdi_vhci_bus_config_t *vhbc; 7572 mdi_vhcache_phci_t *cphci; 7573 7574 rw_enter(&vhcache->vhcache_lock, RW_READER); 7575 if (vhcache->vhcache_phci_head == NULL) { 7576 rw_exit(&vhcache->vhcache_lock); 7577 return; 7578 } 7579 7580 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7581 7582 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7583 cphci = cphci->cphci_next) { 7584 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7585 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7586 KM_SLEEP); 7587 phbc->phbc_vhbusconfig = vhbc; 7588 phbc->phbc_next = phbc_head; 7589 phbc_head = phbc; 7590 vhbc->vhbc_thr_count++; 7591 } 7592 rw_exit(&vhcache->vhcache_lock); 7593 7594 vhbc->vhbc_op = op; 7595 vhbc->vhbc_op_major = maj; 7596 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7597 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7598 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7599 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7600 7601 /* now create threads to initiate bus config on all phcis in parallel */ 7602 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7603 phbc_next = phbc->phbc_next; 7604 if (mdi_mtc_off) 7605 bus_config_phci((void *)phbc); 7606 else 7607 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7608 0, &p0, TS_RUN, minclsyspri); 7609 } 7610 7611 mutex_enter(&vhbc->vhbc_lock); 7612 /* wait until all threads exit */ 7613 while (vhbc->vhbc_thr_count > 0) 7614 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7615 mutex_exit(&vhbc->vhbc_lock); 7616 7617 mutex_destroy(&vhbc->vhbc_lock); 7618 cv_destroy(&vhbc->vhbc_cv); 7619 kmem_free(vhbc, sizeof (*vhbc)); 7620 } 7621 7622 /* 7623 * Single threaded version of bus_config_all_phcis() 7624 */ 7625 static void 7626 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7627 ddi_bus_config_op_t op, major_t maj) 7628 { 7629 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7630 7631 single_threaded_vhconfig_enter(vhc); 7632 bus_config_all_phcis(vhcache, flags, op, maj); 7633 single_threaded_vhconfig_exit(vhc); 7634 } 7635 7636 /* 7637 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7638 * The path includes the child component in addition to the phci path. 7639 */ 7640 static int 7641 bus_config_one_phci_child(char *path) 7642 { 7643 dev_info_t *ph_dip, *child; 7644 char *devnm; 7645 int rv = MDI_FAILURE; 7646 7647 /* extract the child component of the phci */ 7648 devnm = strrchr(path, '/'); 7649 *devnm++ = '\0'; 7650 7651 /* 7652 * first configure all path components upto phci and then 7653 * configure the phci child. 7654 */ 7655 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7656 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7657 NDI_SUCCESS) { 7658 /* 7659 * release the hold that ndi_devi_config_one() placed 7660 */ 7661 ndi_rele_devi(child); 7662 rv = MDI_SUCCESS; 7663 } 7664 7665 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7666 ndi_rele_devi(ph_dip); 7667 } 7668 7669 devnm--; 7670 *devnm = '/'; 7671 return (rv); 7672 } 7673 7674 /* 7675 * Build a list of phci client paths for the specified vhci client. 7676 * The list includes only those phci client paths which aren't configured yet. 7677 */ 7678 static mdi_phys_path_t * 7679 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7680 { 7681 mdi_vhcache_pathinfo_t *cpi; 7682 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7683 int config_path, len; 7684 7685 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7686 /* 7687 * include only those paths that aren't configured. 7688 */ 7689 config_path = 0; 7690 if (cpi->cpi_pip == NULL) 7691 config_path = 1; 7692 else { 7693 MDI_PI_LOCK(cpi->cpi_pip); 7694 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7695 config_path = 1; 7696 MDI_PI_UNLOCK(cpi->cpi_pip); 7697 } 7698 7699 if (config_path) { 7700 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7701 len = strlen(cpi->cpi_cphci->cphci_path) + 7702 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7703 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7704 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7705 cpi->cpi_cphci->cphci_path, ct_name, 7706 cpi->cpi_addr); 7707 pp->phys_path_next = NULL; 7708 7709 if (pp_head == NULL) 7710 pp_head = pp; 7711 else 7712 pp_tail->phys_path_next = pp; 7713 pp_tail = pp; 7714 } 7715 } 7716 7717 return (pp_head); 7718 } 7719 7720 /* 7721 * Free the memory allocated for phci client path list. 7722 */ 7723 static void 7724 free_phclient_path_list(mdi_phys_path_t *pp_head) 7725 { 7726 mdi_phys_path_t *pp, *pp_next; 7727 7728 for (pp = pp_head; pp != NULL; pp = pp_next) { 7729 pp_next = pp->phys_path_next; 7730 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7731 kmem_free(pp, sizeof (*pp)); 7732 } 7733 } 7734 7735 /* 7736 * Allocated async client structure and initialize with the specified values. 7737 */ 7738 static mdi_async_client_config_t * 7739 alloc_async_client_config(char *ct_name, char *ct_addr, 7740 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7741 { 7742 mdi_async_client_config_t *acc; 7743 7744 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7745 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7746 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7747 acc->acc_phclient_path_list_head = pp_head; 7748 init_vhcache_lookup_token(&acc->acc_token, tok); 7749 acc->acc_next = NULL; 7750 return (acc); 7751 } 7752 7753 /* 7754 * Free the memory allocated for the async client structure and their members. 7755 */ 7756 static void 7757 free_async_client_config(mdi_async_client_config_t *acc) 7758 { 7759 if (acc->acc_phclient_path_list_head) 7760 free_phclient_path_list(acc->acc_phclient_path_list_head); 7761 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7762 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7763 kmem_free(acc, sizeof (*acc)); 7764 } 7765 7766 /* 7767 * Sort vhcache pathinfos (cpis) of the specified client. 7768 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7769 * flag set come at the beginning of the list. All cpis which have this 7770 * flag set come at the end of the list. 7771 */ 7772 static void 7773 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7774 { 7775 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7776 7777 cpi_head = cct->cct_cpi_head; 7778 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7779 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7780 cpi_next = cpi->cpi_next; 7781 enqueue_vhcache_pathinfo(cct, cpi); 7782 } 7783 } 7784 7785 /* 7786 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7787 * every vhcache pathinfo of the specified client. If not adjust the flag 7788 * setting appropriately. 7789 * 7790 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7791 * on-disk vhci cache. So every time this flag is updated the cache must be 7792 * flushed. 7793 */ 7794 static void 7795 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7796 mdi_vhcache_lookup_token_t *tok) 7797 { 7798 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7799 mdi_vhcache_client_t *cct; 7800 mdi_vhcache_pathinfo_t *cpi; 7801 7802 rw_enter(&vhcache->vhcache_lock, RW_READER); 7803 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7804 == NULL) { 7805 rw_exit(&vhcache->vhcache_lock); 7806 return; 7807 } 7808 7809 /* 7810 * to avoid unnecessary on-disk cache updates, first check if an 7811 * update is really needed. If no update is needed simply return. 7812 */ 7813 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7814 if ((cpi->cpi_pip != NULL && 7815 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7816 (cpi->cpi_pip == NULL && 7817 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7818 break; 7819 } 7820 } 7821 if (cpi == NULL) { 7822 rw_exit(&vhcache->vhcache_lock); 7823 return; 7824 } 7825 7826 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7827 rw_exit(&vhcache->vhcache_lock); 7828 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7829 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7830 tok)) == NULL) { 7831 rw_exit(&vhcache->vhcache_lock); 7832 return; 7833 } 7834 } 7835 7836 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7837 if (cpi->cpi_pip != NULL) 7838 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7839 else 7840 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7841 } 7842 sort_vhcache_paths(cct); 7843 7844 rw_exit(&vhcache->vhcache_lock); 7845 vhcache_dirty(vhc); 7846 } 7847 7848 /* 7849 * Configure all specified paths of the client. 7850 */ 7851 static void 7852 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7853 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7854 { 7855 mdi_phys_path_t *pp; 7856 7857 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7858 (void) bus_config_one_phci_child(pp->phys_path); 7859 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7860 } 7861 7862 /* 7863 * Dequeue elements from vhci async client config list and bus configure 7864 * their corresponding phci clients. 7865 */ 7866 static void 7867 config_client_paths_thread(void *arg) 7868 { 7869 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7870 mdi_async_client_config_t *acc; 7871 clock_t quit_at_ticks; 7872 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7873 callb_cpr_t cprinfo; 7874 7875 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7876 "mdi_config_client_paths"); 7877 7878 for (; ; ) { 7879 quit_at_ticks = ddi_get_lbolt() + idle_time; 7880 7881 mutex_enter(&vhc->vhc_lock); 7882 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7883 vhc->vhc_acc_list_head == NULL && 7884 ddi_get_lbolt() < quit_at_ticks) { 7885 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7886 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7887 quit_at_ticks); 7888 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7889 } 7890 7891 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7892 vhc->vhc_acc_list_head == NULL) 7893 goto out; 7894 7895 acc = vhc->vhc_acc_list_head; 7896 vhc->vhc_acc_list_head = acc->acc_next; 7897 if (vhc->vhc_acc_list_head == NULL) 7898 vhc->vhc_acc_list_tail = NULL; 7899 vhc->vhc_acc_count--; 7900 mutex_exit(&vhc->vhc_lock); 7901 7902 config_client_paths_sync(vhc, acc->acc_ct_name, 7903 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7904 &acc->acc_token); 7905 7906 free_async_client_config(acc); 7907 } 7908 7909 out: 7910 vhc->vhc_acc_thrcount--; 7911 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7912 CALLB_CPR_EXIT(&cprinfo); 7913 } 7914 7915 /* 7916 * Arrange for all the phci client paths (pp_head) for the specified client 7917 * to be bus configured asynchronously by a thread. 7918 */ 7919 static void 7920 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7921 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7922 { 7923 mdi_async_client_config_t *acc, *newacc; 7924 int create_thread; 7925 7926 if (pp_head == NULL) 7927 return; 7928 7929 if (mdi_mtc_off) { 7930 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7931 free_phclient_path_list(pp_head); 7932 return; 7933 } 7934 7935 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7936 ASSERT(newacc); 7937 7938 mutex_enter(&vhc->vhc_lock); 7939 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7940 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7941 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7942 free_async_client_config(newacc); 7943 mutex_exit(&vhc->vhc_lock); 7944 return; 7945 } 7946 } 7947 7948 if (vhc->vhc_acc_list_head == NULL) 7949 vhc->vhc_acc_list_head = newacc; 7950 else 7951 vhc->vhc_acc_list_tail->acc_next = newacc; 7952 vhc->vhc_acc_list_tail = newacc; 7953 vhc->vhc_acc_count++; 7954 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7955 cv_broadcast(&vhc->vhc_cv); 7956 create_thread = 0; 7957 } else { 7958 vhc->vhc_acc_thrcount++; 7959 create_thread = 1; 7960 } 7961 mutex_exit(&vhc->vhc_lock); 7962 7963 if (create_thread) 7964 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7965 0, &p0, TS_RUN, minclsyspri); 7966 } 7967 7968 /* 7969 * Return number of online paths for the specified client. 7970 */ 7971 static int 7972 nonline_paths(mdi_vhcache_client_t *cct) 7973 { 7974 mdi_vhcache_pathinfo_t *cpi; 7975 int online_count = 0; 7976 7977 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7978 if (cpi->cpi_pip != NULL) { 7979 MDI_PI_LOCK(cpi->cpi_pip); 7980 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7981 online_count++; 7982 MDI_PI_UNLOCK(cpi->cpi_pip); 7983 } 7984 } 7985 7986 return (online_count); 7987 } 7988 7989 /* 7990 * Bus configure all paths for the specified vhci client. 7991 * If at least one path for the client is already online, the remaining paths 7992 * will be configured asynchronously. Otherwise, it synchronously configures 7993 * the paths until at least one path is online and then rest of the paths 7994 * will be configured asynchronously. 7995 */ 7996 static void 7997 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 7998 { 7999 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8000 mdi_phys_path_t *pp_head, *pp; 8001 mdi_vhcache_client_t *cct; 8002 mdi_vhcache_lookup_token_t tok; 8003 8004 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8005 8006 init_vhcache_lookup_token(&tok, NULL); 8007 8008 if (ct_name == NULL || ct_addr == NULL || 8009 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8010 == NULL || 8011 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8012 rw_exit(&vhcache->vhcache_lock); 8013 return; 8014 } 8015 8016 /* if at least one path is online, configure the rest asynchronously */ 8017 if (nonline_paths(cct) > 0) { 8018 rw_exit(&vhcache->vhcache_lock); 8019 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8020 return; 8021 } 8022 8023 rw_exit(&vhcache->vhcache_lock); 8024 8025 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8026 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8027 rw_enter(&vhcache->vhcache_lock, RW_READER); 8028 8029 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8030 ct_addr, &tok)) == NULL) { 8031 rw_exit(&vhcache->vhcache_lock); 8032 goto out; 8033 } 8034 8035 if (nonline_paths(cct) > 0 && 8036 pp->phys_path_next != NULL) { 8037 rw_exit(&vhcache->vhcache_lock); 8038 config_client_paths_async(vhc, ct_name, ct_addr, 8039 pp->phys_path_next, &tok); 8040 pp->phys_path_next = NULL; 8041 goto out; 8042 } 8043 8044 rw_exit(&vhcache->vhcache_lock); 8045 } 8046 } 8047 8048 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8049 out: 8050 free_phclient_path_list(pp_head); 8051 } 8052 8053 static void 8054 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8055 { 8056 mutex_enter(&vhc->vhc_lock); 8057 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8058 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8059 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8060 mutex_exit(&vhc->vhc_lock); 8061 } 8062 8063 static void 8064 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8065 { 8066 mutex_enter(&vhc->vhc_lock); 8067 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8068 cv_broadcast(&vhc->vhc_cv); 8069 mutex_exit(&vhc->vhc_lock); 8070 } 8071 8072 /* 8073 * Attach the phci driver instances associated with the vhci: 8074 * If root is mounted attach all phci driver instances. 8075 * If root is not mounted, attach the instances of only those phci 8076 * drivers that have the root support. 8077 */ 8078 static void 8079 attach_phci_drivers(mdi_vhci_config_t *vhc) 8080 { 8081 int i; 8082 major_t m; 8083 8084 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8085 if (modrootloaded == 0 && 8086 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8087 continue; 8088 8089 m = ddi_name_to_major( 8090 vhc->vhc_phci_driver_list[i].phdriver_name); 8091 if (m != (major_t)-1) { 8092 if (ddi_hold_installed_driver(m) != NULL) 8093 ddi_rele_driver(m); 8094 } 8095 } 8096 } 8097 8098 /* 8099 * Build vhci cache: 8100 * 8101 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8102 * the phci driver instances. During this process the cache gets built. 8103 * 8104 * Cache is built fully if the root is mounted. 8105 * If the root is not mounted, phci drivers that do not have root support 8106 * are not attached. As a result the cache is built partially. The entries 8107 * in the cache reflect only those phci drivers that have root support. 8108 */ 8109 static int 8110 build_vhci_cache(mdi_vhci_config_t *vhc) 8111 { 8112 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8113 8114 single_threaded_vhconfig_enter(vhc); 8115 8116 rw_enter(&vhcache->vhcache_lock, RW_READER); 8117 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8118 rw_exit(&vhcache->vhcache_lock); 8119 single_threaded_vhconfig_exit(vhc); 8120 return (0); 8121 } 8122 rw_exit(&vhcache->vhcache_lock); 8123 8124 attach_phci_drivers(vhc); 8125 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8126 BUS_CONFIG_ALL, (major_t)-1); 8127 8128 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8129 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8130 rw_exit(&vhcache->vhcache_lock); 8131 8132 single_threaded_vhconfig_exit(vhc); 8133 vhcache_dirty(vhc); 8134 return (1); 8135 } 8136 8137 /* 8138 * Determine if discovery of paths is needed. 8139 */ 8140 static int 8141 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8142 { 8143 int rv = 1; 8144 8145 mutex_enter(&vhc->vhc_lock); 8146 if (i_ddi_io_initialized() == 0) { 8147 if (vhc->vhc_path_discovery_boot > 0) { 8148 vhc->vhc_path_discovery_boot--; 8149 goto out; 8150 } 8151 } else { 8152 if (vhc->vhc_path_discovery_postboot > 0) { 8153 vhc->vhc_path_discovery_postboot--; 8154 goto out; 8155 } 8156 } 8157 8158 /* 8159 * Do full path discovery at most once per mdi_path_discovery_interval. 8160 * This is to avoid a series of full path discoveries when opening 8161 * stale /dev/[r]dsk links. 8162 */ 8163 if (mdi_path_discovery_interval != -1 && 8164 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8165 goto out; 8166 8167 rv = 0; 8168 out: 8169 mutex_exit(&vhc->vhc_lock); 8170 return (rv); 8171 } 8172 8173 /* 8174 * Discover all paths: 8175 * 8176 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8177 * driver instances. During this process all paths will be discovered. 8178 */ 8179 static int 8180 vhcache_discover_paths(mdi_vhci_config_t *vhc) 8181 { 8182 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8183 int rv = 0; 8184 8185 single_threaded_vhconfig_enter(vhc); 8186 8187 if (vhcache_do_discovery(vhc)) { 8188 attach_phci_drivers(vhc); 8189 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8190 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8191 8192 mutex_enter(&vhc->vhc_lock); 8193 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8194 mdi_path_discovery_interval * TICKS_PER_SECOND; 8195 mutex_exit(&vhc->vhc_lock); 8196 rv = 1; 8197 } 8198 8199 single_threaded_vhconfig_exit(vhc); 8200 return (rv); 8201 } 8202 8203 /* 8204 * Generic vhci bus config implementation: 8205 * 8206 * Parameters 8207 * vdip vhci dip 8208 * flags bus config flags 8209 * op bus config operation 8210 * The remaining parameters are bus config operation specific 8211 * 8212 * for BUS_CONFIG_ONE 8213 * arg pointer to name@addr 8214 * child upon successful return from this function, *child will be 8215 * set to the configured and held devinfo child node of vdip. 8216 * ct_addr pointer to client address (i.e. GUID) 8217 * 8218 * for BUS_CONFIG_DRIVER 8219 * arg major number of the driver 8220 * child and ct_addr parameters are ignored 8221 * 8222 * for BUS_CONFIG_ALL 8223 * arg, child, and ct_addr parameters are ignored 8224 * 8225 * Note that for the rest of the bus config operations, this function simply 8226 * calls the framework provided default bus config routine. 8227 */ 8228 int 8229 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8230 void *arg, dev_info_t **child, char *ct_addr) 8231 { 8232 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8233 mdi_vhci_config_t *vhc = vh->vh_config; 8234 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8235 int rv = 0; 8236 int params_valid = 0; 8237 char *cp; 8238 8239 /* 8240 * While bus configuring phcis, the phci driver interactions with MDI 8241 * cause child nodes to be enumerated under the vhci node for which 8242 * they need to ndi_devi_enter the vhci node. 8243 * 8244 * Unfortunately, to avoid the deadlock, we ourself can not wait for 8245 * for the bus config operations on phcis to finish while holding the 8246 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on 8247 * phcis and call the default framework provided bus config function 8248 * if we are called with ndi_devi_enter lock held. 8249 */ 8250 if (DEVI_BUSY_OWNED(vdip)) { 8251 MDI_DEBUG(2, (CE_NOTE, vdip, 8252 "!MDI: vhci bus config: vhci dip is busy owned\n")); 8253 goto default_bus_config; 8254 } 8255 8256 rw_enter(&vhcache->vhcache_lock, RW_READER); 8257 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8258 rw_exit(&vhcache->vhcache_lock); 8259 rv = build_vhci_cache(vhc); 8260 rw_enter(&vhcache->vhcache_lock, RW_READER); 8261 } 8262 8263 switch (op) { 8264 case BUS_CONFIG_ONE: 8265 if (arg != NULL && ct_addr != NULL) { 8266 /* extract node name */ 8267 cp = (char *)arg; 8268 while (*cp != '\0' && *cp != '@') 8269 cp++; 8270 if (*cp == '@') { 8271 params_valid = 1; 8272 *cp = '\0'; 8273 config_client_paths(vhc, (char *)arg, ct_addr); 8274 /* config_client_paths() releases cache_lock */ 8275 *cp = '@'; 8276 break; 8277 } 8278 } 8279 8280 rw_exit(&vhcache->vhcache_lock); 8281 break; 8282 8283 case BUS_CONFIG_DRIVER: 8284 rw_exit(&vhcache->vhcache_lock); 8285 if (rv == 0) 8286 st_bus_config_all_phcis(vhc, flags, op, 8287 (major_t)(uintptr_t)arg); 8288 break; 8289 8290 case BUS_CONFIG_ALL: 8291 rw_exit(&vhcache->vhcache_lock); 8292 if (rv == 0) 8293 st_bus_config_all_phcis(vhc, flags, op, -1); 8294 break; 8295 8296 default: 8297 rw_exit(&vhcache->vhcache_lock); 8298 break; 8299 } 8300 8301 8302 default_bus_config: 8303 /* 8304 * All requested child nodes are enumerated under the vhci. 8305 * Now configure them. 8306 */ 8307 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8308 NDI_SUCCESS) { 8309 return (MDI_SUCCESS); 8310 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8311 /* discover all paths and try configuring again */ 8312 if (vhcache_discover_paths(vhc) && 8313 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8314 NDI_SUCCESS) 8315 return (MDI_SUCCESS); 8316 } 8317 8318 return (MDI_FAILURE); 8319 } 8320 8321 /* 8322 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8323 */ 8324 static nvlist_t * 8325 read_on_disk_vhci_cache(char *vhci_class) 8326 { 8327 nvlist_t *nvl; 8328 int err; 8329 char *filename; 8330 8331 filename = vhclass2vhcache_filename(vhci_class); 8332 8333 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8334 kmem_free(filename, strlen(filename) + 1); 8335 return (nvl); 8336 } else if (err == EIO) 8337 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8338 else if (err == EINVAL) 8339 cmn_err(CE_WARN, 8340 "%s: data file corrupted, will recreate\n", filename); 8341 8342 kmem_free(filename, strlen(filename) + 1); 8343 return (NULL); 8344 } 8345 8346 /* 8347 * Read on-disk vhci cache into nvlists for all vhci classes. 8348 * Called during booting by i_ddi_read_devices_files(). 8349 */ 8350 void 8351 mdi_read_devices_files(void) 8352 { 8353 int i; 8354 8355 for (i = 0; i < N_VHCI_CLASSES; i++) 8356 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8357 } 8358 8359 /* 8360 * Remove all stale entries from vhci cache. 8361 */ 8362 static void 8363 clean_vhcache(mdi_vhci_config_t *vhc) 8364 { 8365 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8366 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8367 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8368 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8369 8370 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8371 8372 cct_head = vhcache->vhcache_client_head; 8373 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8374 for (cct = cct_head; cct != NULL; cct = cct_next) { 8375 cct_next = cct->cct_next; 8376 8377 cpi_head = cct->cct_cpi_head; 8378 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8379 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8380 cpi_next = cpi->cpi_next; 8381 if (cpi->cpi_pip != NULL) { 8382 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8383 enqueue_tail_vhcache_pathinfo(cct, cpi); 8384 } else 8385 free_vhcache_pathinfo(cpi); 8386 } 8387 8388 if (cct->cct_cpi_head != NULL) 8389 enqueue_vhcache_client(vhcache, cct); 8390 else { 8391 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8392 (mod_hash_key_t)cct->cct_name_addr); 8393 free_vhcache_client(cct); 8394 } 8395 } 8396 8397 cphci_head = vhcache->vhcache_phci_head; 8398 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8399 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8400 cphci_next = cphci->cphci_next; 8401 if (cphci->cphci_phci != NULL) 8402 enqueue_vhcache_phci(vhcache, cphci); 8403 else 8404 free_vhcache_phci(cphci); 8405 } 8406 8407 vhcache->vhcache_clean_time = lbolt64; 8408 rw_exit(&vhcache->vhcache_lock); 8409 vhcache_dirty(vhc); 8410 } 8411 8412 /* 8413 * Remove all stale entries from vhci cache. 8414 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8415 */ 8416 void 8417 mdi_clean_vhcache(void) 8418 { 8419 mdi_vhci_t *vh; 8420 8421 mutex_enter(&mdi_mutex); 8422 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8423 vh->vh_refcnt++; 8424 mutex_exit(&mdi_mutex); 8425 clean_vhcache(vh->vh_config); 8426 mutex_enter(&mdi_mutex); 8427 vh->vh_refcnt--; 8428 } 8429 mutex_exit(&mdi_mutex); 8430 } 8431 8432 /* 8433 * mdi_vhci_walk_clients(): 8434 * Walker routine to traverse client dev_info nodes 8435 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8436 * below the client, including nexus devices, which we dont want. 8437 * So we just traverse the immediate siblings, starting from 1st client. 8438 */ 8439 void 8440 mdi_vhci_walk_clients(dev_info_t *vdip, 8441 int (*f)(dev_info_t *, void *), void *arg) 8442 { 8443 dev_info_t *cdip; 8444 mdi_client_t *ct; 8445 8446 mutex_enter(&mdi_mutex); 8447 8448 cdip = ddi_get_child(vdip); 8449 8450 while (cdip) { 8451 ct = i_devi_get_client(cdip); 8452 MDI_CLIENT_LOCK(ct); 8453 8454 switch ((*f)(cdip, arg)) { 8455 case DDI_WALK_CONTINUE: 8456 cdip = ddi_get_next_sibling(cdip); 8457 MDI_CLIENT_UNLOCK(ct); 8458 break; 8459 8460 default: 8461 MDI_CLIENT_UNLOCK(ct); 8462 mutex_exit(&mdi_mutex); 8463 return; 8464 } 8465 } 8466 8467 mutex_exit(&mdi_mutex); 8468 } 8469 8470 /* 8471 * mdi_vhci_walk_phcis(): 8472 * Walker routine to traverse phci dev_info nodes 8473 */ 8474 void 8475 mdi_vhci_walk_phcis(dev_info_t *vdip, 8476 int (*f)(dev_info_t *, void *), void *arg) 8477 { 8478 mdi_vhci_t *vh = NULL; 8479 mdi_phci_t *ph = NULL; 8480 8481 mutex_enter(&mdi_mutex); 8482 8483 vh = i_devi_get_vhci(vdip); 8484 ph = vh->vh_phci_head; 8485 8486 while (ph) { 8487 MDI_PHCI_LOCK(ph); 8488 8489 switch ((*f)(ph->ph_dip, arg)) { 8490 case DDI_WALK_CONTINUE: 8491 MDI_PHCI_UNLOCK(ph); 8492 ph = ph->ph_next; 8493 break; 8494 8495 default: 8496 MDI_PHCI_UNLOCK(ph); 8497 mutex_exit(&mdi_mutex); 8498 return; 8499 } 8500 } 8501 8502 mutex_exit(&mdi_mutex); 8503 } 8504 8505 8506 /* 8507 * mdi_walk_vhcis(): 8508 * Walker routine to traverse vhci dev_info nodes 8509 */ 8510 void 8511 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8512 { 8513 mdi_vhci_t *vh = NULL; 8514 8515 mutex_enter(&mdi_mutex); 8516 /* 8517 * Scan for already registered vhci 8518 */ 8519 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8520 vh->vh_refcnt++; 8521 mutex_exit(&mdi_mutex); 8522 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8523 mutex_enter(&mdi_mutex); 8524 vh->vh_refcnt--; 8525 break; 8526 } else { 8527 mutex_enter(&mdi_mutex); 8528 vh->vh_refcnt--; 8529 } 8530 } 8531 8532 mutex_exit(&mdi_mutex); 8533 } 8534 8535 /* 8536 * i_mdi_log_sysevent(): 8537 * Logs events for pickup by syseventd 8538 */ 8539 static void 8540 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8541 { 8542 char *path_name; 8543 nvlist_t *attr_list; 8544 8545 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8546 KM_SLEEP) != DDI_SUCCESS) { 8547 goto alloc_failed; 8548 } 8549 8550 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8551 (void) ddi_pathname(dip, path_name); 8552 8553 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8554 ddi_driver_name(dip)) != DDI_SUCCESS) { 8555 goto error; 8556 } 8557 8558 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8559 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8560 goto error; 8561 } 8562 8563 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8564 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8565 goto error; 8566 } 8567 8568 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8569 path_name) != DDI_SUCCESS) { 8570 goto error; 8571 } 8572 8573 if (nvlist_add_string(attr_list, DDI_CLASS, 8574 ph_vh_class) != DDI_SUCCESS) { 8575 goto error; 8576 } 8577 8578 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8579 attr_list, NULL, DDI_SLEEP); 8580 8581 error: 8582 kmem_free(path_name, MAXPATHLEN); 8583 nvlist_free(attr_list); 8584 return; 8585 8586 alloc_failed: 8587 MDI_DEBUG(1, (CE_WARN, dip, 8588 "!i_mdi_log_sysevent: Unable to send sysevent")); 8589 } 8590