1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * The "path" to a pathinfo node is identical to the /devices path to a 161 * devinfo node had the device been enumerated under a pHCI instead of 162 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 163 * This association persists across create/delete of the pathinfo nodes, 164 * but not across reboot. 165 */ 166 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 167 static int mdi_pathmap_hash_size = 256; 168 static kmutex_t mdi_pathmap_mutex; 169 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 170 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 171 172 /* 173 * MDI component property name/value string definitions 174 */ 175 const char *mdi_component_prop = "mpxio-component"; 176 const char *mdi_component_prop_vhci = "vhci"; 177 const char *mdi_component_prop_phci = "phci"; 178 const char *mdi_component_prop_client = "client"; 179 180 /* 181 * MDI client global unique identifier property name 182 */ 183 const char *mdi_client_guid_prop = "client-guid"; 184 185 /* 186 * MDI client load balancing property name/value string definitions 187 */ 188 const char *mdi_load_balance = "load-balance"; 189 const char *mdi_load_balance_none = "none"; 190 const char *mdi_load_balance_rr = "round-robin"; 191 const char *mdi_load_balance_lba = "logical-block"; 192 193 /* 194 * Obsolete vHCI class definition; to be removed after Leadville update 195 */ 196 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 197 198 static char vhci_greeting[] = 199 "\tThere already exists one vHCI driver for class %s\n" 200 "\tOnly one vHCI driver for each class is allowed\n"; 201 202 /* 203 * Static function prototypes 204 */ 205 static int i_mdi_phci_offline(dev_info_t *, uint_t); 206 static int i_mdi_client_offline(dev_info_t *, uint_t); 207 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 208 static void i_mdi_phci_post_detach(dev_info_t *, 209 ddi_detach_cmd_t, int); 210 static int i_mdi_client_pre_detach(dev_info_t *, 211 ddi_detach_cmd_t); 212 static void i_mdi_client_post_detach(dev_info_t *, 213 ddi_detach_cmd_t, int); 214 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 215 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 216 static int i_mdi_lba_lb(mdi_client_t *ct, 217 mdi_pathinfo_t **ret_pip, struct buf *buf); 218 static void i_mdi_pm_hold_client(mdi_client_t *, int); 219 static void i_mdi_pm_rele_client(mdi_client_t *, int); 220 static void i_mdi_pm_reset_client(mdi_client_t *); 221 static int i_mdi_power_all_phci(mdi_client_t *); 222 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 223 224 225 /* 226 * Internal mdi_pathinfo node functions 227 */ 228 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 229 230 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 231 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 232 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 233 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 234 static void i_mdi_phci_unlock(mdi_phci_t *); 235 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 236 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 238 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 239 mdi_client_t *); 240 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 241 static void i_mdi_client_remove_path(mdi_client_t *, 242 mdi_pathinfo_t *); 243 244 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 245 mdi_pathinfo_state_t, int); 246 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 247 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 248 char **, int); 249 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 250 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 251 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 252 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 253 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 254 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 255 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_update_state(mdi_client_t *); 257 static int i_mdi_client_compute_state(mdi_client_t *, 258 mdi_phci_t *); 259 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 260 static void i_mdi_client_unlock(mdi_client_t *); 261 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 262 static mdi_client_t *i_devi_get_client(dev_info_t *); 263 /* 264 * NOTE: this will be removed once the NWS files are changed to use the new 265 * mdi_{enable,disable}_path interfaces 266 */ 267 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 268 int, int); 269 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 270 mdi_vhci_t *vh, int flags, int op); 271 /* 272 * Failover related function prototypes 273 */ 274 static int i_mdi_failover(void *); 275 276 /* 277 * misc internal functions 278 */ 279 static int i_mdi_get_hash_key(char *); 280 static int i_map_nvlist_error_to_mdi(int); 281 static void i_mdi_report_path_state(mdi_client_t *, 282 mdi_pathinfo_t *); 283 284 static void setup_vhci_cache(mdi_vhci_t *); 285 static int destroy_vhci_cache(mdi_vhci_t *); 286 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 287 static boolean_t stop_vhcache_flush_thread(void *, int); 288 static void free_string_array(char **, int); 289 static void free_vhcache_phci(mdi_vhcache_phci_t *); 290 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 291 static void free_vhcache_client(mdi_vhcache_client_t *); 292 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 293 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 294 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 296 static void vhcache_pi_add(mdi_vhci_config_t *, 297 struct mdi_pathinfo *); 298 static void vhcache_pi_remove(mdi_vhci_config_t *, 299 struct mdi_pathinfo *); 300 static void free_phclient_path_list(mdi_phys_path_t *); 301 static void sort_vhcache_paths(mdi_vhcache_client_t *); 302 static int flush_vhcache(mdi_vhci_config_t *, int); 303 static void vhcache_dirty(mdi_vhci_config_t *); 304 static void free_async_client_config(mdi_async_client_config_t *); 305 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 306 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 307 static nvlist_t *read_on_disk_vhci_cache(char *); 308 extern int fread_nvlist(char *, nvlist_t **); 309 extern int fwrite_nvlist(char *, nvlist_t *); 310 311 /* called once when first vhci registers with mdi */ 312 static void 313 i_mdi_init() 314 { 315 static int initialized = 0; 316 317 if (initialized) 318 return; 319 initialized = 1; 320 321 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 322 323 /* Create our taskq resources */ 324 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 325 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 326 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 327 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 328 329 /* Allocate ['path_instance' <-> "path"] maps */ 330 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 331 mdi_pathmap_bypath = mod_hash_create_strhash( 332 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 333 mod_hash_null_valdtor); 334 mdi_pathmap_byinstance = mod_hash_create_idhash( 335 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 } 338 339 /* 340 * mdi_get_component_type(): 341 * Return mpxio component type 342 * Return Values: 343 * MDI_COMPONENT_NONE 344 * MDI_COMPONENT_VHCI 345 * MDI_COMPONENT_PHCI 346 * MDI_COMPONENT_CLIENT 347 * XXX This doesn't work under multi-level MPxIO and should be 348 * removed when clients migrate mdi_component_is_*() interfaces. 349 */ 350 int 351 mdi_get_component_type(dev_info_t *dip) 352 { 353 return (DEVI(dip)->devi_mdi_component); 354 } 355 356 /* 357 * mdi_vhci_register(): 358 * Register a vHCI module with the mpxio framework 359 * mdi_vhci_register() is called by vHCI drivers to register the 360 * 'class_driver' vHCI driver and its MDI entrypoints with the 361 * mpxio framework. The vHCI driver must call this interface as 362 * part of its attach(9e) handler. 363 * Competing threads may try to attach mdi_vhci_register() as 364 * the vHCI drivers are loaded and attached as a result of pHCI 365 * driver instance registration (mdi_phci_register()) with the 366 * framework. 367 * Return Values: 368 * MDI_SUCCESS 369 * MDI_FAILURE 370 */ 371 /*ARGSUSED*/ 372 int 373 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 374 int flags) 375 { 376 mdi_vhci_t *vh = NULL; 377 378 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 379 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 380 381 i_mdi_init(); 382 383 mutex_enter(&mdi_mutex); 384 /* 385 * Scan for already registered vhci 386 */ 387 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 388 if (strcmp(vh->vh_class, class) == 0) { 389 /* 390 * vHCI has already been created. Check for valid 391 * vHCI ops registration. We only support one vHCI 392 * module per class 393 */ 394 if (vh->vh_ops != NULL) { 395 mutex_exit(&mdi_mutex); 396 cmn_err(CE_NOTE, vhci_greeting, class); 397 return (MDI_FAILURE); 398 } 399 break; 400 } 401 } 402 403 /* 404 * if not yet created, create the vHCI component 405 */ 406 if (vh == NULL) { 407 struct client_hash *hash = NULL; 408 char *load_balance; 409 410 /* 411 * Allocate and initialize the mdi extensions 412 */ 413 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 414 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 415 KM_SLEEP); 416 vh->vh_client_table = hash; 417 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 418 (void) strcpy(vh->vh_class, class); 419 vh->vh_lb = LOAD_BALANCE_RR; 420 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 421 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 422 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 423 vh->vh_lb = LOAD_BALANCE_NONE; 424 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 425 == 0) { 426 vh->vh_lb = LOAD_BALANCE_LBA; 427 } 428 ddi_prop_free(load_balance); 429 } 430 431 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 432 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 433 434 /* 435 * Store the vHCI ops vectors 436 */ 437 vh->vh_dip = vdip; 438 vh->vh_ops = vops; 439 440 setup_vhci_cache(vh); 441 442 if (mdi_vhci_head == NULL) { 443 mdi_vhci_head = vh; 444 } 445 if (mdi_vhci_tail) { 446 mdi_vhci_tail->vh_next = vh; 447 } 448 mdi_vhci_tail = vh; 449 mdi_vhci_count++; 450 } 451 452 /* 453 * Claim the devfs node as a vhci component 454 */ 455 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 456 457 /* 458 * Initialize our back reference from dev_info node 459 */ 460 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 461 mutex_exit(&mdi_mutex); 462 return (MDI_SUCCESS); 463 } 464 465 /* 466 * mdi_vhci_unregister(): 467 * Unregister a vHCI module from mpxio framework 468 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 469 * of a vhci to unregister it from the framework. 470 * Return Values: 471 * MDI_SUCCESS 472 * MDI_FAILURE 473 */ 474 /*ARGSUSED*/ 475 int 476 mdi_vhci_unregister(dev_info_t *vdip, int flags) 477 { 478 mdi_vhci_t *found, *vh, *prev = NULL; 479 480 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 481 482 /* 483 * Check for invalid VHCI 484 */ 485 if ((vh = i_devi_get_vhci(vdip)) == NULL) 486 return (MDI_FAILURE); 487 488 /* 489 * Scan the list of registered vHCIs for a match 490 */ 491 mutex_enter(&mdi_mutex); 492 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 493 if (found == vh) 494 break; 495 prev = found; 496 } 497 498 if (found == NULL) { 499 mutex_exit(&mdi_mutex); 500 return (MDI_FAILURE); 501 } 502 503 /* 504 * Check the vHCI, pHCI and client count. All the pHCIs and clients 505 * should have been unregistered, before a vHCI can be 506 * unregistered. 507 */ 508 MDI_VHCI_PHCI_LOCK(vh); 509 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 510 MDI_VHCI_PHCI_UNLOCK(vh); 511 mutex_exit(&mdi_mutex); 512 return (MDI_FAILURE); 513 } 514 MDI_VHCI_PHCI_UNLOCK(vh); 515 516 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 517 mutex_exit(&mdi_mutex); 518 return (MDI_FAILURE); 519 } 520 521 /* 522 * Remove the vHCI from the global list 523 */ 524 if (vh == mdi_vhci_head) { 525 mdi_vhci_head = vh->vh_next; 526 } else { 527 prev->vh_next = vh->vh_next; 528 } 529 if (vh == mdi_vhci_tail) { 530 mdi_vhci_tail = prev; 531 } 532 mdi_vhci_count--; 533 mutex_exit(&mdi_mutex); 534 535 vh->vh_ops = NULL; 536 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 537 DEVI(vdip)->devi_mdi_xhci = NULL; 538 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 539 kmem_free(vh->vh_client_table, 540 mdi_client_table_size * sizeof (struct client_hash)); 541 mutex_destroy(&vh->vh_phci_mutex); 542 mutex_destroy(&vh->vh_client_mutex); 543 544 kmem_free(vh, sizeof (mdi_vhci_t)); 545 return (MDI_SUCCESS); 546 } 547 548 /* 549 * i_mdi_vhci_class2vhci(): 550 * Look for a matching vHCI module given a vHCI class name 551 * Return Values: 552 * Handle to a vHCI component 553 * NULL 554 */ 555 static mdi_vhci_t * 556 i_mdi_vhci_class2vhci(char *class) 557 { 558 mdi_vhci_t *vh = NULL; 559 560 ASSERT(!MUTEX_HELD(&mdi_mutex)); 561 562 mutex_enter(&mdi_mutex); 563 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 564 if (strcmp(vh->vh_class, class) == 0) { 565 break; 566 } 567 } 568 mutex_exit(&mdi_mutex); 569 return (vh); 570 } 571 572 /* 573 * i_devi_get_vhci(): 574 * Utility function to get the handle to a vHCI component 575 * Return Values: 576 * Handle to a vHCI component 577 * NULL 578 */ 579 mdi_vhci_t * 580 i_devi_get_vhci(dev_info_t *vdip) 581 { 582 mdi_vhci_t *vh = NULL; 583 if (MDI_VHCI(vdip)) { 584 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 585 } 586 return (vh); 587 } 588 589 /* 590 * mdi_phci_register(): 591 * Register a pHCI module with mpxio framework 592 * mdi_phci_register() is called by pHCI drivers to register with 593 * the mpxio framework and a specific 'class_driver' vHCI. The 594 * pHCI driver must call this interface as part of its attach(9e) 595 * handler. 596 * Return Values: 597 * MDI_SUCCESS 598 * MDI_FAILURE 599 */ 600 /*ARGSUSED*/ 601 int 602 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 603 { 604 mdi_phci_t *ph; 605 mdi_vhci_t *vh; 606 char *data; 607 char *pathname; 608 609 /* 610 * Some subsystems, like fcp, perform pHCI registration from a 611 * different thread than the one doing the pHCI attach(9E) - the 612 * driver attach code is waiting for this other thread to complete. 613 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 614 * (indicating that some thread has done an ndi_devi_enter of parent) 615 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 616 */ 617 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 618 619 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 620 (void) ddi_pathname(pdip, pathname); 621 622 /* 623 * Check for mpxio-disable property. Enable mpxio if the property is 624 * missing or not set to "yes". 625 * If the property is set to "yes" then emit a brief message. 626 */ 627 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 628 &data) == DDI_SUCCESS)) { 629 if (strcmp(data, "yes") == 0) { 630 MDI_DEBUG(1, (CE_CONT, pdip, 631 "?%s (%s%d) multipath capabilities " 632 "disabled via %s.conf.\n", pathname, 633 ddi_driver_name(pdip), ddi_get_instance(pdip), 634 ddi_driver_name(pdip))); 635 ddi_prop_free(data); 636 kmem_free(pathname, MAXPATHLEN); 637 return (MDI_FAILURE); 638 } 639 ddi_prop_free(data); 640 } 641 642 kmem_free(pathname, MAXPATHLEN); 643 644 /* 645 * Search for a matching vHCI 646 */ 647 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 648 if (vh == NULL) { 649 return (MDI_FAILURE); 650 } 651 652 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 653 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 654 ph->ph_dip = pdip; 655 ph->ph_vhci = vh; 656 ph->ph_next = NULL; 657 ph->ph_unstable = 0; 658 ph->ph_vprivate = 0; 659 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 660 661 MDI_PHCI_LOCK(ph); 662 MDI_PHCI_SET_POWER_UP(ph); 663 MDI_PHCI_UNLOCK(ph); 664 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 665 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 666 667 vhcache_phci_add(vh->vh_config, ph); 668 669 MDI_VHCI_PHCI_LOCK(vh); 670 if (vh->vh_phci_head == NULL) { 671 vh->vh_phci_head = ph; 672 } 673 if (vh->vh_phci_tail) { 674 vh->vh_phci_tail->ph_next = ph; 675 } 676 vh->vh_phci_tail = ph; 677 vh->vh_phci_count++; 678 MDI_VHCI_PHCI_UNLOCK(vh); 679 680 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 681 return (MDI_SUCCESS); 682 } 683 684 /* 685 * mdi_phci_unregister(): 686 * Unregister a pHCI module from mpxio framework 687 * mdi_phci_unregister() is called by the pHCI drivers from their 688 * detach(9E) handler to unregister their instances from the 689 * framework. 690 * Return Values: 691 * MDI_SUCCESS 692 * MDI_FAILURE 693 */ 694 /*ARGSUSED*/ 695 int 696 mdi_phci_unregister(dev_info_t *pdip, int flags) 697 { 698 mdi_vhci_t *vh; 699 mdi_phci_t *ph; 700 mdi_phci_t *tmp; 701 mdi_phci_t *prev = NULL; 702 703 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 704 705 ph = i_devi_get_phci(pdip); 706 if (ph == NULL) { 707 MDI_DEBUG(1, (CE_WARN, pdip, 708 "!pHCI unregister: Not a valid pHCI")); 709 return (MDI_FAILURE); 710 } 711 712 vh = ph->ph_vhci; 713 ASSERT(vh != NULL); 714 if (vh == NULL) { 715 MDI_DEBUG(1, (CE_WARN, pdip, 716 "!pHCI unregister: Not a valid vHCI")); 717 return (MDI_FAILURE); 718 } 719 720 MDI_VHCI_PHCI_LOCK(vh); 721 tmp = vh->vh_phci_head; 722 while (tmp) { 723 if (tmp == ph) { 724 break; 725 } 726 prev = tmp; 727 tmp = tmp->ph_next; 728 } 729 730 if (ph == vh->vh_phci_head) { 731 vh->vh_phci_head = ph->ph_next; 732 } else { 733 prev->ph_next = ph->ph_next; 734 } 735 736 if (ph == vh->vh_phci_tail) { 737 vh->vh_phci_tail = prev; 738 } 739 740 vh->vh_phci_count--; 741 MDI_VHCI_PHCI_UNLOCK(vh); 742 743 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 744 ESC_DDI_INITIATOR_UNREGISTER); 745 vhcache_phci_remove(vh->vh_config, ph); 746 cv_destroy(&ph->ph_unstable_cv); 747 mutex_destroy(&ph->ph_mutex); 748 kmem_free(ph, sizeof (mdi_phci_t)); 749 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 750 DEVI(pdip)->devi_mdi_xhci = NULL; 751 return (MDI_SUCCESS); 752 } 753 754 /* 755 * i_devi_get_phci(): 756 * Utility function to return the phci extensions. 757 */ 758 static mdi_phci_t * 759 i_devi_get_phci(dev_info_t *pdip) 760 { 761 mdi_phci_t *ph = NULL; 762 if (MDI_PHCI(pdip)) { 763 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 764 } 765 return (ph); 766 } 767 768 /* 769 * Single thread mdi entry into devinfo node for modifying its children. 770 * If necessary we perform an ndi_devi_enter of the vHCI before doing 771 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 772 * for the vHCI and one for the pHCI. 773 */ 774 void 775 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 776 { 777 dev_info_t *vdip; 778 int vcircular, pcircular; 779 780 /* Verify calling context */ 781 ASSERT(MDI_PHCI(phci_dip)); 782 vdip = mdi_devi_get_vdip(phci_dip); 783 ASSERT(vdip); /* A pHCI always has a vHCI */ 784 785 /* 786 * If pHCI is detaching then the framework has already entered the 787 * vHCI on a threads that went down the code path leading to 788 * detach_node(). This framework enter of the vHCI during pHCI 789 * detach is done to avoid deadlock with vHCI power management 790 * operations which enter the vHCI and the enter down the path 791 * to the pHCI. If pHCI is detaching then we piggyback this calls 792 * enter of the vHCI on frameworks vHCI enter that has already 793 * occurred - this is OK because we know that the framework thread 794 * doing detach is waiting for our completion. 795 * 796 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 797 * race with detach - but we can't do that because the framework has 798 * already entered the parent, so we have some complexity instead. 799 */ 800 for (;;) { 801 if (ndi_devi_tryenter(vdip, &vcircular)) { 802 ASSERT(vcircular != -1); 803 if (DEVI_IS_DETACHING(phci_dip)) { 804 ndi_devi_exit(vdip, vcircular); 805 vcircular = -1; 806 } 807 break; 808 } else if (DEVI_IS_DETACHING(phci_dip)) { 809 vcircular = -1; 810 break; 811 } else { 812 delay(1); 813 } 814 } 815 816 ndi_devi_enter(phci_dip, &pcircular); 817 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 818 } 819 820 /* 821 * Release mdi_devi_enter or successful mdi_devi_tryenter. 822 */ 823 void 824 mdi_devi_exit(dev_info_t *phci_dip, int circular) 825 { 826 dev_info_t *vdip; 827 int vcircular, pcircular; 828 829 /* Verify calling context */ 830 ASSERT(MDI_PHCI(phci_dip)); 831 vdip = mdi_devi_get_vdip(phci_dip); 832 ASSERT(vdip); /* A pHCI always has a vHCI */ 833 834 /* extract two circular recursion values from single int */ 835 pcircular = (short)(circular & 0xFFFF); 836 vcircular = (short)((circular >> 16) & 0xFFFF); 837 838 ndi_devi_exit(phci_dip, pcircular); 839 if (vcircular != -1) 840 ndi_devi_exit(vdip, vcircular); 841 } 842 843 /* 844 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 845 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 846 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 847 * with vHCI power management code during path online/offline. Each 848 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 849 * occur within the scope of an active mdi_devi_enter that establishes the 850 * circular value. 851 */ 852 void 853 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 854 { 855 int pcircular; 856 857 /* Verify calling context */ 858 ASSERT(MDI_PHCI(phci_dip)); 859 860 pcircular = (short)(circular & 0xFFFF); 861 ndi_devi_exit(phci_dip, pcircular); 862 } 863 864 void 865 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 866 { 867 int pcircular; 868 869 /* Verify calling context */ 870 ASSERT(MDI_PHCI(phci_dip)); 871 872 ndi_devi_enter(phci_dip, &pcircular); 873 874 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 875 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 876 } 877 878 /* 879 * mdi_devi_get_vdip(): 880 * given a pHCI dip return vHCI dip 881 */ 882 dev_info_t * 883 mdi_devi_get_vdip(dev_info_t *pdip) 884 { 885 mdi_phci_t *ph; 886 887 ph = i_devi_get_phci(pdip); 888 if (ph && ph->ph_vhci) 889 return (ph->ph_vhci->vh_dip); 890 return (NULL); 891 } 892 893 /* 894 * mdi_devi_pdip_entered(): 895 * Return 1 if we are vHCI and have done an ndi_devi_enter 896 * of a pHCI 897 */ 898 int 899 mdi_devi_pdip_entered(dev_info_t *vdip) 900 { 901 mdi_vhci_t *vh; 902 mdi_phci_t *ph; 903 904 vh = i_devi_get_vhci(vdip); 905 if (vh == NULL) 906 return (0); 907 908 MDI_VHCI_PHCI_LOCK(vh); 909 ph = vh->vh_phci_head; 910 while (ph) { 911 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 912 MDI_VHCI_PHCI_UNLOCK(vh); 913 return (1); 914 } 915 ph = ph->ph_next; 916 } 917 MDI_VHCI_PHCI_UNLOCK(vh); 918 return (0); 919 } 920 921 /* 922 * mdi_phci_path2devinfo(): 923 * Utility function to search for a valid phci device given 924 * the devfs pathname. 925 */ 926 dev_info_t * 927 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 928 { 929 char *temp_pathname; 930 mdi_vhci_t *vh; 931 mdi_phci_t *ph; 932 dev_info_t *pdip = NULL; 933 934 vh = i_devi_get_vhci(vdip); 935 ASSERT(vh != NULL); 936 937 if (vh == NULL) { 938 /* 939 * Invalid vHCI component, return failure 940 */ 941 return (NULL); 942 } 943 944 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 945 MDI_VHCI_PHCI_LOCK(vh); 946 ph = vh->vh_phci_head; 947 while (ph != NULL) { 948 pdip = ph->ph_dip; 949 ASSERT(pdip != NULL); 950 *temp_pathname = '\0'; 951 (void) ddi_pathname(pdip, temp_pathname); 952 if (strcmp(temp_pathname, pathname) == 0) { 953 break; 954 } 955 ph = ph->ph_next; 956 } 957 if (ph == NULL) { 958 pdip = NULL; 959 } 960 MDI_VHCI_PHCI_UNLOCK(vh); 961 kmem_free(temp_pathname, MAXPATHLEN); 962 return (pdip); 963 } 964 965 /* 966 * mdi_phci_get_path_count(): 967 * get number of path information nodes associated with a given 968 * pHCI device. 969 */ 970 int 971 mdi_phci_get_path_count(dev_info_t *pdip) 972 { 973 mdi_phci_t *ph; 974 int count = 0; 975 976 ph = i_devi_get_phci(pdip); 977 if (ph != NULL) { 978 count = ph->ph_path_count; 979 } 980 return (count); 981 } 982 983 /* 984 * i_mdi_phci_lock(): 985 * Lock a pHCI device 986 * Return Values: 987 * None 988 * Note: 989 * The default locking order is: 990 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 991 * But there are number of situations where locks need to be 992 * grabbed in reverse order. This routine implements try and lock 993 * mechanism depending on the requested parameter option. 994 */ 995 static void 996 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 997 { 998 if (pip) { 999 /* Reverse locking is requested. */ 1000 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1001 /* 1002 * tryenter failed. Try to grab again 1003 * after a small delay 1004 */ 1005 MDI_PI_HOLD(pip); 1006 MDI_PI_UNLOCK(pip); 1007 delay(1); 1008 MDI_PI_LOCK(pip); 1009 MDI_PI_RELE(pip); 1010 } 1011 } else { 1012 MDI_PHCI_LOCK(ph); 1013 } 1014 } 1015 1016 /* 1017 * i_mdi_phci_unlock(): 1018 * Unlock the pHCI component 1019 */ 1020 static void 1021 i_mdi_phci_unlock(mdi_phci_t *ph) 1022 { 1023 MDI_PHCI_UNLOCK(ph); 1024 } 1025 1026 /* 1027 * i_mdi_devinfo_create(): 1028 * create client device's devinfo node 1029 * Return Values: 1030 * dev_info 1031 * NULL 1032 * Notes: 1033 */ 1034 static dev_info_t * 1035 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1036 char **compatible, int ncompatible) 1037 { 1038 dev_info_t *cdip = NULL; 1039 1040 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1041 1042 /* Verify for duplicate entry */ 1043 cdip = i_mdi_devinfo_find(vh, name, guid); 1044 ASSERT(cdip == NULL); 1045 if (cdip) { 1046 cmn_err(CE_WARN, 1047 "i_mdi_devinfo_create: client dip %p already exists", 1048 (void *)cdip); 1049 } 1050 1051 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1052 if (cdip == NULL) 1053 goto fail; 1054 1055 /* 1056 * Create component type and Global unique identifier 1057 * properties 1058 */ 1059 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1060 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1061 goto fail; 1062 } 1063 1064 /* Decorate the node with compatible property */ 1065 if (compatible && 1066 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1067 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1068 goto fail; 1069 } 1070 1071 return (cdip); 1072 1073 fail: 1074 if (cdip) { 1075 (void) ndi_prop_remove_all(cdip); 1076 (void) ndi_devi_free(cdip); 1077 } 1078 return (NULL); 1079 } 1080 1081 /* 1082 * i_mdi_devinfo_find(): 1083 * Find a matching devinfo node for given client node name 1084 * and its guid. 1085 * Return Values: 1086 * Handle to a dev_info node or NULL 1087 */ 1088 static dev_info_t * 1089 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1090 { 1091 char *data; 1092 dev_info_t *cdip = NULL; 1093 dev_info_t *ndip = NULL; 1094 int circular; 1095 1096 ndi_devi_enter(vh->vh_dip, &circular); 1097 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1098 while ((cdip = ndip) != NULL) { 1099 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1100 1101 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1102 continue; 1103 } 1104 1105 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1106 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1107 &data) != DDI_PROP_SUCCESS) { 1108 continue; 1109 } 1110 1111 if (strcmp(data, guid) != 0) { 1112 ddi_prop_free(data); 1113 continue; 1114 } 1115 ddi_prop_free(data); 1116 break; 1117 } 1118 ndi_devi_exit(vh->vh_dip, circular); 1119 return (cdip); 1120 } 1121 1122 /* 1123 * i_mdi_devinfo_remove(): 1124 * Remove a client device node 1125 */ 1126 static int 1127 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1128 { 1129 int rv = MDI_SUCCESS; 1130 1131 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1132 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1133 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1134 if (rv != NDI_SUCCESS) { 1135 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1136 " failed. cdip = %p\n", (void *)cdip)); 1137 } 1138 /* 1139 * Convert to MDI error code 1140 */ 1141 switch (rv) { 1142 case NDI_SUCCESS: 1143 rv = MDI_SUCCESS; 1144 break; 1145 case NDI_BUSY: 1146 rv = MDI_BUSY; 1147 break; 1148 default: 1149 rv = MDI_FAILURE; 1150 break; 1151 } 1152 } 1153 return (rv); 1154 } 1155 1156 /* 1157 * i_devi_get_client() 1158 * Utility function to get mpxio component extensions 1159 */ 1160 static mdi_client_t * 1161 i_devi_get_client(dev_info_t *cdip) 1162 { 1163 mdi_client_t *ct = NULL; 1164 1165 if (MDI_CLIENT(cdip)) { 1166 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1167 } 1168 return (ct); 1169 } 1170 1171 /* 1172 * i_mdi_is_child_present(): 1173 * Search for the presence of client device dev_info node 1174 */ 1175 static int 1176 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1177 { 1178 int rv = MDI_FAILURE; 1179 struct dev_info *dip; 1180 int circular; 1181 1182 ndi_devi_enter(vdip, &circular); 1183 dip = DEVI(vdip)->devi_child; 1184 while (dip) { 1185 if (dip == DEVI(cdip)) { 1186 rv = MDI_SUCCESS; 1187 break; 1188 } 1189 dip = dip->devi_sibling; 1190 } 1191 ndi_devi_exit(vdip, circular); 1192 return (rv); 1193 } 1194 1195 1196 /* 1197 * i_mdi_client_lock(): 1198 * Grab client component lock 1199 * Return Values: 1200 * None 1201 * Note: 1202 * The default locking order is: 1203 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1204 * But there are number of situations where locks need to be 1205 * grabbed in reverse order. This routine implements try and lock 1206 * mechanism depending on the requested parameter option. 1207 */ 1208 static void 1209 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1210 { 1211 if (pip) { 1212 /* 1213 * Reverse locking is requested. 1214 */ 1215 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1216 /* 1217 * tryenter failed. Try to grab again 1218 * after a small delay 1219 */ 1220 MDI_PI_HOLD(pip); 1221 MDI_PI_UNLOCK(pip); 1222 delay(1); 1223 MDI_PI_LOCK(pip); 1224 MDI_PI_RELE(pip); 1225 } 1226 } else { 1227 MDI_CLIENT_LOCK(ct); 1228 } 1229 } 1230 1231 /* 1232 * i_mdi_client_unlock(): 1233 * Unlock a client component 1234 */ 1235 static void 1236 i_mdi_client_unlock(mdi_client_t *ct) 1237 { 1238 MDI_CLIENT_UNLOCK(ct); 1239 } 1240 1241 /* 1242 * i_mdi_client_alloc(): 1243 * Allocate and initialize a client structure. Caller should 1244 * hold the vhci client lock. 1245 * Return Values: 1246 * Handle to a client component 1247 */ 1248 /*ARGSUSED*/ 1249 static mdi_client_t * 1250 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1251 { 1252 mdi_client_t *ct; 1253 1254 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1255 1256 /* 1257 * Allocate and initialize a component structure. 1258 */ 1259 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1260 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1261 ct->ct_hnext = NULL; 1262 ct->ct_hprev = NULL; 1263 ct->ct_dip = NULL; 1264 ct->ct_vhci = vh; 1265 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1266 (void) strcpy(ct->ct_drvname, name); 1267 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1268 (void) strcpy(ct->ct_guid, lguid); 1269 ct->ct_cprivate = NULL; 1270 ct->ct_vprivate = NULL; 1271 ct->ct_flags = 0; 1272 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1273 MDI_CLIENT_LOCK(ct); 1274 MDI_CLIENT_SET_OFFLINE(ct); 1275 MDI_CLIENT_SET_DETACH(ct); 1276 MDI_CLIENT_SET_POWER_UP(ct); 1277 MDI_CLIENT_UNLOCK(ct); 1278 ct->ct_failover_flags = 0; 1279 ct->ct_failover_status = 0; 1280 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1281 ct->ct_unstable = 0; 1282 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1283 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1284 ct->ct_lb = vh->vh_lb; 1285 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1286 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1287 ct->ct_path_count = 0; 1288 ct->ct_path_head = NULL; 1289 ct->ct_path_tail = NULL; 1290 ct->ct_path_last = NULL; 1291 1292 /* 1293 * Add this client component to our client hash queue 1294 */ 1295 i_mdi_client_enlist_table(vh, ct); 1296 return (ct); 1297 } 1298 1299 /* 1300 * i_mdi_client_enlist_table(): 1301 * Attach the client device to the client hash table. Caller 1302 * should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 struct client_hash *head; 1309 1310 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1311 1312 index = i_mdi_get_hash_key(ct->ct_guid); 1313 head = &vh->vh_client_table[index]; 1314 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1315 head->ct_hash_head = ct; 1316 head->ct_hash_count++; 1317 vh->vh_client_count++; 1318 } 1319 1320 /* 1321 * i_mdi_client_delist_table(): 1322 * Attach the client device to the client hash table. 1323 * Caller should hold the vhci client lock. 1324 */ 1325 static void 1326 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1327 { 1328 int index; 1329 char *guid; 1330 struct client_hash *head; 1331 mdi_client_t *next; 1332 mdi_client_t *last; 1333 1334 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1335 1336 guid = ct->ct_guid; 1337 index = i_mdi_get_hash_key(guid); 1338 head = &vh->vh_client_table[index]; 1339 1340 last = NULL; 1341 next = (mdi_client_t *)head->ct_hash_head; 1342 while (next != NULL) { 1343 if (next == ct) { 1344 break; 1345 } 1346 last = next; 1347 next = next->ct_hnext; 1348 } 1349 1350 if (next) { 1351 head->ct_hash_count--; 1352 if (last == NULL) { 1353 head->ct_hash_head = ct->ct_hnext; 1354 } else { 1355 last->ct_hnext = ct->ct_hnext; 1356 } 1357 ct->ct_hnext = NULL; 1358 vh->vh_client_count--; 1359 } 1360 } 1361 1362 1363 /* 1364 * i_mdi_client_free(): 1365 * Free a client component 1366 */ 1367 static int 1368 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1369 { 1370 int rv = MDI_SUCCESS; 1371 int flags = ct->ct_flags; 1372 dev_info_t *cdip; 1373 dev_info_t *vdip; 1374 1375 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1376 1377 vdip = vh->vh_dip; 1378 cdip = ct->ct_dip; 1379 1380 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1381 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1382 DEVI(cdip)->devi_mdi_client = NULL; 1383 1384 /* 1385 * Clear out back ref. to dev_info_t node 1386 */ 1387 ct->ct_dip = NULL; 1388 1389 /* 1390 * Remove this client from our hash queue 1391 */ 1392 i_mdi_client_delist_table(vh, ct); 1393 1394 /* 1395 * Uninitialize and free the component 1396 */ 1397 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1398 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1399 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1400 cv_destroy(&ct->ct_failover_cv); 1401 cv_destroy(&ct->ct_unstable_cv); 1402 cv_destroy(&ct->ct_powerchange_cv); 1403 mutex_destroy(&ct->ct_mutex); 1404 kmem_free(ct, sizeof (*ct)); 1405 1406 if (cdip != NULL) { 1407 MDI_VHCI_CLIENT_UNLOCK(vh); 1408 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1409 MDI_VHCI_CLIENT_LOCK(vh); 1410 } 1411 return (rv); 1412 } 1413 1414 /* 1415 * i_mdi_client_find(): 1416 * Find the client structure corresponding to a given guid 1417 * Caller should hold the vhci client lock. 1418 */ 1419 static mdi_client_t * 1420 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1421 { 1422 int index; 1423 struct client_hash *head; 1424 mdi_client_t *ct; 1425 1426 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1427 1428 index = i_mdi_get_hash_key(guid); 1429 head = &vh->vh_client_table[index]; 1430 1431 ct = head->ct_hash_head; 1432 while (ct != NULL) { 1433 if (strcmp(ct->ct_guid, guid) == 0 && 1434 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1435 break; 1436 } 1437 ct = ct->ct_hnext; 1438 } 1439 return (ct); 1440 } 1441 1442 /* 1443 * i_mdi_client_update_state(): 1444 * Compute and update client device state 1445 * Notes: 1446 * A client device can be in any of three possible states: 1447 * 1448 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1449 * one online/standby paths. Can tolerate failures. 1450 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1451 * no alternate paths available as standby. A failure on the online 1452 * would result in loss of access to device data. 1453 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1454 * no paths available to access the device. 1455 */ 1456 static void 1457 i_mdi_client_update_state(mdi_client_t *ct) 1458 { 1459 int state; 1460 1461 ASSERT(MDI_CLIENT_LOCKED(ct)); 1462 state = i_mdi_client_compute_state(ct, NULL); 1463 MDI_CLIENT_SET_STATE(ct, state); 1464 } 1465 1466 /* 1467 * i_mdi_client_compute_state(): 1468 * Compute client device state 1469 * 1470 * mdi_phci_t * Pointer to pHCI structure which should 1471 * while computing the new value. Used by 1472 * i_mdi_phci_offline() to find the new 1473 * client state after DR of a pHCI. 1474 */ 1475 static int 1476 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1477 { 1478 int state; 1479 int online_count = 0; 1480 int standby_count = 0; 1481 mdi_pathinfo_t *pip, *next; 1482 1483 ASSERT(MDI_CLIENT_LOCKED(ct)); 1484 pip = ct->ct_path_head; 1485 while (pip != NULL) { 1486 MDI_PI_LOCK(pip); 1487 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1488 if (MDI_PI(pip)->pi_phci == ph) { 1489 MDI_PI_UNLOCK(pip); 1490 pip = next; 1491 continue; 1492 } 1493 1494 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1495 == MDI_PATHINFO_STATE_ONLINE) 1496 online_count++; 1497 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1498 == MDI_PATHINFO_STATE_STANDBY) 1499 standby_count++; 1500 MDI_PI_UNLOCK(pip); 1501 pip = next; 1502 } 1503 1504 if (online_count == 0) { 1505 if (standby_count == 0) { 1506 state = MDI_CLIENT_STATE_FAILED; 1507 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1508 " ct = %p\n", (void *)ct)); 1509 } else if (standby_count == 1) { 1510 state = MDI_CLIENT_STATE_DEGRADED; 1511 } else { 1512 state = MDI_CLIENT_STATE_OPTIMAL; 1513 } 1514 } else if (online_count == 1) { 1515 if (standby_count == 0) { 1516 state = MDI_CLIENT_STATE_DEGRADED; 1517 } else { 1518 state = MDI_CLIENT_STATE_OPTIMAL; 1519 } 1520 } else { 1521 state = MDI_CLIENT_STATE_OPTIMAL; 1522 } 1523 return (state); 1524 } 1525 1526 /* 1527 * i_mdi_client2devinfo(): 1528 * Utility function 1529 */ 1530 dev_info_t * 1531 i_mdi_client2devinfo(mdi_client_t *ct) 1532 { 1533 return (ct->ct_dip); 1534 } 1535 1536 /* 1537 * mdi_client_path2_devinfo(): 1538 * Given the parent devinfo and child devfs pathname, search for 1539 * a valid devfs node handle. 1540 */ 1541 dev_info_t * 1542 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1543 { 1544 dev_info_t *cdip = NULL; 1545 dev_info_t *ndip = NULL; 1546 char *temp_pathname; 1547 int circular; 1548 1549 /* 1550 * Allocate temp buffer 1551 */ 1552 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1553 1554 /* 1555 * Lock parent against changes 1556 */ 1557 ndi_devi_enter(vdip, &circular); 1558 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1559 while ((cdip = ndip) != NULL) { 1560 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1561 1562 *temp_pathname = '\0'; 1563 (void) ddi_pathname(cdip, temp_pathname); 1564 if (strcmp(temp_pathname, pathname) == 0) { 1565 break; 1566 } 1567 } 1568 /* 1569 * Release devinfo lock 1570 */ 1571 ndi_devi_exit(vdip, circular); 1572 1573 /* 1574 * Free the temp buffer 1575 */ 1576 kmem_free(temp_pathname, MAXPATHLEN); 1577 return (cdip); 1578 } 1579 1580 /* 1581 * mdi_client_get_path_count(): 1582 * Utility function to get number of path information nodes 1583 * associated with a given client device. 1584 */ 1585 int 1586 mdi_client_get_path_count(dev_info_t *cdip) 1587 { 1588 mdi_client_t *ct; 1589 int count = 0; 1590 1591 ct = i_devi_get_client(cdip); 1592 if (ct != NULL) { 1593 count = ct->ct_path_count; 1594 } 1595 return (count); 1596 } 1597 1598 1599 /* 1600 * i_mdi_get_hash_key(): 1601 * Create a hash using strings as keys 1602 * 1603 */ 1604 static int 1605 i_mdi_get_hash_key(char *str) 1606 { 1607 uint32_t g, hash = 0; 1608 char *p; 1609 1610 for (p = str; *p != '\0'; p++) { 1611 g = *p; 1612 hash += g; 1613 } 1614 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1615 } 1616 1617 /* 1618 * mdi_get_lb_policy(): 1619 * Get current load balancing policy for a given client device 1620 */ 1621 client_lb_t 1622 mdi_get_lb_policy(dev_info_t *cdip) 1623 { 1624 client_lb_t lb = LOAD_BALANCE_NONE; 1625 mdi_client_t *ct; 1626 1627 ct = i_devi_get_client(cdip); 1628 if (ct != NULL) { 1629 lb = ct->ct_lb; 1630 } 1631 return (lb); 1632 } 1633 1634 /* 1635 * mdi_set_lb_region_size(): 1636 * Set current region size for the load-balance 1637 */ 1638 int 1639 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1640 { 1641 mdi_client_t *ct; 1642 int rv = MDI_FAILURE; 1643 1644 ct = i_devi_get_client(cdip); 1645 if (ct != NULL && ct->ct_lb_args != NULL) { 1646 ct->ct_lb_args->region_size = region_size; 1647 rv = MDI_SUCCESS; 1648 } 1649 return (rv); 1650 } 1651 1652 /* 1653 * mdi_Set_lb_policy(): 1654 * Set current load balancing policy for a given client device 1655 */ 1656 int 1657 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1658 { 1659 mdi_client_t *ct; 1660 int rv = MDI_FAILURE; 1661 1662 ct = i_devi_get_client(cdip); 1663 if (ct != NULL) { 1664 ct->ct_lb = lb; 1665 rv = MDI_SUCCESS; 1666 } 1667 return (rv); 1668 } 1669 1670 /* 1671 * mdi_failover(): 1672 * failover function called by the vHCI drivers to initiate 1673 * a failover operation. This is typically due to non-availability 1674 * of online paths to route I/O requests. Failover can be 1675 * triggered through user application also. 1676 * 1677 * The vHCI driver calls mdi_failover() to initiate a failover 1678 * operation. mdi_failover() calls back into the vHCI driver's 1679 * vo_failover() entry point to perform the actual failover 1680 * operation. The reason for requiring the vHCI driver to 1681 * initiate failover by calling mdi_failover(), instead of directly 1682 * executing vo_failover() itself, is to ensure that the mdi 1683 * framework can keep track of the client state properly. 1684 * Additionally, mdi_failover() provides as a convenience the 1685 * option of performing the failover operation synchronously or 1686 * asynchronously 1687 * 1688 * Upon successful completion of the failover operation, the 1689 * paths that were previously ONLINE will be in the STANDBY state, 1690 * and the newly activated paths will be in the ONLINE state. 1691 * 1692 * The flags modifier determines whether the activation is done 1693 * synchronously: MDI_FAILOVER_SYNC 1694 * Return Values: 1695 * MDI_SUCCESS 1696 * MDI_FAILURE 1697 * MDI_BUSY 1698 */ 1699 /*ARGSUSED*/ 1700 int 1701 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1702 { 1703 int rv; 1704 mdi_client_t *ct; 1705 1706 ct = i_devi_get_client(cdip); 1707 ASSERT(ct != NULL); 1708 if (ct == NULL) { 1709 /* cdip is not a valid client device. Nothing more to do. */ 1710 return (MDI_FAILURE); 1711 } 1712 1713 MDI_CLIENT_LOCK(ct); 1714 1715 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1716 /* A path to the client is being freed */ 1717 MDI_CLIENT_UNLOCK(ct); 1718 return (MDI_BUSY); 1719 } 1720 1721 1722 if (MDI_CLIENT_IS_FAILED(ct)) { 1723 /* 1724 * Client is in failed state. Nothing more to do. 1725 */ 1726 MDI_CLIENT_UNLOCK(ct); 1727 return (MDI_FAILURE); 1728 } 1729 1730 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1731 /* 1732 * Failover is already in progress; return BUSY 1733 */ 1734 MDI_CLIENT_UNLOCK(ct); 1735 return (MDI_BUSY); 1736 } 1737 /* 1738 * Make sure that mdi_pathinfo node state changes are processed. 1739 * We do not allow failovers to progress while client path state 1740 * changes are in progress 1741 */ 1742 if (ct->ct_unstable) { 1743 if (flags == MDI_FAILOVER_ASYNC) { 1744 MDI_CLIENT_UNLOCK(ct); 1745 return (MDI_BUSY); 1746 } else { 1747 while (ct->ct_unstable) 1748 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1749 } 1750 } 1751 1752 /* 1753 * Client device is in stable state. Before proceeding, perform sanity 1754 * checks again. 1755 */ 1756 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1757 (!i_ddi_devi_attached(ct->ct_dip))) { 1758 /* 1759 * Client is in failed state. Nothing more to do. 1760 */ 1761 MDI_CLIENT_UNLOCK(ct); 1762 return (MDI_FAILURE); 1763 } 1764 1765 /* 1766 * Set the client state as failover in progress. 1767 */ 1768 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1769 ct->ct_failover_flags = flags; 1770 MDI_CLIENT_UNLOCK(ct); 1771 1772 if (flags == MDI_FAILOVER_ASYNC) { 1773 /* 1774 * Submit the initiate failover request via CPR safe 1775 * taskq threads. 1776 */ 1777 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1778 ct, KM_SLEEP); 1779 return (MDI_ACCEPT); 1780 } else { 1781 /* 1782 * Synchronous failover mode. Typically invoked from the user 1783 * land. 1784 */ 1785 rv = i_mdi_failover(ct); 1786 } 1787 return (rv); 1788 } 1789 1790 /* 1791 * i_mdi_failover(): 1792 * internal failover function. Invokes vHCI drivers failover 1793 * callback function and process the failover status 1794 * Return Values: 1795 * None 1796 * 1797 * Note: A client device in failover state can not be detached or freed. 1798 */ 1799 static int 1800 i_mdi_failover(void *arg) 1801 { 1802 int rv = MDI_SUCCESS; 1803 mdi_client_t *ct = (mdi_client_t *)arg; 1804 mdi_vhci_t *vh = ct->ct_vhci; 1805 1806 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1807 1808 if (vh->vh_ops->vo_failover != NULL) { 1809 /* 1810 * Call vHCI drivers callback routine 1811 */ 1812 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1813 ct->ct_failover_flags); 1814 } 1815 1816 MDI_CLIENT_LOCK(ct); 1817 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1818 1819 /* 1820 * Save the failover return status 1821 */ 1822 ct->ct_failover_status = rv; 1823 1824 /* 1825 * As a result of failover, client status would have been changed. 1826 * Update the client state and wake up anyone waiting on this client 1827 * device. 1828 */ 1829 i_mdi_client_update_state(ct); 1830 1831 cv_broadcast(&ct->ct_failover_cv); 1832 MDI_CLIENT_UNLOCK(ct); 1833 return (rv); 1834 } 1835 1836 /* 1837 * Load balancing is logical block. 1838 * IOs within the range described by region_size 1839 * would go on the same path. This would improve the 1840 * performance by cache-hit on some of the RAID devices. 1841 * Search only for online paths(At some point we 1842 * may want to balance across target ports). 1843 * If no paths are found then default to round-robin. 1844 */ 1845 static int 1846 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1847 { 1848 int path_index = -1; 1849 int online_path_count = 0; 1850 int online_nonpref_path_count = 0; 1851 int region_size = ct->ct_lb_args->region_size; 1852 mdi_pathinfo_t *pip; 1853 mdi_pathinfo_t *next; 1854 int preferred, path_cnt; 1855 1856 pip = ct->ct_path_head; 1857 while (pip) { 1858 MDI_PI_LOCK(pip); 1859 if (MDI_PI(pip)->pi_state == 1860 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1861 online_path_count++; 1862 } else if (MDI_PI(pip)->pi_state == 1863 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1864 online_nonpref_path_count++; 1865 } 1866 next = (mdi_pathinfo_t *) 1867 MDI_PI(pip)->pi_client_link; 1868 MDI_PI_UNLOCK(pip); 1869 pip = next; 1870 } 1871 /* if found any online/preferred then use this type */ 1872 if (online_path_count > 0) { 1873 path_cnt = online_path_count; 1874 preferred = 1; 1875 } else if (online_nonpref_path_count > 0) { 1876 path_cnt = online_nonpref_path_count; 1877 preferred = 0; 1878 } else { 1879 path_cnt = 0; 1880 } 1881 if (path_cnt) { 1882 path_index = (bp->b_blkno >> region_size) % path_cnt; 1883 pip = ct->ct_path_head; 1884 while (pip && path_index != -1) { 1885 MDI_PI_LOCK(pip); 1886 if (path_index == 0 && 1887 (MDI_PI(pip)->pi_state == 1888 MDI_PATHINFO_STATE_ONLINE) && 1889 MDI_PI(pip)->pi_preferred == preferred) { 1890 MDI_PI_HOLD(pip); 1891 MDI_PI_UNLOCK(pip); 1892 *ret_pip = pip; 1893 return (MDI_SUCCESS); 1894 } 1895 path_index --; 1896 next = (mdi_pathinfo_t *) 1897 MDI_PI(pip)->pi_client_link; 1898 MDI_PI_UNLOCK(pip); 1899 pip = next; 1900 } 1901 if (pip == NULL) { 1902 MDI_DEBUG(4, (CE_NOTE, NULL, 1903 "!lba %llx, no pip !!\n", 1904 bp->b_lblkno)); 1905 } else { 1906 MDI_DEBUG(4, (CE_NOTE, NULL, 1907 "!lba %llx, no pip for path_index, " 1908 "pip %p\n", bp->b_lblkno, (void *)pip)); 1909 } 1910 } 1911 return (MDI_FAILURE); 1912 } 1913 1914 /* 1915 * mdi_select_path(): 1916 * select a path to access a client device. 1917 * 1918 * mdi_select_path() function is called by the vHCI drivers to 1919 * select a path to route the I/O request to. The caller passes 1920 * the block I/O data transfer structure ("buf") as one of the 1921 * parameters. The mpxio framework uses the buf structure 1922 * contents to maintain per path statistics (total I/O size / 1923 * count pending). If more than one online paths are available to 1924 * select, the framework automatically selects a suitable path 1925 * for routing I/O request. If a failover operation is active for 1926 * this client device the call shall be failed with MDI_BUSY error 1927 * code. 1928 * 1929 * By default this function returns a suitable path in online 1930 * state based on the current load balancing policy. Currently 1931 * we support LOAD_BALANCE_NONE (Previously selected online path 1932 * will continue to be used till the path is usable) and 1933 * LOAD_BALANCE_RR (Online paths will be selected in a round 1934 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1935 * based on the logical block). The load balancing 1936 * through vHCI drivers configuration file (driver.conf). 1937 * 1938 * vHCI drivers may override this default behavior by specifying 1939 * appropriate flags. The meaning of the thrid argument depends 1940 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1941 * then the argument is the "path instance" of the path to select. 1942 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1943 * "start_pip". A non NULL "start_pip" is the starting point to 1944 * walk and find the next appropriate path. The following values 1945 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1946 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1947 * STANDBY path). 1948 * 1949 * The non-standard behavior is used by the scsi_vhci driver, 1950 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1951 * attach of client devices (to avoid an unnecessary failover 1952 * when the STANDBY path comes up first), during failover 1953 * (to activate a STANDBY path as ONLINE). 1954 * 1955 * The selected path is returned in a a mdi_hold_path() state 1956 * (pi_ref_cnt). Caller should release the hold by calling 1957 * mdi_rele_path(). 1958 * 1959 * Return Values: 1960 * MDI_SUCCESS - Completed successfully 1961 * MDI_BUSY - Client device is busy failing over 1962 * MDI_NOPATH - Client device is online, but no valid path are 1963 * available to access this client device 1964 * MDI_FAILURE - Invalid client device or state 1965 * MDI_DEVI_ONLINING 1966 * - Client device (struct dev_info state) is in 1967 * onlining state. 1968 */ 1969 1970 /*ARGSUSED*/ 1971 int 1972 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1973 void *arg, mdi_pathinfo_t **ret_pip) 1974 { 1975 mdi_client_t *ct; 1976 mdi_pathinfo_t *pip; 1977 mdi_pathinfo_t *next; 1978 mdi_pathinfo_t *head; 1979 mdi_pathinfo_t *start; 1980 client_lb_t lbp; /* load balancing policy */ 1981 int sb = 1; /* standard behavior */ 1982 int preferred = 1; /* preferred path */ 1983 int cond, cont = 1; 1984 int retry = 0; 1985 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 1986 int path_instance; /* request specific path instance */ 1987 1988 /* determine type of arg based on flags */ 1989 if (flags & MDI_SELECT_PATH_INSTANCE) { 1990 flags &= ~MDI_SELECT_PATH_INSTANCE; 1991 path_instance = (int)(intptr_t)arg; 1992 start_pip = NULL; 1993 } else { 1994 path_instance = 0; 1995 start_pip = (mdi_pathinfo_t *)arg; 1996 } 1997 1998 if (flags != 0) { 1999 /* 2000 * disable default behavior 2001 */ 2002 sb = 0; 2003 } 2004 2005 *ret_pip = NULL; 2006 ct = i_devi_get_client(cdip); 2007 if (ct == NULL) { 2008 /* mdi extensions are NULL, Nothing more to do */ 2009 return (MDI_FAILURE); 2010 } 2011 2012 MDI_CLIENT_LOCK(ct); 2013 2014 if (sb) { 2015 if (MDI_CLIENT_IS_FAILED(ct)) { 2016 /* 2017 * Client is not ready to accept any I/O requests. 2018 * Fail this request. 2019 */ 2020 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2021 "client state offline ct = %p\n", (void *)ct)); 2022 MDI_CLIENT_UNLOCK(ct); 2023 return (MDI_FAILURE); 2024 } 2025 2026 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2027 /* 2028 * Check for Failover is in progress. If so tell the 2029 * caller that this device is busy. 2030 */ 2031 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2032 "client failover in progress ct = %p\n", 2033 (void *)ct)); 2034 MDI_CLIENT_UNLOCK(ct); 2035 return (MDI_BUSY); 2036 } 2037 2038 /* 2039 * Check to see whether the client device is attached. 2040 * If not so, let the vHCI driver manually select a path 2041 * (standby) and let the probe/attach process to continue. 2042 */ 2043 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2044 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2045 "ct = %p\n", (void *)ct)); 2046 MDI_CLIENT_UNLOCK(ct); 2047 return (MDI_DEVI_ONLINING); 2048 } 2049 } 2050 2051 /* 2052 * Cache in the client list head. If head of the list is NULL 2053 * return MDI_NOPATH 2054 */ 2055 head = ct->ct_path_head; 2056 if (head == NULL) { 2057 MDI_CLIENT_UNLOCK(ct); 2058 return (MDI_NOPATH); 2059 } 2060 2061 /* Caller is specifying a specific pathinfo path by path_instance */ 2062 if (path_instance) { 2063 /* search for pathinfo with correct path_instance */ 2064 for (pip = head; 2065 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2066 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2067 ; 2068 2069 /* If path can't be selected then MDI_FAILURE is returned. */ 2070 if (pip == NULL) { 2071 MDI_CLIENT_UNLOCK(ct); 2072 return (MDI_FAILURE); 2073 } 2074 2075 /* verify state of path */ 2076 MDI_PI_LOCK(pip); 2077 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2078 MDI_PI_UNLOCK(pip); 2079 MDI_CLIENT_UNLOCK(ct); 2080 return (MDI_FAILURE); 2081 } 2082 2083 /* 2084 * Return the path in hold state. Caller should release the 2085 * lock by calling mdi_rele_path() 2086 */ 2087 MDI_PI_HOLD(pip); 2088 MDI_PI_UNLOCK(pip); 2089 ct->ct_path_last = pip; 2090 *ret_pip = pip; 2091 MDI_CLIENT_UNLOCK(ct); 2092 return (MDI_SUCCESS); 2093 } 2094 2095 /* 2096 * for non default behavior, bypass current 2097 * load balancing policy and always use LOAD_BALANCE_RR 2098 * except that the start point will be adjusted based 2099 * on the provided start_pip 2100 */ 2101 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2102 2103 switch (lbp) { 2104 case LOAD_BALANCE_NONE: 2105 /* 2106 * Load balancing is None or Alternate path mode 2107 * Start looking for a online mdi_pathinfo node starting from 2108 * last known selected path 2109 */ 2110 preferred = 1; 2111 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2112 if (pip == NULL) { 2113 pip = head; 2114 } 2115 start = pip; 2116 do { 2117 MDI_PI_LOCK(pip); 2118 /* 2119 * No need to explicitly check if the path is disabled. 2120 * Since we are checking for state == ONLINE and the 2121 * same veriable is used for DISABLE/ENABLE information. 2122 */ 2123 if ((MDI_PI(pip)->pi_state == 2124 MDI_PATHINFO_STATE_ONLINE) && 2125 preferred == MDI_PI(pip)->pi_preferred) { 2126 /* 2127 * Return the path in hold state. Caller should 2128 * release the lock by calling mdi_rele_path() 2129 */ 2130 MDI_PI_HOLD(pip); 2131 MDI_PI_UNLOCK(pip); 2132 ct->ct_path_last = pip; 2133 *ret_pip = pip; 2134 MDI_CLIENT_UNLOCK(ct); 2135 return (MDI_SUCCESS); 2136 } 2137 2138 /* 2139 * Path is busy. 2140 */ 2141 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2142 MDI_PI_IS_TRANSIENT(pip)) 2143 retry = 1; 2144 /* 2145 * Keep looking for a next available online path 2146 */ 2147 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2148 if (next == NULL) { 2149 next = head; 2150 } 2151 MDI_PI_UNLOCK(pip); 2152 pip = next; 2153 if (start == pip && preferred) { 2154 preferred = 0; 2155 } else if (start == pip && !preferred) { 2156 cont = 0; 2157 } 2158 } while (cont); 2159 break; 2160 2161 case LOAD_BALANCE_LBA: 2162 /* 2163 * Make sure we are looking 2164 * for an online path. Otherwise, if it is for a STANDBY 2165 * path request, it will go through and fetch an ONLINE 2166 * path which is not desirable. 2167 */ 2168 if ((ct->ct_lb_args != NULL) && 2169 (ct->ct_lb_args->region_size) && bp && 2170 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2171 if (i_mdi_lba_lb(ct, ret_pip, bp) 2172 == MDI_SUCCESS) { 2173 MDI_CLIENT_UNLOCK(ct); 2174 return (MDI_SUCCESS); 2175 } 2176 } 2177 /* FALLTHROUGH */ 2178 case LOAD_BALANCE_RR: 2179 /* 2180 * Load balancing is Round Robin. Start looking for a online 2181 * mdi_pathinfo node starting from last known selected path 2182 * as the start point. If override flags are specified, 2183 * process accordingly. 2184 * If the search is already in effect(start_pip not null), 2185 * then lets just use the same path preference to continue the 2186 * traversal. 2187 */ 2188 2189 if (start_pip != NULL) { 2190 preferred = MDI_PI(start_pip)->pi_preferred; 2191 } else { 2192 preferred = 1; 2193 } 2194 2195 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2196 if (start == NULL) { 2197 pip = head; 2198 } else { 2199 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2200 if (pip == NULL) { 2201 if (!sb) { 2202 if (preferred == 0) { 2203 /* 2204 * Looks like we have completed 2205 * the traversal as preferred 2206 * value is 0. Time to bail out. 2207 */ 2208 *ret_pip = NULL; 2209 MDI_CLIENT_UNLOCK(ct); 2210 return (MDI_NOPATH); 2211 } else { 2212 /* 2213 * Looks like we reached the 2214 * end of the list. Lets enable 2215 * traversal of non preferred 2216 * paths. 2217 */ 2218 preferred = 0; 2219 } 2220 } 2221 pip = head; 2222 } 2223 } 2224 start = pip; 2225 do { 2226 MDI_PI_LOCK(pip); 2227 if (sb) { 2228 cond = ((MDI_PI(pip)->pi_state == 2229 MDI_PATHINFO_STATE_ONLINE && 2230 MDI_PI(pip)->pi_preferred == 2231 preferred) ? 1 : 0); 2232 } else { 2233 if (flags == MDI_SELECT_ONLINE_PATH) { 2234 cond = ((MDI_PI(pip)->pi_state == 2235 MDI_PATHINFO_STATE_ONLINE && 2236 MDI_PI(pip)->pi_preferred == 2237 preferred) ? 1 : 0); 2238 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2239 cond = ((MDI_PI(pip)->pi_state == 2240 MDI_PATHINFO_STATE_STANDBY && 2241 MDI_PI(pip)->pi_preferred == 2242 preferred) ? 1 : 0); 2243 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2244 MDI_SELECT_STANDBY_PATH)) { 2245 cond = (((MDI_PI(pip)->pi_state == 2246 MDI_PATHINFO_STATE_ONLINE || 2247 (MDI_PI(pip)->pi_state == 2248 MDI_PATHINFO_STATE_STANDBY)) && 2249 MDI_PI(pip)->pi_preferred == 2250 preferred) ? 1 : 0); 2251 } else if (flags == 2252 (MDI_SELECT_STANDBY_PATH | 2253 MDI_SELECT_ONLINE_PATH | 2254 MDI_SELECT_USER_DISABLE_PATH)) { 2255 cond = (((MDI_PI(pip)->pi_state == 2256 MDI_PATHINFO_STATE_ONLINE || 2257 (MDI_PI(pip)->pi_state == 2258 MDI_PATHINFO_STATE_STANDBY) || 2259 (MDI_PI(pip)->pi_state == 2260 (MDI_PATHINFO_STATE_ONLINE| 2261 MDI_PATHINFO_STATE_USER_DISABLE)) || 2262 (MDI_PI(pip)->pi_state == 2263 (MDI_PATHINFO_STATE_STANDBY | 2264 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2265 MDI_PI(pip)->pi_preferred == 2266 preferred) ? 1 : 0); 2267 } else { 2268 cond = 0; 2269 } 2270 } 2271 /* 2272 * No need to explicitly check if the path is disabled. 2273 * Since we are checking for state == ONLINE and the 2274 * same veriable is used for DISABLE/ENABLE information. 2275 */ 2276 if (cond) { 2277 /* 2278 * Return the path in hold state. Caller should 2279 * release the lock by calling mdi_rele_path() 2280 */ 2281 MDI_PI_HOLD(pip); 2282 MDI_PI_UNLOCK(pip); 2283 if (sb) 2284 ct->ct_path_last = pip; 2285 *ret_pip = pip; 2286 MDI_CLIENT_UNLOCK(ct); 2287 return (MDI_SUCCESS); 2288 } 2289 /* 2290 * Path is busy. 2291 */ 2292 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2293 MDI_PI_IS_TRANSIENT(pip)) 2294 retry = 1; 2295 2296 /* 2297 * Keep looking for a next available online path 2298 */ 2299 do_again: 2300 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2301 if (next == NULL) { 2302 if (!sb) { 2303 if (preferred == 1) { 2304 /* 2305 * Looks like we reached the 2306 * end of the list. Lets enable 2307 * traversal of non preferred 2308 * paths. 2309 */ 2310 preferred = 0; 2311 next = head; 2312 } else { 2313 /* 2314 * We have done both the passes 2315 * Preferred as well as for 2316 * Non-preferred. Bail out now. 2317 */ 2318 cont = 0; 2319 } 2320 } else { 2321 /* 2322 * Standard behavior case. 2323 */ 2324 next = head; 2325 } 2326 } 2327 MDI_PI_UNLOCK(pip); 2328 if (cont == 0) { 2329 break; 2330 } 2331 pip = next; 2332 2333 if (!sb) { 2334 /* 2335 * We need to handle the selection of 2336 * non-preferred path in the following 2337 * case: 2338 * 2339 * +------+ +------+ +------+ +-----+ 2340 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2341 * +------+ +------+ +------+ +-----+ 2342 * 2343 * If we start the search with B, we need to 2344 * skip beyond B to pick C which is non - 2345 * preferred in the second pass. The following 2346 * test, if true, will allow us to skip over 2347 * the 'start'(B in the example) to select 2348 * other non preferred elements. 2349 */ 2350 if ((start_pip != NULL) && (start_pip == pip) && 2351 (MDI_PI(start_pip)->pi_preferred 2352 != preferred)) { 2353 /* 2354 * try again after going past the start 2355 * pip 2356 */ 2357 MDI_PI_LOCK(pip); 2358 goto do_again; 2359 } 2360 } else { 2361 /* 2362 * Standard behavior case 2363 */ 2364 if (start == pip && preferred) { 2365 /* look for nonpreferred paths */ 2366 preferred = 0; 2367 } else if (start == pip && !preferred) { 2368 /* 2369 * Exit condition 2370 */ 2371 cont = 0; 2372 } 2373 } 2374 } while (cont); 2375 break; 2376 } 2377 2378 MDI_CLIENT_UNLOCK(ct); 2379 if (retry == 1) { 2380 return (MDI_BUSY); 2381 } else { 2382 return (MDI_NOPATH); 2383 } 2384 } 2385 2386 /* 2387 * For a client, return the next available path to any phci 2388 * 2389 * Note: 2390 * Caller should hold the branch's devinfo node to get a consistent 2391 * snap shot of the mdi_pathinfo nodes. 2392 * 2393 * Please note that even the list is stable the mdi_pathinfo 2394 * node state and properties are volatile. The caller should lock 2395 * and unlock the nodes by calling mdi_pi_lock() and 2396 * mdi_pi_unlock() functions to get a stable properties. 2397 * 2398 * If there is a need to use the nodes beyond the hold of the 2399 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2400 * need to be held against unexpected removal by calling 2401 * mdi_hold_path() and should be released by calling 2402 * mdi_rele_path() on completion. 2403 */ 2404 mdi_pathinfo_t * 2405 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2406 { 2407 mdi_client_t *ct; 2408 2409 if (!MDI_CLIENT(ct_dip)) 2410 return (NULL); 2411 2412 /* 2413 * Walk through client link 2414 */ 2415 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2416 ASSERT(ct != NULL); 2417 2418 if (pip == NULL) 2419 return ((mdi_pathinfo_t *)ct->ct_path_head); 2420 2421 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2422 } 2423 2424 /* 2425 * For a phci, return the next available path to any client 2426 * Note: ditto mdi_get_next_phci_path() 2427 */ 2428 mdi_pathinfo_t * 2429 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2430 { 2431 mdi_phci_t *ph; 2432 2433 if (!MDI_PHCI(ph_dip)) 2434 return (NULL); 2435 2436 /* 2437 * Walk through pHCI link 2438 */ 2439 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2440 ASSERT(ph != NULL); 2441 2442 if (pip == NULL) 2443 return ((mdi_pathinfo_t *)ph->ph_path_head); 2444 2445 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2446 } 2447 2448 /* 2449 * mdi_hold_path(): 2450 * Hold the mdi_pathinfo node against unwanted unexpected free. 2451 * Return Values: 2452 * None 2453 */ 2454 void 2455 mdi_hold_path(mdi_pathinfo_t *pip) 2456 { 2457 if (pip) { 2458 MDI_PI_LOCK(pip); 2459 MDI_PI_HOLD(pip); 2460 MDI_PI_UNLOCK(pip); 2461 } 2462 } 2463 2464 2465 /* 2466 * mdi_rele_path(): 2467 * Release the mdi_pathinfo node which was selected 2468 * through mdi_select_path() mechanism or manually held by 2469 * calling mdi_hold_path(). 2470 * Return Values: 2471 * None 2472 */ 2473 void 2474 mdi_rele_path(mdi_pathinfo_t *pip) 2475 { 2476 if (pip) { 2477 MDI_PI_LOCK(pip); 2478 MDI_PI_RELE(pip); 2479 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2480 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2481 } 2482 MDI_PI_UNLOCK(pip); 2483 } 2484 } 2485 2486 /* 2487 * mdi_pi_lock(): 2488 * Lock the mdi_pathinfo node. 2489 * Note: 2490 * The caller should release the lock by calling mdi_pi_unlock() 2491 */ 2492 void 2493 mdi_pi_lock(mdi_pathinfo_t *pip) 2494 { 2495 ASSERT(pip != NULL); 2496 if (pip) { 2497 MDI_PI_LOCK(pip); 2498 } 2499 } 2500 2501 2502 /* 2503 * mdi_pi_unlock(): 2504 * Unlock the mdi_pathinfo node. 2505 * Note: 2506 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2507 */ 2508 void 2509 mdi_pi_unlock(mdi_pathinfo_t *pip) 2510 { 2511 ASSERT(pip != NULL); 2512 if (pip) { 2513 MDI_PI_UNLOCK(pip); 2514 } 2515 } 2516 2517 /* 2518 * mdi_pi_find(): 2519 * Search the list of mdi_pathinfo nodes attached to the 2520 * pHCI/Client device node whose path address matches "paddr". 2521 * Returns a pointer to the mdi_pathinfo node if a matching node is 2522 * found. 2523 * Return Values: 2524 * mdi_pathinfo node handle 2525 * NULL 2526 * Notes: 2527 * Caller need not hold any locks to call this function. 2528 */ 2529 mdi_pathinfo_t * 2530 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2531 { 2532 mdi_phci_t *ph; 2533 mdi_vhci_t *vh; 2534 mdi_client_t *ct; 2535 mdi_pathinfo_t *pip = NULL; 2536 2537 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2538 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2539 if ((pdip == NULL) || (paddr == NULL)) { 2540 return (NULL); 2541 } 2542 ph = i_devi_get_phci(pdip); 2543 if (ph == NULL) { 2544 /* 2545 * Invalid pHCI device, Nothing more to do. 2546 */ 2547 MDI_DEBUG(2, (CE_WARN, pdip, 2548 "!mdi_pi_find: invalid phci")); 2549 return (NULL); 2550 } 2551 2552 vh = ph->ph_vhci; 2553 if (vh == NULL) { 2554 /* 2555 * Invalid vHCI device, Nothing more to do. 2556 */ 2557 MDI_DEBUG(2, (CE_WARN, pdip, 2558 "!mdi_pi_find: invalid vhci")); 2559 return (NULL); 2560 } 2561 2562 /* 2563 * Look for pathinfo node identified by paddr. 2564 */ 2565 if (caddr == NULL) { 2566 /* 2567 * Find a mdi_pathinfo node under pHCI list for a matching 2568 * unit address. 2569 */ 2570 MDI_PHCI_LOCK(ph); 2571 if (MDI_PHCI_IS_OFFLINE(ph)) { 2572 MDI_DEBUG(2, (CE_WARN, pdip, 2573 "!mdi_pi_find: offline phci %p", (void *)ph)); 2574 MDI_PHCI_UNLOCK(ph); 2575 return (NULL); 2576 } 2577 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2578 2579 while (pip != NULL) { 2580 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2581 break; 2582 } 2583 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2584 } 2585 MDI_PHCI_UNLOCK(ph); 2586 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2587 (void *)pip)); 2588 return (pip); 2589 } 2590 2591 /* 2592 * XXX - Is the rest of the code in this function really necessary? 2593 * The consumers of mdi_pi_find() can search for the desired pathinfo 2594 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2595 * whether the search is based on the pathinfo nodes attached to 2596 * the pHCI or the client node, the result will be the same. 2597 */ 2598 2599 /* 2600 * Find the client device corresponding to 'caddr' 2601 */ 2602 MDI_VHCI_CLIENT_LOCK(vh); 2603 2604 /* 2605 * XXX - Passing NULL to the following function works as long as the 2606 * the client addresses (caddr) are unique per vhci basis. 2607 */ 2608 ct = i_mdi_client_find(vh, NULL, caddr); 2609 if (ct == NULL) { 2610 /* 2611 * Client not found, Obviously mdi_pathinfo node has not been 2612 * created yet. 2613 */ 2614 MDI_VHCI_CLIENT_UNLOCK(vh); 2615 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2616 "found for caddr %s", caddr ? caddr : "NULL")); 2617 return (NULL); 2618 } 2619 2620 /* 2621 * Hold the client lock and look for a mdi_pathinfo node with matching 2622 * pHCI and paddr 2623 */ 2624 MDI_CLIENT_LOCK(ct); 2625 2626 /* 2627 * Release the global mutex as it is no more needed. Note: We always 2628 * respect the locking order while acquiring. 2629 */ 2630 MDI_VHCI_CLIENT_UNLOCK(vh); 2631 2632 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2633 while (pip != NULL) { 2634 /* 2635 * Compare the unit address 2636 */ 2637 if ((MDI_PI(pip)->pi_phci == ph) && 2638 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2639 break; 2640 } 2641 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2642 } 2643 MDI_CLIENT_UNLOCK(ct); 2644 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2645 return (pip); 2646 } 2647 2648 /* 2649 * mdi_pi_alloc(): 2650 * Allocate and initialize a new instance of a mdi_pathinfo node. 2651 * The mdi_pathinfo node returned by this function identifies a 2652 * unique device path is capable of having properties attached 2653 * and passed to mdi_pi_online() to fully attach and online the 2654 * path and client device node. 2655 * The mdi_pathinfo node returned by this function must be 2656 * destroyed using mdi_pi_free() if the path is no longer 2657 * operational or if the caller fails to attach a client device 2658 * node when calling mdi_pi_online(). The framework will not free 2659 * the resources allocated. 2660 * This function can be called from both interrupt and kernel 2661 * contexts. DDI_NOSLEEP flag should be used while calling 2662 * from interrupt contexts. 2663 * Return Values: 2664 * MDI_SUCCESS 2665 * MDI_FAILURE 2666 * MDI_NOMEM 2667 */ 2668 /*ARGSUSED*/ 2669 int 2670 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2671 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2672 { 2673 mdi_vhci_t *vh; 2674 mdi_phci_t *ph; 2675 mdi_client_t *ct; 2676 mdi_pathinfo_t *pip = NULL; 2677 dev_info_t *cdip; 2678 int rv = MDI_NOMEM; 2679 int path_allocated = 0; 2680 2681 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2682 cname ? cname : "NULL", caddr ? caddr : "NULL", 2683 paddr ? paddr : "NULL")); 2684 2685 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2686 ret_pip == NULL) { 2687 /* Nothing more to do */ 2688 return (MDI_FAILURE); 2689 } 2690 2691 *ret_pip = NULL; 2692 2693 /* No allocations on detaching pHCI */ 2694 if (DEVI_IS_DETACHING(pdip)) { 2695 /* Invalid pHCI device, return failure */ 2696 MDI_DEBUG(1, (CE_WARN, pdip, 2697 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2698 return (MDI_FAILURE); 2699 } 2700 2701 ph = i_devi_get_phci(pdip); 2702 ASSERT(ph != NULL); 2703 if (ph == NULL) { 2704 /* Invalid pHCI device, return failure */ 2705 MDI_DEBUG(1, (CE_WARN, pdip, 2706 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2707 return (MDI_FAILURE); 2708 } 2709 2710 MDI_PHCI_LOCK(ph); 2711 vh = ph->ph_vhci; 2712 if (vh == NULL) { 2713 /* Invalid vHCI device, return failure */ 2714 MDI_DEBUG(1, (CE_WARN, pdip, 2715 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2716 MDI_PHCI_UNLOCK(ph); 2717 return (MDI_FAILURE); 2718 } 2719 2720 if (MDI_PHCI_IS_READY(ph) == 0) { 2721 /* 2722 * Do not allow new node creation when pHCI is in 2723 * offline/suspended states 2724 */ 2725 MDI_DEBUG(1, (CE_WARN, pdip, 2726 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2727 MDI_PHCI_UNLOCK(ph); 2728 return (MDI_BUSY); 2729 } 2730 MDI_PHCI_UNSTABLE(ph); 2731 MDI_PHCI_UNLOCK(ph); 2732 2733 /* look for a matching client, create one if not found */ 2734 MDI_VHCI_CLIENT_LOCK(vh); 2735 ct = i_mdi_client_find(vh, cname, caddr); 2736 if (ct == NULL) { 2737 ct = i_mdi_client_alloc(vh, cname, caddr); 2738 ASSERT(ct != NULL); 2739 } 2740 2741 if (ct->ct_dip == NULL) { 2742 /* 2743 * Allocate a devinfo node 2744 */ 2745 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2746 compatible, ncompatible); 2747 if (ct->ct_dip == NULL) { 2748 (void) i_mdi_client_free(vh, ct); 2749 goto fail; 2750 } 2751 } 2752 cdip = ct->ct_dip; 2753 2754 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2755 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2756 2757 MDI_CLIENT_LOCK(ct); 2758 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2759 while (pip != NULL) { 2760 /* 2761 * Compare the unit address 2762 */ 2763 if ((MDI_PI(pip)->pi_phci == ph) && 2764 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2765 break; 2766 } 2767 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2768 } 2769 MDI_CLIENT_UNLOCK(ct); 2770 2771 if (pip == NULL) { 2772 /* 2773 * This is a new path for this client device. Allocate and 2774 * initialize a new pathinfo node 2775 */ 2776 pip = i_mdi_pi_alloc(ph, paddr, ct); 2777 ASSERT(pip != NULL); 2778 path_allocated = 1; 2779 } 2780 rv = MDI_SUCCESS; 2781 2782 fail: 2783 /* 2784 * Release the global mutex. 2785 */ 2786 MDI_VHCI_CLIENT_UNLOCK(vh); 2787 2788 /* 2789 * Mark the pHCI as stable 2790 */ 2791 MDI_PHCI_LOCK(ph); 2792 MDI_PHCI_STABLE(ph); 2793 MDI_PHCI_UNLOCK(ph); 2794 *ret_pip = pip; 2795 2796 MDI_DEBUG(2, (CE_NOTE, pdip, 2797 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2798 2799 if (path_allocated) 2800 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2801 2802 return (rv); 2803 } 2804 2805 /*ARGSUSED*/ 2806 int 2807 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2808 int flags, mdi_pathinfo_t **ret_pip) 2809 { 2810 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2811 flags, ret_pip)); 2812 } 2813 2814 /* 2815 * i_mdi_pi_alloc(): 2816 * Allocate a mdi_pathinfo node and add to the pHCI path list 2817 * Return Values: 2818 * mdi_pathinfo 2819 */ 2820 /*ARGSUSED*/ 2821 static mdi_pathinfo_t * 2822 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2823 { 2824 mdi_pathinfo_t *pip; 2825 int ct_circular; 2826 int ph_circular; 2827 static char path[MAXPATHLEN]; 2828 char *path_persistent; 2829 int path_instance; 2830 mod_hash_val_t hv; 2831 2832 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2833 2834 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2835 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2836 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2837 MDI_PATHINFO_STATE_TRANSIENT; 2838 2839 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2840 MDI_PI_SET_USER_DISABLE(pip); 2841 2842 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2843 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2844 2845 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2846 MDI_PI_SET_DRV_DISABLE(pip); 2847 2848 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2849 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2850 MDI_PI(pip)->pi_client = ct; 2851 MDI_PI(pip)->pi_phci = ph; 2852 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2853 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2854 2855 /* 2856 * We form the "path" to the pathinfo node, and see if we have 2857 * already allocated a 'path_instance' for that "path". If so, 2858 * we use the already allocated 'path_instance'. If not, we 2859 * allocate a new 'path_instance' and associate it with a copy of 2860 * the "path" string (which is never freed). The association 2861 * between a 'path_instance' this "path" string persists until 2862 * reboot. 2863 */ 2864 mutex_enter(&mdi_pathmap_mutex); 2865 (void) ddi_pathname(ph->ph_dip, path); 2866 (void) sprintf(path + strlen(path), "/%s@%s", 2867 ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr); 2868 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2869 path_instance = (uint_t)(intptr_t)hv; 2870 } else { 2871 /* allocate a new 'path_instance' and persistent "path" */ 2872 path_instance = mdi_pathmap_instance++; 2873 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2874 (void) mod_hash_insert(mdi_pathmap_bypath, 2875 (mod_hash_key_t)path_persistent, 2876 (mod_hash_val_t)(intptr_t)path_instance); 2877 (void) mod_hash_insert(mdi_pathmap_byinstance, 2878 (mod_hash_key_t)(intptr_t)path_instance, 2879 (mod_hash_val_t)path_persistent); 2880 } 2881 mutex_exit(&mdi_pathmap_mutex); 2882 MDI_PI(pip)->pi_path_instance = path_instance; 2883 2884 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2885 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2886 MDI_PI(pip)->pi_pprivate = NULL; 2887 MDI_PI(pip)->pi_cprivate = NULL; 2888 MDI_PI(pip)->pi_vprivate = NULL; 2889 MDI_PI(pip)->pi_client_link = NULL; 2890 MDI_PI(pip)->pi_phci_link = NULL; 2891 MDI_PI(pip)->pi_ref_cnt = 0; 2892 MDI_PI(pip)->pi_kstats = NULL; 2893 MDI_PI(pip)->pi_preferred = 1; 2894 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2895 2896 /* 2897 * Lock both dev_info nodes against changes in parallel. 2898 * 2899 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2900 * This atypical operation is done to synchronize pathinfo nodes 2901 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2902 * the pathinfo nodes are children of the Client. 2903 */ 2904 ndi_devi_enter(ct->ct_dip, &ct_circular); 2905 ndi_devi_enter(ph->ph_dip, &ph_circular); 2906 2907 i_mdi_phci_add_path(ph, pip); 2908 i_mdi_client_add_path(ct, pip); 2909 2910 ndi_devi_exit(ph->ph_dip, ph_circular); 2911 ndi_devi_exit(ct->ct_dip, ct_circular); 2912 2913 return (pip); 2914 } 2915 2916 /* 2917 * mdi_pi_pathname_by_instance(): 2918 * Lookup of "path" by 'path_instance'. Return "path". 2919 * NOTE: returned "path" remains valid forever (until reboot). 2920 */ 2921 char * 2922 mdi_pi_pathname_by_instance(int path_instance) 2923 { 2924 char *path; 2925 mod_hash_val_t hv; 2926 2927 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2928 mutex_enter(&mdi_pathmap_mutex); 2929 if (mod_hash_find(mdi_pathmap_byinstance, 2930 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2931 path = (char *)hv; 2932 else 2933 path = NULL; 2934 mutex_exit(&mdi_pathmap_mutex); 2935 return (path); 2936 } 2937 2938 /* 2939 * i_mdi_phci_add_path(): 2940 * Add a mdi_pathinfo node to pHCI list. 2941 * Notes: 2942 * Caller should per-pHCI mutex 2943 */ 2944 static void 2945 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2946 { 2947 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2948 2949 MDI_PHCI_LOCK(ph); 2950 if (ph->ph_path_head == NULL) { 2951 ph->ph_path_head = pip; 2952 } else { 2953 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2954 } 2955 ph->ph_path_tail = pip; 2956 ph->ph_path_count++; 2957 MDI_PHCI_UNLOCK(ph); 2958 } 2959 2960 /* 2961 * i_mdi_client_add_path(): 2962 * Add mdi_pathinfo node to client list 2963 */ 2964 static void 2965 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2966 { 2967 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2968 2969 MDI_CLIENT_LOCK(ct); 2970 if (ct->ct_path_head == NULL) { 2971 ct->ct_path_head = pip; 2972 } else { 2973 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2974 } 2975 ct->ct_path_tail = pip; 2976 ct->ct_path_count++; 2977 MDI_CLIENT_UNLOCK(ct); 2978 } 2979 2980 /* 2981 * mdi_pi_free(): 2982 * Free the mdi_pathinfo node and also client device node if this 2983 * is the last path to the device 2984 * Return Values: 2985 * MDI_SUCCESS 2986 * MDI_FAILURE 2987 * MDI_BUSY 2988 */ 2989 /*ARGSUSED*/ 2990 int 2991 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2992 { 2993 int rv = MDI_FAILURE; 2994 mdi_vhci_t *vh; 2995 mdi_phci_t *ph; 2996 mdi_client_t *ct; 2997 int (*f)(); 2998 int client_held = 0; 2999 3000 MDI_PI_LOCK(pip); 3001 ph = MDI_PI(pip)->pi_phci; 3002 ASSERT(ph != NULL); 3003 if (ph == NULL) { 3004 /* 3005 * Invalid pHCI device, return failure 3006 */ 3007 MDI_DEBUG(1, (CE_WARN, NULL, 3008 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3009 MDI_PI_UNLOCK(pip); 3010 return (MDI_FAILURE); 3011 } 3012 3013 vh = ph->ph_vhci; 3014 ASSERT(vh != NULL); 3015 if (vh == NULL) { 3016 /* Invalid pHCI device, return failure */ 3017 MDI_DEBUG(1, (CE_WARN, NULL, 3018 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3019 MDI_PI_UNLOCK(pip); 3020 return (MDI_FAILURE); 3021 } 3022 3023 ct = MDI_PI(pip)->pi_client; 3024 ASSERT(ct != NULL); 3025 if (ct == NULL) { 3026 /* 3027 * Invalid Client device, return failure 3028 */ 3029 MDI_DEBUG(1, (CE_WARN, NULL, 3030 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3031 MDI_PI_UNLOCK(pip); 3032 return (MDI_FAILURE); 3033 } 3034 3035 /* 3036 * Check to see for busy condition. A mdi_pathinfo can only be freed 3037 * if the node state is either offline or init and the reference count 3038 * is zero. 3039 */ 3040 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3041 MDI_PI_IS_INITING(pip))) { 3042 /* 3043 * Node is busy 3044 */ 3045 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3046 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3047 MDI_PI_UNLOCK(pip); 3048 return (MDI_BUSY); 3049 } 3050 3051 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3052 /* 3053 * Give a chance for pending I/Os to complete. 3054 */ 3055 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3056 "%d cmds still pending on path: %p\n", 3057 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3058 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3059 &MDI_PI(pip)->pi_mutex, 3060 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3061 /* 3062 * The timeout time reached without ref_cnt being zero 3063 * being signaled. 3064 */ 3065 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3066 "!mdi_pi_free: " 3067 "Timeout reached on path %p without the cond\n", 3068 (void *)pip)); 3069 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3070 "!mdi_pi_free: " 3071 "%d cmds still pending on path: %p\n", 3072 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3073 MDI_PI_UNLOCK(pip); 3074 return (MDI_BUSY); 3075 } 3076 } 3077 if (MDI_PI(pip)->pi_pm_held) { 3078 client_held = 1; 3079 } 3080 MDI_PI_UNLOCK(pip); 3081 3082 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3083 3084 MDI_CLIENT_LOCK(ct); 3085 3086 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3087 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3088 3089 /* 3090 * Wait till failover is complete before removing this node. 3091 */ 3092 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3093 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3094 3095 MDI_CLIENT_UNLOCK(ct); 3096 MDI_VHCI_CLIENT_LOCK(vh); 3097 MDI_CLIENT_LOCK(ct); 3098 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3099 3100 if (!MDI_PI_IS_INITING(pip)) { 3101 f = vh->vh_ops->vo_pi_uninit; 3102 if (f != NULL) { 3103 rv = (*f)(vh->vh_dip, pip, 0); 3104 } 3105 } 3106 /* 3107 * If vo_pi_uninit() completed successfully. 3108 */ 3109 if (rv == MDI_SUCCESS) { 3110 if (client_held) { 3111 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3112 "i_mdi_pm_rele_client\n")); 3113 i_mdi_pm_rele_client(ct, 1); 3114 } 3115 i_mdi_pi_free(ph, pip, ct); 3116 if (ct->ct_path_count == 0) { 3117 /* 3118 * Client lost its last path. 3119 * Clean up the client device 3120 */ 3121 MDI_CLIENT_UNLOCK(ct); 3122 (void) i_mdi_client_free(ct->ct_vhci, ct); 3123 MDI_VHCI_CLIENT_UNLOCK(vh); 3124 return (rv); 3125 } 3126 } 3127 MDI_CLIENT_UNLOCK(ct); 3128 MDI_VHCI_CLIENT_UNLOCK(vh); 3129 3130 if (rv == MDI_FAILURE) 3131 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3132 3133 return (rv); 3134 } 3135 3136 /* 3137 * i_mdi_pi_free(): 3138 * Free the mdi_pathinfo node 3139 */ 3140 static void 3141 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3142 { 3143 int ct_circular; 3144 int ph_circular; 3145 3146 ASSERT(MDI_CLIENT_LOCKED(ct)); 3147 3148 /* 3149 * remove any per-path kstats 3150 */ 3151 i_mdi_pi_kstat_destroy(pip); 3152 3153 /* See comments in i_mdi_pi_alloc() */ 3154 ndi_devi_enter(ct->ct_dip, &ct_circular); 3155 ndi_devi_enter(ph->ph_dip, &ph_circular); 3156 3157 i_mdi_client_remove_path(ct, pip); 3158 i_mdi_phci_remove_path(ph, pip); 3159 3160 ndi_devi_exit(ph->ph_dip, ph_circular); 3161 ndi_devi_exit(ct->ct_dip, ct_circular); 3162 3163 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3164 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3165 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3166 if (MDI_PI(pip)->pi_addr) { 3167 kmem_free(MDI_PI(pip)->pi_addr, 3168 strlen(MDI_PI(pip)->pi_addr) + 1); 3169 MDI_PI(pip)->pi_addr = NULL; 3170 } 3171 3172 if (MDI_PI(pip)->pi_prop) { 3173 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3174 MDI_PI(pip)->pi_prop = NULL; 3175 } 3176 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3177 } 3178 3179 3180 /* 3181 * i_mdi_phci_remove_path(): 3182 * Remove a mdi_pathinfo node from pHCI list. 3183 * Notes: 3184 * Caller should hold per-pHCI mutex 3185 */ 3186 static void 3187 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3188 { 3189 mdi_pathinfo_t *prev = NULL; 3190 mdi_pathinfo_t *path = NULL; 3191 3192 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3193 3194 MDI_PHCI_LOCK(ph); 3195 path = ph->ph_path_head; 3196 while (path != NULL) { 3197 if (path == pip) { 3198 break; 3199 } 3200 prev = path; 3201 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3202 } 3203 3204 if (path) { 3205 ph->ph_path_count--; 3206 if (prev) { 3207 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3208 } else { 3209 ph->ph_path_head = 3210 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3211 } 3212 if (ph->ph_path_tail == path) { 3213 ph->ph_path_tail = prev; 3214 } 3215 } 3216 3217 /* 3218 * Clear the pHCI link 3219 */ 3220 MDI_PI(pip)->pi_phci_link = NULL; 3221 MDI_PI(pip)->pi_phci = NULL; 3222 MDI_PHCI_UNLOCK(ph); 3223 } 3224 3225 /* 3226 * i_mdi_client_remove_path(): 3227 * Remove a mdi_pathinfo node from client path list. 3228 */ 3229 static void 3230 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3231 { 3232 mdi_pathinfo_t *prev = NULL; 3233 mdi_pathinfo_t *path; 3234 3235 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3236 3237 ASSERT(MDI_CLIENT_LOCKED(ct)); 3238 path = ct->ct_path_head; 3239 while (path != NULL) { 3240 if (path == pip) { 3241 break; 3242 } 3243 prev = path; 3244 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3245 } 3246 3247 if (path) { 3248 ct->ct_path_count--; 3249 if (prev) { 3250 MDI_PI(prev)->pi_client_link = 3251 MDI_PI(path)->pi_client_link; 3252 } else { 3253 ct->ct_path_head = 3254 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3255 } 3256 if (ct->ct_path_tail == path) { 3257 ct->ct_path_tail = prev; 3258 } 3259 if (ct->ct_path_last == path) { 3260 ct->ct_path_last = ct->ct_path_head; 3261 } 3262 } 3263 MDI_PI(pip)->pi_client_link = NULL; 3264 MDI_PI(pip)->pi_client = NULL; 3265 } 3266 3267 /* 3268 * i_mdi_pi_state_change(): 3269 * online a mdi_pathinfo node 3270 * 3271 * Return Values: 3272 * MDI_SUCCESS 3273 * MDI_FAILURE 3274 */ 3275 /*ARGSUSED*/ 3276 static int 3277 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3278 { 3279 int rv = MDI_SUCCESS; 3280 mdi_vhci_t *vh; 3281 mdi_phci_t *ph; 3282 mdi_client_t *ct; 3283 int (*f)(); 3284 dev_info_t *cdip; 3285 3286 MDI_PI_LOCK(pip); 3287 3288 ph = MDI_PI(pip)->pi_phci; 3289 ASSERT(ph); 3290 if (ph == NULL) { 3291 /* 3292 * Invalid pHCI device, fail the request 3293 */ 3294 MDI_PI_UNLOCK(pip); 3295 MDI_DEBUG(1, (CE_WARN, NULL, 3296 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3297 return (MDI_FAILURE); 3298 } 3299 3300 vh = ph->ph_vhci; 3301 ASSERT(vh); 3302 if (vh == NULL) { 3303 /* 3304 * Invalid vHCI device, fail the request 3305 */ 3306 MDI_PI_UNLOCK(pip); 3307 MDI_DEBUG(1, (CE_WARN, NULL, 3308 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3309 return (MDI_FAILURE); 3310 } 3311 3312 ct = MDI_PI(pip)->pi_client; 3313 ASSERT(ct != NULL); 3314 if (ct == NULL) { 3315 /* 3316 * Invalid client device, fail the request 3317 */ 3318 MDI_PI_UNLOCK(pip); 3319 MDI_DEBUG(1, (CE_WARN, NULL, 3320 "!mdi_pi_state_change: invalid client pip=%p", 3321 (void *)pip)); 3322 return (MDI_FAILURE); 3323 } 3324 3325 /* 3326 * If this path has not been initialized yet, Callback vHCI driver's 3327 * pathinfo node initialize entry point 3328 */ 3329 3330 if (MDI_PI_IS_INITING(pip)) { 3331 MDI_PI_UNLOCK(pip); 3332 f = vh->vh_ops->vo_pi_init; 3333 if (f != NULL) { 3334 rv = (*f)(vh->vh_dip, pip, 0); 3335 if (rv != MDI_SUCCESS) { 3336 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3337 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3338 (void *)vh, (void *)pip)); 3339 return (MDI_FAILURE); 3340 } 3341 } 3342 MDI_PI_LOCK(pip); 3343 MDI_PI_CLEAR_TRANSIENT(pip); 3344 } 3345 3346 /* 3347 * Do not allow state transition when pHCI is in offline/suspended 3348 * states 3349 */ 3350 i_mdi_phci_lock(ph, pip); 3351 if (MDI_PHCI_IS_READY(ph) == 0) { 3352 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3353 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3354 (void *)ph)); 3355 MDI_PI_UNLOCK(pip); 3356 i_mdi_phci_unlock(ph); 3357 return (MDI_BUSY); 3358 } 3359 MDI_PHCI_UNSTABLE(ph); 3360 i_mdi_phci_unlock(ph); 3361 3362 /* 3363 * Check if mdi_pathinfo state is in transient state. 3364 * If yes, offlining is in progress and wait till transient state is 3365 * cleared. 3366 */ 3367 if (MDI_PI_IS_TRANSIENT(pip)) { 3368 while (MDI_PI_IS_TRANSIENT(pip)) { 3369 cv_wait(&MDI_PI(pip)->pi_state_cv, 3370 &MDI_PI(pip)->pi_mutex); 3371 } 3372 } 3373 3374 /* 3375 * Grab the client lock in reverse order sequence and release the 3376 * mdi_pathinfo mutex. 3377 */ 3378 i_mdi_client_lock(ct, pip); 3379 MDI_PI_UNLOCK(pip); 3380 3381 /* 3382 * Wait till failover state is cleared 3383 */ 3384 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3385 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3386 3387 /* 3388 * Mark the mdi_pathinfo node state as transient 3389 */ 3390 MDI_PI_LOCK(pip); 3391 switch (state) { 3392 case MDI_PATHINFO_STATE_ONLINE: 3393 MDI_PI_SET_ONLINING(pip); 3394 break; 3395 3396 case MDI_PATHINFO_STATE_STANDBY: 3397 MDI_PI_SET_STANDBYING(pip); 3398 break; 3399 3400 case MDI_PATHINFO_STATE_FAULT: 3401 /* 3402 * Mark the pathinfo state as FAULTED 3403 */ 3404 MDI_PI_SET_FAULTING(pip); 3405 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3406 break; 3407 3408 case MDI_PATHINFO_STATE_OFFLINE: 3409 /* 3410 * ndi_devi_offline() cannot hold pip or ct locks. 3411 */ 3412 MDI_PI_UNLOCK(pip); 3413 /* 3414 * Don't offline the client dev_info node unless we have 3415 * no available paths left at all. 3416 */ 3417 cdip = ct->ct_dip; 3418 if ((flag & NDI_DEVI_REMOVE) && 3419 (ct->ct_path_count == 1)) { 3420 i_mdi_client_unlock(ct); 3421 rv = ndi_devi_offline(cdip, 0); 3422 if (rv != NDI_SUCCESS) { 3423 /* 3424 * Convert to MDI error code 3425 */ 3426 switch (rv) { 3427 case NDI_BUSY: 3428 rv = MDI_BUSY; 3429 break; 3430 default: 3431 rv = MDI_FAILURE; 3432 break; 3433 } 3434 goto state_change_exit; 3435 } else { 3436 i_mdi_client_lock(ct, NULL); 3437 } 3438 } 3439 /* 3440 * Mark the mdi_pathinfo node state as transient 3441 */ 3442 MDI_PI_LOCK(pip); 3443 MDI_PI_SET_OFFLINING(pip); 3444 break; 3445 } 3446 MDI_PI_UNLOCK(pip); 3447 MDI_CLIENT_UNSTABLE(ct); 3448 i_mdi_client_unlock(ct); 3449 3450 f = vh->vh_ops->vo_pi_state_change; 3451 if (f != NULL) 3452 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3453 3454 MDI_CLIENT_LOCK(ct); 3455 MDI_PI_LOCK(pip); 3456 if (rv == MDI_NOT_SUPPORTED) { 3457 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3458 } 3459 if (rv != MDI_SUCCESS) { 3460 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3461 "!vo_pi_state_change: failed rv = %x", rv)); 3462 } 3463 if (MDI_PI_IS_TRANSIENT(pip)) { 3464 if (rv == MDI_SUCCESS) { 3465 MDI_PI_CLEAR_TRANSIENT(pip); 3466 } else { 3467 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3468 } 3469 } 3470 3471 /* 3472 * Wake anyone waiting for this mdi_pathinfo node 3473 */ 3474 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3475 MDI_PI_UNLOCK(pip); 3476 3477 /* 3478 * Mark the client device as stable 3479 */ 3480 MDI_CLIENT_STABLE(ct); 3481 if (rv == MDI_SUCCESS) { 3482 if (ct->ct_unstable == 0) { 3483 cdip = ct->ct_dip; 3484 3485 /* 3486 * Onlining the mdi_pathinfo node will impact the 3487 * client state Update the client and dev_info node 3488 * state accordingly 3489 */ 3490 rv = NDI_SUCCESS; 3491 i_mdi_client_update_state(ct); 3492 switch (MDI_CLIENT_STATE(ct)) { 3493 case MDI_CLIENT_STATE_OPTIMAL: 3494 case MDI_CLIENT_STATE_DEGRADED: 3495 if (cdip && !i_ddi_devi_attached(cdip) && 3496 ((state == MDI_PATHINFO_STATE_ONLINE) || 3497 (state == MDI_PATHINFO_STATE_STANDBY))) { 3498 3499 /* 3500 * Must do ndi_devi_online() through 3501 * hotplug thread for deferred 3502 * attach mechanism to work 3503 */ 3504 MDI_CLIENT_UNLOCK(ct); 3505 rv = ndi_devi_online(cdip, 0); 3506 MDI_CLIENT_LOCK(ct); 3507 if ((rv != NDI_SUCCESS) && 3508 (MDI_CLIENT_STATE(ct) == 3509 MDI_CLIENT_STATE_DEGRADED)) { 3510 /* 3511 * ndi_devi_online failed. 3512 * Reset client flags to 3513 * offline. 3514 */ 3515 MDI_DEBUG(1, (CE_WARN, cdip, 3516 "!ndi_devi_online: failed " 3517 " Error: %x", rv)); 3518 MDI_CLIENT_SET_OFFLINE(ct); 3519 } 3520 if (rv != NDI_SUCCESS) { 3521 /* Reset the path state */ 3522 MDI_PI_LOCK(pip); 3523 MDI_PI(pip)->pi_state = 3524 MDI_PI_OLD_STATE(pip); 3525 MDI_PI_UNLOCK(pip); 3526 } 3527 } 3528 break; 3529 3530 case MDI_CLIENT_STATE_FAILED: 3531 /* 3532 * This is the last path case for 3533 * non-user initiated events. 3534 */ 3535 if (((flag & NDI_DEVI_REMOVE) == 0) && 3536 cdip && (i_ddi_node_state(cdip) >= 3537 DS_INITIALIZED)) { 3538 MDI_CLIENT_UNLOCK(ct); 3539 rv = ndi_devi_offline(cdip, 0); 3540 MDI_CLIENT_LOCK(ct); 3541 3542 if (rv != NDI_SUCCESS) { 3543 /* 3544 * ndi_devi_offline failed. 3545 * Reset client flags to 3546 * online as the path could not 3547 * be offlined. 3548 */ 3549 MDI_DEBUG(1, (CE_WARN, cdip, 3550 "!ndi_devi_offline: failed " 3551 " Error: %x", rv)); 3552 MDI_CLIENT_SET_ONLINE(ct); 3553 } 3554 } 3555 break; 3556 } 3557 /* 3558 * Convert to MDI error code 3559 */ 3560 switch (rv) { 3561 case NDI_SUCCESS: 3562 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3563 i_mdi_report_path_state(ct, pip); 3564 rv = MDI_SUCCESS; 3565 break; 3566 case NDI_BUSY: 3567 rv = MDI_BUSY; 3568 break; 3569 default: 3570 rv = MDI_FAILURE; 3571 break; 3572 } 3573 } 3574 } 3575 MDI_CLIENT_UNLOCK(ct); 3576 3577 state_change_exit: 3578 /* 3579 * Mark the pHCI as stable again. 3580 */ 3581 MDI_PHCI_LOCK(ph); 3582 MDI_PHCI_STABLE(ph); 3583 MDI_PHCI_UNLOCK(ph); 3584 return (rv); 3585 } 3586 3587 /* 3588 * mdi_pi_online(): 3589 * Place the path_info node in the online state. The path is 3590 * now available to be selected by mdi_select_path() for 3591 * transporting I/O requests to client devices. 3592 * Return Values: 3593 * MDI_SUCCESS 3594 * MDI_FAILURE 3595 */ 3596 int 3597 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3598 { 3599 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3600 int client_held = 0; 3601 int rv; 3602 int se_flag; 3603 int kmem_flag; 3604 3605 ASSERT(ct != NULL); 3606 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3607 if (rv != MDI_SUCCESS) 3608 return (rv); 3609 3610 MDI_PI_LOCK(pip); 3611 if (MDI_PI(pip)->pi_pm_held == 0) { 3612 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3613 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3614 i_mdi_pm_hold_pip(pip); 3615 client_held = 1; 3616 } 3617 MDI_PI_UNLOCK(pip); 3618 3619 if (client_held) { 3620 MDI_CLIENT_LOCK(ct); 3621 if (ct->ct_power_cnt == 0) { 3622 rv = i_mdi_power_all_phci(ct); 3623 } 3624 3625 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3626 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3627 i_mdi_pm_hold_client(ct, 1); 3628 MDI_CLIENT_UNLOCK(ct); 3629 } 3630 3631 /* determine interrupt context */ 3632 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3633 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3634 3635 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3636 i_ddi_di_cache_invalidate(kmem_flag); 3637 3638 return (rv); 3639 } 3640 3641 /* 3642 * mdi_pi_standby(): 3643 * Place the mdi_pathinfo node in standby state 3644 * 3645 * Return Values: 3646 * MDI_SUCCESS 3647 * MDI_FAILURE 3648 */ 3649 int 3650 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3651 { 3652 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3653 } 3654 3655 /* 3656 * mdi_pi_fault(): 3657 * Place the mdi_pathinfo node in fault'ed state 3658 * Return Values: 3659 * MDI_SUCCESS 3660 * MDI_FAILURE 3661 */ 3662 int 3663 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3664 { 3665 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3666 } 3667 3668 /* 3669 * mdi_pi_offline(): 3670 * Offline a mdi_pathinfo node. 3671 * Return Values: 3672 * MDI_SUCCESS 3673 * MDI_FAILURE 3674 */ 3675 int 3676 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3677 { 3678 int ret, client_held = 0; 3679 mdi_client_t *ct; 3680 int se_flag; 3681 int kmem_flag; 3682 3683 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3684 3685 if (ret == MDI_SUCCESS) { 3686 MDI_PI_LOCK(pip); 3687 if (MDI_PI(pip)->pi_pm_held) { 3688 client_held = 1; 3689 } 3690 MDI_PI_UNLOCK(pip); 3691 3692 if (client_held) { 3693 ct = MDI_PI(pip)->pi_client; 3694 MDI_CLIENT_LOCK(ct); 3695 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3696 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3697 i_mdi_pm_rele_client(ct, 1); 3698 MDI_CLIENT_UNLOCK(ct); 3699 } 3700 3701 /* determine interrupt context */ 3702 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3703 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3704 3705 /* pathinfo is offlined. update DINFOCACHE. */ 3706 i_ddi_di_cache_invalidate(kmem_flag); 3707 } 3708 3709 return (ret); 3710 } 3711 3712 /* 3713 * i_mdi_pi_offline(): 3714 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3715 */ 3716 static int 3717 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3718 { 3719 dev_info_t *vdip = NULL; 3720 mdi_vhci_t *vh = NULL; 3721 mdi_client_t *ct = NULL; 3722 int (*f)(); 3723 int rv; 3724 3725 MDI_PI_LOCK(pip); 3726 ct = MDI_PI(pip)->pi_client; 3727 ASSERT(ct != NULL); 3728 3729 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3730 /* 3731 * Give a chance for pending I/Os to complete. 3732 */ 3733 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3734 "%d cmds still pending on path: %p\n", 3735 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3736 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3737 &MDI_PI(pip)->pi_mutex, 3738 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3739 /* 3740 * The timeout time reached without ref_cnt being zero 3741 * being signaled. 3742 */ 3743 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3744 "Timeout reached on path %p without the cond\n", 3745 (void *)pip)); 3746 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3747 "%d cmds still pending on path: %p\n", 3748 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3749 } 3750 } 3751 vh = ct->ct_vhci; 3752 vdip = vh->vh_dip; 3753 3754 /* 3755 * Notify vHCI that has registered this event 3756 */ 3757 ASSERT(vh->vh_ops); 3758 f = vh->vh_ops->vo_pi_state_change; 3759 3760 if (f != NULL) { 3761 MDI_PI_UNLOCK(pip); 3762 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3763 flags)) != MDI_SUCCESS) { 3764 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3765 "!vo_path_offline failed " 3766 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3767 } 3768 MDI_PI_LOCK(pip); 3769 } 3770 3771 /* 3772 * Set the mdi_pathinfo node state and clear the transient condition 3773 */ 3774 MDI_PI_SET_OFFLINE(pip); 3775 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3776 MDI_PI_UNLOCK(pip); 3777 3778 MDI_CLIENT_LOCK(ct); 3779 if (rv == MDI_SUCCESS) { 3780 if (ct->ct_unstable == 0) { 3781 dev_info_t *cdip = ct->ct_dip; 3782 3783 /* 3784 * Onlining the mdi_pathinfo node will impact the 3785 * client state Update the client and dev_info node 3786 * state accordingly 3787 */ 3788 i_mdi_client_update_state(ct); 3789 rv = NDI_SUCCESS; 3790 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3791 if (cdip && 3792 (i_ddi_node_state(cdip) >= 3793 DS_INITIALIZED)) { 3794 MDI_CLIENT_UNLOCK(ct); 3795 rv = ndi_devi_offline(cdip, 0); 3796 MDI_CLIENT_LOCK(ct); 3797 if (rv != NDI_SUCCESS) { 3798 /* 3799 * ndi_devi_offline failed. 3800 * Reset client flags to 3801 * online. 3802 */ 3803 MDI_DEBUG(4, (CE_WARN, cdip, 3804 "!ndi_devi_offline: failed " 3805 " Error: %x", rv)); 3806 MDI_CLIENT_SET_ONLINE(ct); 3807 } 3808 } 3809 } 3810 /* 3811 * Convert to MDI error code 3812 */ 3813 switch (rv) { 3814 case NDI_SUCCESS: 3815 rv = MDI_SUCCESS; 3816 break; 3817 case NDI_BUSY: 3818 rv = MDI_BUSY; 3819 break; 3820 default: 3821 rv = MDI_FAILURE; 3822 break; 3823 } 3824 } 3825 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3826 i_mdi_report_path_state(ct, pip); 3827 } 3828 3829 MDI_CLIENT_UNLOCK(ct); 3830 3831 /* 3832 * Change in the mdi_pathinfo node state will impact the client state 3833 */ 3834 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3835 (void *)ct, (void *)pip)); 3836 return (rv); 3837 } 3838 3839 3840 /* 3841 * mdi_pi_get_addr(): 3842 * Get the unit address associated with a mdi_pathinfo node 3843 * 3844 * Return Values: 3845 * char * 3846 */ 3847 char * 3848 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3849 { 3850 if (pip == NULL) 3851 return (NULL); 3852 3853 return (MDI_PI(pip)->pi_addr); 3854 } 3855 3856 /* 3857 * mdi_pi_get_path_instance(): 3858 * Get the 'path_instance' of a mdi_pathinfo node 3859 * 3860 * Return Values: 3861 * path_instance 3862 */ 3863 int 3864 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3865 { 3866 if (pip == NULL) 3867 return (0); 3868 3869 return (MDI_PI(pip)->pi_path_instance); 3870 } 3871 3872 /* 3873 * mdi_pi_pathname(): 3874 * Return pointer to path to pathinfo node. 3875 */ 3876 char * 3877 mdi_pi_pathname(mdi_pathinfo_t *pip) 3878 { 3879 if (pip == NULL) 3880 return (NULL); 3881 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3882 } 3883 3884 /* 3885 * mdi_pi_get_client(): 3886 * Get the client devinfo associated with a mdi_pathinfo node 3887 * 3888 * Return Values: 3889 * Handle to client device dev_info node 3890 */ 3891 dev_info_t * 3892 mdi_pi_get_client(mdi_pathinfo_t *pip) 3893 { 3894 dev_info_t *dip = NULL; 3895 if (pip) { 3896 dip = MDI_PI(pip)->pi_client->ct_dip; 3897 } 3898 return (dip); 3899 } 3900 3901 /* 3902 * mdi_pi_get_phci(): 3903 * Get the pHCI devinfo associated with the mdi_pathinfo node 3904 * Return Values: 3905 * Handle to dev_info node 3906 */ 3907 dev_info_t * 3908 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3909 { 3910 dev_info_t *dip = NULL; 3911 if (pip) { 3912 dip = MDI_PI(pip)->pi_phci->ph_dip; 3913 } 3914 return (dip); 3915 } 3916 3917 /* 3918 * mdi_pi_get_client_private(): 3919 * Get the client private information associated with the 3920 * mdi_pathinfo node 3921 */ 3922 void * 3923 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3924 { 3925 void *cprivate = NULL; 3926 if (pip) { 3927 cprivate = MDI_PI(pip)->pi_cprivate; 3928 } 3929 return (cprivate); 3930 } 3931 3932 /* 3933 * mdi_pi_set_client_private(): 3934 * Set the client private information in the mdi_pathinfo node 3935 */ 3936 void 3937 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3938 { 3939 if (pip) { 3940 MDI_PI(pip)->pi_cprivate = priv; 3941 } 3942 } 3943 3944 /* 3945 * mdi_pi_get_phci_private(): 3946 * Get the pHCI private information associated with the 3947 * mdi_pathinfo node 3948 */ 3949 caddr_t 3950 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3951 { 3952 caddr_t pprivate = NULL; 3953 if (pip) { 3954 pprivate = MDI_PI(pip)->pi_pprivate; 3955 } 3956 return (pprivate); 3957 } 3958 3959 /* 3960 * mdi_pi_set_phci_private(): 3961 * Set the pHCI private information in the mdi_pathinfo node 3962 */ 3963 void 3964 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3965 { 3966 if (pip) { 3967 MDI_PI(pip)->pi_pprivate = priv; 3968 } 3969 } 3970 3971 /* 3972 * mdi_pi_get_state(): 3973 * Get the mdi_pathinfo node state. Transient states are internal 3974 * and not provided to the users 3975 */ 3976 mdi_pathinfo_state_t 3977 mdi_pi_get_state(mdi_pathinfo_t *pip) 3978 { 3979 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3980 3981 if (pip) { 3982 if (MDI_PI_IS_TRANSIENT(pip)) { 3983 /* 3984 * mdi_pathinfo is in state transition. Return the 3985 * last good state. 3986 */ 3987 state = MDI_PI_OLD_STATE(pip); 3988 } else { 3989 state = MDI_PI_STATE(pip); 3990 } 3991 } 3992 return (state); 3993 } 3994 3995 /* 3996 * Note that the following function needs to be the new interface for 3997 * mdi_pi_get_state when mpxio gets integrated to ON. 3998 */ 3999 int 4000 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4001 uint32_t *ext_state) 4002 { 4003 *state = MDI_PATHINFO_STATE_INIT; 4004 4005 if (pip) { 4006 if (MDI_PI_IS_TRANSIENT(pip)) { 4007 /* 4008 * mdi_pathinfo is in state transition. Return the 4009 * last good state. 4010 */ 4011 *state = MDI_PI_OLD_STATE(pip); 4012 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4013 } else { 4014 *state = MDI_PI_STATE(pip); 4015 *ext_state = MDI_PI_EXT_STATE(pip); 4016 } 4017 } 4018 return (MDI_SUCCESS); 4019 } 4020 4021 /* 4022 * mdi_pi_get_preferred: 4023 * Get the preferred path flag 4024 */ 4025 int 4026 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4027 { 4028 if (pip) { 4029 return (MDI_PI(pip)->pi_preferred); 4030 } 4031 return (0); 4032 } 4033 4034 /* 4035 * mdi_pi_set_preferred: 4036 * Set the preferred path flag 4037 */ 4038 void 4039 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4040 { 4041 if (pip) { 4042 MDI_PI(pip)->pi_preferred = preferred; 4043 } 4044 } 4045 4046 /* 4047 * mdi_pi_set_state(): 4048 * Set the mdi_pathinfo node state 4049 */ 4050 void 4051 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4052 { 4053 uint32_t ext_state; 4054 4055 if (pip) { 4056 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4057 MDI_PI(pip)->pi_state = state; 4058 MDI_PI(pip)->pi_state |= ext_state; 4059 } 4060 } 4061 4062 /* 4063 * Property functions: 4064 */ 4065 int 4066 i_map_nvlist_error_to_mdi(int val) 4067 { 4068 int rv; 4069 4070 switch (val) { 4071 case 0: 4072 rv = DDI_PROP_SUCCESS; 4073 break; 4074 case EINVAL: 4075 case ENOTSUP: 4076 rv = DDI_PROP_INVAL_ARG; 4077 break; 4078 case ENOMEM: 4079 rv = DDI_PROP_NO_MEMORY; 4080 break; 4081 default: 4082 rv = DDI_PROP_NOT_FOUND; 4083 break; 4084 } 4085 return (rv); 4086 } 4087 4088 /* 4089 * mdi_pi_get_next_prop(): 4090 * Property walk function. The caller should hold mdi_pi_lock() 4091 * and release by calling mdi_pi_unlock() at the end of walk to 4092 * get a consistent value. 4093 */ 4094 nvpair_t * 4095 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4096 { 4097 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4098 return (NULL); 4099 } 4100 ASSERT(MDI_PI_LOCKED(pip)); 4101 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4102 } 4103 4104 /* 4105 * mdi_prop_remove(): 4106 * Remove the named property from the named list. 4107 */ 4108 int 4109 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4110 { 4111 if (pip == NULL) { 4112 return (DDI_PROP_NOT_FOUND); 4113 } 4114 ASSERT(!MDI_PI_LOCKED(pip)); 4115 MDI_PI_LOCK(pip); 4116 if (MDI_PI(pip)->pi_prop == NULL) { 4117 MDI_PI_UNLOCK(pip); 4118 return (DDI_PROP_NOT_FOUND); 4119 } 4120 if (name) { 4121 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4122 } else { 4123 char nvp_name[MAXNAMELEN]; 4124 nvpair_t *nvp; 4125 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4126 while (nvp) { 4127 nvpair_t *next; 4128 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4129 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4130 nvpair_name(nvp)); 4131 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4132 nvp_name); 4133 nvp = next; 4134 } 4135 } 4136 MDI_PI_UNLOCK(pip); 4137 return (DDI_PROP_SUCCESS); 4138 } 4139 4140 /* 4141 * mdi_prop_size(): 4142 * Get buffer size needed to pack the property data. 4143 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4144 * buffer size. 4145 */ 4146 int 4147 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4148 { 4149 int rv; 4150 size_t bufsize; 4151 4152 *buflenp = 0; 4153 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4154 return (DDI_PROP_NOT_FOUND); 4155 } 4156 ASSERT(MDI_PI_LOCKED(pip)); 4157 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4158 &bufsize, NV_ENCODE_NATIVE); 4159 *buflenp = bufsize; 4160 return (i_map_nvlist_error_to_mdi(rv)); 4161 } 4162 4163 /* 4164 * mdi_prop_pack(): 4165 * pack the property list. The caller should hold the 4166 * mdi_pathinfo_t node to get a consistent data 4167 */ 4168 int 4169 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4170 { 4171 int rv; 4172 size_t bufsize; 4173 4174 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4175 return (DDI_PROP_NOT_FOUND); 4176 } 4177 4178 ASSERT(MDI_PI_LOCKED(pip)); 4179 4180 bufsize = buflen; 4181 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4182 NV_ENCODE_NATIVE, KM_SLEEP); 4183 4184 return (i_map_nvlist_error_to_mdi(rv)); 4185 } 4186 4187 /* 4188 * mdi_prop_update_byte(): 4189 * Create/Update a byte property 4190 */ 4191 int 4192 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4193 { 4194 int rv; 4195 4196 if (pip == NULL) { 4197 return (DDI_PROP_INVAL_ARG); 4198 } 4199 ASSERT(!MDI_PI_LOCKED(pip)); 4200 MDI_PI_LOCK(pip); 4201 if (MDI_PI(pip)->pi_prop == NULL) { 4202 MDI_PI_UNLOCK(pip); 4203 return (DDI_PROP_NOT_FOUND); 4204 } 4205 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4206 MDI_PI_UNLOCK(pip); 4207 return (i_map_nvlist_error_to_mdi(rv)); 4208 } 4209 4210 /* 4211 * mdi_prop_update_byte_array(): 4212 * Create/Update a byte array property 4213 */ 4214 int 4215 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4216 uint_t nelements) 4217 { 4218 int rv; 4219 4220 if (pip == NULL) { 4221 return (DDI_PROP_INVAL_ARG); 4222 } 4223 ASSERT(!MDI_PI_LOCKED(pip)); 4224 MDI_PI_LOCK(pip); 4225 if (MDI_PI(pip)->pi_prop == NULL) { 4226 MDI_PI_UNLOCK(pip); 4227 return (DDI_PROP_NOT_FOUND); 4228 } 4229 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4230 MDI_PI_UNLOCK(pip); 4231 return (i_map_nvlist_error_to_mdi(rv)); 4232 } 4233 4234 /* 4235 * mdi_prop_update_int(): 4236 * Create/Update a 32 bit integer property 4237 */ 4238 int 4239 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4240 { 4241 int rv; 4242 4243 if (pip == NULL) { 4244 return (DDI_PROP_INVAL_ARG); 4245 } 4246 ASSERT(!MDI_PI_LOCKED(pip)); 4247 MDI_PI_LOCK(pip); 4248 if (MDI_PI(pip)->pi_prop == NULL) { 4249 MDI_PI_UNLOCK(pip); 4250 return (DDI_PROP_NOT_FOUND); 4251 } 4252 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4253 MDI_PI_UNLOCK(pip); 4254 return (i_map_nvlist_error_to_mdi(rv)); 4255 } 4256 4257 /* 4258 * mdi_prop_update_int64(): 4259 * Create/Update a 64 bit integer property 4260 */ 4261 int 4262 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4263 { 4264 int rv; 4265 4266 if (pip == NULL) { 4267 return (DDI_PROP_INVAL_ARG); 4268 } 4269 ASSERT(!MDI_PI_LOCKED(pip)); 4270 MDI_PI_LOCK(pip); 4271 if (MDI_PI(pip)->pi_prop == NULL) { 4272 MDI_PI_UNLOCK(pip); 4273 return (DDI_PROP_NOT_FOUND); 4274 } 4275 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4276 MDI_PI_UNLOCK(pip); 4277 return (i_map_nvlist_error_to_mdi(rv)); 4278 } 4279 4280 /* 4281 * mdi_prop_update_int_array(): 4282 * Create/Update a int array property 4283 */ 4284 int 4285 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4286 uint_t nelements) 4287 { 4288 int rv; 4289 4290 if (pip == NULL) { 4291 return (DDI_PROP_INVAL_ARG); 4292 } 4293 ASSERT(!MDI_PI_LOCKED(pip)); 4294 MDI_PI_LOCK(pip); 4295 if (MDI_PI(pip)->pi_prop == NULL) { 4296 MDI_PI_UNLOCK(pip); 4297 return (DDI_PROP_NOT_FOUND); 4298 } 4299 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4300 nelements); 4301 MDI_PI_UNLOCK(pip); 4302 return (i_map_nvlist_error_to_mdi(rv)); 4303 } 4304 4305 /* 4306 * mdi_prop_update_string(): 4307 * Create/Update a string property 4308 */ 4309 int 4310 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4311 { 4312 int rv; 4313 4314 if (pip == NULL) { 4315 return (DDI_PROP_INVAL_ARG); 4316 } 4317 ASSERT(!MDI_PI_LOCKED(pip)); 4318 MDI_PI_LOCK(pip); 4319 if (MDI_PI(pip)->pi_prop == NULL) { 4320 MDI_PI_UNLOCK(pip); 4321 return (DDI_PROP_NOT_FOUND); 4322 } 4323 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4324 MDI_PI_UNLOCK(pip); 4325 return (i_map_nvlist_error_to_mdi(rv)); 4326 } 4327 4328 /* 4329 * mdi_prop_update_string_array(): 4330 * Create/Update a string array property 4331 */ 4332 int 4333 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4334 uint_t nelements) 4335 { 4336 int rv; 4337 4338 if (pip == NULL) { 4339 return (DDI_PROP_INVAL_ARG); 4340 } 4341 ASSERT(!MDI_PI_LOCKED(pip)); 4342 MDI_PI_LOCK(pip); 4343 if (MDI_PI(pip)->pi_prop == NULL) { 4344 MDI_PI_UNLOCK(pip); 4345 return (DDI_PROP_NOT_FOUND); 4346 } 4347 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4348 nelements); 4349 MDI_PI_UNLOCK(pip); 4350 return (i_map_nvlist_error_to_mdi(rv)); 4351 } 4352 4353 /* 4354 * mdi_prop_lookup_byte(): 4355 * Look for byte property identified by name. The data returned 4356 * is the actual property and valid as long as mdi_pathinfo_t node 4357 * is alive. 4358 */ 4359 int 4360 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4361 { 4362 int rv; 4363 4364 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4365 return (DDI_PROP_NOT_FOUND); 4366 } 4367 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4368 return (i_map_nvlist_error_to_mdi(rv)); 4369 } 4370 4371 4372 /* 4373 * mdi_prop_lookup_byte_array(): 4374 * Look for byte array property identified by name. The data 4375 * returned is the actual property and valid as long as 4376 * mdi_pathinfo_t node is alive. 4377 */ 4378 int 4379 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4380 uint_t *nelements) 4381 { 4382 int rv; 4383 4384 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4385 return (DDI_PROP_NOT_FOUND); 4386 } 4387 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4388 nelements); 4389 return (i_map_nvlist_error_to_mdi(rv)); 4390 } 4391 4392 /* 4393 * mdi_prop_lookup_int(): 4394 * Look for int property identified by name. The data returned 4395 * is the actual property and valid as long as mdi_pathinfo_t 4396 * node is alive. 4397 */ 4398 int 4399 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4400 { 4401 int rv; 4402 4403 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4404 return (DDI_PROP_NOT_FOUND); 4405 } 4406 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4407 return (i_map_nvlist_error_to_mdi(rv)); 4408 } 4409 4410 /* 4411 * mdi_prop_lookup_int64(): 4412 * Look for int64 property identified by name. The data returned 4413 * is the actual property and valid as long as mdi_pathinfo_t node 4414 * is alive. 4415 */ 4416 int 4417 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4418 { 4419 int rv; 4420 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4421 return (DDI_PROP_NOT_FOUND); 4422 } 4423 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4424 return (i_map_nvlist_error_to_mdi(rv)); 4425 } 4426 4427 /* 4428 * mdi_prop_lookup_int_array(): 4429 * Look for int array property identified by name. The data 4430 * returned is the actual property and valid as long as 4431 * mdi_pathinfo_t node is alive. 4432 */ 4433 int 4434 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4435 uint_t *nelements) 4436 { 4437 int rv; 4438 4439 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4440 return (DDI_PROP_NOT_FOUND); 4441 } 4442 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4443 (int32_t **)data, nelements); 4444 return (i_map_nvlist_error_to_mdi(rv)); 4445 } 4446 4447 /* 4448 * mdi_prop_lookup_string(): 4449 * Look for string property identified by name. The data 4450 * returned is the actual property and valid as long as 4451 * mdi_pathinfo_t node is alive. 4452 */ 4453 int 4454 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4455 { 4456 int rv; 4457 4458 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4459 return (DDI_PROP_NOT_FOUND); 4460 } 4461 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4462 return (i_map_nvlist_error_to_mdi(rv)); 4463 } 4464 4465 /* 4466 * mdi_prop_lookup_string_array(): 4467 * Look for string array property identified by name. The data 4468 * returned is the actual property and valid as long as 4469 * mdi_pathinfo_t node is alive. 4470 */ 4471 int 4472 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4473 uint_t *nelements) 4474 { 4475 int rv; 4476 4477 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4478 return (DDI_PROP_NOT_FOUND); 4479 } 4480 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4481 nelements); 4482 return (i_map_nvlist_error_to_mdi(rv)); 4483 } 4484 4485 /* 4486 * mdi_prop_free(): 4487 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4488 * functions return the pointer to actual property data and not a 4489 * copy of it. So the data returned is valid as long as 4490 * mdi_pathinfo_t node is valid. 4491 */ 4492 /*ARGSUSED*/ 4493 int 4494 mdi_prop_free(void *data) 4495 { 4496 return (DDI_PROP_SUCCESS); 4497 } 4498 4499 /*ARGSUSED*/ 4500 static void 4501 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4502 { 4503 char *phci_path, *ct_path; 4504 char *ct_status; 4505 char *status; 4506 dev_info_t *dip = ct->ct_dip; 4507 char lb_buf[64]; 4508 4509 ASSERT(MDI_CLIENT_LOCKED(ct)); 4510 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4511 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4512 return; 4513 } 4514 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4515 ct_status = "optimal"; 4516 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4517 ct_status = "degraded"; 4518 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4519 ct_status = "failed"; 4520 } else { 4521 ct_status = "unknown"; 4522 } 4523 4524 if (MDI_PI_IS_OFFLINE(pip)) { 4525 status = "offline"; 4526 } else if (MDI_PI_IS_ONLINE(pip)) { 4527 status = "online"; 4528 } else if (MDI_PI_IS_STANDBY(pip)) { 4529 status = "standby"; 4530 } else if (MDI_PI_IS_FAULT(pip)) { 4531 status = "faulted"; 4532 } else { 4533 status = "unknown"; 4534 } 4535 4536 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4537 (void) snprintf(lb_buf, sizeof (lb_buf), 4538 "%s, region-size: %d", mdi_load_balance_lba, 4539 ct->ct_lb_args->region_size); 4540 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4541 (void) snprintf(lb_buf, sizeof (lb_buf), 4542 "%s", mdi_load_balance_none); 4543 } else { 4544 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4545 mdi_load_balance_rr); 4546 } 4547 4548 if (dip) { 4549 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4550 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4551 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4552 "path %s (%s%d) to target address: %s is %s" 4553 " Load balancing: %s\n", 4554 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4555 ddi_get_instance(dip), ct_status, 4556 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4557 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4558 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4559 MDI_PI(pip)->pi_addr, status, lb_buf); 4560 kmem_free(phci_path, MAXPATHLEN); 4561 kmem_free(ct_path, MAXPATHLEN); 4562 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4563 } 4564 } 4565 4566 #ifdef DEBUG 4567 /* 4568 * i_mdi_log(): 4569 * Utility function for error message management 4570 * 4571 */ 4572 /*PRINTFLIKE3*/ 4573 static void 4574 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4575 { 4576 char name[MAXNAMELEN]; 4577 char buf[MAXNAMELEN]; 4578 char *bp; 4579 va_list ap; 4580 int log_only = 0; 4581 int boot_only = 0; 4582 int console_only = 0; 4583 4584 if (dip) { 4585 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4586 ddi_node_name(dip), ddi_get_instance(dip)); 4587 } else { 4588 name[0] = 0; 4589 } 4590 4591 va_start(ap, fmt); 4592 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4593 va_end(ap); 4594 4595 switch (buf[0]) { 4596 case '!': 4597 bp = &buf[1]; 4598 log_only = 1; 4599 break; 4600 case '?': 4601 bp = &buf[1]; 4602 boot_only = 1; 4603 break; 4604 case '^': 4605 bp = &buf[1]; 4606 console_only = 1; 4607 break; 4608 default: 4609 bp = buf; 4610 break; 4611 } 4612 if (mdi_debug_logonly) { 4613 log_only = 1; 4614 boot_only = 0; 4615 console_only = 0; 4616 } 4617 4618 switch (level) { 4619 case CE_NOTE: 4620 level = CE_CONT; 4621 /* FALLTHROUGH */ 4622 case CE_CONT: 4623 case CE_WARN: 4624 case CE_PANIC: 4625 if (boot_only) { 4626 cmn_err(level, "?mdi: %s%s", name, bp); 4627 } else if (console_only) { 4628 cmn_err(level, "^mdi: %s%s", name, bp); 4629 } else if (log_only) { 4630 cmn_err(level, "!mdi: %s%s", name, bp); 4631 } else { 4632 cmn_err(level, "mdi: %s%s", name, bp); 4633 } 4634 break; 4635 default: 4636 cmn_err(level, "mdi: %s%s", name, bp); 4637 break; 4638 } 4639 } 4640 #endif /* DEBUG */ 4641 4642 void 4643 i_mdi_client_online(dev_info_t *ct_dip) 4644 { 4645 mdi_client_t *ct; 4646 4647 /* 4648 * Client online notification. Mark client state as online 4649 * restore our binding with dev_info node 4650 */ 4651 ct = i_devi_get_client(ct_dip); 4652 ASSERT(ct != NULL); 4653 MDI_CLIENT_LOCK(ct); 4654 MDI_CLIENT_SET_ONLINE(ct); 4655 /* catch for any memory leaks */ 4656 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4657 ct->ct_dip = ct_dip; 4658 4659 if (ct->ct_power_cnt == 0) 4660 (void) i_mdi_power_all_phci(ct); 4661 4662 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4663 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4664 i_mdi_pm_hold_client(ct, 1); 4665 4666 MDI_CLIENT_UNLOCK(ct); 4667 } 4668 4669 void 4670 i_mdi_phci_online(dev_info_t *ph_dip) 4671 { 4672 mdi_phci_t *ph; 4673 4674 /* pHCI online notification. Mark state accordingly */ 4675 ph = i_devi_get_phci(ph_dip); 4676 ASSERT(ph != NULL); 4677 MDI_PHCI_LOCK(ph); 4678 MDI_PHCI_SET_ONLINE(ph); 4679 MDI_PHCI_UNLOCK(ph); 4680 } 4681 4682 /* 4683 * mdi_devi_online(): 4684 * Online notification from NDI framework on pHCI/client 4685 * device online. 4686 * Return Values: 4687 * NDI_SUCCESS 4688 * MDI_FAILURE 4689 */ 4690 /*ARGSUSED*/ 4691 int 4692 mdi_devi_online(dev_info_t *dip, uint_t flags) 4693 { 4694 if (MDI_PHCI(dip)) { 4695 i_mdi_phci_online(dip); 4696 } 4697 4698 if (MDI_CLIENT(dip)) { 4699 i_mdi_client_online(dip); 4700 } 4701 return (NDI_SUCCESS); 4702 } 4703 4704 /* 4705 * mdi_devi_offline(): 4706 * Offline notification from NDI framework on pHCI/Client device 4707 * offline. 4708 * 4709 * Return Values: 4710 * NDI_SUCCESS 4711 * NDI_FAILURE 4712 */ 4713 /*ARGSUSED*/ 4714 int 4715 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4716 { 4717 int rv = NDI_SUCCESS; 4718 4719 if (MDI_CLIENT(dip)) { 4720 rv = i_mdi_client_offline(dip, flags); 4721 if (rv != NDI_SUCCESS) 4722 return (rv); 4723 } 4724 4725 if (MDI_PHCI(dip)) { 4726 rv = i_mdi_phci_offline(dip, flags); 4727 4728 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4729 /* set client back online */ 4730 i_mdi_client_online(dip); 4731 } 4732 } 4733 4734 return (rv); 4735 } 4736 4737 /*ARGSUSED*/ 4738 static int 4739 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4740 { 4741 int rv = NDI_SUCCESS; 4742 mdi_phci_t *ph; 4743 mdi_client_t *ct; 4744 mdi_pathinfo_t *pip; 4745 mdi_pathinfo_t *next; 4746 mdi_pathinfo_t *failed_pip = NULL; 4747 dev_info_t *cdip; 4748 4749 /* 4750 * pHCI component offline notification 4751 * Make sure that this pHCI instance is free to be offlined. 4752 * If it is OK to proceed, Offline and remove all the child 4753 * mdi_pathinfo nodes. This process automatically offlines 4754 * corresponding client devices, for which this pHCI provides 4755 * critical services. 4756 */ 4757 ph = i_devi_get_phci(dip); 4758 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4759 (void *)dip, (void *)ph)); 4760 if (ph == NULL) { 4761 return (rv); 4762 } 4763 4764 MDI_PHCI_LOCK(ph); 4765 4766 if (MDI_PHCI_IS_OFFLINE(ph)) { 4767 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4768 (void *)ph)); 4769 MDI_PHCI_UNLOCK(ph); 4770 return (NDI_SUCCESS); 4771 } 4772 4773 /* 4774 * Check to see if the pHCI can be offlined 4775 */ 4776 if (ph->ph_unstable) { 4777 MDI_DEBUG(1, (CE_WARN, dip, 4778 "!One or more target devices are in transient " 4779 "state. This device can not be removed at " 4780 "this moment. Please try again later.")); 4781 MDI_PHCI_UNLOCK(ph); 4782 return (NDI_BUSY); 4783 } 4784 4785 pip = ph->ph_path_head; 4786 while (pip != NULL) { 4787 MDI_PI_LOCK(pip); 4788 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4789 4790 /* 4791 * The mdi_pathinfo state is OK. Check the client state. 4792 * If failover in progress fail the pHCI from offlining 4793 */ 4794 ct = MDI_PI(pip)->pi_client; 4795 i_mdi_client_lock(ct, pip); 4796 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4797 (ct->ct_unstable)) { 4798 /* 4799 * Failover is in progress, Fail the DR 4800 */ 4801 MDI_DEBUG(1, (CE_WARN, dip, 4802 "!pHCI device (%s%d) is Busy. %s", 4803 ddi_driver_name(dip), ddi_get_instance(dip), 4804 "This device can not be removed at " 4805 "this moment. Please try again later.")); 4806 MDI_PI_UNLOCK(pip); 4807 i_mdi_client_unlock(ct); 4808 MDI_PHCI_UNLOCK(ph); 4809 return (NDI_BUSY); 4810 } 4811 MDI_PI_UNLOCK(pip); 4812 4813 /* 4814 * Check to see of we are removing the last path of this 4815 * client device... 4816 */ 4817 cdip = ct->ct_dip; 4818 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4819 (i_mdi_client_compute_state(ct, ph) == 4820 MDI_CLIENT_STATE_FAILED)) { 4821 i_mdi_client_unlock(ct); 4822 MDI_PHCI_UNLOCK(ph); 4823 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4824 /* 4825 * ndi_devi_offline() failed. 4826 * This pHCI provides the critical path 4827 * to one or more client devices. 4828 * Return busy. 4829 */ 4830 MDI_PHCI_LOCK(ph); 4831 MDI_DEBUG(1, (CE_WARN, dip, 4832 "!pHCI device (%s%d) is Busy. %s", 4833 ddi_driver_name(dip), ddi_get_instance(dip), 4834 "This device can not be removed at " 4835 "this moment. Please try again later.")); 4836 failed_pip = pip; 4837 break; 4838 } else { 4839 MDI_PHCI_LOCK(ph); 4840 pip = next; 4841 } 4842 } else { 4843 i_mdi_client_unlock(ct); 4844 pip = next; 4845 } 4846 } 4847 4848 if (failed_pip) { 4849 pip = ph->ph_path_head; 4850 while (pip != failed_pip) { 4851 MDI_PI_LOCK(pip); 4852 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4853 ct = MDI_PI(pip)->pi_client; 4854 i_mdi_client_lock(ct, pip); 4855 cdip = ct->ct_dip; 4856 switch (MDI_CLIENT_STATE(ct)) { 4857 case MDI_CLIENT_STATE_OPTIMAL: 4858 case MDI_CLIENT_STATE_DEGRADED: 4859 if (cdip) { 4860 MDI_PI_UNLOCK(pip); 4861 i_mdi_client_unlock(ct); 4862 MDI_PHCI_UNLOCK(ph); 4863 (void) ndi_devi_online(cdip, 0); 4864 MDI_PHCI_LOCK(ph); 4865 pip = next; 4866 continue; 4867 } 4868 break; 4869 4870 case MDI_CLIENT_STATE_FAILED: 4871 if (cdip) { 4872 MDI_PI_UNLOCK(pip); 4873 i_mdi_client_unlock(ct); 4874 MDI_PHCI_UNLOCK(ph); 4875 (void) ndi_devi_offline(cdip, 0); 4876 MDI_PHCI_LOCK(ph); 4877 pip = next; 4878 continue; 4879 } 4880 break; 4881 } 4882 MDI_PI_UNLOCK(pip); 4883 i_mdi_client_unlock(ct); 4884 pip = next; 4885 } 4886 MDI_PHCI_UNLOCK(ph); 4887 return (NDI_BUSY); 4888 } 4889 4890 /* 4891 * Mark the pHCI as offline 4892 */ 4893 MDI_PHCI_SET_OFFLINE(ph); 4894 4895 /* 4896 * Mark the child mdi_pathinfo nodes as transient 4897 */ 4898 pip = ph->ph_path_head; 4899 while (pip != NULL) { 4900 MDI_PI_LOCK(pip); 4901 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4902 MDI_PI_SET_OFFLINING(pip); 4903 MDI_PI_UNLOCK(pip); 4904 pip = next; 4905 } 4906 MDI_PHCI_UNLOCK(ph); 4907 /* 4908 * Give a chance for any pending commands to execute 4909 */ 4910 delay(1); 4911 MDI_PHCI_LOCK(ph); 4912 pip = ph->ph_path_head; 4913 while (pip != NULL) { 4914 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4915 (void) i_mdi_pi_offline(pip, flags); 4916 MDI_PI_LOCK(pip); 4917 ct = MDI_PI(pip)->pi_client; 4918 if (!MDI_PI_IS_OFFLINE(pip)) { 4919 MDI_DEBUG(1, (CE_WARN, dip, 4920 "!pHCI device (%s%d) is Busy. %s", 4921 ddi_driver_name(dip), ddi_get_instance(dip), 4922 "This device can not be removed at " 4923 "this moment. Please try again later.")); 4924 MDI_PI_UNLOCK(pip); 4925 MDI_PHCI_SET_ONLINE(ph); 4926 MDI_PHCI_UNLOCK(ph); 4927 return (NDI_BUSY); 4928 } 4929 MDI_PI_UNLOCK(pip); 4930 pip = next; 4931 } 4932 MDI_PHCI_UNLOCK(ph); 4933 4934 return (rv); 4935 } 4936 4937 void 4938 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 4939 { 4940 mdi_phci_t *ph; 4941 mdi_client_t *ct; 4942 mdi_pathinfo_t *pip; 4943 mdi_pathinfo_t *next; 4944 dev_info_t *cdip; 4945 4946 if (!MDI_PHCI(dip)) 4947 return; 4948 4949 ph = i_devi_get_phci(dip); 4950 if (ph == NULL) { 4951 return; 4952 } 4953 4954 MDI_PHCI_LOCK(ph); 4955 4956 if (MDI_PHCI_IS_OFFLINE(ph)) { 4957 /* has no last path */ 4958 MDI_PHCI_UNLOCK(ph); 4959 return; 4960 } 4961 4962 pip = ph->ph_path_head; 4963 while (pip != NULL) { 4964 MDI_PI_LOCK(pip); 4965 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4966 4967 ct = MDI_PI(pip)->pi_client; 4968 i_mdi_client_lock(ct, pip); 4969 MDI_PI_UNLOCK(pip); 4970 4971 cdip = ct->ct_dip; 4972 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4973 (i_mdi_client_compute_state(ct, ph) == 4974 MDI_CLIENT_STATE_FAILED)) { 4975 /* Last path. Mark client dip as retiring */ 4976 i_mdi_client_unlock(ct); 4977 MDI_PHCI_UNLOCK(ph); 4978 (void) e_ddi_mark_retiring(cdip, cons_array); 4979 MDI_PHCI_LOCK(ph); 4980 pip = next; 4981 } else { 4982 i_mdi_client_unlock(ct); 4983 pip = next; 4984 } 4985 } 4986 4987 MDI_PHCI_UNLOCK(ph); 4988 4989 return; 4990 } 4991 4992 void 4993 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 4994 { 4995 mdi_phci_t *ph; 4996 mdi_client_t *ct; 4997 mdi_pathinfo_t *pip; 4998 mdi_pathinfo_t *next; 4999 dev_info_t *cdip; 5000 5001 if (!MDI_PHCI(dip)) 5002 return; 5003 5004 ph = i_devi_get_phci(dip); 5005 if (ph == NULL) 5006 return; 5007 5008 MDI_PHCI_LOCK(ph); 5009 5010 if (MDI_PHCI_IS_OFFLINE(ph)) { 5011 MDI_PHCI_UNLOCK(ph); 5012 /* not last path */ 5013 return; 5014 } 5015 5016 if (ph->ph_unstable) { 5017 MDI_PHCI_UNLOCK(ph); 5018 /* can't check for constraints */ 5019 *constraint = 0; 5020 return; 5021 } 5022 5023 pip = ph->ph_path_head; 5024 while (pip != NULL) { 5025 MDI_PI_LOCK(pip); 5026 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5027 5028 /* 5029 * The mdi_pathinfo state is OK. Check the client state. 5030 * If failover in progress fail the pHCI from offlining 5031 */ 5032 ct = MDI_PI(pip)->pi_client; 5033 i_mdi_client_lock(ct, pip); 5034 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5035 (ct->ct_unstable)) { 5036 /* 5037 * Failover is in progress, can't check for constraints 5038 */ 5039 MDI_PI_UNLOCK(pip); 5040 i_mdi_client_unlock(ct); 5041 MDI_PHCI_UNLOCK(ph); 5042 *constraint = 0; 5043 return; 5044 } 5045 MDI_PI_UNLOCK(pip); 5046 5047 /* 5048 * Check to see of we are retiring the last path of this 5049 * client device... 5050 */ 5051 cdip = ct->ct_dip; 5052 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5053 (i_mdi_client_compute_state(ct, ph) == 5054 MDI_CLIENT_STATE_FAILED)) { 5055 i_mdi_client_unlock(ct); 5056 MDI_PHCI_UNLOCK(ph); 5057 (void) e_ddi_retire_notify(cdip, constraint); 5058 MDI_PHCI_LOCK(ph); 5059 pip = next; 5060 } else { 5061 i_mdi_client_unlock(ct); 5062 pip = next; 5063 } 5064 } 5065 5066 MDI_PHCI_UNLOCK(ph); 5067 5068 return; 5069 } 5070 5071 /* 5072 * offline the path(s) hanging off the PHCI. If the 5073 * last path to any client, check that constraints 5074 * have been applied. 5075 */ 5076 void 5077 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5078 { 5079 mdi_phci_t *ph; 5080 mdi_client_t *ct; 5081 mdi_pathinfo_t *pip; 5082 mdi_pathinfo_t *next; 5083 dev_info_t *cdip; 5084 int unstable = 0; 5085 int constraint; 5086 5087 if (!MDI_PHCI(dip)) 5088 return; 5089 5090 ph = i_devi_get_phci(dip); 5091 if (ph == NULL) { 5092 /* no last path and no pips */ 5093 return; 5094 } 5095 5096 MDI_PHCI_LOCK(ph); 5097 5098 if (MDI_PHCI_IS_OFFLINE(ph)) { 5099 MDI_PHCI_UNLOCK(ph); 5100 /* no last path and no pips */ 5101 return; 5102 } 5103 5104 /* 5105 * Check to see if the pHCI can be offlined 5106 */ 5107 if (ph->ph_unstable) { 5108 unstable = 1; 5109 } 5110 5111 pip = ph->ph_path_head; 5112 while (pip != NULL) { 5113 MDI_PI_LOCK(pip); 5114 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5115 5116 /* 5117 * if failover in progress fail the pHCI from offlining 5118 */ 5119 ct = MDI_PI(pip)->pi_client; 5120 i_mdi_client_lock(ct, pip); 5121 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5122 (ct->ct_unstable)) { 5123 unstable = 1; 5124 } 5125 MDI_PI_UNLOCK(pip); 5126 5127 /* 5128 * Check to see of we are removing the last path of this 5129 * client device... 5130 */ 5131 cdip = ct->ct_dip; 5132 if (!phci_only && cdip && 5133 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5134 (i_mdi_client_compute_state(ct, ph) == 5135 MDI_CLIENT_STATE_FAILED)) { 5136 i_mdi_client_unlock(ct); 5137 MDI_PHCI_UNLOCK(ph); 5138 /* 5139 * We don't retire clients we just retire the 5140 * path to a client. If it is the last path 5141 * to a client, constraints are checked and 5142 * if we pass the last path is offlined. MPXIO will 5143 * then fail all I/Os to the client. Since we don't 5144 * want to retire the client on a path error 5145 * set constraint = 0 so that the client dip 5146 * is not retired. 5147 */ 5148 constraint = 0; 5149 (void) e_ddi_retire_finalize(cdip, &constraint); 5150 MDI_PHCI_LOCK(ph); 5151 pip = next; 5152 } else { 5153 i_mdi_client_unlock(ct); 5154 pip = next; 5155 } 5156 } 5157 5158 /* 5159 * Cannot offline pip(s) 5160 */ 5161 if (unstable) { 5162 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5163 "retire, dip = %p", (void *)dip); 5164 MDI_PHCI_UNLOCK(ph); 5165 return; 5166 } 5167 5168 /* 5169 * Mark the pHCI as offline 5170 */ 5171 MDI_PHCI_SET_OFFLINE(ph); 5172 5173 /* 5174 * Mark the child mdi_pathinfo nodes as transient 5175 */ 5176 pip = ph->ph_path_head; 5177 while (pip != NULL) { 5178 MDI_PI_LOCK(pip); 5179 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5180 MDI_PI_SET_OFFLINING(pip); 5181 MDI_PI_UNLOCK(pip); 5182 pip = next; 5183 } 5184 MDI_PHCI_UNLOCK(ph); 5185 /* 5186 * Give a chance for any pending commands to execute 5187 */ 5188 delay(1); 5189 MDI_PHCI_LOCK(ph); 5190 pip = ph->ph_path_head; 5191 while (pip != NULL) { 5192 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5193 (void) i_mdi_pi_offline(pip, 0); 5194 MDI_PI_LOCK(pip); 5195 ct = MDI_PI(pip)->pi_client; 5196 if (!MDI_PI_IS_OFFLINE(pip)) { 5197 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5198 "PHCI dip = %p", (void *)dip); 5199 MDI_PI_UNLOCK(pip); 5200 MDI_PHCI_SET_ONLINE(ph); 5201 MDI_PHCI_UNLOCK(ph); 5202 return; 5203 } 5204 MDI_PI_UNLOCK(pip); 5205 pip = next; 5206 } 5207 MDI_PHCI_UNLOCK(ph); 5208 5209 return; 5210 } 5211 5212 void 5213 mdi_phci_unretire(dev_info_t *dip) 5214 { 5215 ASSERT(MDI_PHCI(dip)); 5216 5217 /* 5218 * Online the phci 5219 */ 5220 i_mdi_phci_online(dip); 5221 } 5222 5223 /*ARGSUSED*/ 5224 static int 5225 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5226 { 5227 int rv = NDI_SUCCESS; 5228 mdi_client_t *ct; 5229 5230 /* 5231 * Client component to go offline. Make sure that we are 5232 * not in failing over state and update client state 5233 * accordingly 5234 */ 5235 ct = i_devi_get_client(dip); 5236 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5237 (void *)dip, (void *)ct)); 5238 if (ct != NULL) { 5239 MDI_CLIENT_LOCK(ct); 5240 if (ct->ct_unstable) { 5241 /* 5242 * One or more paths are in transient state, 5243 * Dont allow offline of a client device 5244 */ 5245 MDI_DEBUG(1, (CE_WARN, dip, 5246 "!One or more paths to this device is " 5247 "in transient state. This device can not " 5248 "be removed at this moment. " 5249 "Please try again later.")); 5250 MDI_CLIENT_UNLOCK(ct); 5251 return (NDI_BUSY); 5252 } 5253 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5254 /* 5255 * Failover is in progress, Dont allow DR of 5256 * a client device 5257 */ 5258 MDI_DEBUG(1, (CE_WARN, dip, 5259 "!Client device (%s%d) is Busy. %s", 5260 ddi_driver_name(dip), ddi_get_instance(dip), 5261 "This device can not be removed at " 5262 "this moment. Please try again later.")); 5263 MDI_CLIENT_UNLOCK(ct); 5264 return (NDI_BUSY); 5265 } 5266 MDI_CLIENT_SET_OFFLINE(ct); 5267 5268 /* 5269 * Unbind our relationship with the dev_info node 5270 */ 5271 if (flags & NDI_DEVI_REMOVE) { 5272 ct->ct_dip = NULL; 5273 } 5274 MDI_CLIENT_UNLOCK(ct); 5275 } 5276 return (rv); 5277 } 5278 5279 /* 5280 * mdi_pre_attach(): 5281 * Pre attach() notification handler 5282 */ 5283 /*ARGSUSED*/ 5284 int 5285 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5286 { 5287 /* don't support old DDI_PM_RESUME */ 5288 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5289 (cmd == DDI_PM_RESUME)) 5290 return (DDI_FAILURE); 5291 5292 return (DDI_SUCCESS); 5293 } 5294 5295 /* 5296 * mdi_post_attach(): 5297 * Post attach() notification handler 5298 */ 5299 /*ARGSUSED*/ 5300 void 5301 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5302 { 5303 mdi_phci_t *ph; 5304 mdi_client_t *ct; 5305 mdi_vhci_t *vh; 5306 5307 if (MDI_PHCI(dip)) { 5308 ph = i_devi_get_phci(dip); 5309 ASSERT(ph != NULL); 5310 5311 MDI_PHCI_LOCK(ph); 5312 switch (cmd) { 5313 case DDI_ATTACH: 5314 MDI_DEBUG(2, (CE_NOTE, dip, 5315 "!pHCI post_attach: called %p\n", (void *)ph)); 5316 if (error == DDI_SUCCESS) { 5317 MDI_PHCI_SET_ATTACH(ph); 5318 } else { 5319 MDI_DEBUG(1, (CE_NOTE, dip, 5320 "!pHCI post_attach: failed error=%d\n", 5321 error)); 5322 MDI_PHCI_SET_DETACH(ph); 5323 } 5324 break; 5325 5326 case DDI_RESUME: 5327 MDI_DEBUG(2, (CE_NOTE, dip, 5328 "!pHCI post_resume: called %p\n", (void *)ph)); 5329 if (error == DDI_SUCCESS) { 5330 MDI_PHCI_SET_RESUME(ph); 5331 } else { 5332 MDI_DEBUG(1, (CE_NOTE, dip, 5333 "!pHCI post_resume: failed error=%d\n", 5334 error)); 5335 MDI_PHCI_SET_SUSPEND(ph); 5336 } 5337 break; 5338 } 5339 MDI_PHCI_UNLOCK(ph); 5340 } 5341 5342 if (MDI_CLIENT(dip)) { 5343 ct = i_devi_get_client(dip); 5344 ASSERT(ct != NULL); 5345 5346 MDI_CLIENT_LOCK(ct); 5347 switch (cmd) { 5348 case DDI_ATTACH: 5349 MDI_DEBUG(2, (CE_NOTE, dip, 5350 "!Client post_attach: called %p\n", (void *)ct)); 5351 if (error != DDI_SUCCESS) { 5352 MDI_DEBUG(1, (CE_NOTE, dip, 5353 "!Client post_attach: failed error=%d\n", 5354 error)); 5355 MDI_CLIENT_SET_DETACH(ct); 5356 MDI_DEBUG(4, (CE_WARN, dip, 5357 "mdi_post_attach i_mdi_pm_reset_client\n")); 5358 i_mdi_pm_reset_client(ct); 5359 break; 5360 } 5361 5362 /* 5363 * Client device has successfully attached, inform 5364 * the vhci. 5365 */ 5366 vh = ct->ct_vhci; 5367 if (vh->vh_ops->vo_client_attached) 5368 (*vh->vh_ops->vo_client_attached)(dip); 5369 5370 MDI_CLIENT_SET_ATTACH(ct); 5371 break; 5372 5373 case DDI_RESUME: 5374 MDI_DEBUG(2, (CE_NOTE, dip, 5375 "!Client post_attach: called %p\n", (void *)ct)); 5376 if (error == DDI_SUCCESS) { 5377 MDI_CLIENT_SET_RESUME(ct); 5378 } else { 5379 MDI_DEBUG(1, (CE_NOTE, dip, 5380 "!Client post_resume: failed error=%d\n", 5381 error)); 5382 MDI_CLIENT_SET_SUSPEND(ct); 5383 } 5384 break; 5385 } 5386 MDI_CLIENT_UNLOCK(ct); 5387 } 5388 } 5389 5390 /* 5391 * mdi_pre_detach(): 5392 * Pre detach notification handler 5393 */ 5394 /*ARGSUSED*/ 5395 int 5396 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5397 { 5398 int rv = DDI_SUCCESS; 5399 5400 if (MDI_CLIENT(dip)) { 5401 (void) i_mdi_client_pre_detach(dip, cmd); 5402 } 5403 5404 if (MDI_PHCI(dip)) { 5405 rv = i_mdi_phci_pre_detach(dip, cmd); 5406 } 5407 5408 return (rv); 5409 } 5410 5411 /*ARGSUSED*/ 5412 static int 5413 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5414 { 5415 int rv = DDI_SUCCESS; 5416 mdi_phci_t *ph; 5417 mdi_client_t *ct; 5418 mdi_pathinfo_t *pip; 5419 mdi_pathinfo_t *failed_pip = NULL; 5420 mdi_pathinfo_t *next; 5421 5422 ph = i_devi_get_phci(dip); 5423 if (ph == NULL) { 5424 return (rv); 5425 } 5426 5427 MDI_PHCI_LOCK(ph); 5428 switch (cmd) { 5429 case DDI_DETACH: 5430 MDI_DEBUG(2, (CE_NOTE, dip, 5431 "!pHCI pre_detach: called %p\n", (void *)ph)); 5432 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5433 /* 5434 * mdi_pathinfo nodes are still attached to 5435 * this pHCI. Fail the detach for this pHCI. 5436 */ 5437 MDI_DEBUG(2, (CE_WARN, dip, 5438 "!pHCI pre_detach: " 5439 "mdi_pathinfo nodes are still attached " 5440 "%p\n", (void *)ph)); 5441 rv = DDI_FAILURE; 5442 break; 5443 } 5444 MDI_PHCI_SET_DETACH(ph); 5445 break; 5446 5447 case DDI_SUSPEND: 5448 /* 5449 * pHCI is getting suspended. Since mpxio client 5450 * devices may not be suspended at this point, to avoid 5451 * a potential stack overflow, it is important to suspend 5452 * client devices before pHCI can be suspended. 5453 */ 5454 5455 MDI_DEBUG(2, (CE_NOTE, dip, 5456 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5457 /* 5458 * Suspend all the client devices accessible through this pHCI 5459 */ 5460 pip = ph->ph_path_head; 5461 while (pip != NULL && rv == DDI_SUCCESS) { 5462 dev_info_t *cdip; 5463 MDI_PI_LOCK(pip); 5464 next = 5465 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5466 ct = MDI_PI(pip)->pi_client; 5467 i_mdi_client_lock(ct, pip); 5468 cdip = ct->ct_dip; 5469 MDI_PI_UNLOCK(pip); 5470 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5471 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5472 i_mdi_client_unlock(ct); 5473 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5474 DDI_SUCCESS) { 5475 /* 5476 * Suspend of one of the client 5477 * device has failed. 5478 */ 5479 MDI_DEBUG(1, (CE_WARN, dip, 5480 "!Suspend of device (%s%d) failed.", 5481 ddi_driver_name(cdip), 5482 ddi_get_instance(cdip))); 5483 failed_pip = pip; 5484 break; 5485 } 5486 } else { 5487 i_mdi_client_unlock(ct); 5488 } 5489 pip = next; 5490 } 5491 5492 if (rv == DDI_SUCCESS) { 5493 /* 5494 * Suspend of client devices is complete. Proceed 5495 * with pHCI suspend. 5496 */ 5497 MDI_PHCI_SET_SUSPEND(ph); 5498 } else { 5499 /* 5500 * Revert back all the suspended client device states 5501 * to converse. 5502 */ 5503 pip = ph->ph_path_head; 5504 while (pip != failed_pip) { 5505 dev_info_t *cdip; 5506 MDI_PI_LOCK(pip); 5507 next = 5508 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5509 ct = MDI_PI(pip)->pi_client; 5510 i_mdi_client_lock(ct, pip); 5511 cdip = ct->ct_dip; 5512 MDI_PI_UNLOCK(pip); 5513 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5514 i_mdi_client_unlock(ct); 5515 (void) devi_attach(cdip, DDI_RESUME); 5516 } else { 5517 i_mdi_client_unlock(ct); 5518 } 5519 pip = next; 5520 } 5521 } 5522 break; 5523 5524 default: 5525 rv = DDI_FAILURE; 5526 break; 5527 } 5528 MDI_PHCI_UNLOCK(ph); 5529 return (rv); 5530 } 5531 5532 /*ARGSUSED*/ 5533 static int 5534 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5535 { 5536 int rv = DDI_SUCCESS; 5537 mdi_client_t *ct; 5538 5539 ct = i_devi_get_client(dip); 5540 if (ct == NULL) { 5541 return (rv); 5542 } 5543 5544 MDI_CLIENT_LOCK(ct); 5545 switch (cmd) { 5546 case DDI_DETACH: 5547 MDI_DEBUG(2, (CE_NOTE, dip, 5548 "!Client pre_detach: called %p\n", (void *)ct)); 5549 MDI_CLIENT_SET_DETACH(ct); 5550 break; 5551 5552 case DDI_SUSPEND: 5553 MDI_DEBUG(2, (CE_NOTE, dip, 5554 "!Client pre_suspend: called %p\n", (void *)ct)); 5555 MDI_CLIENT_SET_SUSPEND(ct); 5556 break; 5557 5558 default: 5559 rv = DDI_FAILURE; 5560 break; 5561 } 5562 MDI_CLIENT_UNLOCK(ct); 5563 return (rv); 5564 } 5565 5566 /* 5567 * mdi_post_detach(): 5568 * Post detach notification handler 5569 */ 5570 /*ARGSUSED*/ 5571 void 5572 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5573 { 5574 /* 5575 * Detach/Suspend of mpxio component failed. Update our state 5576 * too 5577 */ 5578 if (MDI_PHCI(dip)) 5579 i_mdi_phci_post_detach(dip, cmd, error); 5580 5581 if (MDI_CLIENT(dip)) 5582 i_mdi_client_post_detach(dip, cmd, error); 5583 } 5584 5585 /*ARGSUSED*/ 5586 static void 5587 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5588 { 5589 mdi_phci_t *ph; 5590 5591 /* 5592 * Detach/Suspend of phci component failed. Update our state 5593 * too 5594 */ 5595 ph = i_devi_get_phci(dip); 5596 if (ph == NULL) { 5597 return; 5598 } 5599 5600 MDI_PHCI_LOCK(ph); 5601 /* 5602 * Detach of pHCI failed. Restore back converse 5603 * state 5604 */ 5605 switch (cmd) { 5606 case DDI_DETACH: 5607 MDI_DEBUG(2, (CE_NOTE, dip, 5608 "!pHCI post_detach: called %p\n", (void *)ph)); 5609 if (error != DDI_SUCCESS) 5610 MDI_PHCI_SET_ATTACH(ph); 5611 break; 5612 5613 case DDI_SUSPEND: 5614 MDI_DEBUG(2, (CE_NOTE, dip, 5615 "!pHCI post_suspend: called %p\n", (void *)ph)); 5616 if (error != DDI_SUCCESS) 5617 MDI_PHCI_SET_RESUME(ph); 5618 break; 5619 } 5620 MDI_PHCI_UNLOCK(ph); 5621 } 5622 5623 /*ARGSUSED*/ 5624 static void 5625 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5626 { 5627 mdi_client_t *ct; 5628 5629 ct = i_devi_get_client(dip); 5630 if (ct == NULL) { 5631 return; 5632 } 5633 MDI_CLIENT_LOCK(ct); 5634 /* 5635 * Detach of Client failed. Restore back converse 5636 * state 5637 */ 5638 switch (cmd) { 5639 case DDI_DETACH: 5640 MDI_DEBUG(2, (CE_NOTE, dip, 5641 "!Client post_detach: called %p\n", (void *)ct)); 5642 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5643 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5644 "i_mdi_pm_rele_client\n")); 5645 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5646 } else { 5647 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5648 "i_mdi_pm_reset_client\n")); 5649 i_mdi_pm_reset_client(ct); 5650 } 5651 if (error != DDI_SUCCESS) 5652 MDI_CLIENT_SET_ATTACH(ct); 5653 break; 5654 5655 case DDI_SUSPEND: 5656 MDI_DEBUG(2, (CE_NOTE, dip, 5657 "!Client post_suspend: called %p\n", (void *)ct)); 5658 if (error != DDI_SUCCESS) 5659 MDI_CLIENT_SET_RESUME(ct); 5660 break; 5661 } 5662 MDI_CLIENT_UNLOCK(ct); 5663 } 5664 5665 int 5666 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5667 { 5668 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5669 } 5670 5671 /* 5672 * create and install per-path (client - pHCI) statistics 5673 * I/O stats supported: nread, nwritten, reads, and writes 5674 * Error stats - hard errors, soft errors, & transport errors 5675 */ 5676 int 5677 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5678 { 5679 kstat_t *kiosp, *kerrsp; 5680 struct pi_errs *nsp; 5681 struct mdi_pi_kstats *mdi_statp; 5682 5683 if (MDI_PI(pip)->pi_kstats != NULL) 5684 return (MDI_SUCCESS); 5685 5686 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5687 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5688 return (MDI_FAILURE); 5689 } 5690 5691 (void) strcat(ksname, ",err"); 5692 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5693 KSTAT_TYPE_NAMED, 5694 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5695 if (kerrsp == NULL) { 5696 kstat_delete(kiosp); 5697 return (MDI_FAILURE); 5698 } 5699 5700 nsp = (struct pi_errs *)kerrsp->ks_data; 5701 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5702 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5703 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5704 KSTAT_DATA_UINT32); 5705 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5706 KSTAT_DATA_UINT32); 5707 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5708 KSTAT_DATA_UINT32); 5709 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5710 KSTAT_DATA_UINT32); 5711 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5712 KSTAT_DATA_UINT32); 5713 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5714 KSTAT_DATA_UINT32); 5715 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5716 KSTAT_DATA_UINT32); 5717 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5718 5719 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5720 mdi_statp->pi_kstat_ref = 1; 5721 mdi_statp->pi_kstat_iostats = kiosp; 5722 mdi_statp->pi_kstat_errstats = kerrsp; 5723 kstat_install(kiosp); 5724 kstat_install(kerrsp); 5725 MDI_PI(pip)->pi_kstats = mdi_statp; 5726 return (MDI_SUCCESS); 5727 } 5728 5729 /* 5730 * destroy per-path properties 5731 */ 5732 static void 5733 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5734 { 5735 5736 struct mdi_pi_kstats *mdi_statp; 5737 5738 if (MDI_PI(pip)->pi_kstats == NULL) 5739 return; 5740 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5741 return; 5742 5743 MDI_PI(pip)->pi_kstats = NULL; 5744 5745 /* 5746 * the kstat may be shared between multiple pathinfo nodes 5747 * decrement this pathinfo's usage, removing the kstats 5748 * themselves when the last pathinfo reference is removed. 5749 */ 5750 ASSERT(mdi_statp->pi_kstat_ref > 0); 5751 if (--mdi_statp->pi_kstat_ref != 0) 5752 return; 5753 5754 kstat_delete(mdi_statp->pi_kstat_iostats); 5755 kstat_delete(mdi_statp->pi_kstat_errstats); 5756 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5757 } 5758 5759 /* 5760 * update I/O paths KSTATS 5761 */ 5762 void 5763 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5764 { 5765 kstat_t *iostatp; 5766 size_t xfer_cnt; 5767 5768 ASSERT(pip != NULL); 5769 5770 /* 5771 * I/O can be driven across a path prior to having path 5772 * statistics available, i.e. probe(9e). 5773 */ 5774 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5775 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5776 xfer_cnt = bp->b_bcount - bp->b_resid; 5777 if (bp->b_flags & B_READ) { 5778 KSTAT_IO_PTR(iostatp)->reads++; 5779 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5780 } else { 5781 KSTAT_IO_PTR(iostatp)->writes++; 5782 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5783 } 5784 } 5785 } 5786 5787 /* 5788 * Enable the path(specific client/target/initiator) 5789 * Enabling a path means that MPxIO may select the enabled path for routing 5790 * future I/O requests, subject to other path state constraints. 5791 */ 5792 int 5793 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5794 { 5795 mdi_phci_t *ph; 5796 5797 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5798 if (ph == NULL) { 5799 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5800 " failed. pip: %p ph = NULL\n", (void *)pip)); 5801 return (MDI_FAILURE); 5802 } 5803 5804 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5805 MDI_ENABLE_OP); 5806 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5807 " Returning success pip = %p. ph = %p\n", 5808 (void *)pip, (void *)ph)); 5809 return (MDI_SUCCESS); 5810 5811 } 5812 5813 /* 5814 * Disable the path (specific client/target/initiator) 5815 * Disabling a path means that MPxIO will not select the disabled path for 5816 * routing any new I/O requests. 5817 */ 5818 int 5819 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5820 { 5821 mdi_phci_t *ph; 5822 5823 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5824 if (ph == NULL) { 5825 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5826 " failed. pip: %p ph = NULL\n", (void *)pip)); 5827 return (MDI_FAILURE); 5828 } 5829 5830 (void) i_mdi_enable_disable_path(pip, 5831 ph->ph_vhci, flags, MDI_DISABLE_OP); 5832 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5833 "Returning success pip = %p. ph = %p", 5834 (void *)pip, (void *)ph)); 5835 return (MDI_SUCCESS); 5836 } 5837 5838 /* 5839 * disable the path to a particular pHCI (pHCI specified in the phci_path 5840 * argument) for a particular client (specified in the client_path argument). 5841 * Disabling a path means that MPxIO will not select the disabled path for 5842 * routing any new I/O requests. 5843 * NOTE: this will be removed once the NWS files are changed to use the new 5844 * mdi_{enable,disable}_path interfaces 5845 */ 5846 int 5847 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5848 { 5849 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5850 } 5851 5852 /* 5853 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5854 * argument) for a particular client (specified in the client_path argument). 5855 * Enabling a path means that MPxIO may select the enabled path for routing 5856 * future I/O requests, subject to other path state constraints. 5857 * NOTE: this will be removed once the NWS files are changed to use the new 5858 * mdi_{enable,disable}_path interfaces 5859 */ 5860 5861 int 5862 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5863 { 5864 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5865 } 5866 5867 /* 5868 * Common routine for doing enable/disable. 5869 */ 5870 static mdi_pathinfo_t * 5871 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5872 int op) 5873 { 5874 int sync_flag = 0; 5875 int rv; 5876 mdi_pathinfo_t *next; 5877 int (*f)() = NULL; 5878 5879 f = vh->vh_ops->vo_pi_state_change; 5880 5881 sync_flag = (flags << 8) & 0xf00; 5882 5883 /* 5884 * Do a callback into the mdi consumer to let it 5885 * know that path is about to get enabled/disabled. 5886 */ 5887 if (f != NULL) { 5888 rv = (*f)(vh->vh_dip, pip, 0, 5889 MDI_PI_EXT_STATE(pip), 5890 MDI_EXT_STATE_CHANGE | sync_flag | 5891 op | MDI_BEFORE_STATE_CHANGE); 5892 if (rv != MDI_SUCCESS) { 5893 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5894 "!vo_pi_state_change: failed rv = %x", rv)); 5895 } 5896 } 5897 MDI_PI_LOCK(pip); 5898 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5899 5900 switch (flags) { 5901 case USER_DISABLE: 5902 if (op == MDI_DISABLE_OP) { 5903 MDI_PI_SET_USER_DISABLE(pip); 5904 } else { 5905 MDI_PI_SET_USER_ENABLE(pip); 5906 } 5907 break; 5908 case DRIVER_DISABLE: 5909 if (op == MDI_DISABLE_OP) { 5910 MDI_PI_SET_DRV_DISABLE(pip); 5911 } else { 5912 MDI_PI_SET_DRV_ENABLE(pip); 5913 } 5914 break; 5915 case DRIVER_DISABLE_TRANSIENT: 5916 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5917 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5918 } else { 5919 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5920 } 5921 break; 5922 } 5923 MDI_PI_UNLOCK(pip); 5924 /* 5925 * Do a callback into the mdi consumer to let it 5926 * know that path is now enabled/disabled. 5927 */ 5928 if (f != NULL) { 5929 rv = (*f)(vh->vh_dip, pip, 0, 5930 MDI_PI_EXT_STATE(pip), 5931 MDI_EXT_STATE_CHANGE | sync_flag | 5932 op | MDI_AFTER_STATE_CHANGE); 5933 if (rv != MDI_SUCCESS) { 5934 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5935 "!vo_pi_state_change: failed rv = %x", rv)); 5936 } 5937 } 5938 return (next); 5939 } 5940 5941 /* 5942 * Common routine for doing enable/disable. 5943 * NOTE: this will be removed once the NWS files are changed to use the new 5944 * mdi_{enable,disable}_path has been putback 5945 */ 5946 int 5947 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5948 { 5949 5950 mdi_phci_t *ph; 5951 mdi_vhci_t *vh = NULL; 5952 mdi_client_t *ct; 5953 mdi_pathinfo_t *next, *pip; 5954 int found_it; 5955 5956 ph = i_devi_get_phci(pdip); 5957 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5958 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5959 (void *)cdip)); 5960 if (ph == NULL) { 5961 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5962 "Op %d failed. ph = NULL\n", op)); 5963 return (MDI_FAILURE); 5964 } 5965 5966 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5967 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5968 "Op Invalid operation = %d\n", op)); 5969 return (MDI_FAILURE); 5970 } 5971 5972 vh = ph->ph_vhci; 5973 5974 if (cdip == NULL) { 5975 /* 5976 * Need to mark the Phci as enabled/disabled. 5977 */ 5978 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5979 "Op %d for the phci\n", op)); 5980 MDI_PHCI_LOCK(ph); 5981 switch (flags) { 5982 case USER_DISABLE: 5983 if (op == MDI_DISABLE_OP) { 5984 MDI_PHCI_SET_USER_DISABLE(ph); 5985 } else { 5986 MDI_PHCI_SET_USER_ENABLE(ph); 5987 } 5988 break; 5989 case DRIVER_DISABLE: 5990 if (op == MDI_DISABLE_OP) { 5991 MDI_PHCI_SET_DRV_DISABLE(ph); 5992 } else { 5993 MDI_PHCI_SET_DRV_ENABLE(ph); 5994 } 5995 break; 5996 case DRIVER_DISABLE_TRANSIENT: 5997 if (op == MDI_DISABLE_OP) { 5998 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5999 } else { 6000 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6001 } 6002 break; 6003 default: 6004 MDI_PHCI_UNLOCK(ph); 6005 MDI_DEBUG(1, (CE_NOTE, NULL, 6006 "!i_mdi_pi_enable_disable:" 6007 " Invalid flag argument= %d\n", flags)); 6008 } 6009 6010 /* 6011 * Phci has been disabled. Now try to enable/disable 6012 * path info's to each client. 6013 */ 6014 pip = ph->ph_path_head; 6015 while (pip != NULL) { 6016 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6017 } 6018 MDI_PHCI_UNLOCK(ph); 6019 } else { 6020 6021 /* 6022 * Disable a specific client. 6023 */ 6024 ct = i_devi_get_client(cdip); 6025 if (ct == NULL) { 6026 MDI_DEBUG(1, (CE_NOTE, NULL, 6027 "!i_mdi_pi_enable_disable:" 6028 " failed. ct = NULL operation = %d\n", op)); 6029 return (MDI_FAILURE); 6030 } 6031 6032 MDI_CLIENT_LOCK(ct); 6033 pip = ct->ct_path_head; 6034 found_it = 0; 6035 while (pip != NULL) { 6036 MDI_PI_LOCK(pip); 6037 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6038 if (MDI_PI(pip)->pi_phci == ph) { 6039 MDI_PI_UNLOCK(pip); 6040 found_it = 1; 6041 break; 6042 } 6043 MDI_PI_UNLOCK(pip); 6044 pip = next; 6045 } 6046 6047 6048 MDI_CLIENT_UNLOCK(ct); 6049 if (found_it == 0) { 6050 MDI_DEBUG(1, (CE_NOTE, NULL, 6051 "!i_mdi_pi_enable_disable:" 6052 " failed. Could not find corresponding pip\n")); 6053 return (MDI_FAILURE); 6054 } 6055 6056 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6057 } 6058 6059 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6060 "Op %d Returning success pdip = %p cdip = %p\n", 6061 op, (void *)pdip, (void *)cdip)); 6062 return (MDI_SUCCESS); 6063 } 6064 6065 /* 6066 * Ensure phci powered up 6067 */ 6068 static void 6069 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6070 { 6071 dev_info_t *ph_dip; 6072 6073 ASSERT(pip != NULL); 6074 ASSERT(MDI_PI_LOCKED(pip)); 6075 6076 if (MDI_PI(pip)->pi_pm_held) { 6077 return; 6078 } 6079 6080 ph_dip = mdi_pi_get_phci(pip); 6081 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6082 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6083 if (ph_dip == NULL) { 6084 return; 6085 } 6086 6087 MDI_PI_UNLOCK(pip); 6088 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6089 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6090 6091 pm_hold_power(ph_dip); 6092 6093 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6094 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6095 MDI_PI_LOCK(pip); 6096 6097 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6098 if (DEVI(ph_dip)->devi_pm_info) 6099 MDI_PI(pip)->pi_pm_held = 1; 6100 } 6101 6102 /* 6103 * Allow phci powered down 6104 */ 6105 static void 6106 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6107 { 6108 dev_info_t *ph_dip = NULL; 6109 6110 ASSERT(pip != NULL); 6111 ASSERT(MDI_PI_LOCKED(pip)); 6112 6113 if (MDI_PI(pip)->pi_pm_held == 0) { 6114 return; 6115 } 6116 6117 ph_dip = mdi_pi_get_phci(pip); 6118 ASSERT(ph_dip != NULL); 6119 6120 MDI_PI_UNLOCK(pip); 6121 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6122 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6123 6124 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6125 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6126 pm_rele_power(ph_dip); 6127 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6128 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6129 6130 MDI_PI_LOCK(pip); 6131 MDI_PI(pip)->pi_pm_held = 0; 6132 } 6133 6134 static void 6135 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6136 { 6137 ASSERT(MDI_CLIENT_LOCKED(ct)); 6138 6139 ct->ct_power_cnt += incr; 6140 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6141 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6142 ct->ct_power_cnt, incr)); 6143 ASSERT(ct->ct_power_cnt >= 0); 6144 } 6145 6146 static void 6147 i_mdi_rele_all_phci(mdi_client_t *ct) 6148 { 6149 mdi_pathinfo_t *pip; 6150 6151 ASSERT(MDI_CLIENT_LOCKED(ct)); 6152 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6153 while (pip != NULL) { 6154 mdi_hold_path(pip); 6155 MDI_PI_LOCK(pip); 6156 i_mdi_pm_rele_pip(pip); 6157 MDI_PI_UNLOCK(pip); 6158 mdi_rele_path(pip); 6159 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6160 } 6161 } 6162 6163 static void 6164 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6165 { 6166 ASSERT(MDI_CLIENT_LOCKED(ct)); 6167 6168 if (i_ddi_devi_attached(ct->ct_dip)) { 6169 ct->ct_power_cnt -= decr; 6170 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6171 "ct_power_cnt = %d decr = %d\n", 6172 (void *)ct, ct->ct_power_cnt, decr)); 6173 } 6174 6175 ASSERT(ct->ct_power_cnt >= 0); 6176 if (ct->ct_power_cnt == 0) { 6177 i_mdi_rele_all_phci(ct); 6178 return; 6179 } 6180 } 6181 6182 static void 6183 i_mdi_pm_reset_client(mdi_client_t *ct) 6184 { 6185 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6186 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6187 ASSERT(MDI_CLIENT_LOCKED(ct)); 6188 ct->ct_power_cnt = 0; 6189 i_mdi_rele_all_phci(ct); 6190 ct->ct_powercnt_config = 0; 6191 ct->ct_powercnt_unconfig = 0; 6192 ct->ct_powercnt_reset = 1; 6193 } 6194 6195 static int 6196 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6197 { 6198 int ret; 6199 dev_info_t *ph_dip; 6200 6201 MDI_PI_LOCK(pip); 6202 i_mdi_pm_hold_pip(pip); 6203 6204 ph_dip = mdi_pi_get_phci(pip); 6205 MDI_PI_UNLOCK(pip); 6206 6207 /* bring all components of phci to full power */ 6208 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6209 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6210 ddi_get_instance(ph_dip), (void *)pip)); 6211 6212 ret = pm_powerup(ph_dip); 6213 6214 if (ret == DDI_FAILURE) { 6215 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6216 "pm_powerup FAILED for %s%d %p\n", 6217 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6218 (void *)pip)); 6219 6220 MDI_PI_LOCK(pip); 6221 i_mdi_pm_rele_pip(pip); 6222 MDI_PI_UNLOCK(pip); 6223 return (MDI_FAILURE); 6224 } 6225 6226 return (MDI_SUCCESS); 6227 } 6228 6229 static int 6230 i_mdi_power_all_phci(mdi_client_t *ct) 6231 { 6232 mdi_pathinfo_t *pip; 6233 int succeeded = 0; 6234 6235 ASSERT(MDI_CLIENT_LOCKED(ct)); 6236 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6237 while (pip != NULL) { 6238 /* 6239 * Don't power if MDI_PATHINFO_STATE_FAULT 6240 * or MDI_PATHINFO_STATE_OFFLINE. 6241 */ 6242 if (MDI_PI_IS_INIT(pip) || 6243 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6244 mdi_hold_path(pip); 6245 MDI_CLIENT_UNLOCK(ct); 6246 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6247 succeeded = 1; 6248 6249 ASSERT(ct == MDI_PI(pip)->pi_client); 6250 MDI_CLIENT_LOCK(ct); 6251 mdi_rele_path(pip); 6252 } 6253 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6254 } 6255 6256 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6257 } 6258 6259 /* 6260 * mdi_bus_power(): 6261 * 1. Place the phci(s) into powered up state so that 6262 * client can do power management 6263 * 2. Ensure phci powered up as client power managing 6264 * Return Values: 6265 * MDI_SUCCESS 6266 * MDI_FAILURE 6267 */ 6268 int 6269 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6270 void *arg, void *result) 6271 { 6272 int ret = MDI_SUCCESS; 6273 pm_bp_child_pwrchg_t *bpc; 6274 mdi_client_t *ct; 6275 dev_info_t *cdip; 6276 pm_bp_has_changed_t *bphc; 6277 6278 /* 6279 * BUS_POWER_NOINVOL not supported 6280 */ 6281 if (op == BUS_POWER_NOINVOL) 6282 return (MDI_FAILURE); 6283 6284 /* 6285 * ignore other OPs. 6286 * return quickly to save cou cycles on the ct processing 6287 */ 6288 switch (op) { 6289 case BUS_POWER_PRE_NOTIFICATION: 6290 case BUS_POWER_POST_NOTIFICATION: 6291 bpc = (pm_bp_child_pwrchg_t *)arg; 6292 cdip = bpc->bpc_dip; 6293 break; 6294 case BUS_POWER_HAS_CHANGED: 6295 bphc = (pm_bp_has_changed_t *)arg; 6296 cdip = bphc->bphc_dip; 6297 break; 6298 default: 6299 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6300 } 6301 6302 ASSERT(MDI_CLIENT(cdip)); 6303 6304 ct = i_devi_get_client(cdip); 6305 if (ct == NULL) 6306 return (MDI_FAILURE); 6307 6308 /* 6309 * wait till the mdi_pathinfo node state change are processed 6310 */ 6311 MDI_CLIENT_LOCK(ct); 6312 switch (op) { 6313 case BUS_POWER_PRE_NOTIFICATION: 6314 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6315 "BUS_POWER_PRE_NOTIFICATION:" 6316 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6317 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6318 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6319 6320 /* serialize power level change per client */ 6321 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6322 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6323 6324 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6325 6326 if (ct->ct_power_cnt == 0) { 6327 ret = i_mdi_power_all_phci(ct); 6328 } 6329 6330 /* 6331 * if new_level > 0: 6332 * - hold phci(s) 6333 * - power up phci(s) if not already 6334 * ignore power down 6335 */ 6336 if (bpc->bpc_nlevel > 0) { 6337 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6338 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6339 "mdi_bus_power i_mdi_pm_hold_client\n")); 6340 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6341 } 6342 } 6343 break; 6344 case BUS_POWER_POST_NOTIFICATION: 6345 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6346 "BUS_POWER_POST_NOTIFICATION:" 6347 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6348 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6349 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6350 *(int *)result)); 6351 6352 if (*(int *)result == DDI_SUCCESS) { 6353 if (bpc->bpc_nlevel > 0) { 6354 MDI_CLIENT_SET_POWER_UP(ct); 6355 } else { 6356 MDI_CLIENT_SET_POWER_DOWN(ct); 6357 } 6358 } 6359 6360 /* release the hold we did in pre-notification */ 6361 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6362 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6363 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6364 "mdi_bus_power i_mdi_pm_rele_client\n")); 6365 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6366 } 6367 6368 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6369 /* another thread might started attaching */ 6370 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6371 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6372 "mdi_bus_power i_mdi_pm_rele_client\n")); 6373 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6374 /* detaching has been taken care in pm_post_unconfig */ 6375 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6376 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6377 "mdi_bus_power i_mdi_pm_reset_client\n")); 6378 i_mdi_pm_reset_client(ct); 6379 } 6380 } 6381 6382 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6383 cv_broadcast(&ct->ct_powerchange_cv); 6384 6385 break; 6386 6387 /* need to do more */ 6388 case BUS_POWER_HAS_CHANGED: 6389 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6390 "BUS_POWER_HAS_CHANGED:" 6391 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6392 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6393 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6394 6395 if (bphc->bphc_nlevel > 0 && 6396 bphc->bphc_nlevel > bphc->bphc_olevel) { 6397 if (ct->ct_power_cnt == 0) { 6398 ret = i_mdi_power_all_phci(ct); 6399 } 6400 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6401 "mdi_bus_power i_mdi_pm_hold_client\n")); 6402 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6403 } 6404 6405 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6406 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6407 "mdi_bus_power i_mdi_pm_rele_client\n")); 6408 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6409 } 6410 break; 6411 } 6412 6413 MDI_CLIENT_UNLOCK(ct); 6414 return (ret); 6415 } 6416 6417 static int 6418 i_mdi_pm_pre_config_one(dev_info_t *child) 6419 { 6420 int ret = MDI_SUCCESS; 6421 mdi_client_t *ct; 6422 6423 ct = i_devi_get_client(child); 6424 if (ct == NULL) 6425 return (MDI_FAILURE); 6426 6427 MDI_CLIENT_LOCK(ct); 6428 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6429 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6430 6431 if (!MDI_CLIENT_IS_FAILED(ct)) { 6432 MDI_CLIENT_UNLOCK(ct); 6433 MDI_DEBUG(4, (CE_NOTE, child, 6434 "i_mdi_pm_pre_config_one already configured\n")); 6435 return (MDI_SUCCESS); 6436 } 6437 6438 if (ct->ct_powercnt_config) { 6439 MDI_CLIENT_UNLOCK(ct); 6440 MDI_DEBUG(4, (CE_NOTE, child, 6441 "i_mdi_pm_pre_config_one ALREADY held\n")); 6442 return (MDI_SUCCESS); 6443 } 6444 6445 if (ct->ct_power_cnt == 0) { 6446 ret = i_mdi_power_all_phci(ct); 6447 } 6448 MDI_DEBUG(4, (CE_NOTE, child, 6449 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6450 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6451 ct->ct_powercnt_config = 1; 6452 ct->ct_powercnt_reset = 0; 6453 MDI_CLIENT_UNLOCK(ct); 6454 return (ret); 6455 } 6456 6457 static int 6458 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6459 { 6460 int ret = MDI_SUCCESS; 6461 dev_info_t *cdip; 6462 int circ; 6463 6464 ASSERT(MDI_VHCI(vdip)); 6465 6466 /* ndi_devi_config_one */ 6467 if (child) { 6468 ASSERT(DEVI_BUSY_OWNED(vdip)); 6469 return (i_mdi_pm_pre_config_one(child)); 6470 } 6471 6472 /* devi_config_common */ 6473 ndi_devi_enter(vdip, &circ); 6474 cdip = ddi_get_child(vdip); 6475 while (cdip) { 6476 dev_info_t *next = ddi_get_next_sibling(cdip); 6477 6478 ret = i_mdi_pm_pre_config_one(cdip); 6479 if (ret != MDI_SUCCESS) 6480 break; 6481 cdip = next; 6482 } 6483 ndi_devi_exit(vdip, circ); 6484 return (ret); 6485 } 6486 6487 static int 6488 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6489 { 6490 int ret = MDI_SUCCESS; 6491 mdi_client_t *ct; 6492 6493 ct = i_devi_get_client(child); 6494 if (ct == NULL) 6495 return (MDI_FAILURE); 6496 6497 MDI_CLIENT_LOCK(ct); 6498 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6499 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6500 6501 if (!i_ddi_devi_attached(ct->ct_dip)) { 6502 MDI_DEBUG(4, (CE_NOTE, child, 6503 "i_mdi_pm_pre_unconfig node detached already\n")); 6504 MDI_CLIENT_UNLOCK(ct); 6505 return (MDI_SUCCESS); 6506 } 6507 6508 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6509 (flags & NDI_AUTODETACH)) { 6510 MDI_DEBUG(4, (CE_NOTE, child, 6511 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6512 MDI_CLIENT_UNLOCK(ct); 6513 return (MDI_FAILURE); 6514 } 6515 6516 if (ct->ct_powercnt_unconfig) { 6517 MDI_DEBUG(4, (CE_NOTE, child, 6518 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6519 MDI_CLIENT_UNLOCK(ct); 6520 *held = 1; 6521 return (MDI_SUCCESS); 6522 } 6523 6524 if (ct->ct_power_cnt == 0) { 6525 ret = i_mdi_power_all_phci(ct); 6526 } 6527 MDI_DEBUG(4, (CE_NOTE, child, 6528 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6529 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6530 ct->ct_powercnt_unconfig = 1; 6531 ct->ct_powercnt_reset = 0; 6532 MDI_CLIENT_UNLOCK(ct); 6533 if (ret == MDI_SUCCESS) 6534 *held = 1; 6535 return (ret); 6536 } 6537 6538 static int 6539 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6540 int flags) 6541 { 6542 int ret = MDI_SUCCESS; 6543 dev_info_t *cdip; 6544 int circ; 6545 6546 ASSERT(MDI_VHCI(vdip)); 6547 *held = 0; 6548 6549 /* ndi_devi_unconfig_one */ 6550 if (child) { 6551 ASSERT(DEVI_BUSY_OWNED(vdip)); 6552 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6553 } 6554 6555 /* devi_unconfig_common */ 6556 ndi_devi_enter(vdip, &circ); 6557 cdip = ddi_get_child(vdip); 6558 while (cdip) { 6559 dev_info_t *next = ddi_get_next_sibling(cdip); 6560 6561 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6562 cdip = next; 6563 } 6564 ndi_devi_exit(vdip, circ); 6565 6566 if (*held) 6567 ret = MDI_SUCCESS; 6568 6569 return (ret); 6570 } 6571 6572 static void 6573 i_mdi_pm_post_config_one(dev_info_t *child) 6574 { 6575 mdi_client_t *ct; 6576 6577 ct = i_devi_get_client(child); 6578 if (ct == NULL) 6579 return; 6580 6581 MDI_CLIENT_LOCK(ct); 6582 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6583 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6584 6585 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6586 MDI_DEBUG(4, (CE_NOTE, child, 6587 "i_mdi_pm_post_config_one NOT configured\n")); 6588 MDI_CLIENT_UNLOCK(ct); 6589 return; 6590 } 6591 6592 /* client has not been updated */ 6593 if (MDI_CLIENT_IS_FAILED(ct)) { 6594 MDI_DEBUG(4, (CE_NOTE, child, 6595 "i_mdi_pm_post_config_one NOT configured\n")); 6596 MDI_CLIENT_UNLOCK(ct); 6597 return; 6598 } 6599 6600 /* another thread might have powered it down or detached it */ 6601 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6602 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6603 (!i_ddi_devi_attached(ct->ct_dip) && 6604 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6605 MDI_DEBUG(4, (CE_NOTE, child, 6606 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6607 i_mdi_pm_reset_client(ct); 6608 } else { 6609 mdi_pathinfo_t *pip, *next; 6610 int valid_path_count = 0; 6611 6612 MDI_DEBUG(4, (CE_NOTE, child, 6613 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6614 pip = ct->ct_path_head; 6615 while (pip != NULL) { 6616 MDI_PI_LOCK(pip); 6617 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6618 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6619 valid_path_count ++; 6620 MDI_PI_UNLOCK(pip); 6621 pip = next; 6622 } 6623 i_mdi_pm_rele_client(ct, valid_path_count); 6624 } 6625 ct->ct_powercnt_config = 0; 6626 MDI_CLIENT_UNLOCK(ct); 6627 } 6628 6629 static void 6630 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6631 { 6632 int circ; 6633 dev_info_t *cdip; 6634 6635 ASSERT(MDI_VHCI(vdip)); 6636 6637 /* ndi_devi_config_one */ 6638 if (child) { 6639 ASSERT(DEVI_BUSY_OWNED(vdip)); 6640 i_mdi_pm_post_config_one(child); 6641 return; 6642 } 6643 6644 /* devi_config_common */ 6645 ndi_devi_enter(vdip, &circ); 6646 cdip = ddi_get_child(vdip); 6647 while (cdip) { 6648 dev_info_t *next = ddi_get_next_sibling(cdip); 6649 6650 i_mdi_pm_post_config_one(cdip); 6651 cdip = next; 6652 } 6653 ndi_devi_exit(vdip, circ); 6654 } 6655 6656 static void 6657 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6658 { 6659 mdi_client_t *ct; 6660 6661 ct = i_devi_get_client(child); 6662 if (ct == NULL) 6663 return; 6664 6665 MDI_CLIENT_LOCK(ct); 6666 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6667 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6668 6669 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6670 MDI_DEBUG(4, (CE_NOTE, child, 6671 "i_mdi_pm_post_unconfig NOT held\n")); 6672 MDI_CLIENT_UNLOCK(ct); 6673 return; 6674 } 6675 6676 /* failure detaching or another thread just attached it */ 6677 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6678 i_ddi_devi_attached(ct->ct_dip)) || 6679 (!i_ddi_devi_attached(ct->ct_dip) && 6680 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6681 MDI_DEBUG(4, (CE_NOTE, child, 6682 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6683 i_mdi_pm_reset_client(ct); 6684 } else { 6685 mdi_pathinfo_t *pip, *next; 6686 int valid_path_count = 0; 6687 6688 MDI_DEBUG(4, (CE_NOTE, child, 6689 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6690 pip = ct->ct_path_head; 6691 while (pip != NULL) { 6692 MDI_PI_LOCK(pip); 6693 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6694 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6695 valid_path_count ++; 6696 MDI_PI_UNLOCK(pip); 6697 pip = next; 6698 } 6699 i_mdi_pm_rele_client(ct, valid_path_count); 6700 ct->ct_powercnt_unconfig = 0; 6701 } 6702 6703 MDI_CLIENT_UNLOCK(ct); 6704 } 6705 6706 static void 6707 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6708 { 6709 int circ; 6710 dev_info_t *cdip; 6711 6712 ASSERT(MDI_VHCI(vdip)); 6713 6714 if (!held) { 6715 MDI_DEBUG(4, (CE_NOTE, vdip, 6716 "i_mdi_pm_post_unconfig held = %d\n", held)); 6717 return; 6718 } 6719 6720 if (child) { 6721 ASSERT(DEVI_BUSY_OWNED(vdip)); 6722 i_mdi_pm_post_unconfig_one(child); 6723 return; 6724 } 6725 6726 ndi_devi_enter(vdip, &circ); 6727 cdip = ddi_get_child(vdip); 6728 while (cdip) { 6729 dev_info_t *next = ddi_get_next_sibling(cdip); 6730 6731 i_mdi_pm_post_unconfig_one(cdip); 6732 cdip = next; 6733 } 6734 ndi_devi_exit(vdip, circ); 6735 } 6736 6737 int 6738 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6739 { 6740 int circ, ret = MDI_SUCCESS; 6741 dev_info_t *client_dip = NULL; 6742 mdi_client_t *ct; 6743 6744 /* 6745 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6746 * Power up pHCI for the named client device. 6747 * Note: Before the client is enumerated under vhci by phci, 6748 * client_dip can be NULL. Then proceed to power up all the 6749 * pHCIs. 6750 */ 6751 if (devnm != NULL) { 6752 ndi_devi_enter(vdip, &circ); 6753 client_dip = ndi_devi_findchild(vdip, devnm); 6754 } 6755 6756 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6757 op, devnm ? devnm : "NULL", (void *)client_dip)); 6758 6759 switch (op) { 6760 case MDI_PM_PRE_CONFIG: 6761 ret = i_mdi_pm_pre_config(vdip, client_dip); 6762 break; 6763 6764 case MDI_PM_PRE_UNCONFIG: 6765 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6766 flags); 6767 break; 6768 6769 case MDI_PM_POST_CONFIG: 6770 i_mdi_pm_post_config(vdip, client_dip); 6771 break; 6772 6773 case MDI_PM_POST_UNCONFIG: 6774 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6775 break; 6776 6777 case MDI_PM_HOLD_POWER: 6778 case MDI_PM_RELE_POWER: 6779 ASSERT(args); 6780 6781 client_dip = (dev_info_t *)args; 6782 ASSERT(MDI_CLIENT(client_dip)); 6783 6784 ct = i_devi_get_client(client_dip); 6785 MDI_CLIENT_LOCK(ct); 6786 6787 if (op == MDI_PM_HOLD_POWER) { 6788 if (ct->ct_power_cnt == 0) { 6789 (void) i_mdi_power_all_phci(ct); 6790 MDI_DEBUG(4, (CE_NOTE, client_dip, 6791 "mdi_power i_mdi_pm_hold_client\n")); 6792 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6793 } 6794 } else { 6795 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6796 MDI_DEBUG(4, (CE_NOTE, client_dip, 6797 "mdi_power i_mdi_pm_rele_client\n")); 6798 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6799 } else { 6800 MDI_DEBUG(4, (CE_NOTE, client_dip, 6801 "mdi_power i_mdi_pm_reset_client\n")); 6802 i_mdi_pm_reset_client(ct); 6803 } 6804 } 6805 6806 MDI_CLIENT_UNLOCK(ct); 6807 break; 6808 6809 default: 6810 break; 6811 } 6812 6813 if (devnm) 6814 ndi_devi_exit(vdip, circ); 6815 6816 return (ret); 6817 } 6818 6819 int 6820 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6821 { 6822 mdi_vhci_t *vhci; 6823 6824 if (!MDI_VHCI(dip)) 6825 return (MDI_FAILURE); 6826 6827 if (mdi_class) { 6828 vhci = DEVI(dip)->devi_mdi_xhci; 6829 ASSERT(vhci); 6830 *mdi_class = vhci->vh_class; 6831 } 6832 6833 return (MDI_SUCCESS); 6834 } 6835 6836 int 6837 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6838 { 6839 mdi_phci_t *phci; 6840 6841 if (!MDI_PHCI(dip)) 6842 return (MDI_FAILURE); 6843 6844 if (mdi_class) { 6845 phci = DEVI(dip)->devi_mdi_xhci; 6846 ASSERT(phci); 6847 *mdi_class = phci->ph_vhci->vh_class; 6848 } 6849 6850 return (MDI_SUCCESS); 6851 } 6852 6853 int 6854 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6855 { 6856 mdi_client_t *client; 6857 6858 if (!MDI_CLIENT(dip)) 6859 return (MDI_FAILURE); 6860 6861 if (mdi_class) { 6862 client = DEVI(dip)->devi_mdi_client; 6863 ASSERT(client); 6864 *mdi_class = client->ct_vhci->vh_class; 6865 } 6866 6867 return (MDI_SUCCESS); 6868 } 6869 6870 void * 6871 mdi_client_get_vhci_private(dev_info_t *dip) 6872 { 6873 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6874 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6875 mdi_client_t *ct; 6876 ct = i_devi_get_client(dip); 6877 return (ct->ct_vprivate); 6878 } 6879 return (NULL); 6880 } 6881 6882 void 6883 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6884 { 6885 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6886 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6887 mdi_client_t *ct; 6888 ct = i_devi_get_client(dip); 6889 ct->ct_vprivate = data; 6890 } 6891 } 6892 /* 6893 * mdi_pi_get_vhci_private(): 6894 * Get the vhci private information associated with the 6895 * mdi_pathinfo node 6896 */ 6897 void * 6898 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6899 { 6900 caddr_t vprivate = NULL; 6901 if (pip) { 6902 vprivate = MDI_PI(pip)->pi_vprivate; 6903 } 6904 return (vprivate); 6905 } 6906 6907 /* 6908 * mdi_pi_set_vhci_private(): 6909 * Set the vhci private information in the mdi_pathinfo node 6910 */ 6911 void 6912 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6913 { 6914 if (pip) { 6915 MDI_PI(pip)->pi_vprivate = priv; 6916 } 6917 } 6918 6919 /* 6920 * mdi_phci_get_vhci_private(): 6921 * Get the vhci private information associated with the 6922 * mdi_phci node 6923 */ 6924 void * 6925 mdi_phci_get_vhci_private(dev_info_t *dip) 6926 { 6927 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6928 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6929 mdi_phci_t *ph; 6930 ph = i_devi_get_phci(dip); 6931 return (ph->ph_vprivate); 6932 } 6933 return (NULL); 6934 } 6935 6936 /* 6937 * mdi_phci_set_vhci_private(): 6938 * Set the vhci private information in the mdi_phci node 6939 */ 6940 void 6941 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6942 { 6943 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6944 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6945 mdi_phci_t *ph; 6946 ph = i_devi_get_phci(dip); 6947 ph->ph_vprivate = priv; 6948 } 6949 } 6950 6951 /* 6952 * List of vhci class names: 6953 * A vhci class name must be in this list only if the corresponding vhci 6954 * driver intends to use the mdi provided bus config implementation 6955 * (i.e., mdi_vhci_bus_config()). 6956 */ 6957 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6958 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6959 6960 /* 6961 * During boot time, the on-disk vhci cache for every vhci class is read 6962 * in the form of an nvlist and stored here. 6963 */ 6964 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6965 6966 /* nvpair names in vhci cache nvlist */ 6967 #define MDI_VHCI_CACHE_VERSION 1 6968 #define MDI_NVPNAME_VERSION "version" 6969 #define MDI_NVPNAME_PHCIS "phcis" 6970 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6971 6972 /* 6973 * Given vhci class name, return its on-disk vhci cache filename. 6974 * Memory for the returned filename which includes the full path is allocated 6975 * by this function. 6976 */ 6977 static char * 6978 vhclass2vhcache_filename(char *vhclass) 6979 { 6980 char *filename; 6981 int len; 6982 static char *fmt = "/etc/devices/mdi_%s_cache"; 6983 6984 /* 6985 * fmt contains the on-disk vhci cache file name format; 6986 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6987 */ 6988 6989 /* the -1 below is to account for "%s" in the format string */ 6990 len = strlen(fmt) + strlen(vhclass) - 1; 6991 filename = kmem_alloc(len, KM_SLEEP); 6992 (void) snprintf(filename, len, fmt, vhclass); 6993 ASSERT(len == (strlen(filename) + 1)); 6994 return (filename); 6995 } 6996 6997 /* 6998 * initialize the vhci cache related data structures and read the on-disk 6999 * vhci cached data into memory. 7000 */ 7001 static void 7002 setup_vhci_cache(mdi_vhci_t *vh) 7003 { 7004 mdi_vhci_config_t *vhc; 7005 mdi_vhci_cache_t *vhcache; 7006 int i; 7007 nvlist_t *nvl = NULL; 7008 7009 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7010 vh->vh_config = vhc; 7011 vhcache = &vhc->vhc_vhcache; 7012 7013 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7014 7015 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7016 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7017 7018 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7019 7020 /* 7021 * Create string hash; same as mod_hash_create_strhash() except that 7022 * we use NULL key destructor. 7023 */ 7024 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7025 mdi_bus_config_cache_hash_size, 7026 mod_hash_null_keydtor, mod_hash_null_valdtor, 7027 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7028 7029 /* 7030 * The on-disk vhci cache is read during booting prior to the 7031 * lights-out period by mdi_read_devices_files(). 7032 */ 7033 for (i = 0; i < N_VHCI_CLASSES; i++) { 7034 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7035 nvl = vhcache_nvl[i]; 7036 vhcache_nvl[i] = NULL; 7037 break; 7038 } 7039 } 7040 7041 /* 7042 * this is to cover the case of some one manually causing unloading 7043 * (or detaching) and reloading (or attaching) of a vhci driver. 7044 */ 7045 if (nvl == NULL && modrootloaded) 7046 nvl = read_on_disk_vhci_cache(vh->vh_class); 7047 7048 if (nvl != NULL) { 7049 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7050 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7051 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7052 else { 7053 cmn_err(CE_WARN, 7054 "%s: data file corrupted, will recreate\n", 7055 vhc->vhc_vhcache_filename); 7056 } 7057 rw_exit(&vhcache->vhcache_lock); 7058 nvlist_free(nvl); 7059 } 7060 7061 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7062 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7063 7064 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7065 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7066 } 7067 7068 /* 7069 * free all vhci cache related resources 7070 */ 7071 static int 7072 destroy_vhci_cache(mdi_vhci_t *vh) 7073 { 7074 mdi_vhci_config_t *vhc = vh->vh_config; 7075 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7076 mdi_vhcache_phci_t *cphci, *cphci_next; 7077 mdi_vhcache_client_t *cct, *cct_next; 7078 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7079 7080 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7081 return (MDI_FAILURE); 7082 7083 kmem_free(vhc->vhc_vhcache_filename, 7084 strlen(vhc->vhc_vhcache_filename) + 1); 7085 7086 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7087 7088 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7089 cphci = cphci_next) { 7090 cphci_next = cphci->cphci_next; 7091 free_vhcache_phci(cphci); 7092 } 7093 7094 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7095 cct_next = cct->cct_next; 7096 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7097 cpi_next = cpi->cpi_next; 7098 free_vhcache_pathinfo(cpi); 7099 } 7100 free_vhcache_client(cct); 7101 } 7102 7103 rw_destroy(&vhcache->vhcache_lock); 7104 7105 mutex_destroy(&vhc->vhc_lock); 7106 cv_destroy(&vhc->vhc_cv); 7107 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7108 return (MDI_SUCCESS); 7109 } 7110 7111 /* 7112 * Stop all vhci cache related async threads and free their resources. 7113 */ 7114 static int 7115 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7116 { 7117 mdi_async_client_config_t *acc, *acc_next; 7118 7119 mutex_enter(&vhc->vhc_lock); 7120 vhc->vhc_flags |= MDI_VHC_EXIT; 7121 ASSERT(vhc->vhc_acc_thrcount >= 0); 7122 cv_broadcast(&vhc->vhc_cv); 7123 7124 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7125 vhc->vhc_acc_thrcount != 0) { 7126 mutex_exit(&vhc->vhc_lock); 7127 delay(1); 7128 mutex_enter(&vhc->vhc_lock); 7129 } 7130 7131 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7132 7133 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7134 acc_next = acc->acc_next; 7135 free_async_client_config(acc); 7136 } 7137 vhc->vhc_acc_list_head = NULL; 7138 vhc->vhc_acc_list_tail = NULL; 7139 vhc->vhc_acc_count = 0; 7140 7141 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7142 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7143 mutex_exit(&vhc->vhc_lock); 7144 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7145 vhcache_dirty(vhc); 7146 return (MDI_FAILURE); 7147 } 7148 } else 7149 mutex_exit(&vhc->vhc_lock); 7150 7151 if (callb_delete(vhc->vhc_cbid) != 0) 7152 return (MDI_FAILURE); 7153 7154 return (MDI_SUCCESS); 7155 } 7156 7157 /* 7158 * Stop vhci cache flush thread 7159 */ 7160 /* ARGSUSED */ 7161 static boolean_t 7162 stop_vhcache_flush_thread(void *arg, int code) 7163 { 7164 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7165 7166 mutex_enter(&vhc->vhc_lock); 7167 vhc->vhc_flags |= MDI_VHC_EXIT; 7168 cv_broadcast(&vhc->vhc_cv); 7169 7170 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7171 mutex_exit(&vhc->vhc_lock); 7172 delay(1); 7173 mutex_enter(&vhc->vhc_lock); 7174 } 7175 7176 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7177 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7178 mutex_exit(&vhc->vhc_lock); 7179 (void) flush_vhcache(vhc, 1); 7180 } else 7181 mutex_exit(&vhc->vhc_lock); 7182 7183 return (B_TRUE); 7184 } 7185 7186 /* 7187 * Enqueue the vhcache phci (cphci) at the tail of the list 7188 */ 7189 static void 7190 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7191 { 7192 cphci->cphci_next = NULL; 7193 if (vhcache->vhcache_phci_head == NULL) 7194 vhcache->vhcache_phci_head = cphci; 7195 else 7196 vhcache->vhcache_phci_tail->cphci_next = cphci; 7197 vhcache->vhcache_phci_tail = cphci; 7198 } 7199 7200 /* 7201 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7202 */ 7203 static void 7204 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7205 mdi_vhcache_pathinfo_t *cpi) 7206 { 7207 cpi->cpi_next = NULL; 7208 if (cct->cct_cpi_head == NULL) 7209 cct->cct_cpi_head = cpi; 7210 else 7211 cct->cct_cpi_tail->cpi_next = cpi; 7212 cct->cct_cpi_tail = cpi; 7213 } 7214 7215 /* 7216 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7217 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7218 * flag set come at the beginning of the list. All cpis which have this 7219 * flag set come at the end of the list. 7220 */ 7221 static void 7222 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7223 mdi_vhcache_pathinfo_t *newcpi) 7224 { 7225 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7226 7227 if (cct->cct_cpi_head == NULL || 7228 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7229 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7230 else { 7231 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7232 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7233 prev_cpi = cpi, cpi = cpi->cpi_next) 7234 ; 7235 7236 if (prev_cpi == NULL) 7237 cct->cct_cpi_head = newcpi; 7238 else 7239 prev_cpi->cpi_next = newcpi; 7240 7241 newcpi->cpi_next = cpi; 7242 7243 if (cpi == NULL) 7244 cct->cct_cpi_tail = newcpi; 7245 } 7246 } 7247 7248 /* 7249 * Enqueue the vhcache client (cct) at the tail of the list 7250 */ 7251 static void 7252 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7253 mdi_vhcache_client_t *cct) 7254 { 7255 cct->cct_next = NULL; 7256 if (vhcache->vhcache_client_head == NULL) 7257 vhcache->vhcache_client_head = cct; 7258 else 7259 vhcache->vhcache_client_tail->cct_next = cct; 7260 vhcache->vhcache_client_tail = cct; 7261 } 7262 7263 static void 7264 free_string_array(char **str, int nelem) 7265 { 7266 int i; 7267 7268 if (str) { 7269 for (i = 0; i < nelem; i++) { 7270 if (str[i]) 7271 kmem_free(str[i], strlen(str[i]) + 1); 7272 } 7273 kmem_free(str, sizeof (char *) * nelem); 7274 } 7275 } 7276 7277 static void 7278 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7279 { 7280 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7281 kmem_free(cphci, sizeof (*cphci)); 7282 } 7283 7284 static void 7285 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7286 { 7287 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7288 kmem_free(cpi, sizeof (*cpi)); 7289 } 7290 7291 static void 7292 free_vhcache_client(mdi_vhcache_client_t *cct) 7293 { 7294 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7295 kmem_free(cct, sizeof (*cct)); 7296 } 7297 7298 static char * 7299 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7300 { 7301 char *name_addr; 7302 int len; 7303 7304 len = strlen(ct_name) + strlen(ct_addr) + 2; 7305 name_addr = kmem_alloc(len, KM_SLEEP); 7306 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7307 7308 if (ret_len) 7309 *ret_len = len; 7310 return (name_addr); 7311 } 7312 7313 /* 7314 * Copy the contents of paddrnvl to vhci cache. 7315 * paddrnvl nvlist contains path information for a vhci client. 7316 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7317 */ 7318 static void 7319 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7320 mdi_vhcache_client_t *cct) 7321 { 7322 nvpair_t *nvp = NULL; 7323 mdi_vhcache_pathinfo_t *cpi; 7324 uint_t nelem; 7325 uint32_t *val; 7326 7327 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7328 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7329 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7330 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7331 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7332 ASSERT(nelem == 2); 7333 cpi->cpi_cphci = cphci_list[val[0]]; 7334 cpi->cpi_flags = val[1]; 7335 enqueue_tail_vhcache_pathinfo(cct, cpi); 7336 } 7337 } 7338 7339 /* 7340 * Copy the contents of caddrmapnvl to vhci cache. 7341 * caddrmapnvl nvlist contains vhci client address to phci client address 7342 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7343 * this nvlist. 7344 */ 7345 static void 7346 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7347 mdi_vhcache_phci_t *cphci_list[]) 7348 { 7349 nvpair_t *nvp = NULL; 7350 nvlist_t *paddrnvl; 7351 mdi_vhcache_client_t *cct; 7352 7353 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7354 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7355 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7356 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7357 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7358 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7359 /* the client must contain at least one path */ 7360 ASSERT(cct->cct_cpi_head != NULL); 7361 7362 enqueue_vhcache_client(vhcache, cct); 7363 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7364 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7365 } 7366 } 7367 7368 /* 7369 * Copy the contents of the main nvlist to vhci cache. 7370 * 7371 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7372 * The nvlist contains the mappings between the vhci client addresses and 7373 * their corresponding phci client addresses. 7374 * 7375 * The structure of the nvlist is as follows: 7376 * 7377 * Main nvlist: 7378 * NAME TYPE DATA 7379 * version int32 version number 7380 * phcis string array array of phci paths 7381 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7382 * 7383 * structure of c2paddrs_nvl: 7384 * NAME TYPE DATA 7385 * caddr1 nvlist_t paddrs_nvl1 7386 * caddr2 nvlist_t paddrs_nvl2 7387 * ... 7388 * where caddr1, caddr2, ... are vhci client name and addresses in the 7389 * form of "<clientname>@<clientaddress>". 7390 * (for example: "ssd@2000002037cd9f72"); 7391 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7392 * 7393 * structure of paddrs_nvl: 7394 * NAME TYPE DATA 7395 * pi_addr1 uint32_array (phci-id, cpi_flags) 7396 * pi_addr2 uint32_array (phci-id, cpi_flags) 7397 * ... 7398 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7399 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7400 * phci-ids are integers that identify PHCIs to which the 7401 * the bus specific address belongs to. These integers are used as an index 7402 * into to the phcis string array in the main nvlist to get the PHCI path. 7403 */ 7404 static int 7405 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7406 { 7407 char **phcis, **phci_namep; 7408 uint_t nphcis; 7409 mdi_vhcache_phci_t *cphci, **cphci_list; 7410 nvlist_t *caddrmapnvl; 7411 int32_t ver; 7412 int i; 7413 size_t cphci_list_size; 7414 7415 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7416 7417 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7418 ver != MDI_VHCI_CACHE_VERSION) 7419 return (MDI_FAILURE); 7420 7421 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7422 &nphcis) != 0) 7423 return (MDI_SUCCESS); 7424 7425 ASSERT(nphcis > 0); 7426 7427 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7428 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7429 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7430 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7431 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7432 enqueue_vhcache_phci(vhcache, cphci); 7433 cphci_list[i] = cphci; 7434 } 7435 7436 ASSERT(vhcache->vhcache_phci_head != NULL); 7437 7438 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7439 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7440 7441 kmem_free(cphci_list, cphci_list_size); 7442 return (MDI_SUCCESS); 7443 } 7444 7445 /* 7446 * Build paddrnvl for the specified client using the information in the 7447 * vhci cache and add it to the caddrmapnnvl. 7448 * Returns 0 on success, errno on failure. 7449 */ 7450 static int 7451 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7452 nvlist_t *caddrmapnvl) 7453 { 7454 mdi_vhcache_pathinfo_t *cpi; 7455 nvlist_t *nvl; 7456 int err; 7457 uint32_t val[2]; 7458 7459 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7460 7461 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7462 return (err); 7463 7464 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7465 val[0] = cpi->cpi_cphci->cphci_id; 7466 val[1] = cpi->cpi_flags; 7467 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7468 != 0) 7469 goto out; 7470 } 7471 7472 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7473 out: 7474 nvlist_free(nvl); 7475 return (err); 7476 } 7477 7478 /* 7479 * Build caddrmapnvl using the information in the vhci cache 7480 * and add it to the mainnvl. 7481 * Returns 0 on success, errno on failure. 7482 */ 7483 static int 7484 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7485 { 7486 mdi_vhcache_client_t *cct; 7487 nvlist_t *nvl; 7488 int err; 7489 7490 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7491 7492 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7493 return (err); 7494 7495 for (cct = vhcache->vhcache_client_head; cct != NULL; 7496 cct = cct->cct_next) { 7497 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7498 goto out; 7499 } 7500 7501 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7502 out: 7503 nvlist_free(nvl); 7504 return (err); 7505 } 7506 7507 /* 7508 * Build nvlist using the information in the vhci cache. 7509 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7510 * Returns nvl on success, NULL on failure. 7511 */ 7512 static nvlist_t * 7513 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7514 { 7515 mdi_vhcache_phci_t *cphci; 7516 uint_t phci_count; 7517 char **phcis; 7518 nvlist_t *nvl; 7519 int err, i; 7520 7521 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7522 nvl = NULL; 7523 goto out; 7524 } 7525 7526 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7527 MDI_VHCI_CACHE_VERSION)) != 0) 7528 goto out; 7529 7530 rw_enter(&vhcache->vhcache_lock, RW_READER); 7531 if (vhcache->vhcache_phci_head == NULL) { 7532 rw_exit(&vhcache->vhcache_lock); 7533 return (nvl); 7534 } 7535 7536 phci_count = 0; 7537 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7538 cphci = cphci->cphci_next) 7539 cphci->cphci_id = phci_count++; 7540 7541 /* build phci pathname list */ 7542 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7543 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7544 cphci = cphci->cphci_next, i++) 7545 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7546 7547 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7548 phci_count); 7549 free_string_array(phcis, phci_count); 7550 7551 if (err == 0 && 7552 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7553 rw_exit(&vhcache->vhcache_lock); 7554 return (nvl); 7555 } 7556 7557 rw_exit(&vhcache->vhcache_lock); 7558 out: 7559 if (nvl) 7560 nvlist_free(nvl); 7561 return (NULL); 7562 } 7563 7564 /* 7565 * Lookup vhcache phci structure for the specified phci path. 7566 */ 7567 static mdi_vhcache_phci_t * 7568 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7569 { 7570 mdi_vhcache_phci_t *cphci; 7571 7572 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7573 7574 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7575 cphci = cphci->cphci_next) { 7576 if (strcmp(cphci->cphci_path, phci_path) == 0) 7577 return (cphci); 7578 } 7579 7580 return (NULL); 7581 } 7582 7583 /* 7584 * Lookup vhcache phci structure for the specified phci. 7585 */ 7586 static mdi_vhcache_phci_t * 7587 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7588 { 7589 mdi_vhcache_phci_t *cphci; 7590 7591 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7592 7593 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7594 cphci = cphci->cphci_next) { 7595 if (cphci->cphci_phci == ph) 7596 return (cphci); 7597 } 7598 7599 return (NULL); 7600 } 7601 7602 /* 7603 * Add the specified phci to the vhci cache if not already present. 7604 */ 7605 static void 7606 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7607 { 7608 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7609 mdi_vhcache_phci_t *cphci; 7610 char *pathname; 7611 int cache_updated; 7612 7613 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7614 7615 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7616 (void) ddi_pathname(ph->ph_dip, pathname); 7617 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7618 != NULL) { 7619 cphci->cphci_phci = ph; 7620 cache_updated = 0; 7621 } else { 7622 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7623 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7624 cphci->cphci_phci = ph; 7625 enqueue_vhcache_phci(vhcache, cphci); 7626 cache_updated = 1; 7627 } 7628 7629 rw_exit(&vhcache->vhcache_lock); 7630 7631 /* 7632 * Since a new phci has been added, reset 7633 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7634 * during next vhcache_discover_paths(). 7635 */ 7636 mutex_enter(&vhc->vhc_lock); 7637 vhc->vhc_path_discovery_cutoff_time = 0; 7638 mutex_exit(&vhc->vhc_lock); 7639 7640 kmem_free(pathname, MAXPATHLEN); 7641 if (cache_updated) 7642 vhcache_dirty(vhc); 7643 } 7644 7645 /* 7646 * Remove the reference to the specified phci from the vhci cache. 7647 */ 7648 static void 7649 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7650 { 7651 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7652 mdi_vhcache_phci_t *cphci; 7653 7654 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7655 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7656 /* do not remove the actual mdi_vhcache_phci structure */ 7657 cphci->cphci_phci = NULL; 7658 } 7659 rw_exit(&vhcache->vhcache_lock); 7660 } 7661 7662 static void 7663 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7664 mdi_vhcache_lookup_token_t *src) 7665 { 7666 if (src == NULL) { 7667 dst->lt_cct = NULL; 7668 dst->lt_cct_lookup_time = 0; 7669 } else { 7670 dst->lt_cct = src->lt_cct; 7671 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7672 } 7673 } 7674 7675 /* 7676 * Look up vhcache client for the specified client. 7677 */ 7678 static mdi_vhcache_client_t * 7679 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7680 mdi_vhcache_lookup_token_t *token) 7681 { 7682 mod_hash_val_t hv; 7683 char *name_addr; 7684 int len; 7685 7686 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7687 7688 /* 7689 * If no vhcache clean occurred since the last lookup, we can 7690 * simply return the cct from the last lookup operation. 7691 * It works because ccts are never freed except during the vhcache 7692 * cleanup operation. 7693 */ 7694 if (token != NULL && 7695 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7696 return (token->lt_cct); 7697 7698 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7699 if (mod_hash_find(vhcache->vhcache_client_hash, 7700 (mod_hash_key_t)name_addr, &hv) == 0) { 7701 if (token) { 7702 token->lt_cct = (mdi_vhcache_client_t *)hv; 7703 token->lt_cct_lookup_time = lbolt64; 7704 } 7705 } else { 7706 if (token) { 7707 token->lt_cct = NULL; 7708 token->lt_cct_lookup_time = 0; 7709 } 7710 hv = NULL; 7711 } 7712 kmem_free(name_addr, len); 7713 return ((mdi_vhcache_client_t *)hv); 7714 } 7715 7716 /* 7717 * Add the specified path to the vhci cache if not already present. 7718 * Also add the vhcache client for the client corresponding to this path 7719 * if it doesn't already exist. 7720 */ 7721 static void 7722 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7723 { 7724 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7725 mdi_vhcache_client_t *cct; 7726 mdi_vhcache_pathinfo_t *cpi; 7727 mdi_phci_t *ph = pip->pi_phci; 7728 mdi_client_t *ct = pip->pi_client; 7729 int cache_updated = 0; 7730 7731 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7732 7733 /* if vhcache client for this pip doesn't already exist, add it */ 7734 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7735 NULL)) == NULL) { 7736 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7737 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7738 ct->ct_guid, NULL); 7739 enqueue_vhcache_client(vhcache, cct); 7740 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7741 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7742 cache_updated = 1; 7743 } 7744 7745 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7746 if (cpi->cpi_cphci->cphci_phci == ph && 7747 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7748 cpi->cpi_pip = pip; 7749 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7750 cpi->cpi_flags &= 7751 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7752 sort_vhcache_paths(cct); 7753 cache_updated = 1; 7754 } 7755 break; 7756 } 7757 } 7758 7759 if (cpi == NULL) { 7760 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7761 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7762 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7763 ASSERT(cpi->cpi_cphci != NULL); 7764 cpi->cpi_pip = pip; 7765 enqueue_vhcache_pathinfo(cct, cpi); 7766 cache_updated = 1; 7767 } 7768 7769 rw_exit(&vhcache->vhcache_lock); 7770 7771 if (cache_updated) 7772 vhcache_dirty(vhc); 7773 } 7774 7775 /* 7776 * Remove the reference to the specified path from the vhci cache. 7777 */ 7778 static void 7779 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7780 { 7781 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7782 mdi_client_t *ct = pip->pi_client; 7783 mdi_vhcache_client_t *cct; 7784 mdi_vhcache_pathinfo_t *cpi; 7785 7786 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7787 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7788 NULL)) != NULL) { 7789 for (cpi = cct->cct_cpi_head; cpi != NULL; 7790 cpi = cpi->cpi_next) { 7791 if (cpi->cpi_pip == pip) { 7792 cpi->cpi_pip = NULL; 7793 break; 7794 } 7795 } 7796 } 7797 rw_exit(&vhcache->vhcache_lock); 7798 } 7799 7800 /* 7801 * Flush the vhci cache to disk. 7802 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7803 */ 7804 static int 7805 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7806 { 7807 nvlist_t *nvl; 7808 int err; 7809 int rv; 7810 7811 /* 7812 * It is possible that the system may shutdown before 7813 * i_ddi_io_initialized (during stmsboot for example). To allow for 7814 * flushing the cache in this case do not check for 7815 * i_ddi_io_initialized when force flag is set. 7816 */ 7817 if (force_flag == 0 && !i_ddi_io_initialized()) 7818 return (MDI_FAILURE); 7819 7820 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7821 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7822 nvlist_free(nvl); 7823 } else 7824 err = EFAULT; 7825 7826 rv = MDI_SUCCESS; 7827 mutex_enter(&vhc->vhc_lock); 7828 if (err != 0) { 7829 if (err == EROFS) { 7830 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7831 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7832 MDI_VHC_VHCACHE_DIRTY); 7833 } else { 7834 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7835 cmn_err(CE_CONT, "%s: update failed\n", 7836 vhc->vhc_vhcache_filename); 7837 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7838 } 7839 rv = MDI_FAILURE; 7840 } 7841 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7842 cmn_err(CE_CONT, 7843 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7844 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7845 } 7846 mutex_exit(&vhc->vhc_lock); 7847 7848 return (rv); 7849 } 7850 7851 /* 7852 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7853 * Exits itself if left idle for the idle timeout period. 7854 */ 7855 static void 7856 vhcache_flush_thread(void *arg) 7857 { 7858 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7859 clock_t idle_time, quit_at_ticks; 7860 callb_cpr_t cprinfo; 7861 7862 /* number of seconds to sleep idle before exiting */ 7863 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7864 7865 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7866 "mdi_vhcache_flush"); 7867 mutex_enter(&vhc->vhc_lock); 7868 for (; ; ) { 7869 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7870 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7871 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7872 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7873 (void) cv_timedwait(&vhc->vhc_cv, 7874 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7875 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7876 } else { 7877 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7878 mutex_exit(&vhc->vhc_lock); 7879 7880 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7881 vhcache_dirty(vhc); 7882 7883 mutex_enter(&vhc->vhc_lock); 7884 } 7885 } 7886 7887 quit_at_ticks = ddi_get_lbolt() + idle_time; 7888 7889 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7890 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7891 ddi_get_lbolt() < quit_at_ticks) { 7892 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7893 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7894 quit_at_ticks); 7895 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7896 } 7897 7898 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7899 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7900 goto out; 7901 } 7902 7903 out: 7904 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7905 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7906 CALLB_CPR_EXIT(&cprinfo); 7907 } 7908 7909 /* 7910 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7911 */ 7912 static void 7913 vhcache_dirty(mdi_vhci_config_t *vhc) 7914 { 7915 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7916 int create_thread; 7917 7918 rw_enter(&vhcache->vhcache_lock, RW_READER); 7919 /* do not flush cache until the cache is fully built */ 7920 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7921 rw_exit(&vhcache->vhcache_lock); 7922 return; 7923 } 7924 rw_exit(&vhcache->vhcache_lock); 7925 7926 mutex_enter(&vhc->vhc_lock); 7927 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7928 mutex_exit(&vhc->vhc_lock); 7929 return; 7930 } 7931 7932 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7933 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7934 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7935 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7936 cv_broadcast(&vhc->vhc_cv); 7937 create_thread = 0; 7938 } else { 7939 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7940 create_thread = 1; 7941 } 7942 mutex_exit(&vhc->vhc_lock); 7943 7944 if (create_thread) 7945 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7946 0, &p0, TS_RUN, minclsyspri); 7947 } 7948 7949 /* 7950 * phci bus config structure - one for for each phci bus config operation that 7951 * we initiate on behalf of a vhci. 7952 */ 7953 typedef struct mdi_phci_bus_config_s { 7954 char *phbc_phci_path; 7955 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7956 struct mdi_phci_bus_config_s *phbc_next; 7957 } mdi_phci_bus_config_t; 7958 7959 /* vhci bus config structure - one for each vhci bus config operation */ 7960 typedef struct mdi_vhci_bus_config_s { 7961 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7962 major_t vhbc_op_major; /* bus config op major */ 7963 uint_t vhbc_op_flags; /* bus config op flags */ 7964 kmutex_t vhbc_lock; 7965 kcondvar_t vhbc_cv; 7966 int vhbc_thr_count; 7967 } mdi_vhci_bus_config_t; 7968 7969 /* 7970 * bus config the specified phci 7971 */ 7972 static void 7973 bus_config_phci(void *arg) 7974 { 7975 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7976 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7977 dev_info_t *ph_dip; 7978 7979 /* 7980 * first configure all path components upto phci and then configure 7981 * the phci children. 7982 */ 7983 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7984 != NULL) { 7985 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7986 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7987 (void) ndi_devi_config_driver(ph_dip, 7988 vhbc->vhbc_op_flags, 7989 vhbc->vhbc_op_major); 7990 } else 7991 (void) ndi_devi_config(ph_dip, 7992 vhbc->vhbc_op_flags); 7993 7994 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7995 ndi_rele_devi(ph_dip); 7996 } 7997 7998 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7999 kmem_free(phbc, sizeof (*phbc)); 8000 8001 mutex_enter(&vhbc->vhbc_lock); 8002 vhbc->vhbc_thr_count--; 8003 if (vhbc->vhbc_thr_count == 0) 8004 cv_broadcast(&vhbc->vhbc_cv); 8005 mutex_exit(&vhbc->vhbc_lock); 8006 } 8007 8008 /* 8009 * Bus config all phcis associated with the vhci in parallel. 8010 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8011 */ 8012 static void 8013 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8014 ddi_bus_config_op_t op, major_t maj) 8015 { 8016 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8017 mdi_vhci_bus_config_t *vhbc; 8018 mdi_vhcache_phci_t *cphci; 8019 8020 rw_enter(&vhcache->vhcache_lock, RW_READER); 8021 if (vhcache->vhcache_phci_head == NULL) { 8022 rw_exit(&vhcache->vhcache_lock); 8023 return; 8024 } 8025 8026 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8027 8028 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8029 cphci = cphci->cphci_next) { 8030 /* skip phcis that haven't attached before root is available */ 8031 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8032 continue; 8033 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8034 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8035 KM_SLEEP); 8036 phbc->phbc_vhbusconfig = vhbc; 8037 phbc->phbc_next = phbc_head; 8038 phbc_head = phbc; 8039 vhbc->vhbc_thr_count++; 8040 } 8041 rw_exit(&vhcache->vhcache_lock); 8042 8043 vhbc->vhbc_op = op; 8044 vhbc->vhbc_op_major = maj; 8045 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8046 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8047 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8048 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8049 8050 /* now create threads to initiate bus config on all phcis in parallel */ 8051 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8052 phbc_next = phbc->phbc_next; 8053 if (mdi_mtc_off) 8054 bus_config_phci((void *)phbc); 8055 else 8056 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8057 0, &p0, TS_RUN, minclsyspri); 8058 } 8059 8060 mutex_enter(&vhbc->vhbc_lock); 8061 /* wait until all threads exit */ 8062 while (vhbc->vhbc_thr_count > 0) 8063 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8064 mutex_exit(&vhbc->vhbc_lock); 8065 8066 mutex_destroy(&vhbc->vhbc_lock); 8067 cv_destroy(&vhbc->vhbc_cv); 8068 kmem_free(vhbc, sizeof (*vhbc)); 8069 } 8070 8071 /* 8072 * Single threaded version of bus_config_all_phcis() 8073 */ 8074 static void 8075 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8076 ddi_bus_config_op_t op, major_t maj) 8077 { 8078 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8079 8080 single_threaded_vhconfig_enter(vhc); 8081 bus_config_all_phcis(vhcache, flags, op, maj); 8082 single_threaded_vhconfig_exit(vhc); 8083 } 8084 8085 /* 8086 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8087 * The path includes the child component in addition to the phci path. 8088 */ 8089 static int 8090 bus_config_one_phci_child(char *path) 8091 { 8092 dev_info_t *ph_dip, *child; 8093 char *devnm; 8094 int rv = MDI_FAILURE; 8095 8096 /* extract the child component of the phci */ 8097 devnm = strrchr(path, '/'); 8098 *devnm++ = '\0'; 8099 8100 /* 8101 * first configure all path components upto phci and then 8102 * configure the phci child. 8103 */ 8104 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8105 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8106 NDI_SUCCESS) { 8107 /* 8108 * release the hold that ndi_devi_config_one() placed 8109 */ 8110 ndi_rele_devi(child); 8111 rv = MDI_SUCCESS; 8112 } 8113 8114 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8115 ndi_rele_devi(ph_dip); 8116 } 8117 8118 devnm--; 8119 *devnm = '/'; 8120 return (rv); 8121 } 8122 8123 /* 8124 * Build a list of phci client paths for the specified vhci client. 8125 * The list includes only those phci client paths which aren't configured yet. 8126 */ 8127 static mdi_phys_path_t * 8128 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8129 { 8130 mdi_vhcache_pathinfo_t *cpi; 8131 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8132 int config_path, len; 8133 8134 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8135 /* 8136 * include only those paths that aren't configured. 8137 */ 8138 config_path = 0; 8139 if (cpi->cpi_pip == NULL) 8140 config_path = 1; 8141 else { 8142 MDI_PI_LOCK(cpi->cpi_pip); 8143 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8144 config_path = 1; 8145 MDI_PI_UNLOCK(cpi->cpi_pip); 8146 } 8147 8148 if (config_path) { 8149 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8150 len = strlen(cpi->cpi_cphci->cphci_path) + 8151 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8152 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8153 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8154 cpi->cpi_cphci->cphci_path, ct_name, 8155 cpi->cpi_addr); 8156 pp->phys_path_next = NULL; 8157 8158 if (pp_head == NULL) 8159 pp_head = pp; 8160 else 8161 pp_tail->phys_path_next = pp; 8162 pp_tail = pp; 8163 } 8164 } 8165 8166 return (pp_head); 8167 } 8168 8169 /* 8170 * Free the memory allocated for phci client path list. 8171 */ 8172 static void 8173 free_phclient_path_list(mdi_phys_path_t *pp_head) 8174 { 8175 mdi_phys_path_t *pp, *pp_next; 8176 8177 for (pp = pp_head; pp != NULL; pp = pp_next) { 8178 pp_next = pp->phys_path_next; 8179 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8180 kmem_free(pp, sizeof (*pp)); 8181 } 8182 } 8183 8184 /* 8185 * Allocated async client structure and initialize with the specified values. 8186 */ 8187 static mdi_async_client_config_t * 8188 alloc_async_client_config(char *ct_name, char *ct_addr, 8189 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8190 { 8191 mdi_async_client_config_t *acc; 8192 8193 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8194 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8195 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8196 acc->acc_phclient_path_list_head = pp_head; 8197 init_vhcache_lookup_token(&acc->acc_token, tok); 8198 acc->acc_next = NULL; 8199 return (acc); 8200 } 8201 8202 /* 8203 * Free the memory allocated for the async client structure and their members. 8204 */ 8205 static void 8206 free_async_client_config(mdi_async_client_config_t *acc) 8207 { 8208 if (acc->acc_phclient_path_list_head) 8209 free_phclient_path_list(acc->acc_phclient_path_list_head); 8210 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8211 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8212 kmem_free(acc, sizeof (*acc)); 8213 } 8214 8215 /* 8216 * Sort vhcache pathinfos (cpis) of the specified client. 8217 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8218 * flag set come at the beginning of the list. All cpis which have this 8219 * flag set come at the end of the list. 8220 */ 8221 static void 8222 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8223 { 8224 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8225 8226 cpi_head = cct->cct_cpi_head; 8227 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8228 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8229 cpi_next = cpi->cpi_next; 8230 enqueue_vhcache_pathinfo(cct, cpi); 8231 } 8232 } 8233 8234 /* 8235 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8236 * every vhcache pathinfo of the specified client. If not adjust the flag 8237 * setting appropriately. 8238 * 8239 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8240 * on-disk vhci cache. So every time this flag is updated the cache must be 8241 * flushed. 8242 */ 8243 static void 8244 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8245 mdi_vhcache_lookup_token_t *tok) 8246 { 8247 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8248 mdi_vhcache_client_t *cct; 8249 mdi_vhcache_pathinfo_t *cpi; 8250 8251 rw_enter(&vhcache->vhcache_lock, RW_READER); 8252 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8253 == NULL) { 8254 rw_exit(&vhcache->vhcache_lock); 8255 return; 8256 } 8257 8258 /* 8259 * to avoid unnecessary on-disk cache updates, first check if an 8260 * update is really needed. If no update is needed simply return. 8261 */ 8262 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8263 if ((cpi->cpi_pip != NULL && 8264 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8265 (cpi->cpi_pip == NULL && 8266 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8267 break; 8268 } 8269 } 8270 if (cpi == NULL) { 8271 rw_exit(&vhcache->vhcache_lock); 8272 return; 8273 } 8274 8275 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8276 rw_exit(&vhcache->vhcache_lock); 8277 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8278 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8279 tok)) == NULL) { 8280 rw_exit(&vhcache->vhcache_lock); 8281 return; 8282 } 8283 } 8284 8285 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8286 if (cpi->cpi_pip != NULL) 8287 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8288 else 8289 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8290 } 8291 sort_vhcache_paths(cct); 8292 8293 rw_exit(&vhcache->vhcache_lock); 8294 vhcache_dirty(vhc); 8295 } 8296 8297 /* 8298 * Configure all specified paths of the client. 8299 */ 8300 static void 8301 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8302 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8303 { 8304 mdi_phys_path_t *pp; 8305 8306 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8307 (void) bus_config_one_phci_child(pp->phys_path); 8308 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8309 } 8310 8311 /* 8312 * Dequeue elements from vhci async client config list and bus configure 8313 * their corresponding phci clients. 8314 */ 8315 static void 8316 config_client_paths_thread(void *arg) 8317 { 8318 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8319 mdi_async_client_config_t *acc; 8320 clock_t quit_at_ticks; 8321 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8322 callb_cpr_t cprinfo; 8323 8324 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8325 "mdi_config_client_paths"); 8326 8327 for (; ; ) { 8328 quit_at_ticks = ddi_get_lbolt() + idle_time; 8329 8330 mutex_enter(&vhc->vhc_lock); 8331 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8332 vhc->vhc_acc_list_head == NULL && 8333 ddi_get_lbolt() < quit_at_ticks) { 8334 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8335 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8336 quit_at_ticks); 8337 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8338 } 8339 8340 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8341 vhc->vhc_acc_list_head == NULL) 8342 goto out; 8343 8344 acc = vhc->vhc_acc_list_head; 8345 vhc->vhc_acc_list_head = acc->acc_next; 8346 if (vhc->vhc_acc_list_head == NULL) 8347 vhc->vhc_acc_list_tail = NULL; 8348 vhc->vhc_acc_count--; 8349 mutex_exit(&vhc->vhc_lock); 8350 8351 config_client_paths_sync(vhc, acc->acc_ct_name, 8352 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8353 &acc->acc_token); 8354 8355 free_async_client_config(acc); 8356 } 8357 8358 out: 8359 vhc->vhc_acc_thrcount--; 8360 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8361 CALLB_CPR_EXIT(&cprinfo); 8362 } 8363 8364 /* 8365 * Arrange for all the phci client paths (pp_head) for the specified client 8366 * to be bus configured asynchronously by a thread. 8367 */ 8368 static void 8369 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8370 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8371 { 8372 mdi_async_client_config_t *acc, *newacc; 8373 int create_thread; 8374 8375 if (pp_head == NULL) 8376 return; 8377 8378 if (mdi_mtc_off) { 8379 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8380 free_phclient_path_list(pp_head); 8381 return; 8382 } 8383 8384 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8385 ASSERT(newacc); 8386 8387 mutex_enter(&vhc->vhc_lock); 8388 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8389 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8390 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8391 free_async_client_config(newacc); 8392 mutex_exit(&vhc->vhc_lock); 8393 return; 8394 } 8395 } 8396 8397 if (vhc->vhc_acc_list_head == NULL) 8398 vhc->vhc_acc_list_head = newacc; 8399 else 8400 vhc->vhc_acc_list_tail->acc_next = newacc; 8401 vhc->vhc_acc_list_tail = newacc; 8402 vhc->vhc_acc_count++; 8403 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8404 cv_broadcast(&vhc->vhc_cv); 8405 create_thread = 0; 8406 } else { 8407 vhc->vhc_acc_thrcount++; 8408 create_thread = 1; 8409 } 8410 mutex_exit(&vhc->vhc_lock); 8411 8412 if (create_thread) 8413 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8414 0, &p0, TS_RUN, minclsyspri); 8415 } 8416 8417 /* 8418 * Return number of online paths for the specified client. 8419 */ 8420 static int 8421 nonline_paths(mdi_vhcache_client_t *cct) 8422 { 8423 mdi_vhcache_pathinfo_t *cpi; 8424 int online_count = 0; 8425 8426 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8427 if (cpi->cpi_pip != NULL) { 8428 MDI_PI_LOCK(cpi->cpi_pip); 8429 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8430 online_count++; 8431 MDI_PI_UNLOCK(cpi->cpi_pip); 8432 } 8433 } 8434 8435 return (online_count); 8436 } 8437 8438 /* 8439 * Bus configure all paths for the specified vhci client. 8440 * If at least one path for the client is already online, the remaining paths 8441 * will be configured asynchronously. Otherwise, it synchronously configures 8442 * the paths until at least one path is online and then rest of the paths 8443 * will be configured asynchronously. 8444 */ 8445 static void 8446 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8447 { 8448 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8449 mdi_phys_path_t *pp_head, *pp; 8450 mdi_vhcache_client_t *cct; 8451 mdi_vhcache_lookup_token_t tok; 8452 8453 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8454 8455 init_vhcache_lookup_token(&tok, NULL); 8456 8457 if (ct_name == NULL || ct_addr == NULL || 8458 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8459 == NULL || 8460 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8461 rw_exit(&vhcache->vhcache_lock); 8462 return; 8463 } 8464 8465 /* if at least one path is online, configure the rest asynchronously */ 8466 if (nonline_paths(cct) > 0) { 8467 rw_exit(&vhcache->vhcache_lock); 8468 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8469 return; 8470 } 8471 8472 rw_exit(&vhcache->vhcache_lock); 8473 8474 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8475 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8476 rw_enter(&vhcache->vhcache_lock, RW_READER); 8477 8478 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8479 ct_addr, &tok)) == NULL) { 8480 rw_exit(&vhcache->vhcache_lock); 8481 goto out; 8482 } 8483 8484 if (nonline_paths(cct) > 0 && 8485 pp->phys_path_next != NULL) { 8486 rw_exit(&vhcache->vhcache_lock); 8487 config_client_paths_async(vhc, ct_name, ct_addr, 8488 pp->phys_path_next, &tok); 8489 pp->phys_path_next = NULL; 8490 goto out; 8491 } 8492 8493 rw_exit(&vhcache->vhcache_lock); 8494 } 8495 } 8496 8497 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8498 out: 8499 free_phclient_path_list(pp_head); 8500 } 8501 8502 static void 8503 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8504 { 8505 mutex_enter(&vhc->vhc_lock); 8506 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8507 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8508 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8509 mutex_exit(&vhc->vhc_lock); 8510 } 8511 8512 static void 8513 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8514 { 8515 mutex_enter(&vhc->vhc_lock); 8516 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8517 cv_broadcast(&vhc->vhc_cv); 8518 mutex_exit(&vhc->vhc_lock); 8519 } 8520 8521 typedef struct mdi_phci_driver_info { 8522 char *phdriver_name; /* name of the phci driver */ 8523 8524 /* set to non zero if the phci driver supports root device */ 8525 int phdriver_root_support; 8526 } mdi_phci_driver_info_t; 8527 8528 /* 8529 * vhci class and root support capability of a phci driver can be 8530 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8531 * phci driver.conf file. The built-in tables below contain this information 8532 * for those phci drivers whose driver.conf files don't yet contain this info. 8533 * 8534 * All phci drivers expect iscsi have root device support. 8535 */ 8536 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8537 { "fp", 1 }, 8538 { "iscsi", 0 }, 8539 { "ibsrp", 1 } 8540 }; 8541 8542 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8543 8544 static void * 8545 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8546 { 8547 void *new_ptr; 8548 8549 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8550 if (old_ptr) { 8551 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8552 kmem_free(old_ptr, old_size); 8553 } 8554 return (new_ptr); 8555 } 8556 8557 static void 8558 add_to_phci_list(char ***driver_list, int **root_support_list, 8559 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8560 { 8561 ASSERT(*cur_elements <= *max_elements); 8562 if (*cur_elements == *max_elements) { 8563 *max_elements += 10; 8564 *driver_list = mdi_realloc(*driver_list, 8565 sizeof (char *) * (*cur_elements), 8566 sizeof (char *) * (*max_elements)); 8567 *root_support_list = mdi_realloc(*root_support_list, 8568 sizeof (int) * (*cur_elements), 8569 sizeof (int) * (*max_elements)); 8570 } 8571 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8572 (*root_support_list)[*cur_elements] = root_support; 8573 (*cur_elements)++; 8574 } 8575 8576 static void 8577 get_phci_driver_list(char *vhci_class, char ***driver_list, 8578 int **root_support_list, int *cur_elements, int *max_elements) 8579 { 8580 mdi_phci_driver_info_t *st_driver_list, *p; 8581 int st_ndrivers, root_support, i, j, driver_conf_count; 8582 major_t m; 8583 struct devnames *dnp; 8584 ddi_prop_t *propp; 8585 8586 *driver_list = NULL; 8587 *root_support_list = NULL; 8588 *cur_elements = 0; 8589 *max_elements = 0; 8590 8591 /* add the phci drivers derived from the phci driver.conf files */ 8592 for (m = 0; m < devcnt; m++) { 8593 dnp = &devnamesp[m]; 8594 8595 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8596 LOCK_DEV_OPS(&dnp->dn_lock); 8597 if (dnp->dn_global_prop_ptr != NULL && 8598 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8599 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8600 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8601 strcmp(propp->prop_val, vhci_class) == 0) { 8602 8603 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8604 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8605 &dnp->dn_global_prop_ptr->prop_list) 8606 == NULL) ? 1 : 0; 8607 8608 add_to_phci_list(driver_list, root_support_list, 8609 cur_elements, max_elements, dnp->dn_name, 8610 root_support); 8611 8612 UNLOCK_DEV_OPS(&dnp->dn_lock); 8613 } else 8614 UNLOCK_DEV_OPS(&dnp->dn_lock); 8615 } 8616 } 8617 8618 driver_conf_count = *cur_elements; 8619 8620 /* add the phci drivers specified in the built-in tables */ 8621 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8622 st_driver_list = scsi_phci_driver_list; 8623 st_ndrivers = sizeof (scsi_phci_driver_list) / 8624 sizeof (mdi_phci_driver_info_t); 8625 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8626 st_driver_list = ib_phci_driver_list; 8627 st_ndrivers = sizeof (ib_phci_driver_list) / 8628 sizeof (mdi_phci_driver_info_t); 8629 } else { 8630 st_driver_list = NULL; 8631 st_ndrivers = 0; 8632 } 8633 8634 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8635 /* add this phci driver if not already added before */ 8636 for (j = 0; j < driver_conf_count; j++) { 8637 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8638 break; 8639 } 8640 if (j == driver_conf_count) { 8641 add_to_phci_list(driver_list, root_support_list, 8642 cur_elements, max_elements, p->phdriver_name, 8643 p->phdriver_root_support); 8644 } 8645 } 8646 } 8647 8648 /* 8649 * Attach the phci driver instances associated with the specified vhci class. 8650 * If root is mounted attach all phci driver instances. 8651 * If root is not mounted, attach the instances of only those phci 8652 * drivers that have the root support. 8653 */ 8654 static void 8655 attach_phci_drivers(char *vhci_class) 8656 { 8657 char **driver_list, **p; 8658 int *root_support_list; 8659 int cur_elements, max_elements, i; 8660 major_t m; 8661 8662 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8663 &cur_elements, &max_elements); 8664 8665 for (i = 0; i < cur_elements; i++) { 8666 if (modrootloaded || root_support_list[i]) { 8667 m = ddi_name_to_major(driver_list[i]); 8668 if (m != DDI_MAJOR_T_NONE && 8669 ddi_hold_installed_driver(m)) 8670 ddi_rele_driver(m); 8671 } 8672 } 8673 8674 if (driver_list) { 8675 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8676 kmem_free(*p, strlen(*p) + 1); 8677 kmem_free(driver_list, sizeof (char *) * max_elements); 8678 kmem_free(root_support_list, sizeof (int) * max_elements); 8679 } 8680 } 8681 8682 /* 8683 * Build vhci cache: 8684 * 8685 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8686 * the phci driver instances. During this process the cache gets built. 8687 * 8688 * Cache is built fully if the root is mounted. 8689 * If the root is not mounted, phci drivers that do not have root support 8690 * are not attached. As a result the cache is built partially. The entries 8691 * in the cache reflect only those phci drivers that have root support. 8692 */ 8693 static int 8694 build_vhci_cache(mdi_vhci_t *vh) 8695 { 8696 mdi_vhci_config_t *vhc = vh->vh_config; 8697 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8698 8699 single_threaded_vhconfig_enter(vhc); 8700 8701 rw_enter(&vhcache->vhcache_lock, RW_READER); 8702 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8703 rw_exit(&vhcache->vhcache_lock); 8704 single_threaded_vhconfig_exit(vhc); 8705 return (0); 8706 } 8707 rw_exit(&vhcache->vhcache_lock); 8708 8709 attach_phci_drivers(vh->vh_class); 8710 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8711 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8712 8713 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8714 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8715 rw_exit(&vhcache->vhcache_lock); 8716 8717 single_threaded_vhconfig_exit(vhc); 8718 vhcache_dirty(vhc); 8719 return (1); 8720 } 8721 8722 /* 8723 * Determine if discovery of paths is needed. 8724 */ 8725 static int 8726 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8727 { 8728 int rv = 1; 8729 8730 mutex_enter(&vhc->vhc_lock); 8731 if (i_ddi_io_initialized() == 0) { 8732 if (vhc->vhc_path_discovery_boot > 0) { 8733 vhc->vhc_path_discovery_boot--; 8734 goto out; 8735 } 8736 } else { 8737 if (vhc->vhc_path_discovery_postboot > 0) { 8738 vhc->vhc_path_discovery_postboot--; 8739 goto out; 8740 } 8741 } 8742 8743 /* 8744 * Do full path discovery at most once per mdi_path_discovery_interval. 8745 * This is to avoid a series of full path discoveries when opening 8746 * stale /dev/[r]dsk links. 8747 */ 8748 if (mdi_path_discovery_interval != -1 && 8749 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8750 goto out; 8751 8752 rv = 0; 8753 out: 8754 mutex_exit(&vhc->vhc_lock); 8755 return (rv); 8756 } 8757 8758 /* 8759 * Discover all paths: 8760 * 8761 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8762 * driver instances. During this process all paths will be discovered. 8763 */ 8764 static int 8765 vhcache_discover_paths(mdi_vhci_t *vh) 8766 { 8767 mdi_vhci_config_t *vhc = vh->vh_config; 8768 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8769 int rv = 0; 8770 8771 single_threaded_vhconfig_enter(vhc); 8772 8773 if (vhcache_do_discovery(vhc)) { 8774 attach_phci_drivers(vh->vh_class); 8775 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8776 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8777 8778 mutex_enter(&vhc->vhc_lock); 8779 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8780 mdi_path_discovery_interval * TICKS_PER_SECOND; 8781 mutex_exit(&vhc->vhc_lock); 8782 rv = 1; 8783 } 8784 8785 single_threaded_vhconfig_exit(vhc); 8786 return (rv); 8787 } 8788 8789 /* 8790 * Generic vhci bus config implementation: 8791 * 8792 * Parameters 8793 * vdip vhci dip 8794 * flags bus config flags 8795 * op bus config operation 8796 * The remaining parameters are bus config operation specific 8797 * 8798 * for BUS_CONFIG_ONE 8799 * arg pointer to name@addr 8800 * child upon successful return from this function, *child will be 8801 * set to the configured and held devinfo child node of vdip. 8802 * ct_addr pointer to client address (i.e. GUID) 8803 * 8804 * for BUS_CONFIG_DRIVER 8805 * arg major number of the driver 8806 * child and ct_addr parameters are ignored 8807 * 8808 * for BUS_CONFIG_ALL 8809 * arg, child, and ct_addr parameters are ignored 8810 * 8811 * Note that for the rest of the bus config operations, this function simply 8812 * calls the framework provided default bus config routine. 8813 */ 8814 int 8815 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8816 void *arg, dev_info_t **child, char *ct_addr) 8817 { 8818 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8819 mdi_vhci_config_t *vhc = vh->vh_config; 8820 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8821 int rv = 0; 8822 int params_valid = 0; 8823 char *cp; 8824 8825 /* 8826 * To bus config vhcis we relay operation, possibly using another 8827 * thread, to phcis. The phci driver then interacts with MDI to cause 8828 * vhci child nodes to be enumerated under the vhci node. Adding a 8829 * vhci child requires an ndi_devi_enter of the vhci. Since another 8830 * thread may be adding the child, to avoid deadlock we can't wait 8831 * for the relayed operations to complete if we have already entered 8832 * the vhci node. 8833 */ 8834 if (DEVI_BUSY_OWNED(vdip)) { 8835 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8836 "vhci dip is busy owned %p\n", (void *)vdip)); 8837 goto default_bus_config; 8838 } 8839 8840 rw_enter(&vhcache->vhcache_lock, RW_READER); 8841 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8842 rw_exit(&vhcache->vhcache_lock); 8843 rv = build_vhci_cache(vh); 8844 rw_enter(&vhcache->vhcache_lock, RW_READER); 8845 } 8846 8847 switch (op) { 8848 case BUS_CONFIG_ONE: 8849 if (arg != NULL && ct_addr != NULL) { 8850 /* extract node name */ 8851 cp = (char *)arg; 8852 while (*cp != '\0' && *cp != '@') 8853 cp++; 8854 if (*cp == '@') { 8855 params_valid = 1; 8856 *cp = '\0'; 8857 config_client_paths(vhc, (char *)arg, ct_addr); 8858 /* config_client_paths() releases cache_lock */ 8859 *cp = '@'; 8860 break; 8861 } 8862 } 8863 8864 rw_exit(&vhcache->vhcache_lock); 8865 break; 8866 8867 case BUS_CONFIG_DRIVER: 8868 rw_exit(&vhcache->vhcache_lock); 8869 if (rv == 0) 8870 st_bus_config_all_phcis(vhc, flags, op, 8871 (major_t)(uintptr_t)arg); 8872 break; 8873 8874 case BUS_CONFIG_ALL: 8875 rw_exit(&vhcache->vhcache_lock); 8876 if (rv == 0) 8877 st_bus_config_all_phcis(vhc, flags, op, -1); 8878 break; 8879 8880 default: 8881 rw_exit(&vhcache->vhcache_lock); 8882 break; 8883 } 8884 8885 8886 default_bus_config: 8887 /* 8888 * All requested child nodes are enumerated under the vhci. 8889 * Now configure them. 8890 */ 8891 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8892 NDI_SUCCESS) { 8893 return (MDI_SUCCESS); 8894 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8895 /* discover all paths and try configuring again */ 8896 if (vhcache_discover_paths(vh) && 8897 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8898 NDI_SUCCESS) 8899 return (MDI_SUCCESS); 8900 } 8901 8902 return (MDI_FAILURE); 8903 } 8904 8905 /* 8906 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8907 */ 8908 static nvlist_t * 8909 read_on_disk_vhci_cache(char *vhci_class) 8910 { 8911 nvlist_t *nvl; 8912 int err; 8913 char *filename; 8914 8915 filename = vhclass2vhcache_filename(vhci_class); 8916 8917 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8918 kmem_free(filename, strlen(filename) + 1); 8919 return (nvl); 8920 } else if (err == EIO) 8921 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8922 else if (err == EINVAL) 8923 cmn_err(CE_WARN, 8924 "%s: data file corrupted, will recreate\n", filename); 8925 8926 kmem_free(filename, strlen(filename) + 1); 8927 return (NULL); 8928 } 8929 8930 /* 8931 * Read on-disk vhci cache into nvlists for all vhci classes. 8932 * Called during booting by i_ddi_read_devices_files(). 8933 */ 8934 void 8935 mdi_read_devices_files(void) 8936 { 8937 int i; 8938 8939 for (i = 0; i < N_VHCI_CLASSES; i++) 8940 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8941 } 8942 8943 /* 8944 * Remove all stale entries from vhci cache. 8945 */ 8946 static void 8947 clean_vhcache(mdi_vhci_config_t *vhc) 8948 { 8949 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8950 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8951 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8952 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8953 8954 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8955 8956 cct_head = vhcache->vhcache_client_head; 8957 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8958 for (cct = cct_head; cct != NULL; cct = cct_next) { 8959 cct_next = cct->cct_next; 8960 8961 cpi_head = cct->cct_cpi_head; 8962 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8963 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8964 cpi_next = cpi->cpi_next; 8965 if (cpi->cpi_pip != NULL) { 8966 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8967 enqueue_tail_vhcache_pathinfo(cct, cpi); 8968 } else 8969 free_vhcache_pathinfo(cpi); 8970 } 8971 8972 if (cct->cct_cpi_head != NULL) 8973 enqueue_vhcache_client(vhcache, cct); 8974 else { 8975 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8976 (mod_hash_key_t)cct->cct_name_addr); 8977 free_vhcache_client(cct); 8978 } 8979 } 8980 8981 cphci_head = vhcache->vhcache_phci_head; 8982 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8983 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8984 cphci_next = cphci->cphci_next; 8985 if (cphci->cphci_phci != NULL) 8986 enqueue_vhcache_phci(vhcache, cphci); 8987 else 8988 free_vhcache_phci(cphci); 8989 } 8990 8991 vhcache->vhcache_clean_time = lbolt64; 8992 rw_exit(&vhcache->vhcache_lock); 8993 vhcache_dirty(vhc); 8994 } 8995 8996 /* 8997 * Remove all stale entries from vhci cache. 8998 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8999 */ 9000 void 9001 mdi_clean_vhcache(void) 9002 { 9003 mdi_vhci_t *vh; 9004 9005 mutex_enter(&mdi_mutex); 9006 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9007 vh->vh_refcnt++; 9008 mutex_exit(&mdi_mutex); 9009 clean_vhcache(vh->vh_config); 9010 mutex_enter(&mdi_mutex); 9011 vh->vh_refcnt--; 9012 } 9013 mutex_exit(&mdi_mutex); 9014 } 9015 9016 /* 9017 * mdi_vhci_walk_clients(): 9018 * Walker routine to traverse client dev_info nodes 9019 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9020 * below the client, including nexus devices, which we dont want. 9021 * So we just traverse the immediate siblings, starting from 1st client. 9022 */ 9023 void 9024 mdi_vhci_walk_clients(dev_info_t *vdip, 9025 int (*f)(dev_info_t *, void *), void *arg) 9026 { 9027 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9028 dev_info_t *cdip; 9029 mdi_client_t *ct; 9030 9031 MDI_VHCI_CLIENT_LOCK(vh); 9032 cdip = ddi_get_child(vdip); 9033 while (cdip) { 9034 ct = i_devi_get_client(cdip); 9035 MDI_CLIENT_LOCK(ct); 9036 9037 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9038 cdip = ddi_get_next_sibling(cdip); 9039 else 9040 cdip = NULL; 9041 9042 MDI_CLIENT_UNLOCK(ct); 9043 } 9044 MDI_VHCI_CLIENT_UNLOCK(vh); 9045 } 9046 9047 /* 9048 * mdi_vhci_walk_phcis(): 9049 * Walker routine to traverse phci dev_info nodes 9050 */ 9051 void 9052 mdi_vhci_walk_phcis(dev_info_t *vdip, 9053 int (*f)(dev_info_t *, void *), void *arg) 9054 { 9055 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9056 mdi_phci_t *ph, *next; 9057 9058 MDI_VHCI_PHCI_LOCK(vh); 9059 ph = vh->vh_phci_head; 9060 while (ph) { 9061 MDI_PHCI_LOCK(ph); 9062 9063 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9064 next = ph->ph_next; 9065 else 9066 next = NULL; 9067 9068 MDI_PHCI_UNLOCK(ph); 9069 ph = next; 9070 } 9071 MDI_VHCI_PHCI_UNLOCK(vh); 9072 } 9073 9074 9075 /* 9076 * mdi_walk_vhcis(): 9077 * Walker routine to traverse vhci dev_info nodes 9078 */ 9079 void 9080 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9081 { 9082 mdi_vhci_t *vh = NULL; 9083 9084 mutex_enter(&mdi_mutex); 9085 /* 9086 * Scan for already registered vhci 9087 */ 9088 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9089 vh->vh_refcnt++; 9090 mutex_exit(&mdi_mutex); 9091 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9092 mutex_enter(&mdi_mutex); 9093 vh->vh_refcnt--; 9094 break; 9095 } else { 9096 mutex_enter(&mdi_mutex); 9097 vh->vh_refcnt--; 9098 } 9099 } 9100 9101 mutex_exit(&mdi_mutex); 9102 } 9103 9104 /* 9105 * i_mdi_log_sysevent(): 9106 * Logs events for pickup by syseventd 9107 */ 9108 static void 9109 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9110 { 9111 char *path_name; 9112 nvlist_t *attr_list; 9113 9114 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9115 KM_SLEEP) != DDI_SUCCESS) { 9116 goto alloc_failed; 9117 } 9118 9119 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9120 (void) ddi_pathname(dip, path_name); 9121 9122 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9123 ddi_driver_name(dip)) != DDI_SUCCESS) { 9124 goto error; 9125 } 9126 9127 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9128 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9129 goto error; 9130 } 9131 9132 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9133 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9134 goto error; 9135 } 9136 9137 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9138 path_name) != DDI_SUCCESS) { 9139 goto error; 9140 } 9141 9142 if (nvlist_add_string(attr_list, DDI_CLASS, 9143 ph_vh_class) != DDI_SUCCESS) { 9144 goto error; 9145 } 9146 9147 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9148 attr_list, NULL, DDI_SLEEP); 9149 9150 error: 9151 kmem_free(path_name, MAXPATHLEN); 9152 nvlist_free(attr_list); 9153 return; 9154 9155 alloc_failed: 9156 MDI_DEBUG(1, (CE_WARN, dip, 9157 "!i_mdi_log_sysevent: Unable to send sysevent")); 9158 } 9159 9160 char ** 9161 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9162 { 9163 char **driver_list, **ret_driver_list = NULL; 9164 int *root_support_list; 9165 int cur_elements, max_elements; 9166 9167 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9168 &cur_elements, &max_elements); 9169 9170 9171 if (driver_list) { 9172 kmem_free(root_support_list, sizeof (int) * max_elements); 9173 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9174 * max_elements, sizeof (char *) * cur_elements); 9175 } 9176 *ndrivers = cur_elements; 9177 9178 return (ret_driver_list); 9179 9180 } 9181 9182 void 9183 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9184 { 9185 char **p; 9186 int i; 9187 9188 if (driver_list) { 9189 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9190 kmem_free(*p, strlen(*p) + 1); 9191 kmem_free(driver_list, sizeof (char *) * ndrivers); 9192 } 9193 } 9194