1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved. 24 * Copyright (c) 2018, Joyent, Inc. 25 */ 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a 29 * more detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 78 #define MDI_WARN CE_WARN, __func__ 79 #define MDI_NOTE CE_NOTE, __func__ 80 #define MDI_CONT CE_CONT, __func__ 81 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 82 #else /* !DEBUG */ 83 #define MDI_DEBUG(dbglevel, pargs) 84 #endif /* DEBUG */ 85 int mdi_debug_consoleonly = 0; 86 int mdi_delay = 3; 87 88 extern pri_t minclsyspri; 89 extern int modrootloaded; 90 91 /* 92 * Global mutex: 93 * Protects vHCI list and structure members. 94 */ 95 kmutex_t mdi_mutex; 96 97 /* 98 * Registered vHCI class driver lists 99 */ 100 int mdi_vhci_count; 101 mdi_vhci_t *mdi_vhci_head; 102 mdi_vhci_t *mdi_vhci_tail; 103 104 /* 105 * Client Hash Table size 106 */ 107 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 108 109 /* 110 * taskq interface definitions 111 */ 112 #define MDI_TASKQ_N_THREADS 8 113 #define MDI_TASKQ_PRI minclsyspri 114 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 115 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 116 117 taskq_t *mdi_taskq; 118 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 119 120 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 121 122 /* 123 * The data should be "quiet" for this interval (in seconds) before the 124 * vhci cached data is flushed to the disk. 125 */ 126 static int mdi_vhcache_flush_delay = 10; 127 128 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 129 static int mdi_vhcache_flush_daemon_idle_time = 60; 130 131 /* 132 * MDI falls back to discovery of all paths when a bus_config_one fails. 133 * The following parameters can be used to tune this operation. 134 * 135 * mdi_path_discovery_boot 136 * Number of times path discovery will be attempted during early boot. 137 * Probably there is no reason to ever set this value to greater than one. 138 * 139 * mdi_path_discovery_postboot 140 * Number of times path discovery will be attempted after early boot. 141 * Set it to a minimum of two to allow for discovery of iscsi paths which 142 * may happen very late during booting. 143 * 144 * mdi_path_discovery_interval 145 * Minimum number of seconds MDI will wait between successive discovery 146 * of all paths. Set it to -1 to disable discovery of all paths. 147 */ 148 static int mdi_path_discovery_boot = 1; 149 static int mdi_path_discovery_postboot = 2; 150 static int mdi_path_discovery_interval = 10; 151 152 /* 153 * number of seconds the asynchronous configuration thread will sleep idle 154 * before exiting. 155 */ 156 static int mdi_async_config_idle_time = 600; 157 158 static int mdi_bus_config_cache_hash_size = 256; 159 160 /* turns off multithreaded configuration for certain operations */ 161 static int mdi_mtc_off = 0; 162 163 /* 164 * The "path" to a pathinfo node is identical to the /devices path to a 165 * devinfo node had the device been enumerated under a pHCI instead of 166 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 167 * This association persists across create/delete of the pathinfo nodes, 168 * but not across reboot. 169 */ 170 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 171 static int mdi_pathmap_hash_size = 256; 172 static kmutex_t mdi_pathmap_mutex; 173 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 174 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 175 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 176 177 /* 178 * MDI component property name/value string definitions 179 */ 180 const char *mdi_component_prop = "mpxio-component"; 181 const char *mdi_component_prop_vhci = "vhci"; 182 const char *mdi_component_prop_phci = "phci"; 183 const char *mdi_component_prop_client = "client"; 184 185 /* 186 * MDI client global unique identifier property name 187 */ 188 const char *mdi_client_guid_prop = "client-guid"; 189 190 /* 191 * MDI client load balancing property name/value string definitions 192 */ 193 const char *mdi_load_balance = "load-balance"; 194 const char *mdi_load_balance_none = "none"; 195 const char *mdi_load_balance_rr = "round-robin"; 196 const char *mdi_load_balance_lba = "logical-block"; 197 198 /* 199 * Obsolete vHCI class definition; to be removed after Leadville update 200 */ 201 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 202 203 static char vhci_greeting[] = 204 "\tThere already exists one vHCI driver for class %s\n" 205 "\tOnly one vHCI driver for each class is allowed\n"; 206 207 /* 208 * Static function prototypes 209 */ 210 static int i_mdi_phci_offline(dev_info_t *, uint_t); 211 static int i_mdi_client_offline(dev_info_t *, uint_t); 212 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 213 static void i_mdi_phci_post_detach(dev_info_t *, 214 ddi_detach_cmd_t, int); 215 static int i_mdi_client_pre_detach(dev_info_t *, 216 ddi_detach_cmd_t); 217 static void i_mdi_client_post_detach(dev_info_t *, 218 ddi_detach_cmd_t, int); 219 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 220 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 221 static int i_mdi_lba_lb(mdi_client_t *ct, 222 mdi_pathinfo_t **ret_pip, struct buf *buf); 223 static void i_mdi_pm_hold_client(mdi_client_t *, int); 224 static void i_mdi_pm_rele_client(mdi_client_t *, int); 225 static void i_mdi_pm_reset_client(mdi_client_t *); 226 static int i_mdi_power_all_phci(mdi_client_t *); 227 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 228 229 230 /* 231 * Internal mdi_pathinfo node functions 232 */ 233 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 234 235 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 236 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 237 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 238 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 239 static void i_mdi_phci_unlock(mdi_phci_t *); 240 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 241 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 242 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 243 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 244 mdi_client_t *); 245 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 246 static void i_mdi_client_remove_path(mdi_client_t *, 247 mdi_pathinfo_t *); 248 249 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 250 mdi_pathinfo_state_t, int); 251 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 252 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 253 char **, int); 254 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 255 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 256 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 257 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 258 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 259 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 260 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 261 static void i_mdi_client_update_state(mdi_client_t *); 262 static int i_mdi_client_compute_state(mdi_client_t *, 263 mdi_phci_t *); 264 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 265 static void i_mdi_client_unlock(mdi_client_t *); 266 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 267 static mdi_client_t *i_devi_get_client(dev_info_t *); 268 /* 269 * NOTE: this will be removed once the NWS files are changed to use the new 270 * mdi_{enable,disable}_path interfaces 271 */ 272 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 273 int, int); 274 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 275 mdi_vhci_t *vh, int flags, int op); 276 /* 277 * Failover related function prototypes 278 */ 279 static int i_mdi_failover(void *); 280 281 /* 282 * misc internal functions 283 */ 284 static int i_mdi_get_hash_key(char *); 285 static int i_map_nvlist_error_to_mdi(int); 286 static void i_mdi_report_path_state(mdi_client_t *, 287 mdi_pathinfo_t *); 288 289 static void setup_vhci_cache(mdi_vhci_t *); 290 static int destroy_vhci_cache(mdi_vhci_t *); 291 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 292 static boolean_t stop_vhcache_flush_thread(void *, int); 293 static void free_string_array(char **, int); 294 static void free_vhcache_phci(mdi_vhcache_phci_t *); 295 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 296 static void free_vhcache_client(mdi_vhcache_client_t *); 297 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 298 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 299 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 300 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 301 static void vhcache_pi_add(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void vhcache_pi_remove(mdi_vhci_config_t *, 304 struct mdi_pathinfo *); 305 static void free_phclient_path_list(mdi_phys_path_t *); 306 static void sort_vhcache_paths(mdi_vhcache_client_t *); 307 static int flush_vhcache(mdi_vhci_config_t *, int); 308 static void vhcache_dirty(mdi_vhci_config_t *); 309 static void free_async_client_config(mdi_async_client_config_t *); 310 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 311 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 312 static nvlist_t *read_on_disk_vhci_cache(char *); 313 extern int fread_nvlist(char *, nvlist_t **); 314 extern int fwrite_nvlist(char *, nvlist_t *); 315 316 /* called once when first vhci registers with mdi */ 317 static void 318 i_mdi_init() 319 { 320 static int initialized = 0; 321 322 if (initialized) 323 return; 324 initialized = 1; 325 326 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 327 328 /* Create our taskq resources */ 329 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 330 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 331 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 332 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 333 334 /* Allocate ['path_instance' <-> "path"] maps */ 335 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 336 mdi_pathmap_bypath = mod_hash_create_strhash( 337 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 338 mod_hash_null_valdtor); 339 mdi_pathmap_byinstance = mod_hash_create_idhash( 340 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 341 mod_hash_null_valdtor); 342 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 343 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 344 mod_hash_null_valdtor); 345 } 346 347 /* 348 * mdi_get_component_type(): 349 * Return mpxio component type 350 * Return Values: 351 * MDI_COMPONENT_NONE 352 * MDI_COMPONENT_VHCI 353 * MDI_COMPONENT_PHCI 354 * MDI_COMPONENT_CLIENT 355 * XXX This doesn't work under multi-level MPxIO and should be 356 * removed when clients migrate mdi_component_is_*() interfaces. 357 */ 358 int 359 mdi_get_component_type(dev_info_t *dip) 360 { 361 return (DEVI(dip)->devi_mdi_component); 362 } 363 364 /* 365 * mdi_vhci_register(): 366 * Register a vHCI module with the mpxio framework 367 * mdi_vhci_register() is called by vHCI drivers to register the 368 * 'class_driver' vHCI driver and its MDI entrypoints with the 369 * mpxio framework. The vHCI driver must call this interface as 370 * part of its attach(9e) handler. 371 * Competing threads may try to attach mdi_vhci_register() as 372 * the vHCI drivers are loaded and attached as a result of pHCI 373 * driver instance registration (mdi_phci_register()) with the 374 * framework. 375 * Return Values: 376 * MDI_SUCCESS 377 * MDI_FAILURE 378 */ 379 /*ARGSUSED*/ 380 int 381 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 382 int flags) 383 { 384 mdi_vhci_t *vh = NULL; 385 386 /* Registrant can't be older */ 387 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 388 389 #ifdef DEBUG 390 /* 391 * IB nexus driver is loaded only when IB hardware is present. 392 * In order to be able to do this there is a need to drive the loading 393 * and attaching of the IB nexus driver (especially when an IB hardware 394 * is dynamically plugged in) when an IB HCA driver (PHCI) 395 * is being attached. Unfortunately this gets into the limitations 396 * of devfs as there seems to be no clean way to drive configuration 397 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 398 * for IB. 399 */ 400 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 401 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 402 #endif 403 404 i_mdi_init(); 405 406 mutex_enter(&mdi_mutex); 407 /* 408 * Scan for already registered vhci 409 */ 410 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 411 if (strcmp(vh->vh_class, class) == 0) { 412 /* 413 * vHCI has already been created. Check for valid 414 * vHCI ops registration. We only support one vHCI 415 * module per class 416 */ 417 if (vh->vh_ops != NULL) { 418 mutex_exit(&mdi_mutex); 419 cmn_err(CE_NOTE, vhci_greeting, class); 420 return (MDI_FAILURE); 421 } 422 break; 423 } 424 } 425 426 /* 427 * if not yet created, create the vHCI component 428 */ 429 if (vh == NULL) { 430 struct client_hash *hash = NULL; 431 char *load_balance; 432 433 /* 434 * Allocate and initialize the mdi extensions 435 */ 436 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 437 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 438 KM_SLEEP); 439 vh->vh_client_table = hash; 440 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 441 (void) strcpy(vh->vh_class, class); 442 vh->vh_lb = LOAD_BALANCE_RR; 443 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 444 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 445 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 446 vh->vh_lb = LOAD_BALANCE_NONE; 447 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 448 == 0) { 449 vh->vh_lb = LOAD_BALANCE_LBA; 450 } 451 ddi_prop_free(load_balance); 452 } 453 454 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 455 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 456 457 /* 458 * Store the vHCI ops vectors 459 */ 460 vh->vh_dip = vdip; 461 vh->vh_ops = vops; 462 463 setup_vhci_cache(vh); 464 465 if (mdi_vhci_head == NULL) { 466 mdi_vhci_head = vh; 467 } 468 if (mdi_vhci_tail) { 469 mdi_vhci_tail->vh_next = vh; 470 } 471 mdi_vhci_tail = vh; 472 mdi_vhci_count++; 473 } 474 475 /* 476 * Claim the devfs node as a vhci component 477 */ 478 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 479 480 /* 481 * Initialize our back reference from dev_info node 482 */ 483 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 484 mutex_exit(&mdi_mutex); 485 return (MDI_SUCCESS); 486 } 487 488 /* 489 * mdi_vhci_unregister(): 490 * Unregister a vHCI module from mpxio framework 491 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 492 * of a vhci to unregister it from the framework. 493 * Return Values: 494 * MDI_SUCCESS 495 * MDI_FAILURE 496 */ 497 /*ARGSUSED*/ 498 int 499 mdi_vhci_unregister(dev_info_t *vdip, int flags) 500 { 501 mdi_vhci_t *found, *vh, *prev = NULL; 502 503 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 504 505 /* 506 * Check for invalid VHCI 507 */ 508 if ((vh = i_devi_get_vhci(vdip)) == NULL) 509 return (MDI_FAILURE); 510 511 /* 512 * Scan the list of registered vHCIs for a match 513 */ 514 mutex_enter(&mdi_mutex); 515 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 516 if (found == vh) 517 break; 518 prev = found; 519 } 520 521 if (found == NULL) { 522 mutex_exit(&mdi_mutex); 523 return (MDI_FAILURE); 524 } 525 526 /* 527 * Check the vHCI, pHCI and client count. All the pHCIs and clients 528 * should have been unregistered, before a vHCI can be 529 * unregistered. 530 */ 531 MDI_VHCI_PHCI_LOCK(vh); 532 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 533 MDI_VHCI_PHCI_UNLOCK(vh); 534 mutex_exit(&mdi_mutex); 535 return (MDI_FAILURE); 536 } 537 MDI_VHCI_PHCI_UNLOCK(vh); 538 539 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 540 mutex_exit(&mdi_mutex); 541 return (MDI_FAILURE); 542 } 543 544 /* 545 * Remove the vHCI from the global list 546 */ 547 if (vh == mdi_vhci_head) { 548 mdi_vhci_head = vh->vh_next; 549 } else { 550 prev->vh_next = vh->vh_next; 551 } 552 if (vh == mdi_vhci_tail) { 553 mdi_vhci_tail = prev; 554 } 555 mdi_vhci_count--; 556 mutex_exit(&mdi_mutex); 557 558 vh->vh_ops = NULL; 559 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 560 DEVI(vdip)->devi_mdi_xhci = NULL; 561 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 562 kmem_free(vh->vh_client_table, 563 mdi_client_table_size * sizeof (struct client_hash)); 564 mutex_destroy(&vh->vh_phci_mutex); 565 mutex_destroy(&vh->vh_client_mutex); 566 567 kmem_free(vh, sizeof (mdi_vhci_t)); 568 return (MDI_SUCCESS); 569 } 570 571 /* 572 * i_mdi_vhci_class2vhci(): 573 * Look for a matching vHCI module given a vHCI class name 574 * Return Values: 575 * Handle to a vHCI component 576 * NULL 577 */ 578 static mdi_vhci_t * 579 i_mdi_vhci_class2vhci(char *class) 580 { 581 mdi_vhci_t *vh = NULL; 582 583 ASSERT(!MUTEX_HELD(&mdi_mutex)); 584 585 mutex_enter(&mdi_mutex); 586 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 587 if (strcmp(vh->vh_class, class) == 0) { 588 break; 589 } 590 } 591 mutex_exit(&mdi_mutex); 592 return (vh); 593 } 594 595 /* 596 * i_devi_get_vhci(): 597 * Utility function to get the handle to a vHCI component 598 * Return Values: 599 * Handle to a vHCI component 600 * NULL 601 */ 602 mdi_vhci_t * 603 i_devi_get_vhci(dev_info_t *vdip) 604 { 605 mdi_vhci_t *vh = NULL; 606 if (MDI_VHCI(vdip)) { 607 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 608 } 609 return (vh); 610 } 611 612 /* 613 * mdi_phci_register(): 614 * Register a pHCI module with mpxio framework 615 * mdi_phci_register() is called by pHCI drivers to register with 616 * the mpxio framework and a specific 'class_driver' vHCI. The 617 * pHCI driver must call this interface as part of its attach(9e) 618 * handler. 619 * Return Values: 620 * MDI_SUCCESS 621 * MDI_FAILURE 622 */ 623 /*ARGSUSED*/ 624 int 625 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 626 { 627 mdi_phci_t *ph; 628 mdi_vhci_t *vh; 629 char *data; 630 631 /* 632 * Some subsystems, like fcp, perform pHCI registration from a 633 * different thread than the one doing the pHCI attach(9E) - the 634 * driver attach code is waiting for this other thread to complete. 635 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 636 * (indicating that some thread has done an ndi_devi_enter of parent) 637 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 638 */ 639 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 640 641 /* 642 * Check for mpxio-disable property. Enable mpxio if the property is 643 * missing or not set to "yes". 644 * If the property is set to "yes" then emit a brief message. 645 */ 646 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 647 &data) == DDI_SUCCESS)) { 648 if (strcmp(data, "yes") == 0) { 649 MDI_DEBUG(1, (MDI_CONT, pdip, 650 "?multipath capabilities disabled via %s.conf.", 651 ddi_driver_name(pdip))); 652 ddi_prop_free(data); 653 return (MDI_FAILURE); 654 } 655 ddi_prop_free(data); 656 } 657 658 /* 659 * Search for a matching vHCI 660 */ 661 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 662 if (vh == NULL) { 663 return (MDI_FAILURE); 664 } 665 666 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 667 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 668 ph->ph_dip = pdip; 669 ph->ph_vhci = vh; 670 ph->ph_next = NULL; 671 ph->ph_unstable = 0; 672 ph->ph_vprivate = 0; 673 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 674 675 MDI_PHCI_LOCK(ph); 676 MDI_PHCI_SET_POWER_UP(ph); 677 MDI_PHCI_UNLOCK(ph); 678 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 679 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 680 681 vhcache_phci_add(vh->vh_config, ph); 682 683 MDI_VHCI_PHCI_LOCK(vh); 684 if (vh->vh_phci_head == NULL) { 685 vh->vh_phci_head = ph; 686 } 687 if (vh->vh_phci_tail) { 688 vh->vh_phci_tail->ph_next = ph; 689 } 690 vh->vh_phci_tail = ph; 691 vh->vh_phci_count++; 692 MDI_VHCI_PHCI_UNLOCK(vh); 693 694 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 695 return (MDI_SUCCESS); 696 } 697 698 /* 699 * mdi_phci_unregister(): 700 * Unregister a pHCI module from mpxio framework 701 * mdi_phci_unregister() is called by the pHCI drivers from their 702 * detach(9E) handler to unregister their instances from the 703 * framework. 704 * Return Values: 705 * MDI_SUCCESS 706 * MDI_FAILURE 707 */ 708 /*ARGSUSED*/ 709 int 710 mdi_phci_unregister(dev_info_t *pdip, int flags) 711 { 712 mdi_vhci_t *vh; 713 mdi_phci_t *ph; 714 mdi_phci_t *tmp; 715 mdi_phci_t *prev = NULL; 716 mdi_pathinfo_t *pip; 717 718 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 719 720 ph = i_devi_get_phci(pdip); 721 if (ph == NULL) { 722 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 723 return (MDI_FAILURE); 724 } 725 726 vh = ph->ph_vhci; 727 ASSERT(vh != NULL); 728 if (vh == NULL) { 729 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 730 return (MDI_FAILURE); 731 } 732 733 MDI_VHCI_PHCI_LOCK(vh); 734 tmp = vh->vh_phci_head; 735 while (tmp) { 736 if (tmp == ph) { 737 break; 738 } 739 prev = tmp; 740 tmp = tmp->ph_next; 741 } 742 743 if (ph == vh->vh_phci_head) { 744 vh->vh_phci_head = ph->ph_next; 745 } else { 746 prev->ph_next = ph->ph_next; 747 } 748 749 if (ph == vh->vh_phci_tail) { 750 vh->vh_phci_tail = prev; 751 } 752 753 vh->vh_phci_count--; 754 MDI_VHCI_PHCI_UNLOCK(vh); 755 756 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 757 MDI_PHCI_LOCK(ph); 758 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 759 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 760 MDI_PI(pip)->pi_phci = NULL; 761 MDI_PHCI_UNLOCK(ph); 762 763 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 764 ESC_DDI_INITIATOR_UNREGISTER); 765 vhcache_phci_remove(vh->vh_config, ph); 766 cv_destroy(&ph->ph_unstable_cv); 767 mutex_destroy(&ph->ph_mutex); 768 kmem_free(ph, sizeof (mdi_phci_t)); 769 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 770 DEVI(pdip)->devi_mdi_xhci = NULL; 771 return (MDI_SUCCESS); 772 } 773 774 /* 775 * i_devi_get_phci(): 776 * Utility function to return the phci extensions. 777 */ 778 static mdi_phci_t * 779 i_devi_get_phci(dev_info_t *pdip) 780 { 781 mdi_phci_t *ph = NULL; 782 783 if (MDI_PHCI(pdip)) { 784 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 785 } 786 return (ph); 787 } 788 789 /* 790 * Single thread mdi entry into devinfo node for modifying its children. 791 * If necessary we perform an ndi_devi_enter of the vHCI before doing 792 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 793 * for the vHCI and one for the pHCI. 794 */ 795 void 796 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 797 { 798 dev_info_t *vdip; 799 int vcircular, pcircular; 800 801 /* Verify calling context */ 802 ASSERT(MDI_PHCI(phci_dip)); 803 vdip = mdi_devi_get_vdip(phci_dip); 804 ASSERT(vdip); /* A pHCI always has a vHCI */ 805 806 /* 807 * If pHCI is detaching then the framework has already entered the 808 * vHCI on a threads that went down the code path leading to 809 * detach_node(). This framework enter of the vHCI during pHCI 810 * detach is done to avoid deadlock with vHCI power management 811 * operations which enter the vHCI and the enter down the path 812 * to the pHCI. If pHCI is detaching then we piggyback this calls 813 * enter of the vHCI on frameworks vHCI enter that has already 814 * occurred - this is OK because we know that the framework thread 815 * doing detach is waiting for our completion. 816 * 817 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 818 * race with detach - but we can't do that because the framework has 819 * already entered the parent, so we have some complexity instead. 820 */ 821 for (;;) { 822 if (ndi_devi_tryenter(vdip, &vcircular)) { 823 ASSERT(vcircular != -1); 824 if (DEVI_IS_DETACHING(phci_dip)) { 825 ndi_devi_exit(vdip, vcircular); 826 vcircular = -1; 827 } 828 break; 829 } else if (DEVI_IS_DETACHING(phci_dip)) { 830 vcircular = -1; 831 break; 832 } else if (servicing_interrupt()) { 833 /* 834 * Don't delay an interrupt (and ensure adaptive 835 * mutex inversion support). 836 */ 837 ndi_devi_enter(vdip, &vcircular); 838 break; 839 } else { 840 delay_random(mdi_delay); 841 } 842 } 843 844 ndi_devi_enter(phci_dip, &pcircular); 845 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 846 } 847 848 /* 849 * Attempt to mdi_devi_enter. 850 */ 851 int 852 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 853 { 854 dev_info_t *vdip; 855 int vcircular, pcircular; 856 857 /* Verify calling context */ 858 ASSERT(MDI_PHCI(phci_dip)); 859 vdip = mdi_devi_get_vdip(phci_dip); 860 ASSERT(vdip); /* A pHCI always has a vHCI */ 861 862 if (ndi_devi_tryenter(vdip, &vcircular)) { 863 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 864 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 865 return (1); /* locked */ 866 } 867 ndi_devi_exit(vdip, vcircular); 868 } 869 return (0); /* busy */ 870 } 871 872 /* 873 * Release mdi_devi_enter or successful mdi_devi_tryenter. 874 */ 875 void 876 mdi_devi_exit(dev_info_t *phci_dip, int circular) 877 { 878 dev_info_t *vdip; 879 int vcircular, pcircular; 880 881 /* Verify calling context */ 882 ASSERT(MDI_PHCI(phci_dip)); 883 vdip = mdi_devi_get_vdip(phci_dip); 884 ASSERT(vdip); /* A pHCI always has a vHCI */ 885 886 /* extract two circular recursion values from single int */ 887 pcircular = (short)(circular & 0xFFFF); 888 vcircular = (short)((circular >> 16) & 0xFFFF); 889 890 ndi_devi_exit(phci_dip, pcircular); 891 if (vcircular != -1) 892 ndi_devi_exit(vdip, vcircular); 893 } 894 895 /* 896 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 897 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 898 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 899 * with vHCI power management code during path online/offline. Each 900 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 901 * occur within the scope of an active mdi_devi_enter that establishes the 902 * circular value. 903 */ 904 void 905 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 906 { 907 int pcircular; 908 909 /* Verify calling context */ 910 ASSERT(MDI_PHCI(phci_dip)); 911 912 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 913 ndi_hold_devi(phci_dip); 914 915 pcircular = (short)(circular & 0xFFFF); 916 ndi_devi_exit(phci_dip, pcircular); 917 } 918 919 void 920 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 921 { 922 int pcircular; 923 924 /* Verify calling context */ 925 ASSERT(MDI_PHCI(phci_dip)); 926 927 ndi_devi_enter(phci_dip, &pcircular); 928 929 /* Drop hold from mdi_devi_exit_phci. */ 930 ndi_rele_devi(phci_dip); 931 932 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 933 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 934 } 935 936 /* 937 * mdi_devi_get_vdip(): 938 * given a pHCI dip return vHCI dip 939 */ 940 dev_info_t * 941 mdi_devi_get_vdip(dev_info_t *pdip) 942 { 943 mdi_phci_t *ph; 944 945 ph = i_devi_get_phci(pdip); 946 if (ph && ph->ph_vhci) 947 return (ph->ph_vhci->vh_dip); 948 return (NULL); 949 } 950 951 /* 952 * mdi_devi_pdip_entered(): 953 * Return 1 if we are vHCI and have done an ndi_devi_enter 954 * of a pHCI 955 */ 956 int 957 mdi_devi_pdip_entered(dev_info_t *vdip) 958 { 959 mdi_vhci_t *vh; 960 mdi_phci_t *ph; 961 962 vh = i_devi_get_vhci(vdip); 963 if (vh == NULL) 964 return (0); 965 966 MDI_VHCI_PHCI_LOCK(vh); 967 ph = vh->vh_phci_head; 968 while (ph) { 969 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 970 MDI_VHCI_PHCI_UNLOCK(vh); 971 return (1); 972 } 973 ph = ph->ph_next; 974 } 975 MDI_VHCI_PHCI_UNLOCK(vh); 976 return (0); 977 } 978 979 /* 980 * mdi_phci_path2devinfo(): 981 * Utility function to search for a valid phci device given 982 * the devfs pathname. 983 */ 984 dev_info_t * 985 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 986 { 987 char *temp_pathname; 988 mdi_vhci_t *vh; 989 mdi_phci_t *ph; 990 dev_info_t *pdip = NULL; 991 992 vh = i_devi_get_vhci(vdip); 993 ASSERT(vh != NULL); 994 995 if (vh == NULL) { 996 /* 997 * Invalid vHCI component, return failure 998 */ 999 return (NULL); 1000 } 1001 1002 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1003 MDI_VHCI_PHCI_LOCK(vh); 1004 ph = vh->vh_phci_head; 1005 while (ph != NULL) { 1006 pdip = ph->ph_dip; 1007 ASSERT(pdip != NULL); 1008 *temp_pathname = '\0'; 1009 (void) ddi_pathname(pdip, temp_pathname); 1010 if (strcmp(temp_pathname, pathname) == 0) { 1011 break; 1012 } 1013 ph = ph->ph_next; 1014 } 1015 if (ph == NULL) { 1016 pdip = NULL; 1017 } 1018 MDI_VHCI_PHCI_UNLOCK(vh); 1019 kmem_free(temp_pathname, MAXPATHLEN); 1020 return (pdip); 1021 } 1022 1023 /* 1024 * mdi_phci_get_path_count(): 1025 * get number of path information nodes associated with a given 1026 * pHCI device. 1027 */ 1028 int 1029 mdi_phci_get_path_count(dev_info_t *pdip) 1030 { 1031 mdi_phci_t *ph; 1032 int count = 0; 1033 1034 ph = i_devi_get_phci(pdip); 1035 if (ph != NULL) { 1036 count = ph->ph_path_count; 1037 } 1038 return (count); 1039 } 1040 1041 /* 1042 * i_mdi_phci_lock(): 1043 * Lock a pHCI device 1044 * Return Values: 1045 * None 1046 * Note: 1047 * The default locking order is: 1048 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1049 * But there are number of situations where locks need to be 1050 * grabbed in reverse order. This routine implements try and lock 1051 * mechanism depending on the requested parameter option. 1052 */ 1053 static void 1054 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1055 { 1056 if (pip) { 1057 /* Reverse locking is requested. */ 1058 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1059 if (servicing_interrupt()) { 1060 MDI_PI_HOLD(pip); 1061 MDI_PI_UNLOCK(pip); 1062 MDI_PHCI_LOCK(ph); 1063 MDI_PI_LOCK(pip); 1064 MDI_PI_RELE(pip); 1065 break; 1066 } else { 1067 /* 1068 * tryenter failed. Try to grab again 1069 * after a small delay 1070 */ 1071 MDI_PI_HOLD(pip); 1072 MDI_PI_UNLOCK(pip); 1073 delay_random(mdi_delay); 1074 MDI_PI_LOCK(pip); 1075 MDI_PI_RELE(pip); 1076 } 1077 } 1078 } else { 1079 MDI_PHCI_LOCK(ph); 1080 } 1081 } 1082 1083 /* 1084 * i_mdi_phci_unlock(): 1085 * Unlock the pHCI component 1086 */ 1087 static void 1088 i_mdi_phci_unlock(mdi_phci_t *ph) 1089 { 1090 MDI_PHCI_UNLOCK(ph); 1091 } 1092 1093 /* 1094 * i_mdi_devinfo_create(): 1095 * create client device's devinfo node 1096 * Return Values: 1097 * dev_info 1098 * NULL 1099 * Notes: 1100 */ 1101 static dev_info_t * 1102 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1103 char **compatible, int ncompatible) 1104 { 1105 dev_info_t *cdip = NULL; 1106 1107 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1108 1109 /* Verify for duplicate entry */ 1110 cdip = i_mdi_devinfo_find(vh, name, guid); 1111 ASSERT(cdip == NULL); 1112 if (cdip) { 1113 cmn_err(CE_WARN, 1114 "i_mdi_devinfo_create: client %s@%s already exists", 1115 name ? name : "", guid ? guid : ""); 1116 } 1117 1118 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1119 if (cdip == NULL) 1120 goto fail; 1121 1122 /* 1123 * Create component type and Global unique identifier 1124 * properties 1125 */ 1126 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1127 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1128 goto fail; 1129 } 1130 1131 /* Decorate the node with compatible property */ 1132 if (compatible && 1133 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1134 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1135 goto fail; 1136 } 1137 1138 return (cdip); 1139 1140 fail: 1141 if (cdip) { 1142 (void) ndi_prop_remove_all(cdip); 1143 (void) ndi_devi_free(cdip); 1144 } 1145 return (NULL); 1146 } 1147 1148 /* 1149 * i_mdi_devinfo_find(): 1150 * Find a matching devinfo node for given client node name 1151 * and its guid. 1152 * Return Values: 1153 * Handle to a dev_info node or NULL 1154 */ 1155 static dev_info_t * 1156 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1157 { 1158 char *data; 1159 dev_info_t *cdip = NULL; 1160 dev_info_t *ndip = NULL; 1161 int circular; 1162 1163 ndi_devi_enter(vh->vh_dip, &circular); 1164 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1165 while ((cdip = ndip) != NULL) { 1166 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1167 1168 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1169 continue; 1170 } 1171 1172 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1173 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1174 &data) != DDI_PROP_SUCCESS) { 1175 continue; 1176 } 1177 1178 if (strcmp(data, guid) != 0) { 1179 ddi_prop_free(data); 1180 continue; 1181 } 1182 ddi_prop_free(data); 1183 break; 1184 } 1185 ndi_devi_exit(vh->vh_dip, circular); 1186 return (cdip); 1187 } 1188 1189 /* 1190 * i_mdi_devinfo_remove(): 1191 * Remove a client device node 1192 */ 1193 static int 1194 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1195 { 1196 int rv = MDI_SUCCESS; 1197 1198 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1199 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1200 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1201 if (rv != NDI_SUCCESS) { 1202 MDI_DEBUG(1, (MDI_NOTE, cdip, 1203 "!failed: cdip %p", (void *)cdip)); 1204 } 1205 /* 1206 * Convert to MDI error code 1207 */ 1208 switch (rv) { 1209 case NDI_SUCCESS: 1210 rv = MDI_SUCCESS; 1211 break; 1212 case NDI_BUSY: 1213 rv = MDI_BUSY; 1214 break; 1215 default: 1216 rv = MDI_FAILURE; 1217 break; 1218 } 1219 } 1220 return (rv); 1221 } 1222 1223 /* 1224 * i_devi_get_client() 1225 * Utility function to get mpxio component extensions 1226 */ 1227 static mdi_client_t * 1228 i_devi_get_client(dev_info_t *cdip) 1229 { 1230 mdi_client_t *ct = NULL; 1231 1232 if (MDI_CLIENT(cdip)) { 1233 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1234 } 1235 return (ct); 1236 } 1237 1238 /* 1239 * i_mdi_is_child_present(): 1240 * Search for the presence of client device dev_info node 1241 */ 1242 static int 1243 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1244 { 1245 int rv = MDI_FAILURE; 1246 struct dev_info *dip; 1247 int circular; 1248 1249 ndi_devi_enter(vdip, &circular); 1250 dip = DEVI(vdip)->devi_child; 1251 while (dip) { 1252 if (dip == DEVI(cdip)) { 1253 rv = MDI_SUCCESS; 1254 break; 1255 } 1256 dip = dip->devi_sibling; 1257 } 1258 ndi_devi_exit(vdip, circular); 1259 return (rv); 1260 } 1261 1262 1263 /* 1264 * i_mdi_client_lock(): 1265 * Grab client component lock 1266 * Return Values: 1267 * None 1268 * Note: 1269 * The default locking order is: 1270 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1271 * But there are number of situations where locks need to be 1272 * grabbed in reverse order. This routine implements try and lock 1273 * mechanism depending on the requested parameter option. 1274 */ 1275 static void 1276 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1277 { 1278 if (pip) { 1279 /* 1280 * Reverse locking is requested. 1281 */ 1282 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1283 if (servicing_interrupt()) { 1284 MDI_PI_HOLD(pip); 1285 MDI_PI_UNLOCK(pip); 1286 MDI_CLIENT_LOCK(ct); 1287 MDI_PI_LOCK(pip); 1288 MDI_PI_RELE(pip); 1289 break; 1290 } else { 1291 /* 1292 * tryenter failed. Try to grab again 1293 * after a small delay 1294 */ 1295 MDI_PI_HOLD(pip); 1296 MDI_PI_UNLOCK(pip); 1297 delay_random(mdi_delay); 1298 MDI_PI_LOCK(pip); 1299 MDI_PI_RELE(pip); 1300 } 1301 } 1302 } else { 1303 MDI_CLIENT_LOCK(ct); 1304 } 1305 } 1306 1307 /* 1308 * i_mdi_client_unlock(): 1309 * Unlock a client component 1310 */ 1311 static void 1312 i_mdi_client_unlock(mdi_client_t *ct) 1313 { 1314 MDI_CLIENT_UNLOCK(ct); 1315 } 1316 1317 /* 1318 * i_mdi_client_alloc(): 1319 * Allocate and initialize a client structure. Caller should 1320 * hold the vhci client lock. 1321 * Return Values: 1322 * Handle to a client component 1323 */ 1324 /*ARGSUSED*/ 1325 static mdi_client_t * 1326 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1327 { 1328 mdi_client_t *ct; 1329 1330 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1331 1332 /* 1333 * Allocate and initialize a component structure. 1334 */ 1335 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1336 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1337 ct->ct_hnext = NULL; 1338 ct->ct_hprev = NULL; 1339 ct->ct_dip = NULL; 1340 ct->ct_vhci = vh; 1341 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_drvname, name); 1343 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1344 (void) strcpy(ct->ct_guid, lguid); 1345 ct->ct_cprivate = NULL; 1346 ct->ct_vprivate = NULL; 1347 ct->ct_flags = 0; 1348 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1349 MDI_CLIENT_LOCK(ct); 1350 MDI_CLIENT_SET_OFFLINE(ct); 1351 MDI_CLIENT_SET_DETACH(ct); 1352 MDI_CLIENT_SET_POWER_UP(ct); 1353 MDI_CLIENT_UNLOCK(ct); 1354 ct->ct_failover_flags = 0; 1355 ct->ct_failover_status = 0; 1356 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1357 ct->ct_unstable = 0; 1358 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1359 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1360 ct->ct_lb = vh->vh_lb; 1361 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1362 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1363 ct->ct_path_count = 0; 1364 ct->ct_path_head = NULL; 1365 ct->ct_path_tail = NULL; 1366 ct->ct_path_last = NULL; 1367 1368 /* 1369 * Add this client component to our client hash queue 1370 */ 1371 i_mdi_client_enlist_table(vh, ct); 1372 return (ct); 1373 } 1374 1375 /* 1376 * i_mdi_client_enlist_table(): 1377 * Attach the client device to the client hash table. Caller 1378 * should hold the vhci client lock. 1379 */ 1380 static void 1381 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1382 { 1383 int index; 1384 struct client_hash *head; 1385 1386 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1387 1388 index = i_mdi_get_hash_key(ct->ct_guid); 1389 head = &vh->vh_client_table[index]; 1390 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1391 head->ct_hash_head = ct; 1392 head->ct_hash_count++; 1393 vh->vh_client_count++; 1394 } 1395 1396 /* 1397 * i_mdi_client_delist_table(): 1398 * Attach the client device to the client hash table. 1399 * Caller should hold the vhci client lock. 1400 */ 1401 static void 1402 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1403 { 1404 int index; 1405 char *guid; 1406 struct client_hash *head; 1407 mdi_client_t *next; 1408 mdi_client_t *last; 1409 1410 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1411 1412 guid = ct->ct_guid; 1413 index = i_mdi_get_hash_key(guid); 1414 head = &vh->vh_client_table[index]; 1415 1416 last = NULL; 1417 next = (mdi_client_t *)head->ct_hash_head; 1418 while (next != NULL) { 1419 if (next == ct) { 1420 break; 1421 } 1422 last = next; 1423 next = next->ct_hnext; 1424 } 1425 1426 if (next) { 1427 head->ct_hash_count--; 1428 if (last == NULL) { 1429 head->ct_hash_head = ct->ct_hnext; 1430 } else { 1431 last->ct_hnext = ct->ct_hnext; 1432 } 1433 ct->ct_hnext = NULL; 1434 vh->vh_client_count--; 1435 } 1436 } 1437 1438 1439 /* 1440 * i_mdi_client_free(): 1441 * Free a client component 1442 */ 1443 static int 1444 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1445 { 1446 int rv = MDI_SUCCESS; 1447 int flags = ct->ct_flags; 1448 dev_info_t *cdip; 1449 dev_info_t *vdip; 1450 1451 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1452 1453 vdip = vh->vh_dip; 1454 cdip = ct->ct_dip; 1455 1456 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1457 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1458 DEVI(cdip)->devi_mdi_client = NULL; 1459 1460 /* 1461 * Clear out back ref. to dev_info_t node 1462 */ 1463 ct->ct_dip = NULL; 1464 1465 /* 1466 * Remove this client from our hash queue 1467 */ 1468 i_mdi_client_delist_table(vh, ct); 1469 1470 /* 1471 * Uninitialize and free the component 1472 */ 1473 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1474 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1475 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1476 cv_destroy(&ct->ct_failover_cv); 1477 cv_destroy(&ct->ct_unstable_cv); 1478 cv_destroy(&ct->ct_powerchange_cv); 1479 mutex_destroy(&ct->ct_mutex); 1480 kmem_free(ct, sizeof (*ct)); 1481 1482 MDI_VHCI_CLIENT_UNLOCK(vh); 1483 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1484 MDI_VHCI_CLIENT_LOCK(vh); 1485 1486 return (rv); 1487 } 1488 1489 /* 1490 * i_mdi_client_find(): 1491 * Find the client structure corresponding to a given guid 1492 * Caller should hold the vhci client lock. 1493 */ 1494 static mdi_client_t * 1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1496 { 1497 int index; 1498 struct client_hash *head; 1499 mdi_client_t *ct; 1500 1501 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1502 1503 index = i_mdi_get_hash_key(guid); 1504 head = &vh->vh_client_table[index]; 1505 1506 ct = head->ct_hash_head; 1507 while (ct != NULL) { 1508 if (strcmp(ct->ct_guid, guid) == 0 && 1509 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1510 break; 1511 } 1512 ct = ct->ct_hnext; 1513 } 1514 return (ct); 1515 } 1516 1517 /* 1518 * i_mdi_client_update_state(): 1519 * Compute and update client device state 1520 * Notes: 1521 * A client device can be in any of three possible states: 1522 * 1523 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1524 * one online/standby paths. Can tolerate failures. 1525 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1526 * no alternate paths available as standby. A failure on the online 1527 * would result in loss of access to device data. 1528 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1529 * no paths available to access the device. 1530 */ 1531 static void 1532 i_mdi_client_update_state(mdi_client_t *ct) 1533 { 1534 int state; 1535 1536 ASSERT(MDI_CLIENT_LOCKED(ct)); 1537 state = i_mdi_client_compute_state(ct, NULL); 1538 MDI_CLIENT_SET_STATE(ct, state); 1539 } 1540 1541 /* 1542 * i_mdi_client_compute_state(): 1543 * Compute client device state 1544 * 1545 * mdi_phci_t * Pointer to pHCI structure which should 1546 * while computing the new value. Used by 1547 * i_mdi_phci_offline() to find the new 1548 * client state after DR of a pHCI. 1549 */ 1550 static int 1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1552 { 1553 int state; 1554 int online_count = 0; 1555 int standby_count = 0; 1556 mdi_pathinfo_t *pip, *next; 1557 1558 ASSERT(MDI_CLIENT_LOCKED(ct)); 1559 pip = ct->ct_path_head; 1560 while (pip != NULL) { 1561 MDI_PI_LOCK(pip); 1562 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1563 if (MDI_PI(pip)->pi_phci == ph) { 1564 MDI_PI_UNLOCK(pip); 1565 pip = next; 1566 continue; 1567 } 1568 1569 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1570 == MDI_PATHINFO_STATE_ONLINE) 1571 online_count++; 1572 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1573 == MDI_PATHINFO_STATE_STANDBY) 1574 standby_count++; 1575 MDI_PI_UNLOCK(pip); 1576 pip = next; 1577 } 1578 1579 if (online_count == 0) { 1580 if (standby_count == 0) { 1581 state = MDI_CLIENT_STATE_FAILED; 1582 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1583 "client state failed: ct = %p", (void *)ct)); 1584 } else if (standby_count == 1) { 1585 state = MDI_CLIENT_STATE_DEGRADED; 1586 } else { 1587 state = MDI_CLIENT_STATE_OPTIMAL; 1588 } 1589 } else if (online_count == 1) { 1590 if (standby_count == 0) { 1591 state = MDI_CLIENT_STATE_DEGRADED; 1592 } else { 1593 state = MDI_CLIENT_STATE_OPTIMAL; 1594 } 1595 } else { 1596 state = MDI_CLIENT_STATE_OPTIMAL; 1597 } 1598 return (state); 1599 } 1600 1601 /* 1602 * i_mdi_client2devinfo(): 1603 * Utility function 1604 */ 1605 dev_info_t * 1606 i_mdi_client2devinfo(mdi_client_t *ct) 1607 { 1608 return (ct->ct_dip); 1609 } 1610 1611 /* 1612 * mdi_client_path2_devinfo(): 1613 * Given the parent devinfo and child devfs pathname, search for 1614 * a valid devfs node handle. 1615 */ 1616 dev_info_t * 1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1618 { 1619 dev_info_t *cdip = NULL; 1620 dev_info_t *ndip = NULL; 1621 char *temp_pathname; 1622 int circular; 1623 1624 /* 1625 * Allocate temp buffer 1626 */ 1627 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1628 1629 /* 1630 * Lock parent against changes 1631 */ 1632 ndi_devi_enter(vdip, &circular); 1633 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1634 while ((cdip = ndip) != NULL) { 1635 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1636 1637 *temp_pathname = '\0'; 1638 (void) ddi_pathname(cdip, temp_pathname); 1639 if (strcmp(temp_pathname, pathname) == 0) { 1640 break; 1641 } 1642 } 1643 /* 1644 * Release devinfo lock 1645 */ 1646 ndi_devi_exit(vdip, circular); 1647 1648 /* 1649 * Free the temp buffer 1650 */ 1651 kmem_free(temp_pathname, MAXPATHLEN); 1652 return (cdip); 1653 } 1654 1655 /* 1656 * mdi_client_get_path_count(): 1657 * Utility function to get number of path information nodes 1658 * associated with a given client device. 1659 */ 1660 int 1661 mdi_client_get_path_count(dev_info_t *cdip) 1662 { 1663 mdi_client_t *ct; 1664 int count = 0; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 count = ct->ct_path_count; 1669 } 1670 return (count); 1671 } 1672 1673 1674 /* 1675 * i_mdi_get_hash_key(): 1676 * Create a hash using strings as keys 1677 * 1678 */ 1679 static int 1680 i_mdi_get_hash_key(char *str) 1681 { 1682 uint32_t g, hash = 0; 1683 char *p; 1684 1685 for (p = str; *p != '\0'; p++) { 1686 g = *p; 1687 hash += g; 1688 } 1689 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1690 } 1691 1692 /* 1693 * mdi_get_lb_policy(): 1694 * Get current load balancing policy for a given client device 1695 */ 1696 client_lb_t 1697 mdi_get_lb_policy(dev_info_t *cdip) 1698 { 1699 client_lb_t lb = LOAD_BALANCE_NONE; 1700 mdi_client_t *ct; 1701 1702 ct = i_devi_get_client(cdip); 1703 if (ct != NULL) { 1704 lb = ct->ct_lb; 1705 } 1706 return (lb); 1707 } 1708 1709 /* 1710 * mdi_set_lb_region_size(): 1711 * Set current region size for the load-balance 1712 */ 1713 int 1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1715 { 1716 mdi_client_t *ct; 1717 int rv = MDI_FAILURE; 1718 1719 ct = i_devi_get_client(cdip); 1720 if (ct != NULL && ct->ct_lb_args != NULL) { 1721 ct->ct_lb_args->region_size = region_size; 1722 rv = MDI_SUCCESS; 1723 } 1724 return (rv); 1725 } 1726 1727 /* 1728 * mdi_Set_lb_policy(): 1729 * Set current load balancing policy for a given client device 1730 */ 1731 int 1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1733 { 1734 mdi_client_t *ct; 1735 int rv = MDI_FAILURE; 1736 1737 ct = i_devi_get_client(cdip); 1738 if (ct != NULL) { 1739 ct->ct_lb = lb; 1740 rv = MDI_SUCCESS; 1741 } 1742 return (rv); 1743 } 1744 1745 static void 1746 mdi_failover_cb(void *arg) 1747 { 1748 (void)i_mdi_failover(arg); 1749 } 1750 1751 /* 1752 * mdi_failover(): 1753 * failover function called by the vHCI drivers to initiate 1754 * a failover operation. This is typically due to non-availability 1755 * of online paths to route I/O requests. Failover can be 1756 * triggered through user application also. 1757 * 1758 * The vHCI driver calls mdi_failover() to initiate a failover 1759 * operation. mdi_failover() calls back into the vHCI driver's 1760 * vo_failover() entry point to perform the actual failover 1761 * operation. The reason for requiring the vHCI driver to 1762 * initiate failover by calling mdi_failover(), instead of directly 1763 * executing vo_failover() itself, is to ensure that the mdi 1764 * framework can keep track of the client state properly. 1765 * Additionally, mdi_failover() provides as a convenience the 1766 * option of performing the failover operation synchronously or 1767 * asynchronously 1768 * 1769 * Upon successful completion of the failover operation, the 1770 * paths that were previously ONLINE will be in the STANDBY state, 1771 * and the newly activated paths will be in the ONLINE state. 1772 * 1773 * The flags modifier determines whether the activation is done 1774 * synchronously: MDI_FAILOVER_SYNC 1775 * Return Values: 1776 * MDI_SUCCESS 1777 * MDI_FAILURE 1778 * MDI_BUSY 1779 */ 1780 /*ARGSUSED*/ 1781 int 1782 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1783 { 1784 int rv; 1785 mdi_client_t *ct; 1786 1787 ct = i_devi_get_client(cdip); 1788 ASSERT(ct != NULL); 1789 if (ct == NULL) { 1790 /* cdip is not a valid client device. Nothing more to do. */ 1791 return (MDI_FAILURE); 1792 } 1793 1794 MDI_CLIENT_LOCK(ct); 1795 1796 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1797 /* A path to the client is being freed */ 1798 MDI_CLIENT_UNLOCK(ct); 1799 return (MDI_BUSY); 1800 } 1801 1802 1803 if (MDI_CLIENT_IS_FAILED(ct)) { 1804 /* 1805 * Client is in failed state. Nothing more to do. 1806 */ 1807 MDI_CLIENT_UNLOCK(ct); 1808 return (MDI_FAILURE); 1809 } 1810 1811 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1812 /* 1813 * Failover is already in progress; return BUSY 1814 */ 1815 MDI_CLIENT_UNLOCK(ct); 1816 return (MDI_BUSY); 1817 } 1818 /* 1819 * Make sure that mdi_pathinfo node state changes are processed. 1820 * We do not allow failovers to progress while client path state 1821 * changes are in progress 1822 */ 1823 if (ct->ct_unstable) { 1824 if (flags == MDI_FAILOVER_ASYNC) { 1825 MDI_CLIENT_UNLOCK(ct); 1826 return (MDI_BUSY); 1827 } else { 1828 while (ct->ct_unstable) 1829 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1830 } 1831 } 1832 1833 /* 1834 * Client device is in stable state. Before proceeding, perform sanity 1835 * checks again. 1836 */ 1837 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1838 (!i_ddi_devi_attached(cdip))) { 1839 /* 1840 * Client is in failed state. Nothing more to do. 1841 */ 1842 MDI_CLIENT_UNLOCK(ct); 1843 return (MDI_FAILURE); 1844 } 1845 1846 /* 1847 * Set the client state as failover in progress. 1848 */ 1849 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1850 ct->ct_failover_flags = flags; 1851 MDI_CLIENT_UNLOCK(ct); 1852 1853 if (flags == MDI_FAILOVER_ASYNC) { 1854 /* 1855 * Submit the initiate failover request via CPR safe 1856 * taskq threads. 1857 */ 1858 (void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP); 1859 return (MDI_ACCEPT); 1860 } else { 1861 /* 1862 * Synchronous failover mode. Typically invoked from the user 1863 * land. 1864 */ 1865 rv = i_mdi_failover(ct); 1866 } 1867 return (rv); 1868 } 1869 1870 /* 1871 * i_mdi_failover(): 1872 * internal failover function. Invokes vHCI drivers failover 1873 * callback function and process the failover status 1874 * Return Values: 1875 * None 1876 * 1877 * Note: A client device in failover state can not be detached or freed. 1878 */ 1879 static int 1880 i_mdi_failover(void *arg) 1881 { 1882 int rv = MDI_SUCCESS; 1883 mdi_client_t *ct = (mdi_client_t *)arg; 1884 mdi_vhci_t *vh = ct->ct_vhci; 1885 1886 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1887 1888 if (vh->vh_ops->vo_failover != NULL) { 1889 /* 1890 * Call vHCI drivers callback routine 1891 */ 1892 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1893 ct->ct_failover_flags); 1894 } 1895 1896 MDI_CLIENT_LOCK(ct); 1897 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1898 1899 /* 1900 * Save the failover return status 1901 */ 1902 ct->ct_failover_status = rv; 1903 1904 /* 1905 * As a result of failover, client status would have been changed. 1906 * Update the client state and wake up anyone waiting on this client 1907 * device. 1908 */ 1909 i_mdi_client_update_state(ct); 1910 1911 cv_broadcast(&ct->ct_failover_cv); 1912 MDI_CLIENT_UNLOCK(ct); 1913 return (rv); 1914 } 1915 1916 /* 1917 * Load balancing is logical block. 1918 * IOs within the range described by region_size 1919 * would go on the same path. This would improve the 1920 * performance by cache-hit on some of the RAID devices. 1921 * Search only for online paths(At some point we 1922 * may want to balance across target ports). 1923 * If no paths are found then default to round-robin. 1924 */ 1925 static int 1926 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1927 { 1928 int path_index = -1; 1929 int online_path_count = 0; 1930 int online_nonpref_path_count = 0; 1931 int region_size = ct->ct_lb_args->region_size; 1932 mdi_pathinfo_t *pip; 1933 mdi_pathinfo_t *next; 1934 int preferred, path_cnt; 1935 1936 pip = ct->ct_path_head; 1937 while (pip) { 1938 MDI_PI_LOCK(pip); 1939 if (MDI_PI(pip)->pi_state == 1940 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1941 online_path_count++; 1942 } else if (MDI_PI(pip)->pi_state == 1943 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1944 online_nonpref_path_count++; 1945 } 1946 next = (mdi_pathinfo_t *) 1947 MDI_PI(pip)->pi_client_link; 1948 MDI_PI_UNLOCK(pip); 1949 pip = next; 1950 } 1951 /* if found any online/preferred then use this type */ 1952 if (online_path_count > 0) { 1953 path_cnt = online_path_count; 1954 preferred = 1; 1955 } else if (online_nonpref_path_count > 0) { 1956 path_cnt = online_nonpref_path_count; 1957 preferred = 0; 1958 } else { 1959 path_cnt = 0; 1960 } 1961 if (path_cnt) { 1962 path_index = (bp->b_blkno >> region_size) % path_cnt; 1963 pip = ct->ct_path_head; 1964 while (pip && path_index != -1) { 1965 MDI_PI_LOCK(pip); 1966 if (path_index == 0 && 1967 (MDI_PI(pip)->pi_state == 1968 MDI_PATHINFO_STATE_ONLINE) && 1969 MDI_PI(pip)->pi_preferred == preferred) { 1970 MDI_PI_HOLD(pip); 1971 MDI_PI_UNLOCK(pip); 1972 *ret_pip = pip; 1973 return (MDI_SUCCESS); 1974 } 1975 path_index --; 1976 next = (mdi_pathinfo_t *) 1977 MDI_PI(pip)->pi_client_link; 1978 MDI_PI_UNLOCK(pip); 1979 pip = next; 1980 } 1981 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1982 "lba %llx: path %s %p", 1983 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1984 } 1985 return (MDI_FAILURE); 1986 } 1987 1988 /* 1989 * mdi_select_path(): 1990 * select a path to access a client device. 1991 * 1992 * mdi_select_path() function is called by the vHCI drivers to 1993 * select a path to route the I/O request to. The caller passes 1994 * the block I/O data transfer structure ("buf") as one of the 1995 * parameters. The mpxio framework uses the buf structure 1996 * contents to maintain per path statistics (total I/O size / 1997 * count pending). If more than one online paths are available to 1998 * select, the framework automatically selects a suitable path 1999 * for routing I/O request. If a failover operation is active for 2000 * this client device the call shall be failed with MDI_BUSY error 2001 * code. 2002 * 2003 * By default this function returns a suitable path in online 2004 * state based on the current load balancing policy. Currently 2005 * we support LOAD_BALANCE_NONE (Previously selected online path 2006 * will continue to be used till the path is usable) and 2007 * LOAD_BALANCE_RR (Online paths will be selected in a round 2008 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2009 * based on the logical block). The load balancing 2010 * through vHCI drivers configuration file (driver.conf). 2011 * 2012 * vHCI drivers may override this default behavior by specifying 2013 * appropriate flags. The meaning of the thrid argument depends 2014 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2015 * then the argument is the "path instance" of the path to select. 2016 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2017 * "start_pip". A non NULL "start_pip" is the starting point to 2018 * walk and find the next appropriate path. The following values 2019 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2020 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2021 * STANDBY path). 2022 * 2023 * The non-standard behavior is used by the scsi_vhci driver, 2024 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2025 * attach of client devices (to avoid an unnecessary failover 2026 * when the STANDBY path comes up first), during failover 2027 * (to activate a STANDBY path as ONLINE). 2028 * 2029 * The selected path is returned in a a mdi_hold_path() state 2030 * (pi_ref_cnt). Caller should release the hold by calling 2031 * mdi_rele_path(). 2032 * 2033 * Return Values: 2034 * MDI_SUCCESS - Completed successfully 2035 * MDI_BUSY - Client device is busy failing over 2036 * MDI_NOPATH - Client device is online, but no valid path are 2037 * available to access this client device 2038 * MDI_FAILURE - Invalid client device or state 2039 * MDI_DEVI_ONLINING 2040 * - Client device (struct dev_info state) is in 2041 * onlining state. 2042 */ 2043 2044 /*ARGSUSED*/ 2045 int 2046 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2047 void *arg, mdi_pathinfo_t **ret_pip) 2048 { 2049 mdi_client_t *ct; 2050 mdi_pathinfo_t *pip; 2051 mdi_pathinfo_t *next; 2052 mdi_pathinfo_t *head; 2053 mdi_pathinfo_t *start; 2054 client_lb_t lbp; /* load balancing policy */ 2055 int sb = 1; /* standard behavior */ 2056 int preferred = 1; /* preferred path */ 2057 int cond, cont = 1; 2058 int retry = 0; 2059 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2060 int path_instance; /* request specific path instance */ 2061 2062 /* determine type of arg based on flags */ 2063 if (flags & MDI_SELECT_PATH_INSTANCE) { 2064 path_instance = (int)(intptr_t)arg; 2065 start_pip = NULL; 2066 } else { 2067 path_instance = 0; 2068 start_pip = (mdi_pathinfo_t *)arg; 2069 } 2070 2071 if (flags != 0) { 2072 /* 2073 * disable default behavior 2074 */ 2075 sb = 0; 2076 } 2077 2078 *ret_pip = NULL; 2079 ct = i_devi_get_client(cdip); 2080 if (ct == NULL) { 2081 /* mdi extensions are NULL, Nothing more to do */ 2082 return (MDI_FAILURE); 2083 } 2084 2085 MDI_CLIENT_LOCK(ct); 2086 2087 if (sb) { 2088 if (MDI_CLIENT_IS_FAILED(ct)) { 2089 /* 2090 * Client is not ready to accept any I/O requests. 2091 * Fail this request. 2092 */ 2093 MDI_DEBUG(2, (MDI_NOTE, cdip, 2094 "client state offline ct = %p", (void *)ct)); 2095 MDI_CLIENT_UNLOCK(ct); 2096 return (MDI_FAILURE); 2097 } 2098 2099 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2100 /* 2101 * Check for Failover is in progress. If so tell the 2102 * caller that this device is busy. 2103 */ 2104 MDI_DEBUG(2, (MDI_NOTE, cdip, 2105 "client failover in progress ct = %p", 2106 (void *)ct)); 2107 MDI_CLIENT_UNLOCK(ct); 2108 return (MDI_BUSY); 2109 } 2110 2111 /* 2112 * Check to see whether the client device is attached. 2113 * If not so, let the vHCI driver manually select a path 2114 * (standby) and let the probe/attach process to continue. 2115 */ 2116 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2117 MDI_DEBUG(4, (MDI_NOTE, cdip, 2118 "devi is onlining ct = %p", (void *)ct)); 2119 MDI_CLIENT_UNLOCK(ct); 2120 return (MDI_DEVI_ONLINING); 2121 } 2122 } 2123 2124 /* 2125 * Cache in the client list head. If head of the list is NULL 2126 * return MDI_NOPATH 2127 */ 2128 head = ct->ct_path_head; 2129 if (head == NULL) { 2130 MDI_CLIENT_UNLOCK(ct); 2131 return (MDI_NOPATH); 2132 } 2133 2134 /* Caller is specifying a specific pathinfo path by path_instance */ 2135 if (path_instance) { 2136 /* search for pathinfo with correct path_instance */ 2137 for (pip = head; 2138 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2139 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2140 ; 2141 2142 /* If path can't be selected then MDI_NOPATH is returned. */ 2143 if (pip == NULL) { 2144 MDI_CLIENT_UNLOCK(ct); 2145 return (MDI_NOPATH); 2146 } 2147 2148 /* 2149 * Verify state of path. When asked to select a specific 2150 * path_instance, we select the requested path in any 2151 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2152 * We don't however select paths where the pHCI has detached. 2153 * NOTE: last pathinfo node of an opened client device may 2154 * exist in an OFFLINE state after the pHCI associated with 2155 * that path has detached (but pi_phci will be NULL if that 2156 * has occurred). 2157 */ 2158 MDI_PI_LOCK(pip); 2159 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2160 (MDI_PI(pip)->pi_phci == NULL)) { 2161 MDI_PI_UNLOCK(pip); 2162 MDI_CLIENT_UNLOCK(ct); 2163 return (MDI_FAILURE); 2164 } 2165 2166 /* Return MDI_BUSY if we have a transient condition */ 2167 if (MDI_PI_IS_TRANSIENT(pip)) { 2168 MDI_PI_UNLOCK(pip); 2169 MDI_CLIENT_UNLOCK(ct); 2170 return (MDI_BUSY); 2171 } 2172 2173 /* 2174 * Return the path in hold state. Caller should release the 2175 * lock by calling mdi_rele_path() 2176 */ 2177 MDI_PI_HOLD(pip); 2178 MDI_PI_UNLOCK(pip); 2179 *ret_pip = pip; 2180 MDI_CLIENT_UNLOCK(ct); 2181 return (MDI_SUCCESS); 2182 } 2183 2184 /* 2185 * for non default behavior, bypass current 2186 * load balancing policy and always use LOAD_BALANCE_RR 2187 * except that the start point will be adjusted based 2188 * on the provided start_pip 2189 */ 2190 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2191 2192 switch (lbp) { 2193 case LOAD_BALANCE_NONE: 2194 /* 2195 * Load balancing is None or Alternate path mode 2196 * Start looking for a online mdi_pathinfo node starting from 2197 * last known selected path 2198 */ 2199 preferred = 1; 2200 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2201 if (pip == NULL) { 2202 pip = head; 2203 } 2204 start = pip; 2205 do { 2206 MDI_PI_LOCK(pip); 2207 /* 2208 * No need to explicitly check if the path is disabled. 2209 * Since we are checking for state == ONLINE and the 2210 * same variable is used for DISABLE/ENABLE information. 2211 */ 2212 if ((MDI_PI(pip)->pi_state == 2213 MDI_PATHINFO_STATE_ONLINE) && 2214 preferred == MDI_PI(pip)->pi_preferred) { 2215 /* 2216 * Return the path in hold state. Caller should 2217 * release the lock by calling mdi_rele_path() 2218 */ 2219 MDI_PI_HOLD(pip); 2220 MDI_PI_UNLOCK(pip); 2221 ct->ct_path_last = pip; 2222 *ret_pip = pip; 2223 MDI_CLIENT_UNLOCK(ct); 2224 return (MDI_SUCCESS); 2225 } 2226 2227 /* 2228 * Path is busy. 2229 */ 2230 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2231 MDI_PI_IS_TRANSIENT(pip)) 2232 retry = 1; 2233 /* 2234 * Keep looking for a next available online path 2235 */ 2236 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2237 if (next == NULL) { 2238 next = head; 2239 } 2240 MDI_PI_UNLOCK(pip); 2241 pip = next; 2242 if (start == pip && preferred) { 2243 preferred = 0; 2244 } else if (start == pip && !preferred) { 2245 cont = 0; 2246 } 2247 } while (cont); 2248 break; 2249 2250 case LOAD_BALANCE_LBA: 2251 /* 2252 * Make sure we are looking 2253 * for an online path. Otherwise, if it is for a STANDBY 2254 * path request, it will go through and fetch an ONLINE 2255 * path which is not desirable. 2256 */ 2257 if ((ct->ct_lb_args != NULL) && 2258 (ct->ct_lb_args->region_size) && bp && 2259 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2260 if (i_mdi_lba_lb(ct, ret_pip, bp) 2261 == MDI_SUCCESS) { 2262 MDI_CLIENT_UNLOCK(ct); 2263 return (MDI_SUCCESS); 2264 } 2265 } 2266 /* FALLTHROUGH */ 2267 case LOAD_BALANCE_RR: 2268 /* 2269 * Load balancing is Round Robin. Start looking for a online 2270 * mdi_pathinfo node starting from last known selected path 2271 * as the start point. If override flags are specified, 2272 * process accordingly. 2273 * If the search is already in effect(start_pip not null), 2274 * then lets just use the same path preference to continue the 2275 * traversal. 2276 */ 2277 2278 if (start_pip != NULL) { 2279 preferred = MDI_PI(start_pip)->pi_preferred; 2280 } else { 2281 preferred = 1; 2282 } 2283 2284 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2285 if (start == NULL) { 2286 pip = head; 2287 } else { 2288 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2289 if (pip == NULL) { 2290 if ( flags & MDI_SELECT_NO_PREFERRED) { 2291 /* 2292 * Return since we hit the end of list 2293 */ 2294 MDI_CLIENT_UNLOCK(ct); 2295 return (MDI_NOPATH); 2296 } 2297 2298 if (!sb) { 2299 if (preferred == 0) { 2300 /* 2301 * Looks like we have completed 2302 * the traversal as preferred 2303 * value is 0. Time to bail out. 2304 */ 2305 *ret_pip = NULL; 2306 MDI_CLIENT_UNLOCK(ct); 2307 return (MDI_NOPATH); 2308 } else { 2309 /* 2310 * Looks like we reached the 2311 * end of the list. Lets enable 2312 * traversal of non preferred 2313 * paths. 2314 */ 2315 preferred = 0; 2316 } 2317 } 2318 pip = head; 2319 } 2320 } 2321 start = pip; 2322 do { 2323 MDI_PI_LOCK(pip); 2324 if (sb) { 2325 cond = ((MDI_PI(pip)->pi_state == 2326 MDI_PATHINFO_STATE_ONLINE && 2327 MDI_PI(pip)->pi_preferred == 2328 preferred) ? 1 : 0); 2329 } else { 2330 if (flags == MDI_SELECT_ONLINE_PATH) { 2331 cond = ((MDI_PI(pip)->pi_state == 2332 MDI_PATHINFO_STATE_ONLINE && 2333 MDI_PI(pip)->pi_preferred == 2334 preferred) ? 1 : 0); 2335 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2336 cond = ((MDI_PI(pip)->pi_state == 2337 MDI_PATHINFO_STATE_STANDBY && 2338 MDI_PI(pip)->pi_preferred == 2339 preferred) ? 1 : 0); 2340 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2341 MDI_SELECT_STANDBY_PATH)) { 2342 cond = (((MDI_PI(pip)->pi_state == 2343 MDI_PATHINFO_STATE_ONLINE || 2344 (MDI_PI(pip)->pi_state == 2345 MDI_PATHINFO_STATE_STANDBY)) && 2346 MDI_PI(pip)->pi_preferred == 2347 preferred) ? 1 : 0); 2348 } else if (flags == 2349 (MDI_SELECT_STANDBY_PATH | 2350 MDI_SELECT_ONLINE_PATH | 2351 MDI_SELECT_USER_DISABLE_PATH)) { 2352 cond = (((MDI_PI(pip)->pi_state == 2353 MDI_PATHINFO_STATE_ONLINE || 2354 (MDI_PI(pip)->pi_state == 2355 MDI_PATHINFO_STATE_STANDBY) || 2356 (MDI_PI(pip)->pi_state == 2357 (MDI_PATHINFO_STATE_ONLINE| 2358 MDI_PATHINFO_STATE_USER_DISABLE)) || 2359 (MDI_PI(pip)->pi_state == 2360 (MDI_PATHINFO_STATE_STANDBY | 2361 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2362 MDI_PI(pip)->pi_preferred == 2363 preferred) ? 1 : 0); 2364 } else if (flags == 2365 (MDI_SELECT_STANDBY_PATH | 2366 MDI_SELECT_ONLINE_PATH | 2367 MDI_SELECT_NO_PREFERRED)) { 2368 cond = (((MDI_PI(pip)->pi_state == 2369 MDI_PATHINFO_STATE_ONLINE) || 2370 (MDI_PI(pip)->pi_state == 2371 MDI_PATHINFO_STATE_STANDBY)) 2372 ? 1 : 0); 2373 } else { 2374 cond = 0; 2375 } 2376 } 2377 /* 2378 * No need to explicitly check if the path is disabled. 2379 * Since we are checking for state == ONLINE and the 2380 * same variable is used for DISABLE/ENABLE information. 2381 */ 2382 if (cond) { 2383 /* 2384 * Return the path in hold state. Caller should 2385 * release the lock by calling mdi_rele_path() 2386 */ 2387 MDI_PI_HOLD(pip); 2388 MDI_PI_UNLOCK(pip); 2389 if (sb) 2390 ct->ct_path_last = pip; 2391 *ret_pip = pip; 2392 MDI_CLIENT_UNLOCK(ct); 2393 return (MDI_SUCCESS); 2394 } 2395 /* 2396 * Path is busy. 2397 */ 2398 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2399 MDI_PI_IS_TRANSIENT(pip)) 2400 retry = 1; 2401 2402 /* 2403 * Keep looking for a next available online path 2404 */ 2405 do_again: 2406 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2407 if (next == NULL) { 2408 if ( flags & MDI_SELECT_NO_PREFERRED) { 2409 /* 2410 * Bail out since we hit the end of list 2411 */ 2412 MDI_PI_UNLOCK(pip); 2413 break; 2414 } 2415 2416 if (!sb) { 2417 if (preferred == 1) { 2418 /* 2419 * Looks like we reached the 2420 * end of the list. Lets enable 2421 * traversal of non preferred 2422 * paths. 2423 */ 2424 preferred = 0; 2425 next = head; 2426 } else { 2427 /* 2428 * We have done both the passes 2429 * Preferred as well as for 2430 * Non-preferred. Bail out now. 2431 */ 2432 cont = 0; 2433 } 2434 } else { 2435 /* 2436 * Standard behavior case. 2437 */ 2438 next = head; 2439 } 2440 } 2441 MDI_PI_UNLOCK(pip); 2442 if (cont == 0) { 2443 break; 2444 } 2445 pip = next; 2446 2447 if (!sb) { 2448 /* 2449 * We need to handle the selection of 2450 * non-preferred path in the following 2451 * case: 2452 * 2453 * +------+ +------+ +------+ +-----+ 2454 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2455 * +------+ +------+ +------+ +-----+ 2456 * 2457 * If we start the search with B, we need to 2458 * skip beyond B to pick C which is non - 2459 * preferred in the second pass. The following 2460 * test, if true, will allow us to skip over 2461 * the 'start'(B in the example) to select 2462 * other non preferred elements. 2463 */ 2464 if ((start_pip != NULL) && (start_pip == pip) && 2465 (MDI_PI(start_pip)->pi_preferred 2466 != preferred)) { 2467 /* 2468 * try again after going past the start 2469 * pip 2470 */ 2471 MDI_PI_LOCK(pip); 2472 goto do_again; 2473 } 2474 } else { 2475 /* 2476 * Standard behavior case 2477 */ 2478 if (start == pip && preferred) { 2479 /* look for nonpreferred paths */ 2480 preferred = 0; 2481 } else if (start == pip && !preferred) { 2482 /* 2483 * Exit condition 2484 */ 2485 cont = 0; 2486 } 2487 } 2488 } while (cont); 2489 break; 2490 } 2491 2492 MDI_CLIENT_UNLOCK(ct); 2493 if (retry == 1) { 2494 return (MDI_BUSY); 2495 } else { 2496 return (MDI_NOPATH); 2497 } 2498 } 2499 2500 /* 2501 * For a client, return the next available path to any phci 2502 * 2503 * Note: 2504 * Caller should hold the branch's devinfo node to get a consistent 2505 * snap shot of the mdi_pathinfo nodes. 2506 * 2507 * Please note that even the list is stable the mdi_pathinfo 2508 * node state and properties are volatile. The caller should lock 2509 * and unlock the nodes by calling mdi_pi_lock() and 2510 * mdi_pi_unlock() functions to get a stable properties. 2511 * 2512 * If there is a need to use the nodes beyond the hold of the 2513 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2514 * need to be held against unexpected removal by calling 2515 * mdi_hold_path() and should be released by calling 2516 * mdi_rele_path() on completion. 2517 */ 2518 mdi_pathinfo_t * 2519 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2520 { 2521 mdi_client_t *ct; 2522 2523 if (!MDI_CLIENT(ct_dip)) 2524 return (NULL); 2525 2526 /* 2527 * Walk through client link 2528 */ 2529 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2530 ASSERT(ct != NULL); 2531 2532 if (pip == NULL) 2533 return ((mdi_pathinfo_t *)ct->ct_path_head); 2534 2535 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2536 } 2537 2538 /* 2539 * For a phci, return the next available path to any client 2540 * Note: ditto mdi_get_next_phci_path() 2541 */ 2542 mdi_pathinfo_t * 2543 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2544 { 2545 mdi_phci_t *ph; 2546 2547 if (!MDI_PHCI(ph_dip)) 2548 return (NULL); 2549 2550 /* 2551 * Walk through pHCI link 2552 */ 2553 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2554 ASSERT(ph != NULL); 2555 2556 if (pip == NULL) 2557 return ((mdi_pathinfo_t *)ph->ph_path_head); 2558 2559 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2560 } 2561 2562 /* 2563 * mdi_hold_path(): 2564 * Hold the mdi_pathinfo node against unwanted unexpected free. 2565 * Return Values: 2566 * None 2567 */ 2568 void 2569 mdi_hold_path(mdi_pathinfo_t *pip) 2570 { 2571 if (pip) { 2572 MDI_PI_LOCK(pip); 2573 MDI_PI_HOLD(pip); 2574 MDI_PI_UNLOCK(pip); 2575 } 2576 } 2577 2578 2579 /* 2580 * mdi_rele_path(): 2581 * Release the mdi_pathinfo node which was selected 2582 * through mdi_select_path() mechanism or manually held by 2583 * calling mdi_hold_path(). 2584 * Return Values: 2585 * None 2586 */ 2587 void 2588 mdi_rele_path(mdi_pathinfo_t *pip) 2589 { 2590 if (pip) { 2591 MDI_PI_LOCK(pip); 2592 MDI_PI_RELE(pip); 2593 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2594 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2595 } 2596 MDI_PI_UNLOCK(pip); 2597 } 2598 } 2599 2600 /* 2601 * mdi_pi_lock(): 2602 * Lock the mdi_pathinfo node. 2603 * Note: 2604 * The caller should release the lock by calling mdi_pi_unlock() 2605 */ 2606 void 2607 mdi_pi_lock(mdi_pathinfo_t *pip) 2608 { 2609 ASSERT(pip != NULL); 2610 if (pip) { 2611 MDI_PI_LOCK(pip); 2612 } 2613 } 2614 2615 2616 /* 2617 * mdi_pi_unlock(): 2618 * Unlock the mdi_pathinfo node. 2619 * Note: 2620 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2621 */ 2622 void 2623 mdi_pi_unlock(mdi_pathinfo_t *pip) 2624 { 2625 ASSERT(pip != NULL); 2626 if (pip) { 2627 MDI_PI_UNLOCK(pip); 2628 } 2629 } 2630 2631 /* 2632 * mdi_pi_find(): 2633 * Search the list of mdi_pathinfo nodes attached to the 2634 * pHCI/Client device node whose path address matches "paddr". 2635 * Returns a pointer to the mdi_pathinfo node if a matching node is 2636 * found. 2637 * Return Values: 2638 * mdi_pathinfo node handle 2639 * NULL 2640 * Notes: 2641 * Caller need not hold any locks to call this function. 2642 */ 2643 mdi_pathinfo_t * 2644 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2645 { 2646 mdi_phci_t *ph; 2647 mdi_vhci_t *vh; 2648 mdi_client_t *ct; 2649 mdi_pathinfo_t *pip = NULL; 2650 2651 MDI_DEBUG(2, (MDI_NOTE, pdip, 2652 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2653 if ((pdip == NULL) || (paddr == NULL)) { 2654 return (NULL); 2655 } 2656 ph = i_devi_get_phci(pdip); 2657 if (ph == NULL) { 2658 /* 2659 * Invalid pHCI device, Nothing more to do. 2660 */ 2661 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2662 return (NULL); 2663 } 2664 2665 vh = ph->ph_vhci; 2666 if (vh == NULL) { 2667 /* 2668 * Invalid vHCI device, Nothing more to do. 2669 */ 2670 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2671 return (NULL); 2672 } 2673 2674 /* 2675 * Look for pathinfo node identified by paddr. 2676 */ 2677 if (caddr == NULL) { 2678 /* 2679 * Find a mdi_pathinfo node under pHCI list for a matching 2680 * unit address. 2681 */ 2682 MDI_PHCI_LOCK(ph); 2683 if (MDI_PHCI_IS_OFFLINE(ph)) { 2684 MDI_DEBUG(2, (MDI_WARN, pdip, 2685 "offline phci %p", (void *)ph)); 2686 MDI_PHCI_UNLOCK(ph); 2687 return (NULL); 2688 } 2689 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2690 2691 while (pip != NULL) { 2692 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2693 break; 2694 } 2695 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2696 } 2697 MDI_PHCI_UNLOCK(ph); 2698 MDI_DEBUG(2, (MDI_NOTE, pdip, 2699 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2700 return (pip); 2701 } 2702 2703 /* 2704 * XXX - Is the rest of the code in this function really necessary? 2705 * The consumers of mdi_pi_find() can search for the desired pathinfo 2706 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2707 * whether the search is based on the pathinfo nodes attached to 2708 * the pHCI or the client node, the result will be the same. 2709 */ 2710 2711 /* 2712 * Find the client device corresponding to 'caddr' 2713 */ 2714 MDI_VHCI_CLIENT_LOCK(vh); 2715 2716 /* 2717 * XXX - Passing NULL to the following function works as long as the 2718 * the client addresses (caddr) are unique per vhci basis. 2719 */ 2720 ct = i_mdi_client_find(vh, NULL, caddr); 2721 if (ct == NULL) { 2722 /* 2723 * Client not found, Obviously mdi_pathinfo node has not been 2724 * created yet. 2725 */ 2726 MDI_VHCI_CLIENT_UNLOCK(vh); 2727 MDI_DEBUG(2, (MDI_NOTE, pdip, 2728 "client not found for caddr @%s", caddr ? caddr : "")); 2729 return (NULL); 2730 } 2731 2732 /* 2733 * Hold the client lock and look for a mdi_pathinfo node with matching 2734 * pHCI and paddr 2735 */ 2736 MDI_CLIENT_LOCK(ct); 2737 2738 /* 2739 * Release the global mutex as it is no more needed. Note: We always 2740 * respect the locking order while acquiring. 2741 */ 2742 MDI_VHCI_CLIENT_UNLOCK(vh); 2743 2744 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2745 while (pip != NULL) { 2746 /* 2747 * Compare the unit address 2748 */ 2749 if ((MDI_PI(pip)->pi_phci == ph) && 2750 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2751 break; 2752 } 2753 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2754 } 2755 MDI_CLIENT_UNLOCK(ct); 2756 MDI_DEBUG(2, (MDI_NOTE, pdip, 2757 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2758 return (pip); 2759 } 2760 2761 /* 2762 * mdi_pi_alloc(): 2763 * Allocate and initialize a new instance of a mdi_pathinfo node. 2764 * The mdi_pathinfo node returned by this function identifies a 2765 * unique device path is capable of having properties attached 2766 * and passed to mdi_pi_online() to fully attach and online the 2767 * path and client device node. 2768 * The mdi_pathinfo node returned by this function must be 2769 * destroyed using mdi_pi_free() if the path is no longer 2770 * operational or if the caller fails to attach a client device 2771 * node when calling mdi_pi_online(). The framework will not free 2772 * the resources allocated. 2773 * This function can be called from both interrupt and kernel 2774 * contexts. DDI_NOSLEEP flag should be used while calling 2775 * from interrupt contexts. 2776 * Return Values: 2777 * MDI_SUCCESS 2778 * MDI_FAILURE 2779 * MDI_NOMEM 2780 */ 2781 /*ARGSUSED*/ 2782 int 2783 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2784 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2785 { 2786 mdi_vhci_t *vh; 2787 mdi_phci_t *ph; 2788 mdi_client_t *ct; 2789 mdi_pathinfo_t *pip = NULL; 2790 dev_info_t *cdip; 2791 int rv = MDI_NOMEM; 2792 int path_allocated = 0; 2793 2794 MDI_DEBUG(2, (MDI_NOTE, pdip, 2795 "cname %s: caddr@%s paddr@%s", 2796 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2797 2798 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2799 ret_pip == NULL) { 2800 /* Nothing more to do */ 2801 return (MDI_FAILURE); 2802 } 2803 2804 *ret_pip = NULL; 2805 2806 /* No allocations on detaching pHCI */ 2807 if (DEVI_IS_DETACHING(pdip)) { 2808 /* Invalid pHCI device, return failure */ 2809 MDI_DEBUG(1, (MDI_WARN, pdip, 2810 "!detaching pHCI=%p", (void *)pdip)); 2811 return (MDI_FAILURE); 2812 } 2813 2814 ph = i_devi_get_phci(pdip); 2815 ASSERT(ph != NULL); 2816 if (ph == NULL) { 2817 /* Invalid pHCI device, return failure */ 2818 MDI_DEBUG(1, (MDI_WARN, pdip, 2819 "!invalid pHCI=%p", (void *)pdip)); 2820 return (MDI_FAILURE); 2821 } 2822 2823 MDI_PHCI_LOCK(ph); 2824 vh = ph->ph_vhci; 2825 if (vh == NULL) { 2826 /* Invalid vHCI device, return failure */ 2827 MDI_DEBUG(1, (MDI_WARN, pdip, 2828 "!invalid vHCI=%p", (void *)pdip)); 2829 MDI_PHCI_UNLOCK(ph); 2830 return (MDI_FAILURE); 2831 } 2832 2833 if (MDI_PHCI_IS_READY(ph) == 0) { 2834 /* 2835 * Do not allow new node creation when pHCI is in 2836 * offline/suspended states 2837 */ 2838 MDI_DEBUG(1, (MDI_WARN, pdip, 2839 "pHCI=%p is not ready", (void *)ph)); 2840 MDI_PHCI_UNLOCK(ph); 2841 return (MDI_BUSY); 2842 } 2843 MDI_PHCI_UNSTABLE(ph); 2844 MDI_PHCI_UNLOCK(ph); 2845 2846 /* look for a matching client, create one if not found */ 2847 MDI_VHCI_CLIENT_LOCK(vh); 2848 ct = i_mdi_client_find(vh, cname, caddr); 2849 if (ct == NULL) { 2850 ct = i_mdi_client_alloc(vh, cname, caddr); 2851 ASSERT(ct != NULL); 2852 } 2853 2854 if (ct->ct_dip == NULL) { 2855 /* 2856 * Allocate a devinfo node 2857 */ 2858 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2859 compatible, ncompatible); 2860 if (ct->ct_dip == NULL) { 2861 (void) i_mdi_client_free(vh, ct); 2862 goto fail; 2863 } 2864 } 2865 cdip = ct->ct_dip; 2866 2867 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2868 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2869 2870 MDI_CLIENT_LOCK(ct); 2871 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2872 while (pip != NULL) { 2873 /* 2874 * Compare the unit address 2875 */ 2876 if ((MDI_PI(pip)->pi_phci == ph) && 2877 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2878 break; 2879 } 2880 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2881 } 2882 MDI_CLIENT_UNLOCK(ct); 2883 2884 if (pip == NULL) { 2885 /* 2886 * This is a new path for this client device. Allocate and 2887 * initialize a new pathinfo node 2888 */ 2889 pip = i_mdi_pi_alloc(ph, paddr, ct); 2890 ASSERT(pip != NULL); 2891 path_allocated = 1; 2892 } 2893 rv = MDI_SUCCESS; 2894 2895 fail: 2896 /* 2897 * Release the global mutex. 2898 */ 2899 MDI_VHCI_CLIENT_UNLOCK(vh); 2900 2901 /* 2902 * Mark the pHCI as stable 2903 */ 2904 MDI_PHCI_LOCK(ph); 2905 MDI_PHCI_STABLE(ph); 2906 MDI_PHCI_UNLOCK(ph); 2907 *ret_pip = pip; 2908 2909 MDI_DEBUG(2, (MDI_NOTE, pdip, 2910 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2911 2912 if (path_allocated) 2913 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2914 2915 return (rv); 2916 } 2917 2918 /*ARGSUSED*/ 2919 int 2920 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2921 int flags, mdi_pathinfo_t **ret_pip) 2922 { 2923 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2924 flags, ret_pip)); 2925 } 2926 2927 /* 2928 * i_mdi_pi_alloc(): 2929 * Allocate a mdi_pathinfo node and add to the pHCI path list 2930 * Return Values: 2931 * mdi_pathinfo 2932 */ 2933 /*ARGSUSED*/ 2934 static mdi_pathinfo_t * 2935 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2936 { 2937 mdi_pathinfo_t *pip; 2938 int ct_circular; 2939 int ph_circular; 2940 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2941 char *path_persistent; 2942 int path_instance; 2943 mod_hash_val_t hv; 2944 2945 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2946 2947 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2948 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2949 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2950 MDI_PATHINFO_STATE_TRANSIENT; 2951 2952 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2953 MDI_PI_SET_USER_DISABLE(pip); 2954 2955 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2956 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2957 2958 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2959 MDI_PI_SET_DRV_DISABLE(pip); 2960 2961 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2962 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2963 MDI_PI(pip)->pi_client = ct; 2964 MDI_PI(pip)->pi_phci = ph; 2965 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2966 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2967 2968 /* 2969 * We form the "path" to the pathinfo node, and see if we have 2970 * already allocated a 'path_instance' for that "path". If so, 2971 * we use the already allocated 'path_instance'. If not, we 2972 * allocate a new 'path_instance' and associate it with a copy of 2973 * the "path" string (which is never freed). The association 2974 * between a 'path_instance' this "path" string persists until 2975 * reboot. 2976 */ 2977 mutex_enter(&mdi_pathmap_mutex); 2978 (void) ddi_pathname(ph->ph_dip, path); 2979 (void) sprintf(path + strlen(path), "/%s@%s", 2980 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2981 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2982 path_instance = (uint_t)(intptr_t)hv; 2983 } else { 2984 /* allocate a new 'path_instance' and persistent "path" */ 2985 path_instance = mdi_pathmap_instance++; 2986 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2987 (void) mod_hash_insert(mdi_pathmap_bypath, 2988 (mod_hash_key_t)path_persistent, 2989 (mod_hash_val_t)(intptr_t)path_instance); 2990 (void) mod_hash_insert(mdi_pathmap_byinstance, 2991 (mod_hash_key_t)(intptr_t)path_instance, 2992 (mod_hash_val_t)path_persistent); 2993 2994 /* create shortpath name */ 2995 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2996 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2997 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2998 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2999 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 3000 (mod_hash_key_t)(intptr_t)path_instance, 3001 (mod_hash_val_t)path_persistent); 3002 } 3003 mutex_exit(&mdi_pathmap_mutex); 3004 MDI_PI(pip)->pi_path_instance = path_instance; 3005 3006 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3007 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3008 MDI_PI(pip)->pi_pprivate = NULL; 3009 MDI_PI(pip)->pi_cprivate = NULL; 3010 MDI_PI(pip)->pi_vprivate = NULL; 3011 MDI_PI(pip)->pi_client_link = NULL; 3012 MDI_PI(pip)->pi_phci_link = NULL; 3013 MDI_PI(pip)->pi_ref_cnt = 0; 3014 MDI_PI(pip)->pi_kstats = NULL; 3015 MDI_PI(pip)->pi_preferred = 1; 3016 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3017 3018 /* 3019 * Lock both dev_info nodes against changes in parallel. 3020 * 3021 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3022 * This atypical operation is done to synchronize pathinfo nodes 3023 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3024 * the pathinfo nodes are children of the Client. 3025 */ 3026 ndi_devi_enter(ct->ct_dip, &ct_circular); 3027 ndi_devi_enter(ph->ph_dip, &ph_circular); 3028 3029 i_mdi_phci_add_path(ph, pip); 3030 i_mdi_client_add_path(ct, pip); 3031 3032 ndi_devi_exit(ph->ph_dip, ph_circular); 3033 ndi_devi_exit(ct->ct_dip, ct_circular); 3034 3035 return (pip); 3036 } 3037 3038 /* 3039 * mdi_pi_pathname_by_instance(): 3040 * Lookup of "path" by 'path_instance'. Return "path". 3041 * NOTE: returned "path" remains valid forever (until reboot). 3042 */ 3043 char * 3044 mdi_pi_pathname_by_instance(int path_instance) 3045 { 3046 char *path; 3047 mod_hash_val_t hv; 3048 3049 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3050 mutex_enter(&mdi_pathmap_mutex); 3051 if (mod_hash_find(mdi_pathmap_byinstance, 3052 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3053 path = (char *)hv; 3054 else 3055 path = NULL; 3056 mutex_exit(&mdi_pathmap_mutex); 3057 return (path); 3058 } 3059 3060 /* 3061 * mdi_pi_spathname_by_instance(): 3062 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3063 * NOTE: returned "shortpath" remains valid forever (until reboot). 3064 */ 3065 char * 3066 mdi_pi_spathname_by_instance(int path_instance) 3067 { 3068 char *path; 3069 mod_hash_val_t hv; 3070 3071 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3072 mutex_enter(&mdi_pathmap_mutex); 3073 if (mod_hash_find(mdi_pathmap_sbyinstance, 3074 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3075 path = (char *)hv; 3076 else 3077 path = NULL; 3078 mutex_exit(&mdi_pathmap_mutex); 3079 return (path); 3080 } 3081 3082 3083 /* 3084 * i_mdi_phci_add_path(): 3085 * Add a mdi_pathinfo node to pHCI list. 3086 * Notes: 3087 * Caller should per-pHCI mutex 3088 */ 3089 static void 3090 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3091 { 3092 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3093 3094 MDI_PHCI_LOCK(ph); 3095 if (ph->ph_path_head == NULL) { 3096 ph->ph_path_head = pip; 3097 } else { 3098 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3099 } 3100 ph->ph_path_tail = pip; 3101 ph->ph_path_count++; 3102 MDI_PHCI_UNLOCK(ph); 3103 } 3104 3105 /* 3106 * i_mdi_client_add_path(): 3107 * Add mdi_pathinfo node to client list 3108 */ 3109 static void 3110 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3111 { 3112 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3113 3114 MDI_CLIENT_LOCK(ct); 3115 if (ct->ct_path_head == NULL) { 3116 ct->ct_path_head = pip; 3117 } else { 3118 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3119 } 3120 ct->ct_path_tail = pip; 3121 ct->ct_path_count++; 3122 MDI_CLIENT_UNLOCK(ct); 3123 } 3124 3125 /* 3126 * mdi_pi_free(): 3127 * Free the mdi_pathinfo node and also client device node if this 3128 * is the last path to the device 3129 * Return Values: 3130 * MDI_SUCCESS 3131 * MDI_FAILURE 3132 * MDI_BUSY 3133 */ 3134 /*ARGSUSED*/ 3135 int 3136 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3137 { 3138 int rv; 3139 mdi_vhci_t *vh; 3140 mdi_phci_t *ph; 3141 mdi_client_t *ct; 3142 int (*f)(); 3143 int client_held = 0; 3144 3145 MDI_PI_LOCK(pip); 3146 ph = MDI_PI(pip)->pi_phci; 3147 ASSERT(ph != NULL); 3148 if (ph == NULL) { 3149 /* 3150 * Invalid pHCI device, return failure 3151 */ 3152 MDI_DEBUG(1, (MDI_WARN, NULL, 3153 "!invalid pHCI: pip %s %p", 3154 mdi_pi_spathname(pip), (void *)pip)); 3155 MDI_PI_UNLOCK(pip); 3156 return (MDI_FAILURE); 3157 } 3158 3159 vh = ph->ph_vhci; 3160 ASSERT(vh != NULL); 3161 if (vh == NULL) { 3162 /* Invalid pHCI device, return failure */ 3163 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3164 "!invalid vHCI: pip %s %p", 3165 mdi_pi_spathname(pip), (void *)pip)); 3166 MDI_PI_UNLOCK(pip); 3167 return (MDI_FAILURE); 3168 } 3169 3170 ct = MDI_PI(pip)->pi_client; 3171 ASSERT(ct != NULL); 3172 if (ct == NULL) { 3173 /* 3174 * Invalid Client device, return failure 3175 */ 3176 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3177 "!invalid client: pip %s %p", 3178 mdi_pi_spathname(pip), (void *)pip)); 3179 MDI_PI_UNLOCK(pip); 3180 return (MDI_FAILURE); 3181 } 3182 3183 /* 3184 * Check to see for busy condition. A mdi_pathinfo can only be freed 3185 * if the node state is either offline or init and the reference count 3186 * is zero. 3187 */ 3188 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3189 MDI_PI_IS_INITING(pip))) { 3190 /* 3191 * Node is busy 3192 */ 3193 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3194 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3195 MDI_PI_UNLOCK(pip); 3196 return (MDI_BUSY); 3197 } 3198 3199 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3200 /* 3201 * Give a chance for pending I/Os to complete. 3202 */ 3203 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3204 "!%d cmds still pending on path: %s %p", 3205 MDI_PI(pip)->pi_ref_cnt, 3206 mdi_pi_spathname(pip), (void *)pip)); 3207 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3208 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3209 TR_CLOCK_TICK) == -1) { 3210 /* 3211 * The timeout time reached without ref_cnt being zero 3212 * being signaled. 3213 */ 3214 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3215 "!Timeout reached on path %s %p without the cond", 3216 mdi_pi_spathname(pip), (void *)pip)); 3217 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3218 "!%d cmds still pending on path %s %p", 3219 MDI_PI(pip)->pi_ref_cnt, 3220 mdi_pi_spathname(pip), (void *)pip)); 3221 MDI_PI_UNLOCK(pip); 3222 return (MDI_BUSY); 3223 } 3224 } 3225 if (MDI_PI(pip)->pi_pm_held) { 3226 client_held = 1; 3227 } 3228 MDI_PI_UNLOCK(pip); 3229 3230 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3231 3232 MDI_CLIENT_LOCK(ct); 3233 3234 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3235 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3236 3237 /* 3238 * Wait till failover is complete before removing this node. 3239 */ 3240 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3241 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3242 3243 MDI_CLIENT_UNLOCK(ct); 3244 MDI_VHCI_CLIENT_LOCK(vh); 3245 MDI_CLIENT_LOCK(ct); 3246 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3247 3248 rv = MDI_SUCCESS; 3249 if (!MDI_PI_IS_INITING(pip)) { 3250 f = vh->vh_ops->vo_pi_uninit; 3251 if (f != NULL) { 3252 rv = (*f)(vh->vh_dip, pip, 0); 3253 } 3254 } 3255 3256 /* 3257 * If vo_pi_uninit() completed successfully. 3258 */ 3259 if (rv == MDI_SUCCESS) { 3260 if (client_held) { 3261 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3262 "i_mdi_pm_rele_client\n")); 3263 i_mdi_pm_rele_client(ct, 1); 3264 } 3265 i_mdi_pi_free(ph, pip, ct); 3266 if (ct->ct_path_count == 0) { 3267 /* 3268 * Client lost its last path. 3269 * Clean up the client device 3270 */ 3271 MDI_CLIENT_UNLOCK(ct); 3272 (void) i_mdi_client_free(ct->ct_vhci, ct); 3273 MDI_VHCI_CLIENT_UNLOCK(vh); 3274 return (rv); 3275 } 3276 } 3277 MDI_CLIENT_UNLOCK(ct); 3278 MDI_VHCI_CLIENT_UNLOCK(vh); 3279 3280 if (rv == MDI_FAILURE) 3281 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3282 3283 return (rv); 3284 } 3285 3286 /* 3287 * i_mdi_pi_free(): 3288 * Free the mdi_pathinfo node 3289 */ 3290 static void 3291 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3292 { 3293 int ct_circular; 3294 int ph_circular; 3295 3296 ASSERT(MDI_CLIENT_LOCKED(ct)); 3297 3298 /* 3299 * remove any per-path kstats 3300 */ 3301 i_mdi_pi_kstat_destroy(pip); 3302 3303 /* See comments in i_mdi_pi_alloc() */ 3304 ndi_devi_enter(ct->ct_dip, &ct_circular); 3305 ndi_devi_enter(ph->ph_dip, &ph_circular); 3306 3307 i_mdi_client_remove_path(ct, pip); 3308 i_mdi_phci_remove_path(ph, pip); 3309 3310 ndi_devi_exit(ph->ph_dip, ph_circular); 3311 ndi_devi_exit(ct->ct_dip, ct_circular); 3312 3313 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3314 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3315 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3316 if (MDI_PI(pip)->pi_addr) { 3317 kmem_free(MDI_PI(pip)->pi_addr, 3318 strlen(MDI_PI(pip)->pi_addr) + 1); 3319 MDI_PI(pip)->pi_addr = NULL; 3320 } 3321 3322 if (MDI_PI(pip)->pi_prop) { 3323 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3324 MDI_PI(pip)->pi_prop = NULL; 3325 } 3326 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3327 } 3328 3329 3330 /* 3331 * i_mdi_phci_remove_path(): 3332 * Remove a mdi_pathinfo node from pHCI list. 3333 * Notes: 3334 * Caller should hold per-pHCI mutex 3335 */ 3336 static void 3337 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3338 { 3339 mdi_pathinfo_t *prev = NULL; 3340 mdi_pathinfo_t *path = NULL; 3341 3342 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3343 3344 MDI_PHCI_LOCK(ph); 3345 path = ph->ph_path_head; 3346 while (path != NULL) { 3347 if (path == pip) { 3348 break; 3349 } 3350 prev = path; 3351 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3352 } 3353 3354 if (path) { 3355 ph->ph_path_count--; 3356 if (prev) { 3357 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3358 } else { 3359 ph->ph_path_head = 3360 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3361 } 3362 if (ph->ph_path_tail == path) { 3363 ph->ph_path_tail = prev; 3364 } 3365 } 3366 3367 /* 3368 * Clear the pHCI link 3369 */ 3370 MDI_PI(pip)->pi_phci_link = NULL; 3371 MDI_PI(pip)->pi_phci = NULL; 3372 MDI_PHCI_UNLOCK(ph); 3373 } 3374 3375 /* 3376 * i_mdi_client_remove_path(): 3377 * Remove a mdi_pathinfo node from client path list. 3378 */ 3379 static void 3380 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3381 { 3382 mdi_pathinfo_t *prev = NULL; 3383 mdi_pathinfo_t *path; 3384 3385 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3386 3387 ASSERT(MDI_CLIENT_LOCKED(ct)); 3388 path = ct->ct_path_head; 3389 while (path != NULL) { 3390 if (path == pip) { 3391 break; 3392 } 3393 prev = path; 3394 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3395 } 3396 3397 if (path) { 3398 ct->ct_path_count--; 3399 if (prev) { 3400 MDI_PI(prev)->pi_client_link = 3401 MDI_PI(path)->pi_client_link; 3402 } else { 3403 ct->ct_path_head = 3404 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3405 } 3406 if (ct->ct_path_tail == path) { 3407 ct->ct_path_tail = prev; 3408 } 3409 if (ct->ct_path_last == path) { 3410 ct->ct_path_last = ct->ct_path_head; 3411 } 3412 } 3413 MDI_PI(pip)->pi_client_link = NULL; 3414 MDI_PI(pip)->pi_client = NULL; 3415 } 3416 3417 /* 3418 * i_mdi_pi_state_change(): 3419 * online a mdi_pathinfo node 3420 * 3421 * Return Values: 3422 * MDI_SUCCESS 3423 * MDI_FAILURE 3424 */ 3425 /*ARGSUSED*/ 3426 static int 3427 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3428 { 3429 int rv = MDI_SUCCESS; 3430 mdi_vhci_t *vh; 3431 mdi_phci_t *ph; 3432 mdi_client_t *ct; 3433 int (*f)(); 3434 dev_info_t *cdip; 3435 3436 MDI_PI_LOCK(pip); 3437 3438 ph = MDI_PI(pip)->pi_phci; 3439 ASSERT(ph); 3440 if (ph == NULL) { 3441 /* 3442 * Invalid pHCI device, fail the request 3443 */ 3444 MDI_PI_UNLOCK(pip); 3445 MDI_DEBUG(1, (MDI_WARN, NULL, 3446 "!invalid phci: pip %s %p", 3447 mdi_pi_spathname(pip), (void *)pip)); 3448 return (MDI_FAILURE); 3449 } 3450 3451 vh = ph->ph_vhci; 3452 ASSERT(vh); 3453 if (vh == NULL) { 3454 /* 3455 * Invalid vHCI device, fail the request 3456 */ 3457 MDI_PI_UNLOCK(pip); 3458 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3459 "!invalid vhci: pip %s %p", 3460 mdi_pi_spathname(pip), (void *)pip)); 3461 return (MDI_FAILURE); 3462 } 3463 3464 ct = MDI_PI(pip)->pi_client; 3465 ASSERT(ct != NULL); 3466 if (ct == NULL) { 3467 /* 3468 * Invalid client device, fail the request 3469 */ 3470 MDI_PI_UNLOCK(pip); 3471 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3472 "!invalid client: pip %s %p", 3473 mdi_pi_spathname(pip), (void *)pip)); 3474 return (MDI_FAILURE); 3475 } 3476 3477 /* 3478 * If this path has not been initialized yet, Callback vHCI driver's 3479 * pathinfo node initialize entry point 3480 */ 3481 3482 if (MDI_PI_IS_INITING(pip)) { 3483 MDI_PI_UNLOCK(pip); 3484 f = vh->vh_ops->vo_pi_init; 3485 if (f != NULL) { 3486 rv = (*f)(vh->vh_dip, pip, 0); 3487 if (rv != MDI_SUCCESS) { 3488 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3489 "!vo_pi_init failed: vHCI %p, pip %s %p", 3490 (void *)vh, mdi_pi_spathname(pip), 3491 (void *)pip)); 3492 return (MDI_FAILURE); 3493 } 3494 } 3495 MDI_PI_LOCK(pip); 3496 MDI_PI_CLEAR_TRANSIENT(pip); 3497 } 3498 3499 /* 3500 * Do not allow state transition when pHCI is in offline/suspended 3501 * states 3502 */ 3503 i_mdi_phci_lock(ph, pip); 3504 if (MDI_PHCI_IS_READY(ph) == 0) { 3505 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3506 "!pHCI not ready, pHCI=%p", (void *)ph)); 3507 MDI_PI_UNLOCK(pip); 3508 i_mdi_phci_unlock(ph); 3509 return (MDI_BUSY); 3510 } 3511 MDI_PHCI_UNSTABLE(ph); 3512 i_mdi_phci_unlock(ph); 3513 3514 /* 3515 * Check if mdi_pathinfo state is in transient state. 3516 * If yes, offlining is in progress and wait till transient state is 3517 * cleared. 3518 */ 3519 if (MDI_PI_IS_TRANSIENT(pip)) { 3520 while (MDI_PI_IS_TRANSIENT(pip)) { 3521 cv_wait(&MDI_PI(pip)->pi_state_cv, 3522 &MDI_PI(pip)->pi_mutex); 3523 } 3524 } 3525 3526 /* 3527 * Grab the client lock in reverse order sequence and release the 3528 * mdi_pathinfo mutex. 3529 */ 3530 i_mdi_client_lock(ct, pip); 3531 MDI_PI_UNLOCK(pip); 3532 3533 /* 3534 * Wait till failover state is cleared 3535 */ 3536 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3537 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3538 3539 /* 3540 * Mark the mdi_pathinfo node state as transient 3541 */ 3542 MDI_PI_LOCK(pip); 3543 switch (state) { 3544 case MDI_PATHINFO_STATE_ONLINE: 3545 MDI_PI_SET_ONLINING(pip); 3546 break; 3547 3548 case MDI_PATHINFO_STATE_STANDBY: 3549 MDI_PI_SET_STANDBYING(pip); 3550 break; 3551 3552 case MDI_PATHINFO_STATE_FAULT: 3553 /* 3554 * Mark the pathinfo state as FAULTED 3555 */ 3556 MDI_PI_SET_FAULTING(pip); 3557 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3558 break; 3559 3560 case MDI_PATHINFO_STATE_OFFLINE: 3561 /* 3562 * ndi_devi_offline() cannot hold pip or ct locks. 3563 */ 3564 MDI_PI_UNLOCK(pip); 3565 3566 /* 3567 * If this is a user initiated path online->offline operation 3568 * who's success would transition a client from DEGRADED to 3569 * FAILED then only proceed if we can offline the client first. 3570 */ 3571 cdip = ct->ct_dip; 3572 if ((flag & NDI_USER_REQ) && 3573 MDI_PI_IS_ONLINE(pip) && 3574 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3575 i_mdi_client_unlock(ct); 3576 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3577 if (rv != NDI_SUCCESS) { 3578 /* 3579 * Convert to MDI error code 3580 */ 3581 switch (rv) { 3582 case NDI_BUSY: 3583 rv = MDI_BUSY; 3584 break; 3585 default: 3586 rv = MDI_FAILURE; 3587 break; 3588 } 3589 goto state_change_exit; 3590 } else { 3591 i_mdi_client_lock(ct, NULL); 3592 } 3593 } 3594 /* 3595 * Mark the mdi_pathinfo node state as transient 3596 */ 3597 MDI_PI_LOCK(pip); 3598 MDI_PI_SET_OFFLINING(pip); 3599 break; 3600 3601 case MDI_PATHINFO_STATE_INIT: 3602 /* 3603 * Callers are not allowed to ask us to change the state to the 3604 * initial state. 3605 */ 3606 rv = MDI_FAILURE; 3607 MDI_PI_UNLOCK(pip); 3608 goto state_change_exit; 3609 3610 } 3611 MDI_PI_UNLOCK(pip); 3612 MDI_CLIENT_UNSTABLE(ct); 3613 i_mdi_client_unlock(ct); 3614 3615 f = vh->vh_ops->vo_pi_state_change; 3616 if (f != NULL) 3617 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3618 3619 MDI_CLIENT_LOCK(ct); 3620 MDI_PI_LOCK(pip); 3621 if (rv == MDI_NOT_SUPPORTED) { 3622 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3623 } 3624 if (rv != MDI_SUCCESS) { 3625 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3626 "vo_pi_state_change failed: rv %x", rv)); 3627 } 3628 if (MDI_PI_IS_TRANSIENT(pip)) { 3629 if (rv == MDI_SUCCESS) { 3630 MDI_PI_CLEAR_TRANSIENT(pip); 3631 } else { 3632 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3633 } 3634 } 3635 3636 /* 3637 * Wake anyone waiting for this mdi_pathinfo node 3638 */ 3639 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3640 MDI_PI_UNLOCK(pip); 3641 3642 /* 3643 * Mark the client device as stable 3644 */ 3645 MDI_CLIENT_STABLE(ct); 3646 if (rv == MDI_SUCCESS) { 3647 if (ct->ct_unstable == 0) { 3648 cdip = ct->ct_dip; 3649 3650 /* 3651 * Onlining the mdi_pathinfo node will impact the 3652 * client state Update the client and dev_info node 3653 * state accordingly 3654 */ 3655 rv = NDI_SUCCESS; 3656 i_mdi_client_update_state(ct); 3657 switch (MDI_CLIENT_STATE(ct)) { 3658 case MDI_CLIENT_STATE_OPTIMAL: 3659 case MDI_CLIENT_STATE_DEGRADED: 3660 if (cdip && !i_ddi_devi_attached(cdip) && 3661 ((state == MDI_PATHINFO_STATE_ONLINE) || 3662 (state == MDI_PATHINFO_STATE_STANDBY))) { 3663 3664 /* 3665 * Must do ndi_devi_online() through 3666 * hotplug thread for deferred 3667 * attach mechanism to work 3668 */ 3669 MDI_CLIENT_UNLOCK(ct); 3670 rv = ndi_devi_online(cdip, 0); 3671 MDI_CLIENT_LOCK(ct); 3672 if ((rv != NDI_SUCCESS) && 3673 (MDI_CLIENT_STATE(ct) == 3674 MDI_CLIENT_STATE_DEGRADED)) { 3675 MDI_DEBUG(1, (MDI_WARN, cdip, 3676 "!ndi_devi_online failed " 3677 "error %x", rv)); 3678 } 3679 rv = NDI_SUCCESS; 3680 } 3681 break; 3682 3683 case MDI_CLIENT_STATE_FAILED: 3684 /* 3685 * This is the last path case for 3686 * non-user initiated events. 3687 */ 3688 if (((flag & NDI_USER_REQ) == 0) && 3689 cdip && (i_ddi_node_state(cdip) >= 3690 DS_INITIALIZED)) { 3691 MDI_CLIENT_UNLOCK(ct); 3692 rv = ndi_devi_offline(cdip, 3693 NDI_DEVFS_CLEAN); 3694 MDI_CLIENT_LOCK(ct); 3695 3696 if (rv != NDI_SUCCESS) { 3697 /* 3698 * ndi_devi_offline failed. 3699 * Reset client flags to 3700 * online as the path could not 3701 * be offlined. 3702 */ 3703 MDI_DEBUG(1, (MDI_WARN, cdip, 3704 "!ndi_devi_offline failed: " 3705 "error %x", rv)); 3706 MDI_CLIENT_SET_ONLINE(ct); 3707 } 3708 } 3709 break; 3710 } 3711 /* 3712 * Convert to MDI error code 3713 */ 3714 switch (rv) { 3715 case NDI_SUCCESS: 3716 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3717 i_mdi_report_path_state(ct, pip); 3718 rv = MDI_SUCCESS; 3719 break; 3720 case NDI_BUSY: 3721 rv = MDI_BUSY; 3722 break; 3723 default: 3724 rv = MDI_FAILURE; 3725 break; 3726 } 3727 } 3728 } 3729 MDI_CLIENT_UNLOCK(ct); 3730 3731 state_change_exit: 3732 /* 3733 * Mark the pHCI as stable again. 3734 */ 3735 MDI_PHCI_LOCK(ph); 3736 MDI_PHCI_STABLE(ph); 3737 MDI_PHCI_UNLOCK(ph); 3738 return (rv); 3739 } 3740 3741 /* 3742 * mdi_pi_online(): 3743 * Place the path_info node in the online state. The path is 3744 * now available to be selected by mdi_select_path() for 3745 * transporting I/O requests to client devices. 3746 * Return Values: 3747 * MDI_SUCCESS 3748 * MDI_FAILURE 3749 */ 3750 int 3751 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3752 { 3753 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3754 int client_held = 0; 3755 int rv; 3756 3757 ASSERT(ct != NULL); 3758 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3759 if (rv != MDI_SUCCESS) 3760 return (rv); 3761 3762 MDI_PI_LOCK(pip); 3763 if (MDI_PI(pip)->pi_pm_held == 0) { 3764 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3765 "i_mdi_pm_hold_pip %p", (void *)pip)); 3766 i_mdi_pm_hold_pip(pip); 3767 client_held = 1; 3768 } 3769 MDI_PI_UNLOCK(pip); 3770 3771 if (client_held) { 3772 MDI_CLIENT_LOCK(ct); 3773 if (ct->ct_power_cnt == 0) { 3774 rv = i_mdi_power_all_phci(ct); 3775 } 3776 3777 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3778 "i_mdi_pm_hold_client %p", (void *)ct)); 3779 i_mdi_pm_hold_client(ct, 1); 3780 MDI_CLIENT_UNLOCK(ct); 3781 } 3782 3783 return (rv); 3784 } 3785 3786 /* 3787 * mdi_pi_standby(): 3788 * Place the mdi_pathinfo node in standby state 3789 * 3790 * Return Values: 3791 * MDI_SUCCESS 3792 * MDI_FAILURE 3793 */ 3794 int 3795 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3796 { 3797 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3798 } 3799 3800 /* 3801 * mdi_pi_fault(): 3802 * Place the mdi_pathinfo node in fault'ed state 3803 * Return Values: 3804 * MDI_SUCCESS 3805 * MDI_FAILURE 3806 */ 3807 int 3808 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3809 { 3810 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3811 } 3812 3813 /* 3814 * mdi_pi_offline(): 3815 * Offline a mdi_pathinfo node. 3816 * Return Values: 3817 * MDI_SUCCESS 3818 * MDI_FAILURE 3819 */ 3820 int 3821 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3822 { 3823 int ret, client_held = 0; 3824 mdi_client_t *ct; 3825 3826 /* 3827 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3828 * used it to mean "user initiated operation" (i.e. devctl). Callers 3829 * should now just use NDI_USER_REQ. 3830 */ 3831 if (flags & NDI_DEVI_REMOVE) { 3832 flags &= ~NDI_DEVI_REMOVE; 3833 flags |= NDI_USER_REQ; 3834 } 3835 3836 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3837 3838 if (ret == MDI_SUCCESS) { 3839 MDI_PI_LOCK(pip); 3840 if (MDI_PI(pip)->pi_pm_held) { 3841 client_held = 1; 3842 } 3843 MDI_PI_UNLOCK(pip); 3844 3845 if (client_held) { 3846 ct = MDI_PI(pip)->pi_client; 3847 MDI_CLIENT_LOCK(ct); 3848 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3849 "i_mdi_pm_rele_client\n")); 3850 i_mdi_pm_rele_client(ct, 1); 3851 MDI_CLIENT_UNLOCK(ct); 3852 } 3853 } 3854 3855 return (ret); 3856 } 3857 3858 /* 3859 * i_mdi_pi_offline(): 3860 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3861 */ 3862 static int 3863 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3864 { 3865 dev_info_t *vdip = NULL; 3866 mdi_vhci_t *vh = NULL; 3867 mdi_client_t *ct = NULL; 3868 int (*f)(); 3869 int rv; 3870 3871 MDI_PI_LOCK(pip); 3872 ct = MDI_PI(pip)->pi_client; 3873 ASSERT(ct != NULL); 3874 3875 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3876 /* 3877 * Give a chance for pending I/Os to complete. 3878 */ 3879 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3880 "!%d cmds still pending on path %s %p", 3881 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3882 (void *)pip)); 3883 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3884 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3885 TR_CLOCK_TICK) == -1) { 3886 /* 3887 * The timeout time reached without ref_cnt being zero 3888 * being signaled. 3889 */ 3890 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3891 "!Timeout reached on path %s %p without the cond", 3892 mdi_pi_spathname(pip), (void *)pip)); 3893 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3894 "!%d cmds still pending on path %s %p", 3895 MDI_PI(pip)->pi_ref_cnt, 3896 mdi_pi_spathname(pip), (void *)pip)); 3897 } 3898 } 3899 vh = ct->ct_vhci; 3900 vdip = vh->vh_dip; 3901 3902 /* 3903 * Notify vHCI that has registered this event 3904 */ 3905 ASSERT(vh->vh_ops); 3906 f = vh->vh_ops->vo_pi_state_change; 3907 3908 rv = MDI_SUCCESS; 3909 if (f != NULL) { 3910 MDI_PI_UNLOCK(pip); 3911 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3912 flags)) != MDI_SUCCESS) { 3913 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3914 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3915 ddi_driver_name(vdip), ddi_get_instance(vdip), 3916 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3917 } 3918 MDI_PI_LOCK(pip); 3919 } 3920 3921 /* 3922 * Set the mdi_pathinfo node state and clear the transient condition 3923 */ 3924 MDI_PI_SET_OFFLINE(pip); 3925 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3926 MDI_PI_UNLOCK(pip); 3927 3928 MDI_CLIENT_LOCK(ct); 3929 if (rv == MDI_SUCCESS) { 3930 if (ct->ct_unstable == 0) { 3931 dev_info_t *cdip = ct->ct_dip; 3932 3933 /* 3934 * Onlining the mdi_pathinfo node will impact the 3935 * client state Update the client and dev_info node 3936 * state accordingly 3937 */ 3938 i_mdi_client_update_state(ct); 3939 rv = NDI_SUCCESS; 3940 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3941 if (cdip && 3942 (i_ddi_node_state(cdip) >= 3943 DS_INITIALIZED)) { 3944 MDI_CLIENT_UNLOCK(ct); 3945 rv = ndi_devi_offline(cdip, 3946 NDI_DEVFS_CLEAN); 3947 MDI_CLIENT_LOCK(ct); 3948 if (rv != NDI_SUCCESS) { 3949 /* 3950 * ndi_devi_offline failed. 3951 * Reset client flags to 3952 * online. 3953 */ 3954 MDI_DEBUG(4, (MDI_WARN, cdip, 3955 "ndi_devi_offline failed: " 3956 "error %x", rv)); 3957 MDI_CLIENT_SET_ONLINE(ct); 3958 } 3959 } 3960 } 3961 /* 3962 * Convert to MDI error code 3963 */ 3964 switch (rv) { 3965 case NDI_SUCCESS: 3966 rv = MDI_SUCCESS; 3967 break; 3968 case NDI_BUSY: 3969 rv = MDI_BUSY; 3970 break; 3971 default: 3972 rv = MDI_FAILURE; 3973 break; 3974 } 3975 } 3976 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3977 i_mdi_report_path_state(ct, pip); 3978 } 3979 3980 MDI_CLIENT_UNLOCK(ct); 3981 3982 /* 3983 * Change in the mdi_pathinfo node state will impact the client state 3984 */ 3985 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3986 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3987 return (rv); 3988 } 3989 3990 /* 3991 * i_mdi_pi_online(): 3992 * Online a mdi_pathinfo node and call the vHCI driver's callback 3993 */ 3994 static int 3995 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3996 { 3997 mdi_vhci_t *vh = NULL; 3998 mdi_client_t *ct = NULL; 3999 mdi_phci_t *ph; 4000 int (*f)(); 4001 int rv; 4002 4003 MDI_PI_LOCK(pip); 4004 ph = MDI_PI(pip)->pi_phci; 4005 vh = ph->ph_vhci; 4006 ct = MDI_PI(pip)->pi_client; 4007 MDI_PI_SET_ONLINING(pip) 4008 MDI_PI_UNLOCK(pip); 4009 f = vh->vh_ops->vo_pi_state_change; 4010 rv = MDI_SUCCESS; 4011 if (f != NULL) 4012 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, flags); 4013 MDI_CLIENT_LOCK(ct); 4014 MDI_PI_LOCK(pip); 4015 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 4016 MDI_PI_UNLOCK(pip); 4017 if (rv == MDI_SUCCESS) { 4018 dev_info_t *cdip = ct->ct_dip; 4019 4020 i_mdi_client_update_state(ct); 4021 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL || 4022 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4023 if (cdip && !i_ddi_devi_attached(cdip)) { 4024 MDI_CLIENT_UNLOCK(ct); 4025 rv = ndi_devi_online(cdip, 0); 4026 MDI_CLIENT_LOCK(ct); 4027 if ((rv != NDI_SUCCESS) && 4028 (MDI_CLIENT_STATE(ct) == 4029 MDI_CLIENT_STATE_DEGRADED)) { 4030 MDI_CLIENT_SET_OFFLINE(ct); 4031 } 4032 if (rv != NDI_SUCCESS) { 4033 /* Reset the path state */ 4034 MDI_PI_LOCK(pip); 4035 MDI_PI(pip)->pi_state = 4036 MDI_PI_OLD_STATE(pip); 4037 MDI_PI_UNLOCK(pip); 4038 } 4039 } 4040 } 4041 switch (rv) { 4042 case NDI_SUCCESS: 4043 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 4044 i_mdi_report_path_state(ct, pip); 4045 rv = MDI_SUCCESS; 4046 break; 4047 case NDI_BUSY: 4048 rv = MDI_BUSY; 4049 break; 4050 default: 4051 rv = MDI_FAILURE; 4052 break; 4053 } 4054 } else { 4055 /* Reset the path state */ 4056 MDI_PI_LOCK(pip); 4057 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 4058 MDI_PI_UNLOCK(pip); 4059 } 4060 MDI_CLIENT_UNLOCK(ct); 4061 return (rv); 4062 } 4063 4064 /* 4065 * mdi_pi_get_node_name(): 4066 * Get the name associated with a mdi_pathinfo node. 4067 * Since pathinfo nodes are not directly named, we 4068 * return the node_name of the client. 4069 * 4070 * Return Values: 4071 * char * 4072 */ 4073 char * 4074 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 4075 { 4076 mdi_client_t *ct; 4077 4078 if (pip == NULL) 4079 return (NULL); 4080 ct = MDI_PI(pip)->pi_client; 4081 if ((ct == NULL) || (ct->ct_dip == NULL)) 4082 return (NULL); 4083 return (ddi_node_name(ct->ct_dip)); 4084 } 4085 4086 /* 4087 * mdi_pi_get_addr(): 4088 * Get the unit address associated with a mdi_pathinfo node 4089 * 4090 * Return Values: 4091 * char * 4092 */ 4093 char * 4094 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4095 { 4096 if (pip == NULL) 4097 return (NULL); 4098 4099 return (MDI_PI(pip)->pi_addr); 4100 } 4101 4102 /* 4103 * mdi_pi_get_path_instance(): 4104 * Get the 'path_instance' of a mdi_pathinfo node 4105 * 4106 * Return Values: 4107 * path_instance 4108 */ 4109 int 4110 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4111 { 4112 if (pip == NULL) 4113 return (0); 4114 4115 return (MDI_PI(pip)->pi_path_instance); 4116 } 4117 4118 /* 4119 * mdi_pi_pathname(): 4120 * Return pointer to path to pathinfo node. 4121 */ 4122 char * 4123 mdi_pi_pathname(mdi_pathinfo_t *pip) 4124 { 4125 if (pip == NULL) 4126 return (NULL); 4127 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4128 } 4129 4130 /* 4131 * mdi_pi_spathname(): 4132 * Return pointer to shortpath to pathinfo node. Used for debug 4133 * messages, so return "" instead of NULL when unknown. 4134 */ 4135 char * 4136 mdi_pi_spathname(mdi_pathinfo_t *pip) 4137 { 4138 char *spath = ""; 4139 4140 if (pip) { 4141 spath = mdi_pi_spathname_by_instance( 4142 mdi_pi_get_path_instance(pip)); 4143 if (spath == NULL) 4144 spath = ""; 4145 } 4146 return (spath); 4147 } 4148 4149 char * 4150 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4151 { 4152 char *obp_path = NULL; 4153 if ((pip == NULL) || (path == NULL)) 4154 return (NULL); 4155 4156 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4157 (void) strcpy(path, obp_path); 4158 (void) mdi_prop_free(obp_path); 4159 } else { 4160 path = NULL; 4161 } 4162 return (path); 4163 } 4164 4165 int 4166 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4167 { 4168 dev_info_t *pdip; 4169 char *obp_path = NULL; 4170 int rc = MDI_FAILURE; 4171 4172 if (pip == NULL) 4173 return (MDI_FAILURE); 4174 4175 pdip = mdi_pi_get_phci(pip); 4176 if (pdip == NULL) 4177 return (MDI_FAILURE); 4178 4179 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4180 4181 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4182 (void) ddi_pathname(pdip, obp_path); 4183 } 4184 4185 if (component) { 4186 (void) strncat(obp_path, "/", MAXPATHLEN); 4187 (void) strncat(obp_path, component, MAXPATHLEN); 4188 } 4189 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4190 4191 if (obp_path) 4192 kmem_free(obp_path, MAXPATHLEN); 4193 return (rc); 4194 } 4195 4196 /* 4197 * mdi_pi_get_client(): 4198 * Get the client devinfo associated with a mdi_pathinfo node 4199 * 4200 * Return Values: 4201 * Handle to client device dev_info node 4202 */ 4203 dev_info_t * 4204 mdi_pi_get_client(mdi_pathinfo_t *pip) 4205 { 4206 dev_info_t *dip = NULL; 4207 if (pip) { 4208 dip = MDI_PI(pip)->pi_client->ct_dip; 4209 } 4210 return (dip); 4211 } 4212 4213 /* 4214 * mdi_pi_get_phci(): 4215 * Get the pHCI devinfo associated with the mdi_pathinfo node 4216 * Return Values: 4217 * Handle to dev_info node 4218 */ 4219 dev_info_t * 4220 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4221 { 4222 dev_info_t *dip = NULL; 4223 mdi_phci_t *ph; 4224 4225 if (pip) { 4226 ph = MDI_PI(pip)->pi_phci; 4227 if (ph) 4228 dip = ph->ph_dip; 4229 } 4230 return (dip); 4231 } 4232 4233 /* 4234 * mdi_pi_get_client_private(): 4235 * Get the client private information associated with the 4236 * mdi_pathinfo node 4237 */ 4238 void * 4239 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4240 { 4241 void *cprivate = NULL; 4242 if (pip) { 4243 cprivate = MDI_PI(pip)->pi_cprivate; 4244 } 4245 return (cprivate); 4246 } 4247 4248 /* 4249 * mdi_pi_set_client_private(): 4250 * Set the client private information in the mdi_pathinfo node 4251 */ 4252 void 4253 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4254 { 4255 if (pip) { 4256 MDI_PI(pip)->pi_cprivate = priv; 4257 } 4258 } 4259 4260 /* 4261 * mdi_pi_get_phci_private(): 4262 * Get the pHCI private information associated with the 4263 * mdi_pathinfo node 4264 */ 4265 caddr_t 4266 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4267 { 4268 caddr_t pprivate = NULL; 4269 4270 if (pip) { 4271 pprivate = MDI_PI(pip)->pi_pprivate; 4272 } 4273 return (pprivate); 4274 } 4275 4276 /* 4277 * mdi_pi_set_phci_private(): 4278 * Set the pHCI private information in the mdi_pathinfo node 4279 */ 4280 void 4281 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4282 { 4283 if (pip) { 4284 MDI_PI(pip)->pi_pprivate = priv; 4285 } 4286 } 4287 4288 /* 4289 * mdi_pi_get_state(): 4290 * Get the mdi_pathinfo node state. Transient states are internal 4291 * and not provided to the users 4292 */ 4293 mdi_pathinfo_state_t 4294 mdi_pi_get_state(mdi_pathinfo_t *pip) 4295 { 4296 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4297 4298 if (pip) { 4299 if (MDI_PI_IS_TRANSIENT(pip)) { 4300 /* 4301 * mdi_pathinfo is in state transition. Return the 4302 * last good state. 4303 */ 4304 state = MDI_PI_OLD_STATE(pip); 4305 } else { 4306 state = MDI_PI_STATE(pip); 4307 } 4308 } 4309 return (state); 4310 } 4311 4312 /* 4313 * mdi_pi_get_flags(): 4314 * Get the mdi_pathinfo node flags. 4315 */ 4316 uint_t 4317 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4318 { 4319 return (pip ? MDI_PI(pip)->pi_flags : 0); 4320 } 4321 4322 /* 4323 * Note that the following function needs to be the new interface for 4324 * mdi_pi_get_state when mpxio gets integrated to ON. 4325 */ 4326 int 4327 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4328 uint32_t *ext_state) 4329 { 4330 *state = MDI_PATHINFO_STATE_INIT; 4331 4332 if (pip) { 4333 if (MDI_PI_IS_TRANSIENT(pip)) { 4334 /* 4335 * mdi_pathinfo is in state transition. Return the 4336 * last good state. 4337 */ 4338 *state = MDI_PI_OLD_STATE(pip); 4339 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4340 } else { 4341 *state = MDI_PI_STATE(pip); 4342 *ext_state = MDI_PI_EXT_STATE(pip); 4343 } 4344 } 4345 return (MDI_SUCCESS); 4346 } 4347 4348 /* 4349 * mdi_pi_get_preferred: 4350 * Get the preferred path flag 4351 */ 4352 int 4353 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4354 { 4355 if (pip) { 4356 return (MDI_PI(pip)->pi_preferred); 4357 } 4358 return (0); 4359 } 4360 4361 /* 4362 * mdi_pi_set_preferred: 4363 * Set the preferred path flag 4364 */ 4365 void 4366 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4367 { 4368 if (pip) { 4369 MDI_PI(pip)->pi_preferred = preferred; 4370 } 4371 } 4372 4373 /* 4374 * mdi_pi_set_state(): 4375 * Set the mdi_pathinfo node state 4376 */ 4377 void 4378 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4379 { 4380 uint32_t ext_state; 4381 4382 if (pip) { 4383 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4384 MDI_PI(pip)->pi_state = state; 4385 MDI_PI(pip)->pi_state |= ext_state; 4386 4387 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4388 i_ddi_di_cache_invalidate(); 4389 } 4390 } 4391 4392 /* 4393 * Property functions: 4394 */ 4395 int 4396 i_map_nvlist_error_to_mdi(int val) 4397 { 4398 int rv; 4399 4400 switch (val) { 4401 case 0: 4402 rv = DDI_PROP_SUCCESS; 4403 break; 4404 case EINVAL: 4405 case ENOTSUP: 4406 rv = DDI_PROP_INVAL_ARG; 4407 break; 4408 case ENOMEM: 4409 rv = DDI_PROP_NO_MEMORY; 4410 break; 4411 default: 4412 rv = DDI_PROP_NOT_FOUND; 4413 break; 4414 } 4415 return (rv); 4416 } 4417 4418 /* 4419 * mdi_pi_get_next_prop(): 4420 * Property walk function. The caller should hold mdi_pi_lock() 4421 * and release by calling mdi_pi_unlock() at the end of walk to 4422 * get a consistent value. 4423 */ 4424 nvpair_t * 4425 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4426 { 4427 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4428 return (NULL); 4429 } 4430 ASSERT(MDI_PI_LOCKED(pip)); 4431 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4432 } 4433 4434 /* 4435 * mdi_prop_remove(): 4436 * Remove the named property from the named list. 4437 */ 4438 int 4439 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4440 { 4441 if (pip == NULL) { 4442 return (DDI_PROP_NOT_FOUND); 4443 } 4444 ASSERT(!MDI_PI_LOCKED(pip)); 4445 MDI_PI_LOCK(pip); 4446 if (MDI_PI(pip)->pi_prop == NULL) { 4447 MDI_PI_UNLOCK(pip); 4448 return (DDI_PROP_NOT_FOUND); 4449 } 4450 if (name) { 4451 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4452 } else { 4453 char nvp_name[MAXNAMELEN]; 4454 nvpair_t *nvp; 4455 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4456 while (nvp) { 4457 nvpair_t *next; 4458 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4459 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4460 nvpair_name(nvp)); 4461 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4462 nvp_name); 4463 nvp = next; 4464 } 4465 } 4466 MDI_PI_UNLOCK(pip); 4467 return (DDI_PROP_SUCCESS); 4468 } 4469 4470 /* 4471 * mdi_prop_size(): 4472 * Get buffer size needed to pack the property data. 4473 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4474 * buffer size. 4475 */ 4476 int 4477 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4478 { 4479 int rv; 4480 size_t bufsize; 4481 4482 *buflenp = 0; 4483 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4484 return (DDI_PROP_NOT_FOUND); 4485 } 4486 ASSERT(MDI_PI_LOCKED(pip)); 4487 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4488 &bufsize, NV_ENCODE_NATIVE); 4489 *buflenp = bufsize; 4490 return (i_map_nvlist_error_to_mdi(rv)); 4491 } 4492 4493 /* 4494 * mdi_prop_pack(): 4495 * pack the property list. The caller should hold the 4496 * mdi_pathinfo_t node to get a consistent data 4497 */ 4498 int 4499 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4500 { 4501 int rv; 4502 size_t bufsize; 4503 4504 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4505 return (DDI_PROP_NOT_FOUND); 4506 } 4507 4508 ASSERT(MDI_PI_LOCKED(pip)); 4509 4510 bufsize = buflen; 4511 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4512 NV_ENCODE_NATIVE, KM_SLEEP); 4513 4514 return (i_map_nvlist_error_to_mdi(rv)); 4515 } 4516 4517 /* 4518 * mdi_prop_update_byte(): 4519 * Create/Update a byte property 4520 */ 4521 int 4522 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4523 { 4524 int rv; 4525 4526 if (pip == NULL) { 4527 return (DDI_PROP_INVAL_ARG); 4528 } 4529 ASSERT(!MDI_PI_LOCKED(pip)); 4530 MDI_PI_LOCK(pip); 4531 if (MDI_PI(pip)->pi_prop == NULL) { 4532 MDI_PI_UNLOCK(pip); 4533 return (DDI_PROP_NOT_FOUND); 4534 } 4535 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4536 MDI_PI_UNLOCK(pip); 4537 return (i_map_nvlist_error_to_mdi(rv)); 4538 } 4539 4540 /* 4541 * mdi_prop_update_byte_array(): 4542 * Create/Update a byte array property 4543 */ 4544 int 4545 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4546 uint_t nelements) 4547 { 4548 int rv; 4549 4550 if (pip == NULL) { 4551 return (DDI_PROP_INVAL_ARG); 4552 } 4553 ASSERT(!MDI_PI_LOCKED(pip)); 4554 MDI_PI_LOCK(pip); 4555 if (MDI_PI(pip)->pi_prop == NULL) { 4556 MDI_PI_UNLOCK(pip); 4557 return (DDI_PROP_NOT_FOUND); 4558 } 4559 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4560 MDI_PI_UNLOCK(pip); 4561 return (i_map_nvlist_error_to_mdi(rv)); 4562 } 4563 4564 /* 4565 * mdi_prop_update_int(): 4566 * Create/Update a 32 bit integer property 4567 */ 4568 int 4569 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4570 { 4571 int rv; 4572 4573 if (pip == NULL) { 4574 return (DDI_PROP_INVAL_ARG); 4575 } 4576 ASSERT(!MDI_PI_LOCKED(pip)); 4577 MDI_PI_LOCK(pip); 4578 if (MDI_PI(pip)->pi_prop == NULL) { 4579 MDI_PI_UNLOCK(pip); 4580 return (DDI_PROP_NOT_FOUND); 4581 } 4582 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4583 MDI_PI_UNLOCK(pip); 4584 return (i_map_nvlist_error_to_mdi(rv)); 4585 } 4586 4587 /* 4588 * mdi_prop_update_int64(): 4589 * Create/Update a 64 bit integer property 4590 */ 4591 int 4592 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4593 { 4594 int rv; 4595 4596 if (pip == NULL) { 4597 return (DDI_PROP_INVAL_ARG); 4598 } 4599 ASSERT(!MDI_PI_LOCKED(pip)); 4600 MDI_PI_LOCK(pip); 4601 if (MDI_PI(pip)->pi_prop == NULL) { 4602 MDI_PI_UNLOCK(pip); 4603 return (DDI_PROP_NOT_FOUND); 4604 } 4605 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4606 MDI_PI_UNLOCK(pip); 4607 return (i_map_nvlist_error_to_mdi(rv)); 4608 } 4609 4610 /* 4611 * mdi_prop_update_int_array(): 4612 * Create/Update a int array property 4613 */ 4614 int 4615 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4616 uint_t nelements) 4617 { 4618 int rv; 4619 4620 if (pip == NULL) { 4621 return (DDI_PROP_INVAL_ARG); 4622 } 4623 ASSERT(!MDI_PI_LOCKED(pip)); 4624 MDI_PI_LOCK(pip); 4625 if (MDI_PI(pip)->pi_prop == NULL) { 4626 MDI_PI_UNLOCK(pip); 4627 return (DDI_PROP_NOT_FOUND); 4628 } 4629 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4630 nelements); 4631 MDI_PI_UNLOCK(pip); 4632 return (i_map_nvlist_error_to_mdi(rv)); 4633 } 4634 4635 /* 4636 * mdi_prop_update_string(): 4637 * Create/Update a string property 4638 */ 4639 int 4640 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4641 { 4642 int rv; 4643 4644 if (pip == NULL) { 4645 return (DDI_PROP_INVAL_ARG); 4646 } 4647 ASSERT(!MDI_PI_LOCKED(pip)); 4648 MDI_PI_LOCK(pip); 4649 if (MDI_PI(pip)->pi_prop == NULL) { 4650 MDI_PI_UNLOCK(pip); 4651 return (DDI_PROP_NOT_FOUND); 4652 } 4653 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4654 MDI_PI_UNLOCK(pip); 4655 return (i_map_nvlist_error_to_mdi(rv)); 4656 } 4657 4658 /* 4659 * mdi_prop_update_string_array(): 4660 * Create/Update a string array property 4661 */ 4662 int 4663 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4664 uint_t nelements) 4665 { 4666 int rv; 4667 4668 if (pip == NULL) { 4669 return (DDI_PROP_INVAL_ARG); 4670 } 4671 ASSERT(!MDI_PI_LOCKED(pip)); 4672 MDI_PI_LOCK(pip); 4673 if (MDI_PI(pip)->pi_prop == NULL) { 4674 MDI_PI_UNLOCK(pip); 4675 return (DDI_PROP_NOT_FOUND); 4676 } 4677 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4678 nelements); 4679 MDI_PI_UNLOCK(pip); 4680 return (i_map_nvlist_error_to_mdi(rv)); 4681 } 4682 4683 /* 4684 * mdi_prop_lookup_byte(): 4685 * Look for byte property identified by name. The data returned 4686 * is the actual property and valid as long as mdi_pathinfo_t node 4687 * is alive. 4688 */ 4689 int 4690 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4691 { 4692 int rv; 4693 4694 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4695 return (DDI_PROP_NOT_FOUND); 4696 } 4697 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4698 return (i_map_nvlist_error_to_mdi(rv)); 4699 } 4700 4701 4702 /* 4703 * mdi_prop_lookup_byte_array(): 4704 * Look for byte array property identified by name. The data 4705 * returned is the actual property and valid as long as 4706 * mdi_pathinfo_t node is alive. 4707 */ 4708 int 4709 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4710 uint_t *nelements) 4711 { 4712 int rv; 4713 4714 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4715 return (DDI_PROP_NOT_FOUND); 4716 } 4717 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4718 nelements); 4719 return (i_map_nvlist_error_to_mdi(rv)); 4720 } 4721 4722 /* 4723 * mdi_prop_lookup_int(): 4724 * Look for int property identified by name. The data returned 4725 * is the actual property and valid as long as mdi_pathinfo_t 4726 * node is alive. 4727 */ 4728 int 4729 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4730 { 4731 int rv; 4732 4733 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4734 return (DDI_PROP_NOT_FOUND); 4735 } 4736 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4737 return (i_map_nvlist_error_to_mdi(rv)); 4738 } 4739 4740 /* 4741 * mdi_prop_lookup_int64(): 4742 * Look for int64 property identified by name. The data returned 4743 * is the actual property and valid as long as mdi_pathinfo_t node 4744 * is alive. 4745 */ 4746 int 4747 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4748 { 4749 int rv; 4750 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4751 return (DDI_PROP_NOT_FOUND); 4752 } 4753 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4754 return (i_map_nvlist_error_to_mdi(rv)); 4755 } 4756 4757 /* 4758 * mdi_prop_lookup_int_array(): 4759 * Look for int array property identified by name. The data 4760 * returned is the actual property and valid as long as 4761 * mdi_pathinfo_t node is alive. 4762 */ 4763 int 4764 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4765 uint_t *nelements) 4766 { 4767 int rv; 4768 4769 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4770 return (DDI_PROP_NOT_FOUND); 4771 } 4772 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4773 (int32_t **)data, nelements); 4774 return (i_map_nvlist_error_to_mdi(rv)); 4775 } 4776 4777 /* 4778 * mdi_prop_lookup_string(): 4779 * Look for string property identified by name. The data 4780 * returned is the actual property and valid as long as 4781 * mdi_pathinfo_t node is alive. 4782 */ 4783 int 4784 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4785 { 4786 int rv; 4787 4788 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4789 return (DDI_PROP_NOT_FOUND); 4790 } 4791 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4792 return (i_map_nvlist_error_to_mdi(rv)); 4793 } 4794 4795 /* 4796 * mdi_prop_lookup_string_array(): 4797 * Look for string array property identified by name. The data 4798 * returned is the actual property and valid as long as 4799 * mdi_pathinfo_t node is alive. 4800 */ 4801 int 4802 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4803 uint_t *nelements) 4804 { 4805 int rv; 4806 4807 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4808 return (DDI_PROP_NOT_FOUND); 4809 } 4810 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4811 nelements); 4812 return (i_map_nvlist_error_to_mdi(rv)); 4813 } 4814 4815 /* 4816 * mdi_prop_free(): 4817 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4818 * functions return the pointer to actual property data and not a 4819 * copy of it. So the data returned is valid as long as 4820 * mdi_pathinfo_t node is valid. 4821 */ 4822 /*ARGSUSED*/ 4823 int 4824 mdi_prop_free(void *data) 4825 { 4826 return (DDI_PROP_SUCCESS); 4827 } 4828 4829 /*ARGSUSED*/ 4830 static void 4831 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4832 { 4833 char *ct_path; 4834 char *ct_status; 4835 char *status; 4836 dev_info_t *cdip = ct->ct_dip; 4837 char lb_buf[64]; 4838 int report_lb_c = 0, report_lb_p = 0; 4839 4840 ASSERT(MDI_CLIENT_LOCKED(ct)); 4841 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4842 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4843 return; 4844 } 4845 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4846 ct_status = "optimal"; 4847 report_lb_c = 1; 4848 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4849 ct_status = "degraded"; 4850 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4851 ct_status = "failed"; 4852 } else { 4853 ct_status = "unknown"; 4854 } 4855 4856 lb_buf[0] = 0; /* not interested in load balancing config */ 4857 4858 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4859 status = "removed"; 4860 } else if (MDI_PI_IS_OFFLINE(pip)) { 4861 status = "offline"; 4862 } else if (MDI_PI_IS_ONLINE(pip)) { 4863 status = "online"; 4864 report_lb_p = 1; 4865 } else if (MDI_PI_IS_STANDBY(pip)) { 4866 status = "standby"; 4867 } else if (MDI_PI_IS_FAULT(pip)) { 4868 status = "faulted"; 4869 } else { 4870 status = "unknown"; 4871 } 4872 4873 if (cdip) { 4874 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4875 4876 /* 4877 * NOTE: Keeping "multipath status: %s" and 4878 * "Load balancing: %s" format unchanged in case someone 4879 * scrubs /var/adm/messages looking for these messages. 4880 */ 4881 if (report_lb_c && report_lb_p) { 4882 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4883 (void) snprintf(lb_buf, sizeof (lb_buf), 4884 "%s, region-size: %d", mdi_load_balance_lba, 4885 ct->ct_lb_args->region_size); 4886 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4887 (void) snprintf(lb_buf, sizeof (lb_buf), 4888 "%s", mdi_load_balance_none); 4889 } else { 4890 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4891 mdi_load_balance_rr); 4892 } 4893 4894 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4895 "?%s (%s%d) multipath status: %s: " 4896 "path %d %s is %s: Load balancing: %s\n", 4897 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4898 ddi_get_instance(cdip), ct_status, 4899 mdi_pi_get_path_instance(pip), 4900 mdi_pi_spathname(pip), status, lb_buf); 4901 } else { 4902 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4903 "?%s (%s%d) multipath status: %s: " 4904 "path %d %s is %s\n", 4905 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4906 ddi_get_instance(cdip), ct_status, 4907 mdi_pi_get_path_instance(pip), 4908 mdi_pi_spathname(pip), status); 4909 } 4910 4911 kmem_free(ct_path, MAXPATHLEN); 4912 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4913 } 4914 } 4915 4916 #ifdef DEBUG 4917 /* 4918 * i_mdi_log(): 4919 * Utility function for error message management 4920 * 4921 * NOTE: Implementation takes care of trailing \n for cmn_err, 4922 * MDI_DEBUG should not terminate fmt strings with \n. 4923 * 4924 * NOTE: If the level is >= 2, and there is no leading !?^ 4925 * then a leading ! is implied (but can be overriden via 4926 * mdi_debug_consoleonly). If you are using kmdb on the console, 4927 * consider setting mdi_debug_consoleonly to 1 as an aid. 4928 */ 4929 /*PRINTFLIKE4*/ 4930 static void 4931 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4932 { 4933 char name[MAXNAMELEN]; 4934 char buf[512]; 4935 char *bp; 4936 va_list ap; 4937 int log_only = 0; 4938 int boot_only = 0; 4939 int console_only = 0; 4940 4941 if (dip) { 4942 (void) snprintf(name, sizeof(name), "%s%d: ", 4943 ddi_driver_name(dip), ddi_get_instance(dip)); 4944 } else { 4945 name[0] = 0; 4946 } 4947 4948 va_start(ap, fmt); 4949 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4950 va_end(ap); 4951 4952 switch (buf[0]) { 4953 case '!': 4954 bp = &buf[1]; 4955 log_only = 1; 4956 break; 4957 case '?': 4958 bp = &buf[1]; 4959 boot_only = 1; 4960 break; 4961 case '^': 4962 bp = &buf[1]; 4963 console_only = 1; 4964 break; 4965 default: 4966 if (level >= 2) 4967 log_only = 1; /* ! implied */ 4968 bp = buf; 4969 break; 4970 } 4971 if (mdi_debug_logonly) { 4972 log_only = 1; 4973 boot_only = 0; 4974 console_only = 0; 4975 } 4976 if (mdi_debug_consoleonly) { 4977 log_only = 0; 4978 boot_only = 0; 4979 console_only = 1; 4980 level = CE_NOTE; 4981 goto console; 4982 } 4983 4984 switch (level) { 4985 case CE_NOTE: 4986 level = CE_CONT; 4987 /* FALLTHROUGH */ 4988 case CE_CONT: 4989 if (boot_only) { 4990 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4991 } else if (console_only) { 4992 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4993 } else if (log_only) { 4994 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4995 } else { 4996 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4997 } 4998 break; 4999 5000 case CE_WARN: 5001 case CE_PANIC: 5002 console: 5003 if (boot_only) { 5004 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 5005 } else if (console_only) { 5006 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 5007 } else if (log_only) { 5008 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 5009 } else { 5010 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 5011 } 5012 break; 5013 default: 5014 cmn_err(level, "mdi: %s%s", name, bp); 5015 break; 5016 } 5017 } 5018 #endif /* DEBUG */ 5019 5020 void 5021 i_mdi_client_online(dev_info_t *ct_dip) 5022 { 5023 mdi_client_t *ct; 5024 5025 /* 5026 * Client online notification. Mark client state as online 5027 * restore our binding with dev_info node 5028 */ 5029 ct = i_devi_get_client(ct_dip); 5030 ASSERT(ct != NULL); 5031 MDI_CLIENT_LOCK(ct); 5032 MDI_CLIENT_SET_ONLINE(ct); 5033 /* catch for any memory leaks */ 5034 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 5035 ct->ct_dip = ct_dip; 5036 5037 if (ct->ct_power_cnt == 0) 5038 (void) i_mdi_power_all_phci(ct); 5039 5040 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 5041 "i_mdi_pm_hold_client %p", (void *)ct)); 5042 i_mdi_pm_hold_client(ct, 1); 5043 5044 MDI_CLIENT_UNLOCK(ct); 5045 } 5046 5047 void 5048 i_mdi_phci_online(dev_info_t *ph_dip) 5049 { 5050 mdi_phci_t *ph; 5051 5052 /* pHCI online notification. Mark state accordingly */ 5053 ph = i_devi_get_phci(ph_dip); 5054 ASSERT(ph != NULL); 5055 MDI_PHCI_LOCK(ph); 5056 MDI_PHCI_SET_ONLINE(ph); 5057 MDI_PHCI_UNLOCK(ph); 5058 } 5059 5060 /* 5061 * mdi_devi_online(): 5062 * Online notification from NDI framework on pHCI/client 5063 * device online. 5064 * Return Values: 5065 * NDI_SUCCESS 5066 * MDI_FAILURE 5067 */ 5068 /*ARGSUSED*/ 5069 int 5070 mdi_devi_online(dev_info_t *dip, uint_t flags) 5071 { 5072 if (MDI_PHCI(dip)) { 5073 i_mdi_phci_online(dip); 5074 } 5075 5076 if (MDI_CLIENT(dip)) { 5077 i_mdi_client_online(dip); 5078 } 5079 return (NDI_SUCCESS); 5080 } 5081 5082 /* 5083 * mdi_devi_offline(): 5084 * Offline notification from NDI framework on pHCI/Client device 5085 * offline. 5086 * 5087 * Return Values: 5088 * NDI_SUCCESS 5089 * NDI_FAILURE 5090 */ 5091 /*ARGSUSED*/ 5092 int 5093 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5094 { 5095 int rv = NDI_SUCCESS; 5096 5097 if (MDI_CLIENT(dip)) { 5098 rv = i_mdi_client_offline(dip, flags); 5099 if (rv != NDI_SUCCESS) 5100 return (rv); 5101 } 5102 5103 if (MDI_PHCI(dip)) { 5104 rv = i_mdi_phci_offline(dip, flags); 5105 5106 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5107 /* set client back online */ 5108 i_mdi_client_online(dip); 5109 } 5110 } 5111 5112 return (rv); 5113 } 5114 5115 /*ARGSUSED*/ 5116 static int 5117 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5118 { 5119 int rv = NDI_SUCCESS; 5120 mdi_phci_t *ph; 5121 mdi_client_t *ct; 5122 mdi_pathinfo_t *pip; 5123 mdi_pathinfo_t *next; 5124 mdi_pathinfo_t *failed_pip = NULL; 5125 dev_info_t *cdip; 5126 5127 /* 5128 * pHCI component offline notification 5129 * Make sure that this pHCI instance is free to be offlined. 5130 * If it is OK to proceed, Offline and remove all the child 5131 * mdi_pathinfo nodes. This process automatically offlines 5132 * corresponding client devices, for which this pHCI provides 5133 * critical services. 5134 */ 5135 ph = i_devi_get_phci(dip); 5136 MDI_DEBUG(2, (MDI_NOTE, dip, 5137 "called %p %p", (void *)dip, (void *)ph)); 5138 if (ph == NULL) { 5139 return (rv); 5140 } 5141 5142 MDI_PHCI_LOCK(ph); 5143 5144 if (MDI_PHCI_IS_OFFLINE(ph)) { 5145 MDI_DEBUG(1, (MDI_WARN, dip, 5146 "!pHCI already offlined: %p", (void *)dip)); 5147 MDI_PHCI_UNLOCK(ph); 5148 return (NDI_SUCCESS); 5149 } 5150 5151 /* 5152 * Check to see if the pHCI can be offlined 5153 */ 5154 if (ph->ph_unstable) { 5155 MDI_DEBUG(1, (MDI_WARN, dip, 5156 "!One or more target devices are in transient state. " 5157 "This device can not be removed at this moment. " 5158 "Please try again later.")); 5159 MDI_PHCI_UNLOCK(ph); 5160 return (NDI_BUSY); 5161 } 5162 5163 pip = ph->ph_path_head; 5164 while (pip != NULL) { 5165 MDI_PI_LOCK(pip); 5166 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5167 5168 /* 5169 * The mdi_pathinfo state is OK. Check the client state. 5170 * If failover in progress fail the pHCI from offlining 5171 */ 5172 ct = MDI_PI(pip)->pi_client; 5173 i_mdi_client_lock(ct, pip); 5174 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5175 (ct->ct_unstable)) { 5176 /* 5177 * Failover is in progress, Fail the DR 5178 */ 5179 MDI_DEBUG(1, (MDI_WARN, dip, 5180 "!pHCI device is busy. " 5181 "This device can not be removed at this moment. " 5182 "Please try again later.")); 5183 MDI_PI_UNLOCK(pip); 5184 i_mdi_client_unlock(ct); 5185 MDI_PHCI_UNLOCK(ph); 5186 return (NDI_BUSY); 5187 } 5188 MDI_PI_UNLOCK(pip); 5189 5190 /* 5191 * Check to see of we are removing the last path of this 5192 * client device... 5193 */ 5194 cdip = ct->ct_dip; 5195 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5196 (i_mdi_client_compute_state(ct, ph) == 5197 MDI_CLIENT_STATE_FAILED)) { 5198 i_mdi_client_unlock(ct); 5199 MDI_PHCI_UNLOCK(ph); 5200 if (ndi_devi_offline(cdip, 5201 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5202 /* 5203 * ndi_devi_offline() failed. 5204 * This pHCI provides the critical path 5205 * to one or more client devices. 5206 * Return busy. 5207 */ 5208 MDI_PHCI_LOCK(ph); 5209 MDI_DEBUG(1, (MDI_WARN, dip, 5210 "!pHCI device is busy. " 5211 "This device can not be removed at this " 5212 "moment. Please try again later.")); 5213 failed_pip = pip; 5214 break; 5215 } else { 5216 MDI_PHCI_LOCK(ph); 5217 pip = next; 5218 } 5219 } else { 5220 i_mdi_client_unlock(ct); 5221 pip = next; 5222 } 5223 } 5224 5225 if (failed_pip) { 5226 pip = ph->ph_path_head; 5227 while (pip != failed_pip) { 5228 MDI_PI_LOCK(pip); 5229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5230 ct = MDI_PI(pip)->pi_client; 5231 i_mdi_client_lock(ct, pip); 5232 cdip = ct->ct_dip; 5233 switch (MDI_CLIENT_STATE(ct)) { 5234 case MDI_CLIENT_STATE_OPTIMAL: 5235 case MDI_CLIENT_STATE_DEGRADED: 5236 if (cdip) { 5237 MDI_PI_UNLOCK(pip); 5238 i_mdi_client_unlock(ct); 5239 MDI_PHCI_UNLOCK(ph); 5240 (void) ndi_devi_online(cdip, 0); 5241 MDI_PHCI_LOCK(ph); 5242 pip = next; 5243 continue; 5244 } 5245 break; 5246 5247 case MDI_CLIENT_STATE_FAILED: 5248 if (cdip) { 5249 MDI_PI_UNLOCK(pip); 5250 i_mdi_client_unlock(ct); 5251 MDI_PHCI_UNLOCK(ph); 5252 (void) ndi_devi_offline(cdip, 5253 NDI_DEVFS_CLEAN); 5254 MDI_PHCI_LOCK(ph); 5255 pip = next; 5256 continue; 5257 } 5258 break; 5259 } 5260 MDI_PI_UNLOCK(pip); 5261 i_mdi_client_unlock(ct); 5262 pip = next; 5263 } 5264 MDI_PHCI_UNLOCK(ph); 5265 return (NDI_BUSY); 5266 } 5267 5268 /* 5269 * Mark the pHCI as offline 5270 */ 5271 MDI_PHCI_SET_OFFLINE(ph); 5272 5273 /* 5274 * Mark the child mdi_pathinfo nodes as transient 5275 */ 5276 pip = ph->ph_path_head; 5277 while (pip != NULL) { 5278 MDI_PI_LOCK(pip); 5279 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5280 MDI_PI_SET_OFFLINING(pip); 5281 MDI_PI_UNLOCK(pip); 5282 pip = next; 5283 } 5284 MDI_PHCI_UNLOCK(ph); 5285 /* 5286 * Give a chance for any pending commands to execute 5287 */ 5288 delay_random(mdi_delay); 5289 MDI_PHCI_LOCK(ph); 5290 pip = ph->ph_path_head; 5291 while (pip != NULL) { 5292 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5293 (void) i_mdi_pi_offline(pip, flags); 5294 MDI_PI_LOCK(pip); 5295 ct = MDI_PI(pip)->pi_client; 5296 if (!MDI_PI_IS_OFFLINE(pip)) { 5297 MDI_DEBUG(1, (MDI_WARN, dip, 5298 "!pHCI device is busy. " 5299 "This device can not be removed at this moment. " 5300 "Please try again later.")); 5301 MDI_PI_UNLOCK(pip); 5302 MDI_PHCI_SET_ONLINE(ph); 5303 MDI_PHCI_UNLOCK(ph); 5304 return (NDI_BUSY); 5305 } 5306 MDI_PI_UNLOCK(pip); 5307 pip = next; 5308 } 5309 MDI_PHCI_UNLOCK(ph); 5310 5311 return (rv); 5312 } 5313 5314 void 5315 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5316 { 5317 mdi_phci_t *ph; 5318 mdi_client_t *ct; 5319 mdi_pathinfo_t *pip; 5320 mdi_pathinfo_t *next; 5321 dev_info_t *cdip; 5322 5323 if (!MDI_PHCI(dip)) 5324 return; 5325 5326 ph = i_devi_get_phci(dip); 5327 if (ph == NULL) { 5328 return; 5329 } 5330 5331 MDI_PHCI_LOCK(ph); 5332 5333 if (MDI_PHCI_IS_OFFLINE(ph)) { 5334 /* has no last path */ 5335 MDI_PHCI_UNLOCK(ph); 5336 return; 5337 } 5338 5339 pip = ph->ph_path_head; 5340 while (pip != NULL) { 5341 MDI_PI_LOCK(pip); 5342 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5343 5344 ct = MDI_PI(pip)->pi_client; 5345 i_mdi_client_lock(ct, pip); 5346 MDI_PI_UNLOCK(pip); 5347 5348 cdip = ct->ct_dip; 5349 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5350 (i_mdi_client_compute_state(ct, ph) == 5351 MDI_CLIENT_STATE_FAILED)) { 5352 /* Last path. Mark client dip as retiring */ 5353 i_mdi_client_unlock(ct); 5354 MDI_PHCI_UNLOCK(ph); 5355 (void) e_ddi_mark_retiring(cdip, cons_array); 5356 MDI_PHCI_LOCK(ph); 5357 pip = next; 5358 } else { 5359 i_mdi_client_unlock(ct); 5360 pip = next; 5361 } 5362 } 5363 5364 MDI_PHCI_UNLOCK(ph); 5365 5366 return; 5367 } 5368 5369 void 5370 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5371 { 5372 mdi_phci_t *ph; 5373 mdi_client_t *ct; 5374 mdi_pathinfo_t *pip; 5375 mdi_pathinfo_t *next; 5376 dev_info_t *cdip; 5377 5378 if (!MDI_PHCI(dip)) 5379 return; 5380 5381 ph = i_devi_get_phci(dip); 5382 if (ph == NULL) 5383 return; 5384 5385 MDI_PHCI_LOCK(ph); 5386 5387 if (MDI_PHCI_IS_OFFLINE(ph)) { 5388 MDI_PHCI_UNLOCK(ph); 5389 /* not last path */ 5390 return; 5391 } 5392 5393 if (ph->ph_unstable) { 5394 MDI_PHCI_UNLOCK(ph); 5395 /* can't check for constraints */ 5396 *constraint = 0; 5397 return; 5398 } 5399 5400 pip = ph->ph_path_head; 5401 while (pip != NULL) { 5402 MDI_PI_LOCK(pip); 5403 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5404 5405 /* 5406 * The mdi_pathinfo state is OK. Check the client state. 5407 * If failover in progress fail the pHCI from offlining 5408 */ 5409 ct = MDI_PI(pip)->pi_client; 5410 i_mdi_client_lock(ct, pip); 5411 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5412 (ct->ct_unstable)) { 5413 /* 5414 * Failover is in progress, can't check for constraints 5415 */ 5416 MDI_PI_UNLOCK(pip); 5417 i_mdi_client_unlock(ct); 5418 MDI_PHCI_UNLOCK(ph); 5419 *constraint = 0; 5420 return; 5421 } 5422 MDI_PI_UNLOCK(pip); 5423 5424 /* 5425 * Check to see of we are retiring the last path of this 5426 * client device... 5427 */ 5428 cdip = ct->ct_dip; 5429 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5430 (i_mdi_client_compute_state(ct, ph) == 5431 MDI_CLIENT_STATE_FAILED)) { 5432 i_mdi_client_unlock(ct); 5433 MDI_PHCI_UNLOCK(ph); 5434 (void) e_ddi_retire_notify(cdip, constraint); 5435 MDI_PHCI_LOCK(ph); 5436 pip = next; 5437 } else { 5438 i_mdi_client_unlock(ct); 5439 pip = next; 5440 } 5441 } 5442 5443 MDI_PHCI_UNLOCK(ph); 5444 5445 return; 5446 } 5447 5448 /* 5449 * offline the path(s) hanging off the pHCI. If the 5450 * last path to any client, check that constraints 5451 * have been applied. 5452 * 5453 * If constraint is 0, we aren't going to retire the 5454 * pHCI. However we still need to go through the paths 5455 * calling e_ddi_retire_finalize() to clear their 5456 * contract barriers. 5457 */ 5458 void 5459 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint) 5460 { 5461 mdi_phci_t *ph; 5462 mdi_client_t *ct; 5463 mdi_pathinfo_t *pip; 5464 mdi_pathinfo_t *next; 5465 dev_info_t *cdip; 5466 int unstable = 0; 5467 int tmp_constraint; 5468 5469 if (!MDI_PHCI(dip)) 5470 return; 5471 5472 ph = i_devi_get_phci(dip); 5473 if (ph == NULL) { 5474 /* no last path and no pips */ 5475 return; 5476 } 5477 5478 MDI_PHCI_LOCK(ph); 5479 5480 if (MDI_PHCI_IS_OFFLINE(ph)) { 5481 MDI_PHCI_UNLOCK(ph); 5482 /* no last path and no pips */ 5483 return; 5484 } 5485 5486 /* 5487 * Check to see if the pHCI can be offlined 5488 */ 5489 if (ph->ph_unstable) { 5490 unstable = 1; 5491 } 5492 5493 pip = ph->ph_path_head; 5494 while (pip != NULL) { 5495 MDI_PI_LOCK(pip); 5496 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5497 5498 /* 5499 * if failover in progress fail the pHCI from offlining 5500 */ 5501 ct = MDI_PI(pip)->pi_client; 5502 i_mdi_client_lock(ct, pip); 5503 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5504 (ct->ct_unstable)) { 5505 unstable = 1; 5506 } 5507 MDI_PI_UNLOCK(pip); 5508 5509 /* 5510 * Check to see of we are removing the last path of this 5511 * client device... 5512 */ 5513 cdip = ct->ct_dip; 5514 if (!phci_only && cdip && 5515 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5516 (i_mdi_client_compute_state(ct, ph) == 5517 MDI_CLIENT_STATE_FAILED)) { 5518 i_mdi_client_unlock(ct); 5519 MDI_PHCI_UNLOCK(ph); 5520 /* 5521 * This is the last path to this client. 5522 * 5523 * Constraint will only be set to 1 if this client can 5524 * be retired (as already determined by 5525 * mdi_phci_retire_notify). However we don't actually 5526 * need to retire the client (we just retire the last 5527 * path - MPXIO will then fail all I/Os to the client). 5528 * But we still need to call e_ddi_retire_finalize so 5529 * the contract barriers can be cleared. Therefore we 5530 * temporarily set constraint = 0 so that the client 5531 * dip is not retired. 5532 */ 5533 tmp_constraint = 0; 5534 (void) e_ddi_retire_finalize(cdip, &tmp_constraint); 5535 MDI_PHCI_LOCK(ph); 5536 pip = next; 5537 } else { 5538 i_mdi_client_unlock(ct); 5539 pip = next; 5540 } 5541 } 5542 5543 if (!phci_only && *((int *)constraint) == 0) { 5544 MDI_PHCI_UNLOCK(ph); 5545 return; 5546 } 5547 5548 /* 5549 * Cannot offline pip(s) 5550 */ 5551 if (unstable) { 5552 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5553 "pHCI in transient state, cannot retire", 5554 ddi_driver_name(dip), ddi_get_instance(dip)); 5555 MDI_PHCI_UNLOCK(ph); 5556 return; 5557 } 5558 5559 /* 5560 * Mark the pHCI as offline 5561 */ 5562 MDI_PHCI_SET_OFFLINE(ph); 5563 5564 /* 5565 * Mark the child mdi_pathinfo nodes as transient 5566 */ 5567 pip = ph->ph_path_head; 5568 while (pip != NULL) { 5569 MDI_PI_LOCK(pip); 5570 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5571 MDI_PI_SET_OFFLINING(pip); 5572 MDI_PI_UNLOCK(pip); 5573 pip = next; 5574 } 5575 MDI_PHCI_UNLOCK(ph); 5576 /* 5577 * Give a chance for any pending commands to execute 5578 */ 5579 delay_random(mdi_delay); 5580 MDI_PHCI_LOCK(ph); 5581 pip = ph->ph_path_head; 5582 while (pip != NULL) { 5583 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5584 (void) i_mdi_pi_offline(pip, 0); 5585 MDI_PI_LOCK(pip); 5586 ct = MDI_PI(pip)->pi_client; 5587 if (!MDI_PI_IS_OFFLINE(pip)) { 5588 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5589 "path %d %s busy, cannot offline", 5590 mdi_pi_get_path_instance(pip), 5591 mdi_pi_spathname(pip)); 5592 MDI_PI_UNLOCK(pip); 5593 MDI_PHCI_SET_ONLINE(ph); 5594 MDI_PHCI_UNLOCK(ph); 5595 return; 5596 } 5597 MDI_PI_UNLOCK(pip); 5598 pip = next; 5599 } 5600 MDI_PHCI_UNLOCK(ph); 5601 5602 return; 5603 } 5604 5605 void 5606 mdi_phci_unretire(dev_info_t *dip) 5607 { 5608 mdi_phci_t *ph; 5609 mdi_pathinfo_t *pip; 5610 mdi_pathinfo_t *next; 5611 5612 ASSERT(MDI_PHCI(dip)); 5613 5614 /* 5615 * Online the phci 5616 */ 5617 i_mdi_phci_online(dip); 5618 5619 ph = i_devi_get_phci(dip); 5620 MDI_PHCI_LOCK(ph); 5621 pip = ph->ph_path_head; 5622 while (pip != NULL) { 5623 MDI_PI_LOCK(pip); 5624 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5625 MDI_PI_UNLOCK(pip); 5626 (void) i_mdi_pi_online(pip, 0); 5627 pip = next; 5628 } 5629 MDI_PHCI_UNLOCK(ph); 5630 } 5631 5632 /*ARGSUSED*/ 5633 static int 5634 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5635 { 5636 int rv = NDI_SUCCESS; 5637 mdi_client_t *ct; 5638 5639 /* 5640 * Client component to go offline. Make sure that we are 5641 * not in failing over state and update client state 5642 * accordingly 5643 */ 5644 ct = i_devi_get_client(dip); 5645 MDI_DEBUG(2, (MDI_NOTE, dip, 5646 "called %p %p", (void *)dip, (void *)ct)); 5647 if (ct != NULL) { 5648 MDI_CLIENT_LOCK(ct); 5649 if (ct->ct_unstable) { 5650 /* 5651 * One or more paths are in transient state, 5652 * Dont allow offline of a client device 5653 */ 5654 MDI_DEBUG(1, (MDI_WARN, dip, 5655 "!One or more paths to " 5656 "this device are in transient state. " 5657 "This device can not be removed at this moment. " 5658 "Please try again later.")); 5659 MDI_CLIENT_UNLOCK(ct); 5660 return (NDI_BUSY); 5661 } 5662 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5663 /* 5664 * Failover is in progress, Dont allow DR of 5665 * a client device 5666 */ 5667 MDI_DEBUG(1, (MDI_WARN, dip, 5668 "!Client device is Busy. " 5669 "This device can not be removed at this moment. " 5670 "Please try again later.")); 5671 MDI_CLIENT_UNLOCK(ct); 5672 return (NDI_BUSY); 5673 } 5674 MDI_CLIENT_SET_OFFLINE(ct); 5675 5676 /* 5677 * Unbind our relationship with the dev_info node 5678 */ 5679 if (flags & NDI_DEVI_REMOVE) { 5680 ct->ct_dip = NULL; 5681 } 5682 MDI_CLIENT_UNLOCK(ct); 5683 } 5684 return (rv); 5685 } 5686 5687 /* 5688 * mdi_pre_attach(): 5689 * Pre attach() notification handler 5690 */ 5691 /*ARGSUSED*/ 5692 int 5693 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5694 { 5695 /* don't support old DDI_PM_RESUME */ 5696 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5697 (cmd == DDI_PM_RESUME)) 5698 return (DDI_FAILURE); 5699 5700 return (DDI_SUCCESS); 5701 } 5702 5703 /* 5704 * mdi_post_attach(): 5705 * Post attach() notification handler 5706 */ 5707 /*ARGSUSED*/ 5708 void 5709 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5710 { 5711 mdi_phci_t *ph; 5712 mdi_client_t *ct; 5713 mdi_vhci_t *vh; 5714 5715 if (MDI_PHCI(dip)) { 5716 ph = i_devi_get_phci(dip); 5717 ASSERT(ph != NULL); 5718 5719 MDI_PHCI_LOCK(ph); 5720 switch (cmd) { 5721 case DDI_ATTACH: 5722 MDI_DEBUG(2, (MDI_NOTE, dip, 5723 "phci post_attach called %p", (void *)ph)); 5724 if (error == DDI_SUCCESS) { 5725 MDI_PHCI_SET_ATTACH(ph); 5726 } else { 5727 MDI_DEBUG(1, (MDI_NOTE, dip, 5728 "!pHCI post_attach failed: error %d", 5729 error)); 5730 MDI_PHCI_SET_DETACH(ph); 5731 } 5732 break; 5733 5734 case DDI_RESUME: 5735 case DDI_PM_RESUME: 5736 MDI_DEBUG(2, (MDI_NOTE, dip, 5737 "pHCI post_resume: called %p", (void *)ph)); 5738 if (error == DDI_SUCCESS) { 5739 MDI_PHCI_SET_RESUME(ph); 5740 } else { 5741 MDI_DEBUG(1, (MDI_NOTE, dip, 5742 "!pHCI post_resume failed: error %d", 5743 error)); 5744 MDI_PHCI_SET_SUSPEND(ph); 5745 } 5746 break; 5747 } 5748 MDI_PHCI_UNLOCK(ph); 5749 } 5750 5751 if (MDI_CLIENT(dip)) { 5752 ct = i_devi_get_client(dip); 5753 ASSERT(ct != NULL); 5754 5755 MDI_CLIENT_LOCK(ct); 5756 switch (cmd) { 5757 case DDI_ATTACH: 5758 MDI_DEBUG(2, (MDI_NOTE, dip, 5759 "client post_attach called %p", (void *)ct)); 5760 if (error != DDI_SUCCESS) { 5761 MDI_DEBUG(1, (MDI_NOTE, dip, 5762 "!client post_attach failed: error %d", 5763 error)); 5764 MDI_CLIENT_SET_DETACH(ct); 5765 MDI_DEBUG(4, (MDI_WARN, dip, 5766 "i_mdi_pm_reset_client")); 5767 i_mdi_pm_reset_client(ct); 5768 break; 5769 } 5770 5771 /* 5772 * Client device has successfully attached, inform 5773 * the vhci. 5774 */ 5775 vh = ct->ct_vhci; 5776 if (vh->vh_ops->vo_client_attached) 5777 (*vh->vh_ops->vo_client_attached)(dip); 5778 5779 MDI_CLIENT_SET_ATTACH(ct); 5780 break; 5781 5782 case DDI_RESUME: 5783 case DDI_PM_RESUME: 5784 MDI_DEBUG(2, (MDI_NOTE, dip, 5785 "client post_attach: called %p", (void *)ct)); 5786 if (error == DDI_SUCCESS) { 5787 MDI_CLIENT_SET_RESUME(ct); 5788 } else { 5789 MDI_DEBUG(1, (MDI_NOTE, dip, 5790 "!client post_resume failed: error %d", 5791 error)); 5792 MDI_CLIENT_SET_SUSPEND(ct); 5793 } 5794 break; 5795 } 5796 MDI_CLIENT_UNLOCK(ct); 5797 } 5798 } 5799 5800 /* 5801 * mdi_pre_detach(): 5802 * Pre detach notification handler 5803 */ 5804 /*ARGSUSED*/ 5805 int 5806 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5807 { 5808 int rv = DDI_SUCCESS; 5809 5810 if (MDI_CLIENT(dip)) { 5811 (void) i_mdi_client_pre_detach(dip, cmd); 5812 } 5813 5814 if (MDI_PHCI(dip)) { 5815 rv = i_mdi_phci_pre_detach(dip, cmd); 5816 } 5817 5818 return (rv); 5819 } 5820 5821 /*ARGSUSED*/ 5822 static int 5823 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5824 { 5825 int rv = DDI_SUCCESS; 5826 mdi_phci_t *ph; 5827 mdi_client_t *ct; 5828 mdi_pathinfo_t *pip; 5829 mdi_pathinfo_t *failed_pip = NULL; 5830 mdi_pathinfo_t *next; 5831 5832 ph = i_devi_get_phci(dip); 5833 if (ph == NULL) { 5834 return (rv); 5835 } 5836 5837 MDI_PHCI_LOCK(ph); 5838 switch (cmd) { 5839 case DDI_DETACH: 5840 MDI_DEBUG(2, (MDI_NOTE, dip, 5841 "pHCI pre_detach: called %p", (void *)ph)); 5842 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5843 /* 5844 * mdi_pathinfo nodes are still attached to 5845 * this pHCI. Fail the detach for this pHCI. 5846 */ 5847 MDI_DEBUG(2, (MDI_WARN, dip, 5848 "pHCI pre_detach: paths are still attached %p", 5849 (void *)ph)); 5850 rv = DDI_FAILURE; 5851 break; 5852 } 5853 MDI_PHCI_SET_DETACH(ph); 5854 break; 5855 5856 case DDI_SUSPEND: 5857 /* 5858 * pHCI is getting suspended. Since mpxio client 5859 * devices may not be suspended at this point, to avoid 5860 * a potential stack overflow, it is important to suspend 5861 * client devices before pHCI can be suspended. 5862 */ 5863 5864 MDI_DEBUG(2, (MDI_NOTE, dip, 5865 "pHCI pre_suspend: called %p", (void *)ph)); 5866 /* 5867 * Suspend all the client devices accessible through this pHCI 5868 */ 5869 pip = ph->ph_path_head; 5870 while (pip != NULL && rv == DDI_SUCCESS) { 5871 dev_info_t *cdip; 5872 MDI_PI_LOCK(pip); 5873 next = 5874 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5875 ct = MDI_PI(pip)->pi_client; 5876 i_mdi_client_lock(ct, pip); 5877 cdip = ct->ct_dip; 5878 MDI_PI_UNLOCK(pip); 5879 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5880 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5881 i_mdi_client_unlock(ct); 5882 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5883 DDI_SUCCESS) { 5884 /* 5885 * Suspend of one of the client 5886 * device has failed. 5887 */ 5888 MDI_DEBUG(1, (MDI_WARN, dip, 5889 "!suspend of device (%s%d) failed.", 5890 ddi_driver_name(cdip), 5891 ddi_get_instance(cdip))); 5892 failed_pip = pip; 5893 break; 5894 } 5895 } else { 5896 i_mdi_client_unlock(ct); 5897 } 5898 pip = next; 5899 } 5900 5901 if (rv == DDI_SUCCESS) { 5902 /* 5903 * Suspend of client devices is complete. Proceed 5904 * with pHCI suspend. 5905 */ 5906 MDI_PHCI_SET_SUSPEND(ph); 5907 } else { 5908 /* 5909 * Revert back all the suspended client device states 5910 * to converse. 5911 */ 5912 pip = ph->ph_path_head; 5913 while (pip != failed_pip) { 5914 dev_info_t *cdip; 5915 MDI_PI_LOCK(pip); 5916 next = 5917 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5918 ct = MDI_PI(pip)->pi_client; 5919 i_mdi_client_lock(ct, pip); 5920 cdip = ct->ct_dip; 5921 MDI_PI_UNLOCK(pip); 5922 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5923 i_mdi_client_unlock(ct); 5924 (void) devi_attach(cdip, DDI_RESUME); 5925 } else { 5926 i_mdi_client_unlock(ct); 5927 } 5928 pip = next; 5929 } 5930 } 5931 break; 5932 5933 default: 5934 rv = DDI_FAILURE; 5935 break; 5936 } 5937 MDI_PHCI_UNLOCK(ph); 5938 return (rv); 5939 } 5940 5941 /*ARGSUSED*/ 5942 static int 5943 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5944 { 5945 int rv = DDI_SUCCESS; 5946 mdi_client_t *ct; 5947 5948 ct = i_devi_get_client(dip); 5949 if (ct == NULL) { 5950 return (rv); 5951 } 5952 5953 MDI_CLIENT_LOCK(ct); 5954 switch (cmd) { 5955 case DDI_DETACH: 5956 MDI_DEBUG(2, (MDI_NOTE, dip, 5957 "client pre_detach: called %p", 5958 (void *)ct)); 5959 MDI_CLIENT_SET_DETACH(ct); 5960 break; 5961 5962 case DDI_SUSPEND: 5963 MDI_DEBUG(2, (MDI_NOTE, dip, 5964 "client pre_suspend: called %p", 5965 (void *)ct)); 5966 MDI_CLIENT_SET_SUSPEND(ct); 5967 break; 5968 5969 default: 5970 rv = DDI_FAILURE; 5971 break; 5972 } 5973 MDI_CLIENT_UNLOCK(ct); 5974 return (rv); 5975 } 5976 5977 /* 5978 * mdi_post_detach(): 5979 * Post detach notification handler 5980 */ 5981 /*ARGSUSED*/ 5982 void 5983 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5984 { 5985 /* 5986 * Detach/Suspend of mpxio component failed. Update our state 5987 * too 5988 */ 5989 if (MDI_PHCI(dip)) 5990 i_mdi_phci_post_detach(dip, cmd, error); 5991 5992 if (MDI_CLIENT(dip)) 5993 i_mdi_client_post_detach(dip, cmd, error); 5994 } 5995 5996 /*ARGSUSED*/ 5997 static void 5998 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5999 { 6000 mdi_phci_t *ph; 6001 6002 /* 6003 * Detach/Suspend of phci component failed. Update our state 6004 * too 6005 */ 6006 ph = i_devi_get_phci(dip); 6007 if (ph == NULL) { 6008 return; 6009 } 6010 6011 MDI_PHCI_LOCK(ph); 6012 /* 6013 * Detach of pHCI failed. Restore back converse 6014 * state 6015 */ 6016 switch (cmd) { 6017 case DDI_DETACH: 6018 MDI_DEBUG(2, (MDI_NOTE, dip, 6019 "pHCI post_detach: called %p", 6020 (void *)ph)); 6021 if (error != DDI_SUCCESS) 6022 MDI_PHCI_SET_ATTACH(ph); 6023 break; 6024 6025 case DDI_SUSPEND: 6026 case DDI_PM_SUSPEND: 6027 MDI_DEBUG(2, (MDI_NOTE, dip, 6028 "pHCI post_suspend: called %p", 6029 (void *)ph)); 6030 if (error != DDI_SUCCESS) 6031 MDI_PHCI_SET_RESUME(ph); 6032 break; 6033 case DDI_HOTPLUG_DETACH: 6034 break; 6035 } 6036 MDI_PHCI_UNLOCK(ph); 6037 } 6038 6039 /*ARGSUSED*/ 6040 static void 6041 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 6042 { 6043 mdi_client_t *ct; 6044 6045 ct = i_devi_get_client(dip); 6046 if (ct == NULL) { 6047 return; 6048 } 6049 MDI_CLIENT_LOCK(ct); 6050 /* 6051 * Detach of Client failed. Restore back converse 6052 * state 6053 */ 6054 switch (cmd) { 6055 case DDI_DETACH: 6056 MDI_DEBUG(2, (MDI_NOTE, dip, 6057 "client post_detach: called %p", (void *)ct)); 6058 if (DEVI_IS_ATTACHING(dip)) { 6059 MDI_DEBUG(4, (MDI_NOTE, dip, 6060 "i_mdi_pm_rele_client\n")); 6061 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6062 } else { 6063 MDI_DEBUG(4, (MDI_NOTE, dip, 6064 "i_mdi_pm_reset_client\n")); 6065 i_mdi_pm_reset_client(ct); 6066 } 6067 if (error != DDI_SUCCESS) 6068 MDI_CLIENT_SET_ATTACH(ct); 6069 break; 6070 6071 case DDI_SUSPEND: 6072 case DDI_PM_SUSPEND: 6073 MDI_DEBUG(2, (MDI_NOTE, dip, 6074 "called %p", (void *)ct)); 6075 if (error != DDI_SUCCESS) 6076 MDI_CLIENT_SET_RESUME(ct); 6077 break; 6078 case DDI_HOTPLUG_DETACH: 6079 break; 6080 } 6081 MDI_CLIENT_UNLOCK(ct); 6082 } 6083 6084 int 6085 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 6086 { 6087 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 6088 } 6089 6090 /* 6091 * create and install per-path (client - pHCI) statistics 6092 * I/O stats supported: nread, nwritten, reads, and writes 6093 * Error stats - hard errors, soft errors, & transport errors 6094 */ 6095 int 6096 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 6097 { 6098 kstat_t *kiosp, *kerrsp; 6099 struct pi_errs *nsp; 6100 struct mdi_pi_kstats *mdi_statp; 6101 6102 if (MDI_PI(pip)->pi_kstats != NULL) 6103 return (MDI_SUCCESS); 6104 6105 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 6106 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 6107 return (MDI_FAILURE); 6108 } 6109 6110 (void) strcat(ksname, ",err"); 6111 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 6112 KSTAT_TYPE_NAMED, 6113 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 6114 if (kerrsp == NULL) { 6115 kstat_delete(kiosp); 6116 return (MDI_FAILURE); 6117 } 6118 6119 nsp = (struct pi_errs *)kerrsp->ks_data; 6120 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6121 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6122 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6123 KSTAT_DATA_UINT32); 6124 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6125 KSTAT_DATA_UINT32); 6126 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6127 KSTAT_DATA_UINT32); 6128 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6129 KSTAT_DATA_UINT32); 6130 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6131 KSTAT_DATA_UINT32); 6132 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6133 KSTAT_DATA_UINT32); 6134 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6135 KSTAT_DATA_UINT32); 6136 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6137 6138 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6139 mdi_statp->pi_kstat_ref = 1; 6140 mdi_statp->pi_kstat_iostats = kiosp; 6141 mdi_statp->pi_kstat_errstats = kerrsp; 6142 kstat_install(kiosp); 6143 kstat_install(kerrsp); 6144 MDI_PI(pip)->pi_kstats = mdi_statp; 6145 return (MDI_SUCCESS); 6146 } 6147 6148 /* 6149 * destroy per-path properties 6150 */ 6151 static void 6152 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6153 { 6154 6155 struct mdi_pi_kstats *mdi_statp; 6156 6157 if (MDI_PI(pip)->pi_kstats == NULL) 6158 return; 6159 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6160 return; 6161 6162 MDI_PI(pip)->pi_kstats = NULL; 6163 6164 /* 6165 * the kstat may be shared between multiple pathinfo nodes 6166 * decrement this pathinfo's usage, removing the kstats 6167 * themselves when the last pathinfo reference is removed. 6168 */ 6169 ASSERT(mdi_statp->pi_kstat_ref > 0); 6170 if (--mdi_statp->pi_kstat_ref != 0) 6171 return; 6172 6173 kstat_delete(mdi_statp->pi_kstat_iostats); 6174 kstat_delete(mdi_statp->pi_kstat_errstats); 6175 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6176 } 6177 6178 /* 6179 * update I/O paths KSTATS 6180 */ 6181 void 6182 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6183 { 6184 kstat_t *iostatp; 6185 size_t xfer_cnt; 6186 6187 ASSERT(pip != NULL); 6188 6189 /* 6190 * I/O can be driven across a path prior to having path 6191 * statistics available, i.e. probe(9e). 6192 */ 6193 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6194 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6195 xfer_cnt = bp->b_bcount - bp->b_resid; 6196 if (bp->b_flags & B_READ) { 6197 KSTAT_IO_PTR(iostatp)->reads++; 6198 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6199 } else { 6200 KSTAT_IO_PTR(iostatp)->writes++; 6201 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6202 } 6203 } 6204 } 6205 6206 /* 6207 * Enable the path(specific client/target/initiator) 6208 * Enabling a path means that MPxIO may select the enabled path for routing 6209 * future I/O requests, subject to other path state constraints. 6210 */ 6211 int 6212 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6213 { 6214 mdi_phci_t *ph; 6215 6216 ph = MDI_PI(pip)->pi_phci; 6217 if (ph == NULL) { 6218 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6219 "!failed: path %s %p: NULL ph", 6220 mdi_pi_spathname(pip), (void *)pip)); 6221 return (MDI_FAILURE); 6222 } 6223 6224 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6225 MDI_ENABLE_OP); 6226 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6227 "!returning success pip = %p. ph = %p", 6228 (void *)pip, (void *)ph)); 6229 return (MDI_SUCCESS); 6230 6231 } 6232 6233 /* 6234 * Disable the path (specific client/target/initiator) 6235 * Disabling a path means that MPxIO will not select the disabled path for 6236 * routing any new I/O requests. 6237 */ 6238 int 6239 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6240 { 6241 mdi_phci_t *ph; 6242 6243 ph = MDI_PI(pip)->pi_phci; 6244 if (ph == NULL) { 6245 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6246 "!failed: path %s %p: NULL ph", 6247 mdi_pi_spathname(pip), (void *)pip)); 6248 return (MDI_FAILURE); 6249 } 6250 6251 (void) i_mdi_enable_disable_path(pip, 6252 ph->ph_vhci, flags, MDI_DISABLE_OP); 6253 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6254 "!returning success pip = %p. ph = %p", 6255 (void *)pip, (void *)ph)); 6256 return (MDI_SUCCESS); 6257 } 6258 6259 /* 6260 * disable the path to a particular pHCI (pHCI specified in the phci_path 6261 * argument) for a particular client (specified in the client_path argument). 6262 * Disabling a path means that MPxIO will not select the disabled path for 6263 * routing any new I/O requests. 6264 * NOTE: this will be removed once the NWS files are changed to use the new 6265 * mdi_{enable,disable}_path interfaces 6266 */ 6267 int 6268 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6269 { 6270 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6271 } 6272 6273 /* 6274 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6275 * argument) for a particular client (specified in the client_path argument). 6276 * Enabling a path means that MPxIO may select the enabled path for routing 6277 * future I/O requests, subject to other path state constraints. 6278 * NOTE: this will be removed once the NWS files are changed to use the new 6279 * mdi_{enable,disable}_path interfaces 6280 */ 6281 6282 int 6283 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6284 { 6285 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6286 } 6287 6288 /* 6289 * Common routine for doing enable/disable. 6290 */ 6291 static mdi_pathinfo_t * 6292 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6293 int op) 6294 { 6295 int sync_flag = 0; 6296 int rv; 6297 mdi_pathinfo_t *next; 6298 int (*f)() = NULL; 6299 6300 /* 6301 * Check to make sure the path is not already in the 6302 * requested state. If it is just return the next path 6303 * as we have nothing to do here. 6304 */ 6305 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6306 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6307 MDI_PI_LOCK(pip); 6308 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6309 MDI_PI_UNLOCK(pip); 6310 return (next); 6311 } 6312 6313 f = vh->vh_ops->vo_pi_state_change; 6314 6315 sync_flag = (flags << 8) & 0xf00; 6316 6317 /* 6318 * Do a callback into the mdi consumer to let it 6319 * know that path is about to get enabled/disabled. 6320 */ 6321 rv = MDI_SUCCESS; 6322 if (f != NULL) { 6323 rv = (*f)(vh->vh_dip, pip, 0, 6324 MDI_PI_EXT_STATE(pip), 6325 MDI_EXT_STATE_CHANGE | sync_flag | 6326 op | MDI_BEFORE_STATE_CHANGE); 6327 if (rv != MDI_SUCCESS) { 6328 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6329 "vo_pi_state_change: failed rv = %x", rv)); 6330 } 6331 } 6332 MDI_PI_LOCK(pip); 6333 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6334 6335 switch (flags) { 6336 case USER_DISABLE: 6337 if (op == MDI_DISABLE_OP) { 6338 MDI_PI_SET_USER_DISABLE(pip); 6339 } else { 6340 MDI_PI_SET_USER_ENABLE(pip); 6341 } 6342 break; 6343 case DRIVER_DISABLE: 6344 if (op == MDI_DISABLE_OP) { 6345 MDI_PI_SET_DRV_DISABLE(pip); 6346 } else { 6347 MDI_PI_SET_DRV_ENABLE(pip); 6348 } 6349 break; 6350 case DRIVER_DISABLE_TRANSIENT: 6351 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6352 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6353 } else { 6354 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6355 } 6356 break; 6357 } 6358 MDI_PI_UNLOCK(pip); 6359 /* 6360 * Do a callback into the mdi consumer to let it 6361 * know that path is now enabled/disabled. 6362 */ 6363 if (f != NULL) { 6364 rv = (*f)(vh->vh_dip, pip, 0, 6365 MDI_PI_EXT_STATE(pip), 6366 MDI_EXT_STATE_CHANGE | sync_flag | 6367 op | MDI_AFTER_STATE_CHANGE); 6368 if (rv != MDI_SUCCESS) { 6369 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6370 "vo_pi_state_change failed: rv = %x", rv)); 6371 } 6372 } 6373 return (next); 6374 } 6375 6376 /* 6377 * Common routine for doing enable/disable. 6378 * NOTE: this will be removed once the NWS files are changed to use the new 6379 * mdi_{enable,disable}_path has been putback 6380 */ 6381 int 6382 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6383 { 6384 6385 mdi_phci_t *ph; 6386 mdi_vhci_t *vh = NULL; 6387 mdi_client_t *ct; 6388 mdi_pathinfo_t *next, *pip; 6389 int found_it; 6390 6391 ph = i_devi_get_phci(pdip); 6392 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6393 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6394 (void *)cdip)); 6395 if (ph == NULL) { 6396 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6397 "!failed: operation %d: NULL ph", op)); 6398 return (MDI_FAILURE); 6399 } 6400 6401 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6402 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6403 "!failed: invalid operation %d", op)); 6404 return (MDI_FAILURE); 6405 } 6406 6407 vh = ph->ph_vhci; 6408 6409 if (cdip == NULL) { 6410 /* 6411 * Need to mark the Phci as enabled/disabled. 6412 */ 6413 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6414 "op %d for the phci", op)); 6415 MDI_PHCI_LOCK(ph); 6416 switch (flags) { 6417 case USER_DISABLE: 6418 if (op == MDI_DISABLE_OP) { 6419 MDI_PHCI_SET_USER_DISABLE(ph); 6420 } else { 6421 MDI_PHCI_SET_USER_ENABLE(ph); 6422 } 6423 break; 6424 case DRIVER_DISABLE: 6425 if (op == MDI_DISABLE_OP) { 6426 MDI_PHCI_SET_DRV_DISABLE(ph); 6427 } else { 6428 MDI_PHCI_SET_DRV_ENABLE(ph); 6429 } 6430 break; 6431 case DRIVER_DISABLE_TRANSIENT: 6432 if (op == MDI_DISABLE_OP) { 6433 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6434 } else { 6435 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6436 } 6437 break; 6438 default: 6439 MDI_PHCI_UNLOCK(ph); 6440 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6441 "!invalid flag argument= %d", flags)); 6442 } 6443 6444 /* 6445 * Phci has been disabled. Now try to enable/disable 6446 * path info's to each client. 6447 */ 6448 pip = ph->ph_path_head; 6449 while (pip != NULL) { 6450 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6451 } 6452 MDI_PHCI_UNLOCK(ph); 6453 } else { 6454 6455 /* 6456 * Disable a specific client. 6457 */ 6458 ct = i_devi_get_client(cdip); 6459 if (ct == NULL) { 6460 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6461 "!failed: operation = %d: NULL ct", op)); 6462 return (MDI_FAILURE); 6463 } 6464 6465 MDI_CLIENT_LOCK(ct); 6466 pip = ct->ct_path_head; 6467 found_it = 0; 6468 while (pip != NULL) { 6469 MDI_PI_LOCK(pip); 6470 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6471 if (MDI_PI(pip)->pi_phci == ph) { 6472 MDI_PI_UNLOCK(pip); 6473 found_it = 1; 6474 break; 6475 } 6476 MDI_PI_UNLOCK(pip); 6477 pip = next; 6478 } 6479 6480 6481 MDI_CLIENT_UNLOCK(ct); 6482 if (found_it == 0) { 6483 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6484 "!failed. Could not find corresponding pip\n")); 6485 return (MDI_FAILURE); 6486 } 6487 6488 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6489 } 6490 6491 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6492 "!op %d returning success pdip = %p cdip = %p", 6493 op, (void *)pdip, (void *)cdip)); 6494 return (MDI_SUCCESS); 6495 } 6496 6497 /* 6498 * Ensure phci powered up 6499 */ 6500 static void 6501 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6502 { 6503 dev_info_t *ph_dip; 6504 6505 ASSERT(pip != NULL); 6506 ASSERT(MDI_PI_LOCKED(pip)); 6507 6508 if (MDI_PI(pip)->pi_pm_held) { 6509 return; 6510 } 6511 6512 ph_dip = mdi_pi_get_phci(pip); 6513 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6514 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6515 if (ph_dip == NULL) { 6516 return; 6517 } 6518 6519 MDI_PI_UNLOCK(pip); 6520 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6521 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6522 pm_hold_power(ph_dip); 6523 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6524 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6525 MDI_PI_LOCK(pip); 6526 6527 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6528 if (DEVI(ph_dip)->devi_pm_info) 6529 MDI_PI(pip)->pi_pm_held = 1; 6530 } 6531 6532 /* 6533 * Allow phci powered down 6534 */ 6535 static void 6536 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6537 { 6538 dev_info_t *ph_dip = NULL; 6539 6540 ASSERT(pip != NULL); 6541 ASSERT(MDI_PI_LOCKED(pip)); 6542 6543 if (MDI_PI(pip)->pi_pm_held == 0) { 6544 return; 6545 } 6546 6547 ph_dip = mdi_pi_get_phci(pip); 6548 ASSERT(ph_dip != NULL); 6549 6550 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6551 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6552 6553 MDI_PI_UNLOCK(pip); 6554 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6555 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6556 pm_rele_power(ph_dip); 6557 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6558 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6559 MDI_PI_LOCK(pip); 6560 6561 MDI_PI(pip)->pi_pm_held = 0; 6562 } 6563 6564 static void 6565 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6566 { 6567 ASSERT(MDI_CLIENT_LOCKED(ct)); 6568 6569 ct->ct_power_cnt += incr; 6570 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6571 "%p ct_power_cnt = %d incr = %d", 6572 (void *)ct, ct->ct_power_cnt, incr)); 6573 ASSERT(ct->ct_power_cnt >= 0); 6574 } 6575 6576 static void 6577 i_mdi_rele_all_phci(mdi_client_t *ct) 6578 { 6579 mdi_pathinfo_t *pip; 6580 6581 ASSERT(MDI_CLIENT_LOCKED(ct)); 6582 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6583 while (pip != NULL) { 6584 mdi_hold_path(pip); 6585 MDI_PI_LOCK(pip); 6586 i_mdi_pm_rele_pip(pip); 6587 MDI_PI_UNLOCK(pip); 6588 mdi_rele_path(pip); 6589 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6590 } 6591 } 6592 6593 static void 6594 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6595 { 6596 ASSERT(MDI_CLIENT_LOCKED(ct)); 6597 6598 if (i_ddi_devi_attached(ct->ct_dip)) { 6599 ct->ct_power_cnt -= decr; 6600 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6601 "%p ct_power_cnt = %d decr = %d", 6602 (void *)ct, ct->ct_power_cnt, decr)); 6603 } 6604 6605 ASSERT(ct->ct_power_cnt >= 0); 6606 if (ct->ct_power_cnt == 0) { 6607 i_mdi_rele_all_phci(ct); 6608 return; 6609 } 6610 } 6611 6612 static void 6613 i_mdi_pm_reset_client(mdi_client_t *ct) 6614 { 6615 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6616 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6617 ASSERT(MDI_CLIENT_LOCKED(ct)); 6618 ct->ct_power_cnt = 0; 6619 i_mdi_rele_all_phci(ct); 6620 ct->ct_powercnt_config = 0; 6621 ct->ct_powercnt_unconfig = 0; 6622 ct->ct_powercnt_reset = 1; 6623 } 6624 6625 static int 6626 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6627 { 6628 int ret; 6629 dev_info_t *ph_dip; 6630 6631 MDI_PI_LOCK(pip); 6632 i_mdi_pm_hold_pip(pip); 6633 6634 ph_dip = mdi_pi_get_phci(pip); 6635 MDI_PI_UNLOCK(pip); 6636 6637 /* bring all components of phci to full power */ 6638 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6639 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6640 ddi_get_instance(ph_dip), (void *)pip)); 6641 6642 ret = pm_powerup(ph_dip); 6643 6644 if (ret == DDI_FAILURE) { 6645 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6646 "pm_powerup FAILED for %s%d %p", 6647 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6648 (void *)pip)); 6649 6650 MDI_PI_LOCK(pip); 6651 i_mdi_pm_rele_pip(pip); 6652 MDI_PI_UNLOCK(pip); 6653 return (MDI_FAILURE); 6654 } 6655 6656 return (MDI_SUCCESS); 6657 } 6658 6659 static int 6660 i_mdi_power_all_phci(mdi_client_t *ct) 6661 { 6662 mdi_pathinfo_t *pip; 6663 int succeeded = 0; 6664 6665 ASSERT(MDI_CLIENT_LOCKED(ct)); 6666 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6667 while (pip != NULL) { 6668 /* 6669 * Don't power if MDI_PATHINFO_STATE_FAULT 6670 * or MDI_PATHINFO_STATE_OFFLINE. 6671 */ 6672 if (MDI_PI_IS_INIT(pip) || 6673 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6674 mdi_hold_path(pip); 6675 MDI_CLIENT_UNLOCK(ct); 6676 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6677 succeeded = 1; 6678 6679 ASSERT(ct == MDI_PI(pip)->pi_client); 6680 MDI_CLIENT_LOCK(ct); 6681 mdi_rele_path(pip); 6682 } 6683 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6684 } 6685 6686 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6687 } 6688 6689 /* 6690 * mdi_bus_power(): 6691 * 1. Place the phci(s) into powered up state so that 6692 * client can do power management 6693 * 2. Ensure phci powered up as client power managing 6694 * Return Values: 6695 * MDI_SUCCESS 6696 * MDI_FAILURE 6697 */ 6698 int 6699 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6700 void *arg, void *result) 6701 { 6702 int ret = MDI_SUCCESS; 6703 pm_bp_child_pwrchg_t *bpc; 6704 mdi_client_t *ct; 6705 dev_info_t *cdip; 6706 pm_bp_has_changed_t *bphc; 6707 6708 /* 6709 * BUS_POWER_NOINVOL not supported 6710 */ 6711 if (op == BUS_POWER_NOINVOL) 6712 return (MDI_FAILURE); 6713 6714 /* 6715 * ignore other OPs. 6716 * return quickly to save cou cycles on the ct processing 6717 */ 6718 switch (op) { 6719 case BUS_POWER_PRE_NOTIFICATION: 6720 case BUS_POWER_POST_NOTIFICATION: 6721 bpc = (pm_bp_child_pwrchg_t *)arg; 6722 cdip = bpc->bpc_dip; 6723 break; 6724 case BUS_POWER_HAS_CHANGED: 6725 bphc = (pm_bp_has_changed_t *)arg; 6726 cdip = bphc->bphc_dip; 6727 break; 6728 default: 6729 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6730 } 6731 6732 ASSERT(MDI_CLIENT(cdip)); 6733 6734 ct = i_devi_get_client(cdip); 6735 if (ct == NULL) 6736 return (MDI_FAILURE); 6737 6738 /* 6739 * wait till the mdi_pathinfo node state change are processed 6740 */ 6741 MDI_CLIENT_LOCK(ct); 6742 switch (op) { 6743 case BUS_POWER_PRE_NOTIFICATION: 6744 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6745 "BUS_POWER_PRE_NOTIFICATION:" 6746 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6747 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6748 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6749 6750 /* serialize power level change per client */ 6751 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6752 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6753 6754 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6755 6756 if (ct->ct_power_cnt == 0) { 6757 ret = i_mdi_power_all_phci(ct); 6758 } 6759 6760 /* 6761 * if new_level > 0: 6762 * - hold phci(s) 6763 * - power up phci(s) if not already 6764 * ignore power down 6765 */ 6766 if (bpc->bpc_nlevel > 0) { 6767 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6768 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6769 "i_mdi_pm_hold_client\n")); 6770 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6771 } 6772 } 6773 break; 6774 case BUS_POWER_POST_NOTIFICATION: 6775 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6776 "BUS_POWER_POST_NOTIFICATION:" 6777 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6778 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6779 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6780 *(int *)result)); 6781 6782 if (*(int *)result == DDI_SUCCESS) { 6783 if (bpc->bpc_nlevel > 0) { 6784 MDI_CLIENT_SET_POWER_UP(ct); 6785 } else { 6786 MDI_CLIENT_SET_POWER_DOWN(ct); 6787 } 6788 } 6789 6790 /* release the hold we did in pre-notification */ 6791 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6792 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6793 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6794 "i_mdi_pm_rele_client\n")); 6795 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6796 } 6797 6798 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6799 /* another thread might started attaching */ 6800 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6801 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6802 "i_mdi_pm_rele_client\n")); 6803 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6804 /* detaching has been taken care in pm_post_unconfig */ 6805 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6806 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6807 "i_mdi_pm_reset_client\n")); 6808 i_mdi_pm_reset_client(ct); 6809 } 6810 } 6811 6812 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6813 cv_broadcast(&ct->ct_powerchange_cv); 6814 6815 break; 6816 6817 /* need to do more */ 6818 case BUS_POWER_HAS_CHANGED: 6819 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6820 "BUS_POWER_HAS_CHANGED:" 6821 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6822 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6823 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6824 6825 if (bphc->bphc_nlevel > 0 && 6826 bphc->bphc_nlevel > bphc->bphc_olevel) { 6827 if (ct->ct_power_cnt == 0) { 6828 ret = i_mdi_power_all_phci(ct); 6829 } 6830 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6831 "i_mdi_pm_hold_client\n")); 6832 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6833 } 6834 6835 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6836 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6837 "i_mdi_pm_rele_client\n")); 6838 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6839 } 6840 break; 6841 default: 6842 dev_err(parent, CE_WARN, "!unhandled bus power operation: 0x%x", 6843 op); 6844 break; 6845 } 6846 6847 MDI_CLIENT_UNLOCK(ct); 6848 return (ret); 6849 } 6850 6851 static int 6852 i_mdi_pm_pre_config_one(dev_info_t *child) 6853 { 6854 int ret = MDI_SUCCESS; 6855 mdi_client_t *ct; 6856 6857 ct = i_devi_get_client(child); 6858 if (ct == NULL) 6859 return (MDI_FAILURE); 6860 6861 MDI_CLIENT_LOCK(ct); 6862 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6863 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6864 6865 if (!MDI_CLIENT_IS_FAILED(ct)) { 6866 MDI_CLIENT_UNLOCK(ct); 6867 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6868 return (MDI_SUCCESS); 6869 } 6870 6871 if (ct->ct_powercnt_config) { 6872 MDI_CLIENT_UNLOCK(ct); 6873 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6874 return (MDI_SUCCESS); 6875 } 6876 6877 if (ct->ct_power_cnt == 0) { 6878 ret = i_mdi_power_all_phci(ct); 6879 } 6880 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6881 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6882 ct->ct_powercnt_config = 1; 6883 ct->ct_powercnt_reset = 0; 6884 MDI_CLIENT_UNLOCK(ct); 6885 return (ret); 6886 } 6887 6888 static int 6889 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6890 { 6891 int ret = MDI_SUCCESS; 6892 dev_info_t *cdip; 6893 int circ; 6894 6895 ASSERT(MDI_VHCI(vdip)); 6896 6897 /* ndi_devi_config_one */ 6898 if (child) { 6899 ASSERT(DEVI_BUSY_OWNED(vdip)); 6900 return (i_mdi_pm_pre_config_one(child)); 6901 } 6902 6903 /* devi_config_common */ 6904 ndi_devi_enter(vdip, &circ); 6905 cdip = ddi_get_child(vdip); 6906 while (cdip) { 6907 dev_info_t *next = ddi_get_next_sibling(cdip); 6908 6909 ret = i_mdi_pm_pre_config_one(cdip); 6910 if (ret != MDI_SUCCESS) 6911 break; 6912 cdip = next; 6913 } 6914 ndi_devi_exit(vdip, circ); 6915 return (ret); 6916 } 6917 6918 static int 6919 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6920 { 6921 int ret = MDI_SUCCESS; 6922 mdi_client_t *ct; 6923 6924 ct = i_devi_get_client(child); 6925 if (ct == NULL) 6926 return (MDI_FAILURE); 6927 6928 MDI_CLIENT_LOCK(ct); 6929 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6930 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6931 6932 if (!i_ddi_devi_attached(child)) { 6933 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6934 MDI_CLIENT_UNLOCK(ct); 6935 return (MDI_SUCCESS); 6936 } 6937 6938 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6939 (flags & NDI_AUTODETACH)) { 6940 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6941 MDI_CLIENT_UNLOCK(ct); 6942 return (MDI_FAILURE); 6943 } 6944 6945 if (ct->ct_powercnt_unconfig) { 6946 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6947 MDI_CLIENT_UNLOCK(ct); 6948 *held = 1; 6949 return (MDI_SUCCESS); 6950 } 6951 6952 if (ct->ct_power_cnt == 0) { 6953 ret = i_mdi_power_all_phci(ct); 6954 } 6955 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6956 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6957 ct->ct_powercnt_unconfig = 1; 6958 ct->ct_powercnt_reset = 0; 6959 MDI_CLIENT_UNLOCK(ct); 6960 if (ret == MDI_SUCCESS) 6961 *held = 1; 6962 return (ret); 6963 } 6964 6965 static int 6966 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6967 int flags) 6968 { 6969 int ret = MDI_SUCCESS; 6970 dev_info_t *cdip; 6971 int circ; 6972 6973 ASSERT(MDI_VHCI(vdip)); 6974 *held = 0; 6975 6976 /* ndi_devi_unconfig_one */ 6977 if (child) { 6978 ASSERT(DEVI_BUSY_OWNED(vdip)); 6979 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6980 } 6981 6982 /* devi_unconfig_common */ 6983 ndi_devi_enter(vdip, &circ); 6984 cdip = ddi_get_child(vdip); 6985 while (cdip) { 6986 dev_info_t *next = ddi_get_next_sibling(cdip); 6987 6988 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6989 cdip = next; 6990 } 6991 ndi_devi_exit(vdip, circ); 6992 6993 if (*held) 6994 ret = MDI_SUCCESS; 6995 6996 return (ret); 6997 } 6998 6999 static void 7000 i_mdi_pm_post_config_one(dev_info_t *child) 7001 { 7002 mdi_client_t *ct; 7003 7004 ct = i_devi_get_client(child); 7005 if (ct == NULL) 7006 return; 7007 7008 MDI_CLIENT_LOCK(ct); 7009 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7010 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7011 7012 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 7013 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 7014 MDI_CLIENT_UNLOCK(ct); 7015 return; 7016 } 7017 7018 /* client has not been updated */ 7019 if (MDI_CLIENT_IS_FAILED(ct)) { 7020 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 7021 MDI_CLIENT_UNLOCK(ct); 7022 return; 7023 } 7024 7025 /* another thread might have powered it down or detached it */ 7026 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7027 !DEVI_IS_ATTACHING(child)) || 7028 (!i_ddi_devi_attached(child) && 7029 !DEVI_IS_ATTACHING(child))) { 7030 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7031 i_mdi_pm_reset_client(ct); 7032 } else { 7033 mdi_pathinfo_t *pip, *next; 7034 int valid_path_count = 0; 7035 7036 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7037 pip = ct->ct_path_head; 7038 while (pip != NULL) { 7039 MDI_PI_LOCK(pip); 7040 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7041 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7042 valid_path_count ++; 7043 MDI_PI_UNLOCK(pip); 7044 pip = next; 7045 } 7046 i_mdi_pm_rele_client(ct, valid_path_count); 7047 } 7048 ct->ct_powercnt_config = 0; 7049 MDI_CLIENT_UNLOCK(ct); 7050 } 7051 7052 static void 7053 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 7054 { 7055 int circ; 7056 dev_info_t *cdip; 7057 7058 ASSERT(MDI_VHCI(vdip)); 7059 7060 /* ndi_devi_config_one */ 7061 if (child) { 7062 ASSERT(DEVI_BUSY_OWNED(vdip)); 7063 i_mdi_pm_post_config_one(child); 7064 return; 7065 } 7066 7067 /* devi_config_common */ 7068 ndi_devi_enter(vdip, &circ); 7069 cdip = ddi_get_child(vdip); 7070 while (cdip) { 7071 dev_info_t *next = ddi_get_next_sibling(cdip); 7072 7073 i_mdi_pm_post_config_one(cdip); 7074 cdip = next; 7075 } 7076 ndi_devi_exit(vdip, circ); 7077 } 7078 7079 static void 7080 i_mdi_pm_post_unconfig_one(dev_info_t *child) 7081 { 7082 mdi_client_t *ct; 7083 7084 ct = i_devi_get_client(child); 7085 if (ct == NULL) 7086 return; 7087 7088 MDI_CLIENT_LOCK(ct); 7089 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7090 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7091 7092 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 7093 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 7094 MDI_CLIENT_UNLOCK(ct); 7095 return; 7096 } 7097 7098 /* failure detaching or another thread just attached it */ 7099 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7100 i_ddi_devi_attached(child)) || 7101 (!i_ddi_devi_attached(child) && 7102 !DEVI_IS_ATTACHING(child))) { 7103 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7104 i_mdi_pm_reset_client(ct); 7105 } else { 7106 mdi_pathinfo_t *pip, *next; 7107 int valid_path_count = 0; 7108 7109 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7110 pip = ct->ct_path_head; 7111 while (pip != NULL) { 7112 MDI_PI_LOCK(pip); 7113 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7114 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7115 valid_path_count ++; 7116 MDI_PI_UNLOCK(pip); 7117 pip = next; 7118 } 7119 i_mdi_pm_rele_client(ct, valid_path_count); 7120 ct->ct_powercnt_unconfig = 0; 7121 } 7122 7123 MDI_CLIENT_UNLOCK(ct); 7124 } 7125 7126 static void 7127 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7128 { 7129 int circ; 7130 dev_info_t *cdip; 7131 7132 ASSERT(MDI_VHCI(vdip)); 7133 7134 if (!held) { 7135 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7136 return; 7137 } 7138 7139 if (child) { 7140 ASSERT(DEVI_BUSY_OWNED(vdip)); 7141 i_mdi_pm_post_unconfig_one(child); 7142 return; 7143 } 7144 7145 ndi_devi_enter(vdip, &circ); 7146 cdip = ddi_get_child(vdip); 7147 while (cdip) { 7148 dev_info_t *next = ddi_get_next_sibling(cdip); 7149 7150 i_mdi_pm_post_unconfig_one(cdip); 7151 cdip = next; 7152 } 7153 ndi_devi_exit(vdip, circ); 7154 } 7155 7156 int 7157 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7158 { 7159 int circ, ret = MDI_SUCCESS; 7160 dev_info_t *client_dip = NULL; 7161 mdi_client_t *ct; 7162 7163 /* 7164 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7165 * Power up pHCI for the named client device. 7166 * Note: Before the client is enumerated under vhci by phci, 7167 * client_dip can be NULL. Then proceed to power up all the 7168 * pHCIs. 7169 */ 7170 if (devnm != NULL) { 7171 ndi_devi_enter(vdip, &circ); 7172 client_dip = ndi_devi_findchild(vdip, devnm); 7173 } 7174 7175 MDI_DEBUG(4, (MDI_NOTE, vdip, 7176 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7177 7178 switch (op) { 7179 case MDI_PM_PRE_CONFIG: 7180 ret = i_mdi_pm_pre_config(vdip, client_dip); 7181 break; 7182 7183 case MDI_PM_PRE_UNCONFIG: 7184 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7185 flags); 7186 break; 7187 7188 case MDI_PM_POST_CONFIG: 7189 i_mdi_pm_post_config(vdip, client_dip); 7190 break; 7191 7192 case MDI_PM_POST_UNCONFIG: 7193 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7194 break; 7195 7196 case MDI_PM_HOLD_POWER: 7197 case MDI_PM_RELE_POWER: 7198 ASSERT(args); 7199 7200 client_dip = (dev_info_t *)args; 7201 ASSERT(MDI_CLIENT(client_dip)); 7202 7203 ct = i_devi_get_client(client_dip); 7204 MDI_CLIENT_LOCK(ct); 7205 7206 if (op == MDI_PM_HOLD_POWER) { 7207 if (ct->ct_power_cnt == 0) { 7208 (void) i_mdi_power_all_phci(ct); 7209 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7210 "i_mdi_pm_hold_client\n")); 7211 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7212 } 7213 } else { 7214 if (DEVI_IS_ATTACHING(client_dip)) { 7215 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7216 "i_mdi_pm_rele_client\n")); 7217 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7218 } else { 7219 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7220 "i_mdi_pm_reset_client\n")); 7221 i_mdi_pm_reset_client(ct); 7222 } 7223 } 7224 7225 MDI_CLIENT_UNLOCK(ct); 7226 break; 7227 7228 default: 7229 break; 7230 } 7231 7232 if (devnm) 7233 ndi_devi_exit(vdip, circ); 7234 7235 return (ret); 7236 } 7237 7238 int 7239 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7240 { 7241 mdi_vhci_t *vhci; 7242 7243 if (!MDI_VHCI(dip)) 7244 return (MDI_FAILURE); 7245 7246 if (mdi_class) { 7247 vhci = DEVI(dip)->devi_mdi_xhci; 7248 ASSERT(vhci); 7249 *mdi_class = vhci->vh_class; 7250 } 7251 7252 return (MDI_SUCCESS); 7253 } 7254 7255 int 7256 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7257 { 7258 mdi_phci_t *phci; 7259 7260 if (!MDI_PHCI(dip)) 7261 return (MDI_FAILURE); 7262 7263 if (mdi_class) { 7264 phci = DEVI(dip)->devi_mdi_xhci; 7265 ASSERT(phci); 7266 *mdi_class = phci->ph_vhci->vh_class; 7267 } 7268 7269 return (MDI_SUCCESS); 7270 } 7271 7272 int 7273 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7274 { 7275 mdi_client_t *client; 7276 7277 if (!MDI_CLIENT(dip)) 7278 return (MDI_FAILURE); 7279 7280 if (mdi_class) { 7281 client = DEVI(dip)->devi_mdi_client; 7282 ASSERT(client); 7283 *mdi_class = client->ct_vhci->vh_class; 7284 } 7285 7286 return (MDI_SUCCESS); 7287 } 7288 7289 void * 7290 mdi_client_get_vhci_private(dev_info_t *dip) 7291 { 7292 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7293 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7294 mdi_client_t *ct; 7295 ct = i_devi_get_client(dip); 7296 return (ct->ct_vprivate); 7297 } 7298 return (NULL); 7299 } 7300 7301 void 7302 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7303 { 7304 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7305 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7306 mdi_client_t *ct; 7307 ct = i_devi_get_client(dip); 7308 ct->ct_vprivate = data; 7309 } 7310 } 7311 /* 7312 * mdi_pi_get_vhci_private(): 7313 * Get the vhci private information associated with the 7314 * mdi_pathinfo node 7315 */ 7316 void * 7317 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7318 { 7319 caddr_t vprivate = NULL; 7320 if (pip) { 7321 vprivate = MDI_PI(pip)->pi_vprivate; 7322 } 7323 return (vprivate); 7324 } 7325 7326 /* 7327 * mdi_pi_set_vhci_private(): 7328 * Set the vhci private information in the mdi_pathinfo node 7329 */ 7330 void 7331 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7332 { 7333 if (pip) { 7334 MDI_PI(pip)->pi_vprivate = priv; 7335 } 7336 } 7337 7338 /* 7339 * mdi_phci_get_vhci_private(): 7340 * Get the vhci private information associated with the 7341 * mdi_phci node 7342 */ 7343 void * 7344 mdi_phci_get_vhci_private(dev_info_t *dip) 7345 { 7346 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7347 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7348 mdi_phci_t *ph; 7349 ph = i_devi_get_phci(dip); 7350 return (ph->ph_vprivate); 7351 } 7352 return (NULL); 7353 } 7354 7355 /* 7356 * mdi_phci_set_vhci_private(): 7357 * Set the vhci private information in the mdi_phci node 7358 */ 7359 void 7360 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7361 { 7362 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7363 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7364 mdi_phci_t *ph; 7365 ph = i_devi_get_phci(dip); 7366 ph->ph_vprivate = priv; 7367 } 7368 } 7369 7370 int 7371 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7372 { 7373 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7374 } 7375 7376 int 7377 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7378 { 7379 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7380 } 7381 7382 /* Return 1 if all client paths are device_removed */ 7383 static int 7384 i_mdi_client_all_devices_removed(mdi_client_t *ct) 7385 { 7386 mdi_pathinfo_t *pip; 7387 int all_devices_removed = 1; 7388 7389 MDI_CLIENT_LOCK(ct); 7390 for (pip = ct->ct_path_head; pip; 7391 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) { 7392 if (!mdi_pi_device_isremoved(pip)) { 7393 all_devices_removed = 0; 7394 break; 7395 } 7396 } 7397 MDI_CLIENT_UNLOCK(ct); 7398 return (all_devices_removed); 7399 } 7400 7401 /* 7402 * When processing path hotunplug, represent device removal. 7403 */ 7404 int 7405 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7406 { 7407 mdi_client_t *ct; 7408 7409 MDI_PI_LOCK(pip); 7410 if (mdi_pi_device_isremoved(pip)) { 7411 MDI_PI_UNLOCK(pip); 7412 return (0); 7413 } 7414 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7415 MDI_PI_FLAGS_SET_HIDDEN(pip); 7416 MDI_PI_UNLOCK(pip); 7417 7418 /* 7419 * If all paths associated with the client are now DEVICE_REMOVED, 7420 * reflect DEVICE_REMOVED in the client. 7421 */ 7422 ct = MDI_PI(pip)->pi_client; 7423 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct)) 7424 (void) ndi_devi_device_remove(ct->ct_dip); 7425 else 7426 i_ddi_di_cache_invalidate(); 7427 7428 return (1); 7429 } 7430 7431 /* 7432 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7433 * is now accessible then this interfaces is used to represent device insertion. 7434 */ 7435 int 7436 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7437 { 7438 MDI_PI_LOCK(pip); 7439 if (!mdi_pi_device_isremoved(pip)) { 7440 MDI_PI_UNLOCK(pip); 7441 return (0); 7442 } 7443 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7444 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7445 MDI_PI_UNLOCK(pip); 7446 7447 i_ddi_di_cache_invalidate(); 7448 7449 return (1); 7450 } 7451 7452 /* 7453 * List of vhci class names: 7454 * A vhci class name must be in this list only if the corresponding vhci 7455 * driver intends to use the mdi provided bus config implementation 7456 * (i.e., mdi_vhci_bus_config()). 7457 */ 7458 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7459 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7460 7461 /* 7462 * During boot time, the on-disk vhci cache for every vhci class is read 7463 * in the form of an nvlist and stored here. 7464 */ 7465 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7466 7467 /* nvpair names in vhci cache nvlist */ 7468 #define MDI_VHCI_CACHE_VERSION 1 7469 #define MDI_NVPNAME_VERSION "version" 7470 #define MDI_NVPNAME_PHCIS "phcis" 7471 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7472 7473 /* 7474 * Given vhci class name, return its on-disk vhci cache filename. 7475 * Memory for the returned filename which includes the full path is allocated 7476 * by this function. 7477 */ 7478 static char * 7479 vhclass2vhcache_filename(char *vhclass) 7480 { 7481 char *filename; 7482 int len; 7483 static char *fmt = "/etc/devices/mdi_%s_cache"; 7484 7485 /* 7486 * fmt contains the on-disk vhci cache file name format; 7487 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7488 */ 7489 7490 /* the -1 below is to account for "%s" in the format string */ 7491 len = strlen(fmt) + strlen(vhclass) - 1; 7492 filename = kmem_alloc(len, KM_SLEEP); 7493 (void) snprintf(filename, len, fmt, vhclass); 7494 ASSERT(len == (strlen(filename) + 1)); 7495 return (filename); 7496 } 7497 7498 /* 7499 * initialize the vhci cache related data structures and read the on-disk 7500 * vhci cached data into memory. 7501 */ 7502 static void 7503 setup_vhci_cache(mdi_vhci_t *vh) 7504 { 7505 mdi_vhci_config_t *vhc; 7506 mdi_vhci_cache_t *vhcache; 7507 int i; 7508 nvlist_t *nvl = NULL; 7509 7510 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7511 vh->vh_config = vhc; 7512 vhcache = &vhc->vhc_vhcache; 7513 7514 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7515 7516 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7517 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7518 7519 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7520 7521 /* 7522 * Create string hash; same as mod_hash_create_strhash() except that 7523 * we use NULL key destructor. 7524 */ 7525 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7526 mdi_bus_config_cache_hash_size, 7527 mod_hash_null_keydtor, mod_hash_null_valdtor, 7528 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7529 7530 /* 7531 * The on-disk vhci cache is read during booting prior to the 7532 * lights-out period by mdi_read_devices_files(). 7533 */ 7534 for (i = 0; i < N_VHCI_CLASSES; i++) { 7535 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7536 nvl = vhcache_nvl[i]; 7537 vhcache_nvl[i] = NULL; 7538 break; 7539 } 7540 } 7541 7542 /* 7543 * this is to cover the case of some one manually causing unloading 7544 * (or detaching) and reloading (or attaching) of a vhci driver. 7545 */ 7546 if (nvl == NULL && modrootloaded) 7547 nvl = read_on_disk_vhci_cache(vh->vh_class); 7548 7549 if (nvl != NULL) { 7550 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7551 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7552 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7553 else { 7554 cmn_err(CE_WARN, 7555 "%s: data file corrupted, will recreate", 7556 vhc->vhc_vhcache_filename); 7557 } 7558 rw_exit(&vhcache->vhcache_lock); 7559 nvlist_free(nvl); 7560 } 7561 7562 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7563 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7564 7565 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7566 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7567 } 7568 7569 /* 7570 * free all vhci cache related resources 7571 */ 7572 static int 7573 destroy_vhci_cache(mdi_vhci_t *vh) 7574 { 7575 mdi_vhci_config_t *vhc = vh->vh_config; 7576 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7577 mdi_vhcache_phci_t *cphci, *cphci_next; 7578 mdi_vhcache_client_t *cct, *cct_next; 7579 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7580 7581 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7582 return (MDI_FAILURE); 7583 7584 kmem_free(vhc->vhc_vhcache_filename, 7585 strlen(vhc->vhc_vhcache_filename) + 1); 7586 7587 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7588 7589 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7590 cphci = cphci_next) { 7591 cphci_next = cphci->cphci_next; 7592 free_vhcache_phci(cphci); 7593 } 7594 7595 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7596 cct_next = cct->cct_next; 7597 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7598 cpi_next = cpi->cpi_next; 7599 free_vhcache_pathinfo(cpi); 7600 } 7601 free_vhcache_client(cct); 7602 } 7603 7604 rw_destroy(&vhcache->vhcache_lock); 7605 7606 mutex_destroy(&vhc->vhc_lock); 7607 cv_destroy(&vhc->vhc_cv); 7608 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7609 return (MDI_SUCCESS); 7610 } 7611 7612 /* 7613 * Stop all vhci cache related async threads and free their resources. 7614 */ 7615 static int 7616 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7617 { 7618 mdi_async_client_config_t *acc, *acc_next; 7619 7620 mutex_enter(&vhc->vhc_lock); 7621 vhc->vhc_flags |= MDI_VHC_EXIT; 7622 ASSERT(vhc->vhc_acc_thrcount >= 0); 7623 cv_broadcast(&vhc->vhc_cv); 7624 7625 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7626 vhc->vhc_acc_thrcount != 0) { 7627 mutex_exit(&vhc->vhc_lock); 7628 delay_random(mdi_delay); 7629 mutex_enter(&vhc->vhc_lock); 7630 } 7631 7632 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7633 7634 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7635 acc_next = acc->acc_next; 7636 free_async_client_config(acc); 7637 } 7638 vhc->vhc_acc_list_head = NULL; 7639 vhc->vhc_acc_list_tail = NULL; 7640 vhc->vhc_acc_count = 0; 7641 7642 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7643 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7644 mutex_exit(&vhc->vhc_lock); 7645 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7646 vhcache_dirty(vhc); 7647 return (MDI_FAILURE); 7648 } 7649 } else 7650 mutex_exit(&vhc->vhc_lock); 7651 7652 if (callb_delete(vhc->vhc_cbid) != 0) 7653 return (MDI_FAILURE); 7654 7655 return (MDI_SUCCESS); 7656 } 7657 7658 /* 7659 * Stop vhci cache flush thread 7660 */ 7661 /* ARGSUSED */ 7662 static boolean_t 7663 stop_vhcache_flush_thread(void *arg, int code) 7664 { 7665 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7666 7667 mutex_enter(&vhc->vhc_lock); 7668 vhc->vhc_flags |= MDI_VHC_EXIT; 7669 cv_broadcast(&vhc->vhc_cv); 7670 7671 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7672 mutex_exit(&vhc->vhc_lock); 7673 delay_random(mdi_delay); 7674 mutex_enter(&vhc->vhc_lock); 7675 } 7676 7677 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7678 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7679 mutex_exit(&vhc->vhc_lock); 7680 (void) flush_vhcache(vhc, 1); 7681 } else 7682 mutex_exit(&vhc->vhc_lock); 7683 7684 return (B_TRUE); 7685 } 7686 7687 /* 7688 * Enqueue the vhcache phci (cphci) at the tail of the list 7689 */ 7690 static void 7691 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7692 { 7693 cphci->cphci_next = NULL; 7694 if (vhcache->vhcache_phci_head == NULL) 7695 vhcache->vhcache_phci_head = cphci; 7696 else 7697 vhcache->vhcache_phci_tail->cphci_next = cphci; 7698 vhcache->vhcache_phci_tail = cphci; 7699 } 7700 7701 /* 7702 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7703 */ 7704 static void 7705 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7706 mdi_vhcache_pathinfo_t *cpi) 7707 { 7708 cpi->cpi_next = NULL; 7709 if (cct->cct_cpi_head == NULL) 7710 cct->cct_cpi_head = cpi; 7711 else 7712 cct->cct_cpi_tail->cpi_next = cpi; 7713 cct->cct_cpi_tail = cpi; 7714 } 7715 7716 /* 7717 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7718 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7719 * flag set come at the beginning of the list. All cpis which have this 7720 * flag set come at the end of the list. 7721 */ 7722 static void 7723 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7724 mdi_vhcache_pathinfo_t *newcpi) 7725 { 7726 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7727 7728 if (cct->cct_cpi_head == NULL || 7729 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7730 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7731 else { 7732 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7733 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7734 prev_cpi = cpi, cpi = cpi->cpi_next) 7735 ; 7736 7737 if (prev_cpi == NULL) 7738 cct->cct_cpi_head = newcpi; 7739 else 7740 prev_cpi->cpi_next = newcpi; 7741 7742 newcpi->cpi_next = cpi; 7743 7744 if (cpi == NULL) 7745 cct->cct_cpi_tail = newcpi; 7746 } 7747 } 7748 7749 /* 7750 * Enqueue the vhcache client (cct) at the tail of the list 7751 */ 7752 static void 7753 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7754 mdi_vhcache_client_t *cct) 7755 { 7756 cct->cct_next = NULL; 7757 if (vhcache->vhcache_client_head == NULL) 7758 vhcache->vhcache_client_head = cct; 7759 else 7760 vhcache->vhcache_client_tail->cct_next = cct; 7761 vhcache->vhcache_client_tail = cct; 7762 } 7763 7764 static void 7765 free_string_array(char **str, int nelem) 7766 { 7767 int i; 7768 7769 if (str) { 7770 for (i = 0; i < nelem; i++) { 7771 if (str[i]) 7772 kmem_free(str[i], strlen(str[i]) + 1); 7773 } 7774 kmem_free(str, sizeof (char *) * nelem); 7775 } 7776 } 7777 7778 static void 7779 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7780 { 7781 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7782 kmem_free(cphci, sizeof (*cphci)); 7783 } 7784 7785 static void 7786 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7787 { 7788 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7789 kmem_free(cpi, sizeof (*cpi)); 7790 } 7791 7792 static void 7793 free_vhcache_client(mdi_vhcache_client_t *cct) 7794 { 7795 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7796 kmem_free(cct, sizeof (*cct)); 7797 } 7798 7799 static char * 7800 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7801 { 7802 char *name_addr; 7803 int len; 7804 7805 len = strlen(ct_name) + strlen(ct_addr) + 2; 7806 name_addr = kmem_alloc(len, KM_SLEEP); 7807 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7808 7809 if (ret_len) 7810 *ret_len = len; 7811 return (name_addr); 7812 } 7813 7814 /* 7815 * Copy the contents of paddrnvl to vhci cache. 7816 * paddrnvl nvlist contains path information for a vhci client. 7817 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7818 */ 7819 static void 7820 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7821 mdi_vhcache_client_t *cct) 7822 { 7823 nvpair_t *nvp = NULL; 7824 mdi_vhcache_pathinfo_t *cpi; 7825 uint_t nelem; 7826 uint32_t *val; 7827 7828 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7829 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7830 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7831 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7832 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7833 ASSERT(nelem == 2); 7834 cpi->cpi_cphci = cphci_list[val[0]]; 7835 cpi->cpi_flags = val[1]; 7836 enqueue_tail_vhcache_pathinfo(cct, cpi); 7837 } 7838 } 7839 7840 /* 7841 * Copy the contents of caddrmapnvl to vhci cache. 7842 * caddrmapnvl nvlist contains vhci client address to phci client address 7843 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7844 * this nvlist. 7845 */ 7846 static void 7847 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7848 mdi_vhcache_phci_t *cphci_list[]) 7849 { 7850 nvpair_t *nvp = NULL; 7851 nvlist_t *paddrnvl; 7852 mdi_vhcache_client_t *cct; 7853 7854 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7855 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7856 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7857 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7858 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7859 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7860 /* the client must contain at least one path */ 7861 ASSERT(cct->cct_cpi_head != NULL); 7862 7863 enqueue_vhcache_client(vhcache, cct); 7864 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7865 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7866 } 7867 } 7868 7869 /* 7870 * Copy the contents of the main nvlist to vhci cache. 7871 * 7872 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7873 * The nvlist contains the mappings between the vhci client addresses and 7874 * their corresponding phci client addresses. 7875 * 7876 * The structure of the nvlist is as follows: 7877 * 7878 * Main nvlist: 7879 * NAME TYPE DATA 7880 * version int32 version number 7881 * phcis string array array of phci paths 7882 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7883 * 7884 * structure of c2paddrs_nvl: 7885 * NAME TYPE DATA 7886 * caddr1 nvlist_t paddrs_nvl1 7887 * caddr2 nvlist_t paddrs_nvl2 7888 * ... 7889 * where caddr1, caddr2, ... are vhci client name and addresses in the 7890 * form of "<clientname>@<clientaddress>". 7891 * (for example: "ssd@2000002037cd9f72"); 7892 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7893 * 7894 * structure of paddrs_nvl: 7895 * NAME TYPE DATA 7896 * pi_addr1 uint32_array (phci-id, cpi_flags) 7897 * pi_addr2 uint32_array (phci-id, cpi_flags) 7898 * ... 7899 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7900 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7901 * phci-ids are integers that identify pHCIs to which the 7902 * the bus specific address belongs to. These integers are used as an index 7903 * into to the phcis string array in the main nvlist to get the pHCI path. 7904 */ 7905 static int 7906 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7907 { 7908 char **phcis, **phci_namep; 7909 uint_t nphcis; 7910 mdi_vhcache_phci_t *cphci, **cphci_list; 7911 nvlist_t *caddrmapnvl; 7912 int32_t ver; 7913 int i; 7914 size_t cphci_list_size; 7915 7916 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7917 7918 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7919 ver != MDI_VHCI_CACHE_VERSION) 7920 return (MDI_FAILURE); 7921 7922 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7923 &nphcis) != 0) 7924 return (MDI_SUCCESS); 7925 7926 ASSERT(nphcis > 0); 7927 7928 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7929 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7930 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7931 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7932 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7933 enqueue_vhcache_phci(vhcache, cphci); 7934 cphci_list[i] = cphci; 7935 } 7936 7937 ASSERT(vhcache->vhcache_phci_head != NULL); 7938 7939 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7940 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7941 7942 kmem_free(cphci_list, cphci_list_size); 7943 return (MDI_SUCCESS); 7944 } 7945 7946 /* 7947 * Build paddrnvl for the specified client using the information in the 7948 * vhci cache and add it to the caddrmapnnvl. 7949 * Returns 0 on success, errno on failure. 7950 */ 7951 static int 7952 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7953 nvlist_t *caddrmapnvl) 7954 { 7955 mdi_vhcache_pathinfo_t *cpi; 7956 nvlist_t *nvl; 7957 int err; 7958 uint32_t val[2]; 7959 7960 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7961 7962 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7963 return (err); 7964 7965 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7966 val[0] = cpi->cpi_cphci->cphci_id; 7967 val[1] = cpi->cpi_flags; 7968 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7969 != 0) 7970 goto out; 7971 } 7972 7973 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7974 out: 7975 nvlist_free(nvl); 7976 return (err); 7977 } 7978 7979 /* 7980 * Build caddrmapnvl using the information in the vhci cache 7981 * and add it to the mainnvl. 7982 * Returns 0 on success, errno on failure. 7983 */ 7984 static int 7985 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7986 { 7987 mdi_vhcache_client_t *cct; 7988 nvlist_t *nvl; 7989 int err; 7990 7991 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7992 7993 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7994 return (err); 7995 7996 for (cct = vhcache->vhcache_client_head; cct != NULL; 7997 cct = cct->cct_next) { 7998 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7999 goto out; 8000 } 8001 8002 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 8003 out: 8004 nvlist_free(nvl); 8005 return (err); 8006 } 8007 8008 /* 8009 * Build nvlist using the information in the vhci cache. 8010 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 8011 * Returns nvl on success, NULL on failure. 8012 */ 8013 static nvlist_t * 8014 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 8015 { 8016 mdi_vhcache_phci_t *cphci; 8017 uint_t phci_count; 8018 char **phcis; 8019 nvlist_t *nvl; 8020 int err, i; 8021 8022 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 8023 nvl = NULL; 8024 goto out; 8025 } 8026 8027 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 8028 MDI_VHCI_CACHE_VERSION)) != 0) 8029 goto out; 8030 8031 rw_enter(&vhcache->vhcache_lock, RW_READER); 8032 if (vhcache->vhcache_phci_head == NULL) { 8033 rw_exit(&vhcache->vhcache_lock); 8034 return (nvl); 8035 } 8036 8037 phci_count = 0; 8038 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8039 cphci = cphci->cphci_next) 8040 cphci->cphci_id = phci_count++; 8041 8042 /* build phci pathname list */ 8043 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 8044 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 8045 cphci = cphci->cphci_next, i++) 8046 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 8047 8048 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 8049 phci_count); 8050 free_string_array(phcis, phci_count); 8051 8052 if (err == 0 && 8053 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 8054 rw_exit(&vhcache->vhcache_lock); 8055 return (nvl); 8056 } 8057 8058 rw_exit(&vhcache->vhcache_lock); 8059 out: 8060 nvlist_free(nvl); 8061 return (NULL); 8062 } 8063 8064 /* 8065 * Lookup vhcache phci structure for the specified phci path. 8066 */ 8067 static mdi_vhcache_phci_t * 8068 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 8069 { 8070 mdi_vhcache_phci_t *cphci; 8071 8072 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8073 8074 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8075 cphci = cphci->cphci_next) { 8076 if (strcmp(cphci->cphci_path, phci_path) == 0) 8077 return (cphci); 8078 } 8079 8080 return (NULL); 8081 } 8082 8083 /* 8084 * Lookup vhcache phci structure for the specified phci. 8085 */ 8086 static mdi_vhcache_phci_t * 8087 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 8088 { 8089 mdi_vhcache_phci_t *cphci; 8090 8091 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8092 8093 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8094 cphci = cphci->cphci_next) { 8095 if (cphci->cphci_phci == ph) 8096 return (cphci); 8097 } 8098 8099 return (NULL); 8100 } 8101 8102 /* 8103 * Add the specified phci to the vhci cache if not already present. 8104 */ 8105 static void 8106 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8107 { 8108 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8109 mdi_vhcache_phci_t *cphci; 8110 char *pathname; 8111 int cache_updated; 8112 8113 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8114 8115 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 8116 (void) ddi_pathname(ph->ph_dip, pathname); 8117 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 8118 != NULL) { 8119 cphci->cphci_phci = ph; 8120 cache_updated = 0; 8121 } else { 8122 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 8123 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 8124 cphci->cphci_phci = ph; 8125 enqueue_vhcache_phci(vhcache, cphci); 8126 cache_updated = 1; 8127 } 8128 8129 rw_exit(&vhcache->vhcache_lock); 8130 8131 /* 8132 * Since a new phci has been added, reset 8133 * vhc_path_discovery_cutoff_time to allow for discovery of paths 8134 * during next vhcache_discover_paths(). 8135 */ 8136 mutex_enter(&vhc->vhc_lock); 8137 vhc->vhc_path_discovery_cutoff_time = 0; 8138 mutex_exit(&vhc->vhc_lock); 8139 8140 kmem_free(pathname, MAXPATHLEN); 8141 if (cache_updated) 8142 vhcache_dirty(vhc); 8143 } 8144 8145 /* 8146 * Remove the reference to the specified phci from the vhci cache. 8147 */ 8148 static void 8149 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8150 { 8151 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8152 mdi_vhcache_phci_t *cphci; 8153 8154 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8155 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8156 /* do not remove the actual mdi_vhcache_phci structure */ 8157 cphci->cphci_phci = NULL; 8158 } 8159 rw_exit(&vhcache->vhcache_lock); 8160 } 8161 8162 static void 8163 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8164 mdi_vhcache_lookup_token_t *src) 8165 { 8166 if (src == NULL) { 8167 dst->lt_cct = NULL; 8168 dst->lt_cct_lookup_time = 0; 8169 } else { 8170 dst->lt_cct = src->lt_cct; 8171 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8172 } 8173 } 8174 8175 /* 8176 * Look up vhcache client for the specified client. 8177 */ 8178 static mdi_vhcache_client_t * 8179 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8180 mdi_vhcache_lookup_token_t *token) 8181 { 8182 mod_hash_val_t hv; 8183 char *name_addr; 8184 int len; 8185 8186 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8187 8188 /* 8189 * If no vhcache clean occurred since the last lookup, we can 8190 * simply return the cct from the last lookup operation. 8191 * It works because ccts are never freed except during the vhcache 8192 * cleanup operation. 8193 */ 8194 if (token != NULL && 8195 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8196 return (token->lt_cct); 8197 8198 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8199 if (mod_hash_find(vhcache->vhcache_client_hash, 8200 (mod_hash_key_t)name_addr, &hv) == 0) { 8201 if (token) { 8202 token->lt_cct = (mdi_vhcache_client_t *)hv; 8203 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8204 } 8205 } else { 8206 if (token) { 8207 token->lt_cct = NULL; 8208 token->lt_cct_lookup_time = 0; 8209 } 8210 hv = NULL; 8211 } 8212 kmem_free(name_addr, len); 8213 return ((mdi_vhcache_client_t *)hv); 8214 } 8215 8216 /* 8217 * Add the specified path to the vhci cache if not already present. 8218 * Also add the vhcache client for the client corresponding to this path 8219 * if it doesn't already exist. 8220 */ 8221 static void 8222 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8223 { 8224 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8225 mdi_vhcache_client_t *cct; 8226 mdi_vhcache_pathinfo_t *cpi; 8227 mdi_phci_t *ph = pip->pi_phci; 8228 mdi_client_t *ct = pip->pi_client; 8229 int cache_updated = 0; 8230 8231 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8232 8233 /* if vhcache client for this pip doesn't already exist, add it */ 8234 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8235 NULL)) == NULL) { 8236 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8237 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8238 ct->ct_guid, NULL); 8239 enqueue_vhcache_client(vhcache, cct); 8240 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8241 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8242 cache_updated = 1; 8243 } 8244 8245 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8246 if (cpi->cpi_cphci->cphci_phci == ph && 8247 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8248 cpi->cpi_pip = pip; 8249 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8250 cpi->cpi_flags &= 8251 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8252 sort_vhcache_paths(cct); 8253 cache_updated = 1; 8254 } 8255 break; 8256 } 8257 } 8258 8259 if (cpi == NULL) { 8260 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8261 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8262 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8263 ASSERT(cpi->cpi_cphci != NULL); 8264 cpi->cpi_pip = pip; 8265 enqueue_vhcache_pathinfo(cct, cpi); 8266 cache_updated = 1; 8267 } 8268 8269 rw_exit(&vhcache->vhcache_lock); 8270 8271 if (cache_updated) 8272 vhcache_dirty(vhc); 8273 } 8274 8275 /* 8276 * Remove the reference to the specified path from the vhci cache. 8277 */ 8278 static void 8279 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8280 { 8281 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8282 mdi_client_t *ct = pip->pi_client; 8283 mdi_vhcache_client_t *cct; 8284 mdi_vhcache_pathinfo_t *cpi; 8285 8286 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8287 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8288 NULL)) != NULL) { 8289 for (cpi = cct->cct_cpi_head; cpi != NULL; 8290 cpi = cpi->cpi_next) { 8291 if (cpi->cpi_pip == pip) { 8292 cpi->cpi_pip = NULL; 8293 break; 8294 } 8295 } 8296 } 8297 rw_exit(&vhcache->vhcache_lock); 8298 } 8299 8300 /* 8301 * Flush the vhci cache to disk. 8302 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8303 */ 8304 static int 8305 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8306 { 8307 nvlist_t *nvl; 8308 int err; 8309 int rv; 8310 8311 /* 8312 * It is possible that the system may shutdown before 8313 * i_ddi_io_initialized (during stmsboot for example). To allow for 8314 * flushing the cache in this case do not check for 8315 * i_ddi_io_initialized when force flag is set. 8316 */ 8317 if (force_flag == 0 && !i_ddi_io_initialized()) 8318 return (MDI_FAILURE); 8319 8320 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8321 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8322 nvlist_free(nvl); 8323 } else 8324 err = EFAULT; 8325 8326 rv = MDI_SUCCESS; 8327 mutex_enter(&vhc->vhc_lock); 8328 if (err != 0) { 8329 if (err == EROFS) { 8330 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8331 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8332 MDI_VHC_VHCACHE_DIRTY); 8333 } else { 8334 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8335 cmn_err(CE_CONT, "%s: update failed\n", 8336 vhc->vhc_vhcache_filename); 8337 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8338 } 8339 rv = MDI_FAILURE; 8340 } 8341 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8342 cmn_err(CE_CONT, 8343 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8344 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8345 } 8346 mutex_exit(&vhc->vhc_lock); 8347 8348 return (rv); 8349 } 8350 8351 /* 8352 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8353 * Exits itself if left idle for the idle timeout period. 8354 */ 8355 static void 8356 vhcache_flush_thread(void *arg) 8357 { 8358 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8359 clock_t idle_time, quit_at_ticks; 8360 callb_cpr_t cprinfo; 8361 8362 /* number of seconds to sleep idle before exiting */ 8363 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8364 8365 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8366 "mdi_vhcache_flush"); 8367 mutex_enter(&vhc->vhc_lock); 8368 for (; ; ) { 8369 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8370 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8371 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8372 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8373 (void) cv_timedwait(&vhc->vhc_cv, 8374 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8375 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8376 } else { 8377 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8378 mutex_exit(&vhc->vhc_lock); 8379 8380 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8381 vhcache_dirty(vhc); 8382 8383 mutex_enter(&vhc->vhc_lock); 8384 } 8385 } 8386 8387 quit_at_ticks = ddi_get_lbolt() + idle_time; 8388 8389 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8390 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8391 ddi_get_lbolt() < quit_at_ticks) { 8392 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8393 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8394 quit_at_ticks); 8395 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8396 } 8397 8398 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8399 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8400 goto out; 8401 } 8402 8403 out: 8404 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8405 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8406 CALLB_CPR_EXIT(&cprinfo); 8407 } 8408 8409 /* 8410 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8411 */ 8412 static void 8413 vhcache_dirty(mdi_vhci_config_t *vhc) 8414 { 8415 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8416 int create_thread; 8417 8418 rw_enter(&vhcache->vhcache_lock, RW_READER); 8419 /* do not flush cache until the cache is fully built */ 8420 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8421 rw_exit(&vhcache->vhcache_lock); 8422 return; 8423 } 8424 rw_exit(&vhcache->vhcache_lock); 8425 8426 mutex_enter(&vhc->vhc_lock); 8427 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8428 mutex_exit(&vhc->vhc_lock); 8429 return; 8430 } 8431 8432 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8433 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8434 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8435 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8436 cv_broadcast(&vhc->vhc_cv); 8437 create_thread = 0; 8438 } else { 8439 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8440 create_thread = 1; 8441 } 8442 mutex_exit(&vhc->vhc_lock); 8443 8444 if (create_thread) 8445 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8446 0, &p0, TS_RUN, minclsyspri); 8447 } 8448 8449 /* 8450 * phci bus config structure - one for for each phci bus config operation that 8451 * we initiate on behalf of a vhci. 8452 */ 8453 typedef struct mdi_phci_bus_config_s { 8454 char *phbc_phci_path; 8455 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8456 struct mdi_phci_bus_config_s *phbc_next; 8457 } mdi_phci_bus_config_t; 8458 8459 /* vhci bus config structure - one for each vhci bus config operation */ 8460 typedef struct mdi_vhci_bus_config_s { 8461 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8462 major_t vhbc_op_major; /* bus config op major */ 8463 uint_t vhbc_op_flags; /* bus config op flags */ 8464 kmutex_t vhbc_lock; 8465 kcondvar_t vhbc_cv; 8466 int vhbc_thr_count; 8467 } mdi_vhci_bus_config_t; 8468 8469 /* 8470 * bus config the specified phci 8471 */ 8472 static void 8473 bus_config_phci(void *arg) 8474 { 8475 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8476 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8477 dev_info_t *ph_dip; 8478 8479 /* 8480 * first configure all path components upto phci and then configure 8481 * the phci children. 8482 */ 8483 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8484 != NULL) { 8485 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8486 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8487 (void) ndi_devi_config_driver(ph_dip, 8488 vhbc->vhbc_op_flags, 8489 vhbc->vhbc_op_major); 8490 } else 8491 (void) ndi_devi_config(ph_dip, 8492 vhbc->vhbc_op_flags); 8493 8494 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8495 ndi_rele_devi(ph_dip); 8496 } 8497 8498 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8499 kmem_free(phbc, sizeof (*phbc)); 8500 8501 mutex_enter(&vhbc->vhbc_lock); 8502 vhbc->vhbc_thr_count--; 8503 if (vhbc->vhbc_thr_count == 0) 8504 cv_broadcast(&vhbc->vhbc_cv); 8505 mutex_exit(&vhbc->vhbc_lock); 8506 } 8507 8508 /* 8509 * Bus config all phcis associated with the vhci in parallel. 8510 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8511 */ 8512 static void 8513 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8514 ddi_bus_config_op_t op, major_t maj) 8515 { 8516 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8517 mdi_vhci_bus_config_t *vhbc; 8518 mdi_vhcache_phci_t *cphci; 8519 8520 rw_enter(&vhcache->vhcache_lock, RW_READER); 8521 if (vhcache->vhcache_phci_head == NULL) { 8522 rw_exit(&vhcache->vhcache_lock); 8523 return; 8524 } 8525 8526 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8527 8528 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8529 cphci = cphci->cphci_next) { 8530 /* skip phcis that haven't attached before root is available */ 8531 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8532 continue; 8533 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8534 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8535 KM_SLEEP); 8536 phbc->phbc_vhbusconfig = vhbc; 8537 phbc->phbc_next = phbc_head; 8538 phbc_head = phbc; 8539 vhbc->vhbc_thr_count++; 8540 } 8541 rw_exit(&vhcache->vhcache_lock); 8542 8543 vhbc->vhbc_op = op; 8544 vhbc->vhbc_op_major = maj; 8545 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8546 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8547 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8548 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8549 8550 /* now create threads to initiate bus config on all phcis in parallel */ 8551 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8552 phbc_next = phbc->phbc_next; 8553 if (mdi_mtc_off) 8554 bus_config_phci((void *)phbc); 8555 else 8556 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8557 0, &p0, TS_RUN, minclsyspri); 8558 } 8559 8560 mutex_enter(&vhbc->vhbc_lock); 8561 /* wait until all threads exit */ 8562 while (vhbc->vhbc_thr_count > 0) 8563 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8564 mutex_exit(&vhbc->vhbc_lock); 8565 8566 mutex_destroy(&vhbc->vhbc_lock); 8567 cv_destroy(&vhbc->vhbc_cv); 8568 kmem_free(vhbc, sizeof (*vhbc)); 8569 } 8570 8571 /* 8572 * Single threaded version of bus_config_all_phcis() 8573 */ 8574 static void 8575 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8576 ddi_bus_config_op_t op, major_t maj) 8577 { 8578 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8579 8580 single_threaded_vhconfig_enter(vhc); 8581 bus_config_all_phcis(vhcache, flags, op, maj); 8582 single_threaded_vhconfig_exit(vhc); 8583 } 8584 8585 /* 8586 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8587 * The path includes the child component in addition to the phci path. 8588 */ 8589 static int 8590 bus_config_one_phci_child(char *path) 8591 { 8592 dev_info_t *ph_dip, *child; 8593 char *devnm; 8594 int rv = MDI_FAILURE; 8595 8596 /* extract the child component of the phci */ 8597 devnm = strrchr(path, '/'); 8598 *devnm++ = '\0'; 8599 8600 /* 8601 * first configure all path components upto phci and then 8602 * configure the phci child. 8603 */ 8604 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8605 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8606 NDI_SUCCESS) { 8607 /* 8608 * release the hold that ndi_devi_config_one() placed 8609 */ 8610 ndi_rele_devi(child); 8611 rv = MDI_SUCCESS; 8612 } 8613 8614 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8615 ndi_rele_devi(ph_dip); 8616 } 8617 8618 devnm--; 8619 *devnm = '/'; 8620 return (rv); 8621 } 8622 8623 /* 8624 * Build a list of phci client paths for the specified vhci client. 8625 * The list includes only those phci client paths which aren't configured yet. 8626 */ 8627 static mdi_phys_path_t * 8628 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8629 { 8630 mdi_vhcache_pathinfo_t *cpi; 8631 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8632 int config_path, len; 8633 8634 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8635 /* 8636 * include only those paths that aren't configured. 8637 */ 8638 config_path = 0; 8639 if (cpi->cpi_pip == NULL) 8640 config_path = 1; 8641 else { 8642 MDI_PI_LOCK(cpi->cpi_pip); 8643 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8644 config_path = 1; 8645 MDI_PI_UNLOCK(cpi->cpi_pip); 8646 } 8647 8648 if (config_path) { 8649 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8650 len = strlen(cpi->cpi_cphci->cphci_path) + 8651 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8652 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8653 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8654 cpi->cpi_cphci->cphci_path, ct_name, 8655 cpi->cpi_addr); 8656 pp->phys_path_next = NULL; 8657 8658 if (pp_head == NULL) 8659 pp_head = pp; 8660 else 8661 pp_tail->phys_path_next = pp; 8662 pp_tail = pp; 8663 } 8664 } 8665 8666 return (pp_head); 8667 } 8668 8669 /* 8670 * Free the memory allocated for phci client path list. 8671 */ 8672 static void 8673 free_phclient_path_list(mdi_phys_path_t *pp_head) 8674 { 8675 mdi_phys_path_t *pp, *pp_next; 8676 8677 for (pp = pp_head; pp != NULL; pp = pp_next) { 8678 pp_next = pp->phys_path_next; 8679 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8680 kmem_free(pp, sizeof (*pp)); 8681 } 8682 } 8683 8684 /* 8685 * Allocated async client structure and initialize with the specified values. 8686 */ 8687 static mdi_async_client_config_t * 8688 alloc_async_client_config(char *ct_name, char *ct_addr, 8689 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8690 { 8691 mdi_async_client_config_t *acc; 8692 8693 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8694 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8695 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8696 acc->acc_phclient_path_list_head = pp_head; 8697 init_vhcache_lookup_token(&acc->acc_token, tok); 8698 acc->acc_next = NULL; 8699 return (acc); 8700 } 8701 8702 /* 8703 * Free the memory allocated for the async client structure and their members. 8704 */ 8705 static void 8706 free_async_client_config(mdi_async_client_config_t *acc) 8707 { 8708 if (acc->acc_phclient_path_list_head) 8709 free_phclient_path_list(acc->acc_phclient_path_list_head); 8710 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8711 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8712 kmem_free(acc, sizeof (*acc)); 8713 } 8714 8715 /* 8716 * Sort vhcache pathinfos (cpis) of the specified client. 8717 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8718 * flag set come at the beginning of the list. All cpis which have this 8719 * flag set come at the end of the list. 8720 */ 8721 static void 8722 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8723 { 8724 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8725 8726 cpi_head = cct->cct_cpi_head; 8727 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8728 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8729 cpi_next = cpi->cpi_next; 8730 enqueue_vhcache_pathinfo(cct, cpi); 8731 } 8732 } 8733 8734 /* 8735 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8736 * every vhcache pathinfo of the specified client. If not adjust the flag 8737 * setting appropriately. 8738 * 8739 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8740 * on-disk vhci cache. So every time this flag is updated the cache must be 8741 * flushed. 8742 */ 8743 static void 8744 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8745 mdi_vhcache_lookup_token_t *tok) 8746 { 8747 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8748 mdi_vhcache_client_t *cct; 8749 mdi_vhcache_pathinfo_t *cpi; 8750 8751 rw_enter(&vhcache->vhcache_lock, RW_READER); 8752 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8753 == NULL) { 8754 rw_exit(&vhcache->vhcache_lock); 8755 return; 8756 } 8757 8758 /* 8759 * to avoid unnecessary on-disk cache updates, first check if an 8760 * update is really needed. If no update is needed simply return. 8761 */ 8762 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8763 if ((cpi->cpi_pip != NULL && 8764 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8765 (cpi->cpi_pip == NULL && 8766 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8767 break; 8768 } 8769 } 8770 if (cpi == NULL) { 8771 rw_exit(&vhcache->vhcache_lock); 8772 return; 8773 } 8774 8775 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8776 rw_exit(&vhcache->vhcache_lock); 8777 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8778 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8779 tok)) == NULL) { 8780 rw_exit(&vhcache->vhcache_lock); 8781 return; 8782 } 8783 } 8784 8785 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8786 if (cpi->cpi_pip != NULL) 8787 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8788 else 8789 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8790 } 8791 sort_vhcache_paths(cct); 8792 8793 rw_exit(&vhcache->vhcache_lock); 8794 vhcache_dirty(vhc); 8795 } 8796 8797 /* 8798 * Configure all specified paths of the client. 8799 */ 8800 static void 8801 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8802 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8803 { 8804 mdi_phys_path_t *pp; 8805 8806 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8807 (void) bus_config_one_phci_child(pp->phys_path); 8808 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8809 } 8810 8811 /* 8812 * Dequeue elements from vhci async client config list and bus configure 8813 * their corresponding phci clients. 8814 */ 8815 static void 8816 config_client_paths_thread(void *arg) 8817 { 8818 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8819 mdi_async_client_config_t *acc; 8820 clock_t quit_at_ticks; 8821 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8822 callb_cpr_t cprinfo; 8823 8824 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8825 "mdi_config_client_paths"); 8826 8827 for (; ; ) { 8828 quit_at_ticks = ddi_get_lbolt() + idle_time; 8829 8830 mutex_enter(&vhc->vhc_lock); 8831 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8832 vhc->vhc_acc_list_head == NULL && 8833 ddi_get_lbolt() < quit_at_ticks) { 8834 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8835 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8836 quit_at_ticks); 8837 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8838 } 8839 8840 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8841 vhc->vhc_acc_list_head == NULL) 8842 goto out; 8843 8844 acc = vhc->vhc_acc_list_head; 8845 vhc->vhc_acc_list_head = acc->acc_next; 8846 if (vhc->vhc_acc_list_head == NULL) 8847 vhc->vhc_acc_list_tail = NULL; 8848 vhc->vhc_acc_count--; 8849 mutex_exit(&vhc->vhc_lock); 8850 8851 config_client_paths_sync(vhc, acc->acc_ct_name, 8852 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8853 &acc->acc_token); 8854 8855 free_async_client_config(acc); 8856 } 8857 8858 out: 8859 vhc->vhc_acc_thrcount--; 8860 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8861 CALLB_CPR_EXIT(&cprinfo); 8862 } 8863 8864 /* 8865 * Arrange for all the phci client paths (pp_head) for the specified client 8866 * to be bus configured asynchronously by a thread. 8867 */ 8868 static void 8869 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8870 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8871 { 8872 mdi_async_client_config_t *acc, *newacc; 8873 int create_thread; 8874 8875 if (pp_head == NULL) 8876 return; 8877 8878 if (mdi_mtc_off) { 8879 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8880 free_phclient_path_list(pp_head); 8881 return; 8882 } 8883 8884 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8885 ASSERT(newacc); 8886 8887 mutex_enter(&vhc->vhc_lock); 8888 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8889 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8890 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8891 free_async_client_config(newacc); 8892 mutex_exit(&vhc->vhc_lock); 8893 return; 8894 } 8895 } 8896 8897 if (vhc->vhc_acc_list_head == NULL) 8898 vhc->vhc_acc_list_head = newacc; 8899 else 8900 vhc->vhc_acc_list_tail->acc_next = newacc; 8901 vhc->vhc_acc_list_tail = newacc; 8902 vhc->vhc_acc_count++; 8903 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8904 cv_broadcast(&vhc->vhc_cv); 8905 create_thread = 0; 8906 } else { 8907 vhc->vhc_acc_thrcount++; 8908 create_thread = 1; 8909 } 8910 mutex_exit(&vhc->vhc_lock); 8911 8912 if (create_thread) 8913 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8914 0, &p0, TS_RUN, minclsyspri); 8915 } 8916 8917 /* 8918 * Return number of online paths for the specified client. 8919 */ 8920 static int 8921 nonline_paths(mdi_vhcache_client_t *cct) 8922 { 8923 mdi_vhcache_pathinfo_t *cpi; 8924 int online_count = 0; 8925 8926 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8927 if (cpi->cpi_pip != NULL) { 8928 MDI_PI_LOCK(cpi->cpi_pip); 8929 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8930 online_count++; 8931 MDI_PI_UNLOCK(cpi->cpi_pip); 8932 } 8933 } 8934 8935 return (online_count); 8936 } 8937 8938 /* 8939 * Bus configure all paths for the specified vhci client. 8940 * If at least one path for the client is already online, the remaining paths 8941 * will be configured asynchronously. Otherwise, it synchronously configures 8942 * the paths until at least one path is online and then rest of the paths 8943 * will be configured asynchronously. 8944 */ 8945 static void 8946 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8947 { 8948 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8949 mdi_phys_path_t *pp_head, *pp; 8950 mdi_vhcache_client_t *cct; 8951 mdi_vhcache_lookup_token_t tok; 8952 8953 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8954 8955 init_vhcache_lookup_token(&tok, NULL); 8956 8957 if (ct_name == NULL || ct_addr == NULL || 8958 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8959 == NULL || 8960 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8961 rw_exit(&vhcache->vhcache_lock); 8962 return; 8963 } 8964 8965 /* if at least one path is online, configure the rest asynchronously */ 8966 if (nonline_paths(cct) > 0) { 8967 rw_exit(&vhcache->vhcache_lock); 8968 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8969 return; 8970 } 8971 8972 rw_exit(&vhcache->vhcache_lock); 8973 8974 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8975 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8976 rw_enter(&vhcache->vhcache_lock, RW_READER); 8977 8978 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8979 ct_addr, &tok)) == NULL) { 8980 rw_exit(&vhcache->vhcache_lock); 8981 goto out; 8982 } 8983 8984 if (nonline_paths(cct) > 0 && 8985 pp->phys_path_next != NULL) { 8986 rw_exit(&vhcache->vhcache_lock); 8987 config_client_paths_async(vhc, ct_name, ct_addr, 8988 pp->phys_path_next, &tok); 8989 pp->phys_path_next = NULL; 8990 goto out; 8991 } 8992 8993 rw_exit(&vhcache->vhcache_lock); 8994 } 8995 } 8996 8997 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8998 out: 8999 free_phclient_path_list(pp_head); 9000 } 9001 9002 static void 9003 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 9004 { 9005 mutex_enter(&vhc->vhc_lock); 9006 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 9007 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 9008 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 9009 mutex_exit(&vhc->vhc_lock); 9010 } 9011 9012 static void 9013 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 9014 { 9015 mutex_enter(&vhc->vhc_lock); 9016 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 9017 cv_broadcast(&vhc->vhc_cv); 9018 mutex_exit(&vhc->vhc_lock); 9019 } 9020 9021 typedef struct mdi_phci_driver_info { 9022 char *phdriver_name; /* name of the phci driver */ 9023 9024 /* set to non zero if the phci driver supports root device */ 9025 int phdriver_root_support; 9026 } mdi_phci_driver_info_t; 9027 9028 /* 9029 * vhci class and root support capability of a phci driver can be 9030 * specified using ddi-vhci-class and ddi-no-root-support properties in the 9031 * phci driver.conf file. The built-in tables below contain this information 9032 * for those phci drivers whose driver.conf files don't yet contain this info. 9033 * 9034 * All phci drivers expect iscsi have root device support. 9035 */ 9036 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 9037 { "fp", 1 }, 9038 { "iscsi", 0 }, 9039 { "ibsrp", 1 } 9040 }; 9041 9042 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 9043 9044 static void * 9045 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 9046 { 9047 void *new_ptr; 9048 9049 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 9050 if (old_ptr) { 9051 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 9052 kmem_free(old_ptr, old_size); 9053 } 9054 return (new_ptr); 9055 } 9056 9057 static void 9058 add_to_phci_list(char ***driver_list, int **root_support_list, 9059 int *cur_elements, int *max_elements, char *driver_name, int root_support) 9060 { 9061 ASSERT(*cur_elements <= *max_elements); 9062 if (*cur_elements == *max_elements) { 9063 *max_elements += 10; 9064 *driver_list = mdi_realloc(*driver_list, 9065 sizeof (char *) * (*cur_elements), 9066 sizeof (char *) * (*max_elements)); 9067 *root_support_list = mdi_realloc(*root_support_list, 9068 sizeof (int) * (*cur_elements), 9069 sizeof (int) * (*max_elements)); 9070 } 9071 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 9072 (*root_support_list)[*cur_elements] = root_support; 9073 (*cur_elements)++; 9074 } 9075 9076 static void 9077 get_phci_driver_list(char *vhci_class, char ***driver_list, 9078 int **root_support_list, int *cur_elements, int *max_elements) 9079 { 9080 mdi_phci_driver_info_t *st_driver_list, *p; 9081 int st_ndrivers, root_support, i, j, driver_conf_count; 9082 major_t m; 9083 struct devnames *dnp; 9084 ddi_prop_t *propp; 9085 9086 *driver_list = NULL; 9087 *root_support_list = NULL; 9088 *cur_elements = 0; 9089 *max_elements = 0; 9090 9091 /* add the phci drivers derived from the phci driver.conf files */ 9092 for (m = 0; m < devcnt; m++) { 9093 dnp = &devnamesp[m]; 9094 9095 if (dnp->dn_flags & DN_PHCI_DRIVER) { 9096 LOCK_DEV_OPS(&dnp->dn_lock); 9097 if (dnp->dn_global_prop_ptr != NULL && 9098 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 9099 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 9100 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 9101 strcmp(propp->prop_val, vhci_class) == 0) { 9102 9103 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 9104 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 9105 &dnp->dn_global_prop_ptr->prop_list) 9106 == NULL) ? 1 : 0; 9107 9108 add_to_phci_list(driver_list, root_support_list, 9109 cur_elements, max_elements, dnp->dn_name, 9110 root_support); 9111 9112 UNLOCK_DEV_OPS(&dnp->dn_lock); 9113 } else 9114 UNLOCK_DEV_OPS(&dnp->dn_lock); 9115 } 9116 } 9117 9118 driver_conf_count = *cur_elements; 9119 9120 /* add the phci drivers specified in the built-in tables */ 9121 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 9122 st_driver_list = scsi_phci_driver_list; 9123 st_ndrivers = sizeof (scsi_phci_driver_list) / 9124 sizeof (mdi_phci_driver_info_t); 9125 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 9126 st_driver_list = ib_phci_driver_list; 9127 st_ndrivers = sizeof (ib_phci_driver_list) / 9128 sizeof (mdi_phci_driver_info_t); 9129 } else { 9130 st_driver_list = NULL; 9131 st_ndrivers = 0; 9132 } 9133 9134 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 9135 /* add this phci driver if not already added before */ 9136 for (j = 0; j < driver_conf_count; j++) { 9137 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 9138 break; 9139 } 9140 if (j == driver_conf_count) { 9141 add_to_phci_list(driver_list, root_support_list, 9142 cur_elements, max_elements, p->phdriver_name, 9143 p->phdriver_root_support); 9144 } 9145 } 9146 } 9147 9148 /* 9149 * Attach the phci driver instances associated with the specified vhci class. 9150 * If root is mounted attach all phci driver instances. 9151 * If root is not mounted, attach the instances of only those phci 9152 * drivers that have the root support. 9153 */ 9154 static void 9155 attach_phci_drivers(char *vhci_class) 9156 { 9157 char **driver_list, **p; 9158 int *root_support_list; 9159 int cur_elements, max_elements, i; 9160 major_t m; 9161 9162 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9163 &cur_elements, &max_elements); 9164 9165 for (i = 0; i < cur_elements; i++) { 9166 if (modrootloaded || root_support_list[i]) { 9167 m = ddi_name_to_major(driver_list[i]); 9168 if (m != DDI_MAJOR_T_NONE && 9169 ddi_hold_installed_driver(m)) 9170 ddi_rele_driver(m); 9171 } 9172 } 9173 9174 if (driver_list) { 9175 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9176 kmem_free(*p, strlen(*p) + 1); 9177 kmem_free(driver_list, sizeof (char *) * max_elements); 9178 kmem_free(root_support_list, sizeof (int) * max_elements); 9179 } 9180 } 9181 9182 /* 9183 * Build vhci cache: 9184 * 9185 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9186 * the phci driver instances. During this process the cache gets built. 9187 * 9188 * Cache is built fully if the root is mounted. 9189 * If the root is not mounted, phci drivers that do not have root support 9190 * are not attached. As a result the cache is built partially. The entries 9191 * in the cache reflect only those phci drivers that have root support. 9192 */ 9193 static int 9194 build_vhci_cache(mdi_vhci_t *vh) 9195 { 9196 mdi_vhci_config_t *vhc = vh->vh_config; 9197 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9198 9199 single_threaded_vhconfig_enter(vhc); 9200 9201 rw_enter(&vhcache->vhcache_lock, RW_READER); 9202 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9203 rw_exit(&vhcache->vhcache_lock); 9204 single_threaded_vhconfig_exit(vhc); 9205 return (0); 9206 } 9207 rw_exit(&vhcache->vhcache_lock); 9208 9209 attach_phci_drivers(vh->vh_class); 9210 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9211 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9212 9213 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9214 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9215 rw_exit(&vhcache->vhcache_lock); 9216 9217 single_threaded_vhconfig_exit(vhc); 9218 vhcache_dirty(vhc); 9219 return (1); 9220 } 9221 9222 /* 9223 * Determine if discovery of paths is needed. 9224 */ 9225 static int 9226 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9227 { 9228 int rv = 1; 9229 9230 mutex_enter(&vhc->vhc_lock); 9231 if (i_ddi_io_initialized() == 0) { 9232 if (vhc->vhc_path_discovery_boot > 0) { 9233 vhc->vhc_path_discovery_boot--; 9234 goto out; 9235 } 9236 } else { 9237 if (vhc->vhc_path_discovery_postboot > 0) { 9238 vhc->vhc_path_discovery_postboot--; 9239 goto out; 9240 } 9241 } 9242 9243 /* 9244 * Do full path discovery at most once per mdi_path_discovery_interval. 9245 * This is to avoid a series of full path discoveries when opening 9246 * stale /dev/[r]dsk links. 9247 */ 9248 if (mdi_path_discovery_interval != -1 && 9249 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9250 goto out; 9251 9252 rv = 0; 9253 out: 9254 mutex_exit(&vhc->vhc_lock); 9255 return (rv); 9256 } 9257 9258 /* 9259 * Discover all paths: 9260 * 9261 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9262 * driver instances. During this process all paths will be discovered. 9263 */ 9264 static int 9265 vhcache_discover_paths(mdi_vhci_t *vh) 9266 { 9267 mdi_vhci_config_t *vhc = vh->vh_config; 9268 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9269 int rv = 0; 9270 9271 single_threaded_vhconfig_enter(vhc); 9272 9273 if (vhcache_do_discovery(vhc)) { 9274 attach_phci_drivers(vh->vh_class); 9275 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9276 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9277 9278 mutex_enter(&vhc->vhc_lock); 9279 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9280 mdi_path_discovery_interval * TICKS_PER_SECOND; 9281 mutex_exit(&vhc->vhc_lock); 9282 rv = 1; 9283 } 9284 9285 single_threaded_vhconfig_exit(vhc); 9286 return (rv); 9287 } 9288 9289 /* 9290 * Generic vhci bus config implementation: 9291 * 9292 * Parameters 9293 * vdip vhci dip 9294 * flags bus config flags 9295 * op bus config operation 9296 * The remaining parameters are bus config operation specific 9297 * 9298 * for BUS_CONFIG_ONE 9299 * arg pointer to name@addr 9300 * child upon successful return from this function, *child will be 9301 * set to the configured and held devinfo child node of vdip. 9302 * ct_addr pointer to client address (i.e. GUID) 9303 * 9304 * for BUS_CONFIG_DRIVER 9305 * arg major number of the driver 9306 * child and ct_addr parameters are ignored 9307 * 9308 * for BUS_CONFIG_ALL 9309 * arg, child, and ct_addr parameters are ignored 9310 * 9311 * Note that for the rest of the bus config operations, this function simply 9312 * calls the framework provided default bus config routine. 9313 */ 9314 int 9315 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9316 void *arg, dev_info_t **child, char *ct_addr) 9317 { 9318 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9319 mdi_vhci_config_t *vhc = vh->vh_config; 9320 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9321 int rv = 0; 9322 int params_valid = 0; 9323 char *cp; 9324 9325 /* 9326 * To bus config vhcis we relay operation, possibly using another 9327 * thread, to phcis. The phci driver then interacts with MDI to cause 9328 * vhci child nodes to be enumerated under the vhci node. Adding a 9329 * vhci child requires an ndi_devi_enter of the vhci. Since another 9330 * thread may be adding the child, to avoid deadlock we can't wait 9331 * for the relayed operations to complete if we have already entered 9332 * the vhci node. 9333 */ 9334 if (DEVI_BUSY_OWNED(vdip)) { 9335 MDI_DEBUG(2, (MDI_NOTE, vdip, 9336 "vhci dip is busy owned %p", (void *)vdip)); 9337 goto default_bus_config; 9338 } 9339 9340 rw_enter(&vhcache->vhcache_lock, RW_READER); 9341 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9342 rw_exit(&vhcache->vhcache_lock); 9343 rv = build_vhci_cache(vh); 9344 rw_enter(&vhcache->vhcache_lock, RW_READER); 9345 } 9346 9347 switch (op) { 9348 case BUS_CONFIG_ONE: 9349 if (arg != NULL && ct_addr != NULL) { 9350 /* extract node name */ 9351 cp = (char *)arg; 9352 while (*cp != '\0' && *cp != '@') 9353 cp++; 9354 if (*cp == '@') { 9355 params_valid = 1; 9356 *cp = '\0'; 9357 config_client_paths(vhc, (char *)arg, ct_addr); 9358 /* config_client_paths() releases cache_lock */ 9359 *cp = '@'; 9360 break; 9361 } 9362 } 9363 9364 rw_exit(&vhcache->vhcache_lock); 9365 break; 9366 9367 case BUS_CONFIG_DRIVER: 9368 rw_exit(&vhcache->vhcache_lock); 9369 if (rv == 0) 9370 st_bus_config_all_phcis(vhc, flags, op, 9371 (major_t)(uintptr_t)arg); 9372 break; 9373 9374 case BUS_CONFIG_ALL: 9375 rw_exit(&vhcache->vhcache_lock); 9376 if (rv == 0) 9377 st_bus_config_all_phcis(vhc, flags, op, -1); 9378 break; 9379 9380 default: 9381 rw_exit(&vhcache->vhcache_lock); 9382 break; 9383 } 9384 9385 9386 default_bus_config: 9387 /* 9388 * All requested child nodes are enumerated under the vhci. 9389 * Now configure them. 9390 */ 9391 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9392 NDI_SUCCESS) { 9393 return (MDI_SUCCESS); 9394 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9395 /* discover all paths and try configuring again */ 9396 if (vhcache_discover_paths(vh) && 9397 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9398 NDI_SUCCESS) 9399 return (MDI_SUCCESS); 9400 } 9401 9402 return (MDI_FAILURE); 9403 } 9404 9405 /* 9406 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9407 */ 9408 static nvlist_t * 9409 read_on_disk_vhci_cache(char *vhci_class) 9410 { 9411 nvlist_t *nvl; 9412 int err; 9413 char *filename; 9414 9415 filename = vhclass2vhcache_filename(vhci_class); 9416 9417 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9418 kmem_free(filename, strlen(filename) + 1); 9419 return (nvl); 9420 } else if (err == EIO) 9421 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9422 else if (err == EINVAL) 9423 cmn_err(CE_WARN, 9424 "%s: data file corrupted, will recreate", filename); 9425 9426 kmem_free(filename, strlen(filename) + 1); 9427 return (NULL); 9428 } 9429 9430 /* 9431 * Read on-disk vhci cache into nvlists for all vhci classes. 9432 * Called during booting by i_ddi_read_devices_files(). 9433 */ 9434 void 9435 mdi_read_devices_files(void) 9436 { 9437 int i; 9438 9439 for (i = 0; i < N_VHCI_CLASSES; i++) 9440 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9441 } 9442 9443 /* 9444 * Remove all stale entries from vhci cache. 9445 */ 9446 static void 9447 clean_vhcache(mdi_vhci_config_t *vhc) 9448 { 9449 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9450 mdi_vhcache_phci_t *phci, *nxt_phci; 9451 mdi_vhcache_client_t *client, *nxt_client; 9452 mdi_vhcache_pathinfo_t *path, *nxt_path; 9453 9454 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9455 9456 client = vhcache->vhcache_client_head; 9457 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9458 for ( ; client != NULL; client = nxt_client) { 9459 nxt_client = client->cct_next; 9460 9461 path = client->cct_cpi_head; 9462 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9463 for ( ; path != NULL; path = nxt_path) { 9464 nxt_path = path->cpi_next; 9465 if ((path->cpi_cphci->cphci_phci != NULL) && 9466 (path->cpi_pip != NULL)) { 9467 enqueue_tail_vhcache_pathinfo(client, path); 9468 } else if (path->cpi_pip != NULL) { 9469 /* Not valid to have a path without a phci. */ 9470 free_vhcache_pathinfo(path); 9471 } 9472 } 9473 9474 if (client->cct_cpi_head != NULL) 9475 enqueue_vhcache_client(vhcache, client); 9476 else { 9477 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9478 (mod_hash_key_t)client->cct_name_addr); 9479 free_vhcache_client(client); 9480 } 9481 } 9482 9483 phci = vhcache->vhcache_phci_head; 9484 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9485 for ( ; phci != NULL; phci = nxt_phci) { 9486 9487 nxt_phci = phci->cphci_next; 9488 if (phci->cphci_phci != NULL) 9489 enqueue_vhcache_phci(vhcache, phci); 9490 else 9491 free_vhcache_phci(phci); 9492 } 9493 9494 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9495 rw_exit(&vhcache->vhcache_lock); 9496 vhcache_dirty(vhc); 9497 } 9498 9499 /* 9500 * Remove all stale entries from vhci cache. 9501 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9502 */ 9503 void 9504 mdi_clean_vhcache(void) 9505 { 9506 mdi_vhci_t *vh; 9507 9508 mutex_enter(&mdi_mutex); 9509 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9510 vh->vh_refcnt++; 9511 mutex_exit(&mdi_mutex); 9512 clean_vhcache(vh->vh_config); 9513 mutex_enter(&mdi_mutex); 9514 vh->vh_refcnt--; 9515 } 9516 mutex_exit(&mdi_mutex); 9517 } 9518 9519 /* 9520 * mdi_vhci_walk_clients(): 9521 * Walker routine to traverse client dev_info nodes 9522 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9523 * below the client, including nexus devices, which we dont want. 9524 * So we just traverse the immediate siblings, starting from 1st client. 9525 */ 9526 void 9527 mdi_vhci_walk_clients(dev_info_t *vdip, 9528 int (*f)(dev_info_t *, void *), void *arg) 9529 { 9530 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9531 dev_info_t *cdip; 9532 mdi_client_t *ct; 9533 9534 MDI_VHCI_CLIENT_LOCK(vh); 9535 cdip = ddi_get_child(vdip); 9536 while (cdip) { 9537 ct = i_devi_get_client(cdip); 9538 MDI_CLIENT_LOCK(ct); 9539 9540 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9541 cdip = ddi_get_next_sibling(cdip); 9542 else 9543 cdip = NULL; 9544 9545 MDI_CLIENT_UNLOCK(ct); 9546 } 9547 MDI_VHCI_CLIENT_UNLOCK(vh); 9548 } 9549 9550 /* 9551 * mdi_vhci_walk_phcis(): 9552 * Walker routine to traverse phci dev_info nodes 9553 */ 9554 void 9555 mdi_vhci_walk_phcis(dev_info_t *vdip, 9556 int (*f)(dev_info_t *, void *), void *arg) 9557 { 9558 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9559 mdi_phci_t *ph, *next; 9560 9561 MDI_VHCI_PHCI_LOCK(vh); 9562 ph = vh->vh_phci_head; 9563 while (ph) { 9564 MDI_PHCI_LOCK(ph); 9565 9566 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9567 next = ph->ph_next; 9568 else 9569 next = NULL; 9570 9571 MDI_PHCI_UNLOCK(ph); 9572 ph = next; 9573 } 9574 MDI_VHCI_PHCI_UNLOCK(vh); 9575 } 9576 9577 9578 /* 9579 * mdi_walk_vhcis(): 9580 * Walker routine to traverse vhci dev_info nodes 9581 */ 9582 void 9583 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9584 { 9585 mdi_vhci_t *vh = NULL; 9586 9587 mutex_enter(&mdi_mutex); 9588 /* 9589 * Scan for already registered vhci 9590 */ 9591 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9592 vh->vh_refcnt++; 9593 mutex_exit(&mdi_mutex); 9594 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9595 mutex_enter(&mdi_mutex); 9596 vh->vh_refcnt--; 9597 break; 9598 } else { 9599 mutex_enter(&mdi_mutex); 9600 vh->vh_refcnt--; 9601 } 9602 } 9603 9604 mutex_exit(&mdi_mutex); 9605 } 9606 9607 /* 9608 * i_mdi_log_sysevent(): 9609 * Logs events for pickup by syseventd 9610 */ 9611 static void 9612 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9613 { 9614 char *path_name; 9615 nvlist_t *attr_list; 9616 9617 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9618 KM_SLEEP) != DDI_SUCCESS) { 9619 goto alloc_failed; 9620 } 9621 9622 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9623 (void) ddi_pathname(dip, path_name); 9624 9625 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9626 ddi_driver_name(dip)) != DDI_SUCCESS) { 9627 goto error; 9628 } 9629 9630 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9631 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9632 goto error; 9633 } 9634 9635 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9636 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9637 goto error; 9638 } 9639 9640 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9641 path_name) != DDI_SUCCESS) { 9642 goto error; 9643 } 9644 9645 if (nvlist_add_string(attr_list, DDI_CLASS, 9646 ph_vh_class) != DDI_SUCCESS) { 9647 goto error; 9648 } 9649 9650 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9651 attr_list, NULL, DDI_SLEEP); 9652 9653 error: 9654 kmem_free(path_name, MAXPATHLEN); 9655 nvlist_free(attr_list); 9656 return; 9657 9658 alloc_failed: 9659 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9660 } 9661 9662 char ** 9663 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9664 { 9665 char **driver_list, **ret_driver_list = NULL; 9666 int *root_support_list; 9667 int cur_elements, max_elements; 9668 9669 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9670 &cur_elements, &max_elements); 9671 9672 9673 if (driver_list) { 9674 kmem_free(root_support_list, sizeof (int) * max_elements); 9675 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9676 * max_elements, sizeof (char *) * cur_elements); 9677 } 9678 *ndrivers = cur_elements; 9679 9680 return (ret_driver_list); 9681 9682 } 9683 9684 void 9685 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9686 { 9687 char **p; 9688 int i; 9689 9690 if (driver_list) { 9691 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9692 kmem_free(*p, strlen(*p) + 1); 9693 kmem_free(driver_list, sizeof (char *) * ndrivers); 9694 } 9695 } 9696 9697 /* 9698 * mdi_is_dev_supported(): 9699 * function called by pHCI bus config operation to determine if a 9700 * device should be represented as a child of the vHCI or the 9701 * pHCI. This decision is made by the vHCI, using cinfo idenity 9702 * information passed by the pHCI - specifics of the cinfo 9703 * representation are by agreement between the pHCI and vHCI. 9704 * Return Values: 9705 * MDI_SUCCESS 9706 * MDI_FAILURE 9707 */ 9708 int 9709 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9710 { 9711 mdi_vhci_t *vh; 9712 9713 ASSERT(class && pdip); 9714 9715 /* 9716 * For dev_supported, mdi_phci_register() must have established pdip as 9717 * a pHCI. 9718 * 9719 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9720 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9721 */ 9722 if (!MDI_PHCI(pdip)) 9723 return (MDI_FAILURE); 9724 9725 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9726 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9727 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9728 return (MDI_FAILURE); 9729 } 9730 9731 /* Return vHCI answer */ 9732 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9733 } 9734 9735 int 9736 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9737 { 9738 uint_t devstate = 0; 9739 dev_info_t *cdip; 9740 9741 if ((pip == NULL) || (dcp == NULL)) 9742 return (MDI_FAILURE); 9743 9744 cdip = mdi_pi_get_client(pip); 9745 9746 switch (mdi_pi_get_state(pip)) { 9747 case MDI_PATHINFO_STATE_INIT: 9748 devstate = DEVICE_DOWN; 9749 break; 9750 case MDI_PATHINFO_STATE_ONLINE: 9751 devstate = DEVICE_ONLINE; 9752 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9753 devstate |= DEVICE_BUSY; 9754 break; 9755 case MDI_PATHINFO_STATE_STANDBY: 9756 devstate = DEVICE_ONLINE; 9757 break; 9758 case MDI_PATHINFO_STATE_FAULT: 9759 devstate = DEVICE_DOWN; 9760 break; 9761 case MDI_PATHINFO_STATE_OFFLINE: 9762 devstate = DEVICE_OFFLINE; 9763 break; 9764 default: 9765 ASSERT(MDI_PI(pip)->pi_state); 9766 } 9767 9768 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9769 return (MDI_FAILURE); 9770 9771 return (MDI_SUCCESS); 9772 } 9773