1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved. 24 * Copyright (c) 2018, Joyent, Inc. 25 * Copyright 2023 Oxide Computer Company 26 */ 27 28 /* 29 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a 30 * more detailed discussion of the overall mpxio architecture. 31 * 32 * Default locking order: 33 * 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 37 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 40 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 41 */ 42 43 #include <sys/note.h> 44 #include <sys/types.h> 45 #include <sys/varargs.h> 46 #include <sys/param.h> 47 #include <sys/errno.h> 48 #include <sys/uio.h> 49 #include <sys/buf.h> 50 #include <sys/modctl.h> 51 #include <sys/open.h> 52 #include <sys/kmem.h> 53 #include <sys/poll.h> 54 #include <sys/conf.h> 55 #include <sys/bootconf.h> 56 #include <sys/cmn_err.h> 57 #include <sys/stat.h> 58 #include <sys/ddi.h> 59 #include <sys/sunddi.h> 60 #include <sys/ddipropdefs.h> 61 #include <sys/sunndi.h> 62 #include <sys/ndi_impldefs.h> 63 #include <sys/promif.h> 64 #include <sys/sunmdi.h> 65 #include <sys/mdi_impldefs.h> 66 #include <sys/taskq.h> 67 #include <sys/epm.h> 68 #include <sys/sunpm.h> 69 #include <sys/modhash.h> 70 #include <sys/disp.h> 71 #include <sys/autoconf.h> 72 #include <sys/sysmacros.h> 73 74 #ifdef DEBUG 75 #include <sys/debug.h> 76 int mdi_debug = 1; 77 int mdi_debug_logonly = 0; 78 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 79 #define MDI_WARN CE_WARN, __func__ 80 #define MDI_NOTE CE_NOTE, __func__ 81 #define MDI_CONT CE_CONT, __func__ 82 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 83 #else /* !DEBUG */ 84 #define MDI_DEBUG(dbglevel, pargs) 85 #endif /* DEBUG */ 86 int mdi_debug_consoleonly = 0; 87 int mdi_delay = 3; 88 89 extern pri_t minclsyspri; 90 extern int modrootloaded; 91 92 /* 93 * Global mutex: 94 * Protects vHCI list and structure members. 95 */ 96 kmutex_t mdi_mutex; 97 98 /* 99 * Registered vHCI class driver lists 100 */ 101 int mdi_vhci_count; 102 mdi_vhci_t *mdi_vhci_head; 103 mdi_vhci_t *mdi_vhci_tail; 104 105 /* 106 * Client Hash Table size 107 */ 108 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 109 110 /* 111 * taskq interface definitions 112 */ 113 #define MDI_TASKQ_N_THREADS 8 114 #define MDI_TASKQ_PRI minclsyspri 115 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 116 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 117 118 taskq_t *mdi_taskq; 119 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 120 121 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 122 123 /* 124 * The data should be "quiet" for this interval (in seconds) before the 125 * vhci cached data is flushed to the disk. 126 */ 127 static int mdi_vhcache_flush_delay = 10; 128 129 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 130 static int mdi_vhcache_flush_daemon_idle_time = 60; 131 132 /* 133 * MDI falls back to discovery of all paths when a bus_config_one fails. 134 * The following parameters can be used to tune this operation. 135 * 136 * mdi_path_discovery_boot 137 * Number of times path discovery will be attempted during early boot. 138 * Probably there is no reason to ever set this value to greater than one. 139 * 140 * mdi_path_discovery_postboot 141 * Number of times path discovery will be attempted after early boot. 142 * Set it to a minimum of two to allow for discovery of iscsi paths which 143 * may happen very late during booting. 144 * 145 * mdi_path_discovery_interval 146 * Minimum number of seconds MDI will wait between successive discovery 147 * of all paths. Set it to -1 to disable discovery of all paths. 148 */ 149 static int mdi_path_discovery_boot = 1; 150 static int mdi_path_discovery_postboot = 2; 151 static int mdi_path_discovery_interval = 10; 152 153 /* 154 * number of seconds the asynchronous configuration thread will sleep idle 155 * before exiting. 156 */ 157 static int mdi_async_config_idle_time = 600; 158 159 static int mdi_bus_config_cache_hash_size = 256; 160 161 /* turns off multithreaded configuration for certain operations */ 162 static int mdi_mtc_off = 0; 163 164 /* 165 * The "path" to a pathinfo node is identical to the /devices path to a 166 * devinfo node had the device been enumerated under a pHCI instead of 167 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 168 * This association persists across create/delete of the pathinfo nodes, 169 * but not across reboot. 170 */ 171 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 172 static int mdi_pathmap_hash_size = 256; 173 static kmutex_t mdi_pathmap_mutex; 174 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 175 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 176 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 177 178 /* 179 * MDI component property name/value string definitions 180 */ 181 const char *mdi_component_prop = "mpxio-component"; 182 const char *mdi_component_prop_vhci = "vhci"; 183 const char *mdi_component_prop_phci = "phci"; 184 const char *mdi_component_prop_client = "client"; 185 186 /* 187 * MDI client global unique identifier property name 188 */ 189 const char *mdi_client_guid_prop = "client-guid"; 190 191 /* 192 * MDI client load balancing property name/value string definitions 193 */ 194 const char *mdi_load_balance = "load-balance"; 195 const char *mdi_load_balance_none = "none"; 196 const char *mdi_load_balance_rr = "round-robin"; 197 const char *mdi_load_balance_lba = "logical-block"; 198 199 /* 200 * Obsolete vHCI class definition; to be removed after Leadville update 201 */ 202 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 203 204 static char vhci_greeting[] = 205 "\tThere already exists one vHCI driver for class %s\n" 206 "\tOnly one vHCI driver for each class is allowed\n"; 207 208 /* 209 * Static function prototypes 210 */ 211 static int i_mdi_phci_offline(dev_info_t *, uint_t); 212 static int i_mdi_client_offline(dev_info_t *, uint_t); 213 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 214 static void i_mdi_phci_post_detach(dev_info_t *, 215 ddi_detach_cmd_t, int); 216 static int i_mdi_client_pre_detach(dev_info_t *, 217 ddi_detach_cmd_t); 218 static void i_mdi_client_post_detach(dev_info_t *, 219 ddi_detach_cmd_t, int); 220 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 221 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 222 static int i_mdi_lba_lb(mdi_client_t *ct, 223 mdi_pathinfo_t **ret_pip, struct buf *buf); 224 static void i_mdi_pm_hold_client(mdi_client_t *, int); 225 static void i_mdi_pm_rele_client(mdi_client_t *, int); 226 static void i_mdi_pm_reset_client(mdi_client_t *); 227 static int i_mdi_power_all_phci(mdi_client_t *); 228 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 229 230 231 /* 232 * Internal mdi_pathinfo node functions 233 */ 234 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 235 236 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 237 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 238 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 239 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_phci_unlock(mdi_phci_t *); 241 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 242 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 243 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 244 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 245 mdi_client_t *); 246 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_remove_path(mdi_client_t *, 248 mdi_pathinfo_t *); 249 250 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 251 mdi_pathinfo_state_t, int); 252 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 253 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 254 char **, int); 255 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 256 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 257 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 258 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 259 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 260 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 262 static void i_mdi_client_update_state(mdi_client_t *); 263 static int i_mdi_client_compute_state(mdi_client_t *, 264 mdi_phci_t *); 265 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 266 static void i_mdi_client_unlock(mdi_client_t *); 267 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 268 static mdi_client_t *i_devi_get_client(dev_info_t *); 269 /* 270 * NOTE: this will be removed once the NWS files are changed to use the new 271 * mdi_{enable,disable}_path interfaces 272 */ 273 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 274 int, int); 275 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 276 mdi_vhci_t *vh, int flags, int op); 277 /* 278 * Failover related function prototypes 279 */ 280 static int i_mdi_failover(void *); 281 282 /* 283 * misc internal functions 284 */ 285 static int i_mdi_get_hash_key(char *); 286 static int i_map_nvlist_error_to_mdi(int); 287 static void i_mdi_report_path_state(mdi_client_t *, 288 mdi_pathinfo_t *); 289 290 static void setup_vhci_cache(mdi_vhci_t *); 291 static int destroy_vhci_cache(mdi_vhci_t *); 292 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 293 static boolean_t stop_vhcache_flush_thread(void *, int); 294 static void free_string_array(char **, int); 295 static void free_vhcache_phci(mdi_vhcache_phci_t *); 296 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 297 static void free_vhcache_client(mdi_vhcache_client_t *); 298 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 299 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 300 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 301 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 302 static void vhcache_pi_add(mdi_vhci_config_t *, 303 struct mdi_pathinfo *); 304 static void vhcache_pi_remove(mdi_vhci_config_t *, 305 struct mdi_pathinfo *); 306 static void free_phclient_path_list(mdi_phys_path_t *); 307 static void sort_vhcache_paths(mdi_vhcache_client_t *); 308 static int flush_vhcache(mdi_vhci_config_t *, int); 309 static void vhcache_dirty(mdi_vhci_config_t *); 310 static void free_async_client_config(mdi_async_client_config_t *); 311 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 312 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 313 static nvlist_t *read_on_disk_vhci_cache(char *); 314 extern int fread_nvlist(char *, nvlist_t **); 315 extern int fwrite_nvlist(char *, nvlist_t *); 316 317 /* called once when first vhci registers with mdi */ 318 static void 319 i_mdi_init() 320 { 321 static int initialized = 0; 322 323 if (initialized) 324 return; 325 initialized = 1; 326 327 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 328 329 /* Create our taskq resources */ 330 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 331 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 332 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 333 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 334 335 /* Allocate ['path_instance' <-> "path"] maps */ 336 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 337 mdi_pathmap_bypath = mod_hash_create_strhash( 338 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 339 mod_hash_null_valdtor); 340 mdi_pathmap_byinstance = mod_hash_create_idhash( 341 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 342 mod_hash_null_valdtor); 343 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 344 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 345 mod_hash_null_valdtor); 346 } 347 348 /* 349 * mdi_get_component_type(): 350 * Return mpxio component type 351 * Return Values: 352 * MDI_COMPONENT_NONE 353 * MDI_COMPONENT_VHCI 354 * MDI_COMPONENT_PHCI 355 * MDI_COMPONENT_CLIENT 356 * XXX This doesn't work under multi-level MPxIO and should be 357 * removed when clients migrate mdi_component_is_*() interfaces. 358 */ 359 int 360 mdi_get_component_type(dev_info_t *dip) 361 { 362 return (DEVI(dip)->devi_mdi_component); 363 } 364 365 /* 366 * mdi_vhci_register(): 367 * Register a vHCI module with the mpxio framework 368 * mdi_vhci_register() is called by vHCI drivers to register the 369 * 'class_driver' vHCI driver and its MDI entrypoints with the 370 * mpxio framework. The vHCI driver must call this interface as 371 * part of its attach(9e) handler. 372 * Competing threads may try to attach mdi_vhci_register() as 373 * the vHCI drivers are loaded and attached as a result of pHCI 374 * driver instance registration (mdi_phci_register()) with the 375 * framework. 376 * Return Values: 377 * MDI_SUCCESS 378 * MDI_FAILURE 379 */ 380 /*ARGSUSED*/ 381 int 382 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 383 int flags) 384 { 385 mdi_vhci_t *vh = NULL; 386 387 /* Registrant can't be older */ 388 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 389 390 #ifdef DEBUG 391 /* 392 * IB nexus driver is loaded only when IB hardware is present. 393 * In order to be able to do this there is a need to drive the loading 394 * and attaching of the IB nexus driver (especially when an IB hardware 395 * is dynamically plugged in) when an IB HCA driver (PHCI) 396 * is being attached. Unfortunately this gets into the limitations 397 * of devfs as there seems to be no clean way to drive configuration 398 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 399 * for IB. 400 */ 401 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 402 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 403 #endif 404 405 i_mdi_init(); 406 407 mutex_enter(&mdi_mutex); 408 /* 409 * Scan for already registered vhci 410 */ 411 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 412 if (strcmp(vh->vh_class, class) == 0) { 413 /* 414 * vHCI has already been created. Check for valid 415 * vHCI ops registration. We only support one vHCI 416 * module per class 417 */ 418 if (vh->vh_ops != NULL) { 419 mutex_exit(&mdi_mutex); 420 cmn_err(CE_NOTE, vhci_greeting, class); 421 return (MDI_FAILURE); 422 } 423 break; 424 } 425 } 426 427 /* 428 * if not yet created, create the vHCI component 429 */ 430 if (vh == NULL) { 431 struct client_hash *hash = NULL; 432 char *load_balance; 433 434 /* 435 * Allocate and initialize the mdi extensions 436 */ 437 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 438 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 439 KM_SLEEP); 440 vh->vh_client_table = hash; 441 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 442 (void) strcpy(vh->vh_class, class); 443 vh->vh_lb = LOAD_BALANCE_RR; 444 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 445 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 446 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 447 vh->vh_lb = LOAD_BALANCE_NONE; 448 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 449 == 0) { 450 vh->vh_lb = LOAD_BALANCE_LBA; 451 } 452 ddi_prop_free(load_balance); 453 } 454 455 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 456 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 457 458 /* 459 * Store the vHCI ops vectors 460 */ 461 vh->vh_dip = vdip; 462 vh->vh_ops = vops; 463 464 setup_vhci_cache(vh); 465 466 if (mdi_vhci_head == NULL) { 467 mdi_vhci_head = vh; 468 } 469 if (mdi_vhci_tail) { 470 mdi_vhci_tail->vh_next = vh; 471 } 472 mdi_vhci_tail = vh; 473 mdi_vhci_count++; 474 } 475 476 /* 477 * Claim the devfs node as a vhci component 478 */ 479 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 480 481 /* 482 * Initialize our back reference from dev_info node 483 */ 484 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 485 mutex_exit(&mdi_mutex); 486 return (MDI_SUCCESS); 487 } 488 489 /* 490 * mdi_vhci_unregister(): 491 * Unregister a vHCI module from mpxio framework 492 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 493 * of a vhci to unregister it from the framework. 494 * Return Values: 495 * MDI_SUCCESS 496 * MDI_FAILURE 497 */ 498 /*ARGSUSED*/ 499 int 500 mdi_vhci_unregister(dev_info_t *vdip, int flags) 501 { 502 mdi_vhci_t *found, *vh, *prev = NULL; 503 504 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 505 506 /* 507 * Check for invalid VHCI 508 */ 509 if ((vh = i_devi_get_vhci(vdip)) == NULL) 510 return (MDI_FAILURE); 511 512 /* 513 * Scan the list of registered vHCIs for a match 514 */ 515 mutex_enter(&mdi_mutex); 516 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 517 if (found == vh) 518 break; 519 prev = found; 520 } 521 522 if (found == NULL) { 523 mutex_exit(&mdi_mutex); 524 return (MDI_FAILURE); 525 } 526 527 /* 528 * Check the vHCI, pHCI and client count. All the pHCIs and clients 529 * should have been unregistered, before a vHCI can be 530 * unregistered. 531 */ 532 MDI_VHCI_PHCI_LOCK(vh); 533 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 534 MDI_VHCI_PHCI_UNLOCK(vh); 535 mutex_exit(&mdi_mutex); 536 return (MDI_FAILURE); 537 } 538 MDI_VHCI_PHCI_UNLOCK(vh); 539 540 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 541 mutex_exit(&mdi_mutex); 542 return (MDI_FAILURE); 543 } 544 545 /* 546 * Remove the vHCI from the global list 547 */ 548 if (vh == mdi_vhci_head) { 549 mdi_vhci_head = vh->vh_next; 550 } else { 551 prev->vh_next = vh->vh_next; 552 } 553 if (vh == mdi_vhci_tail) { 554 mdi_vhci_tail = prev; 555 } 556 mdi_vhci_count--; 557 mutex_exit(&mdi_mutex); 558 559 vh->vh_ops = NULL; 560 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 561 DEVI(vdip)->devi_mdi_xhci = NULL; 562 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 563 kmem_free(vh->vh_client_table, 564 mdi_client_table_size * sizeof (struct client_hash)); 565 mutex_destroy(&vh->vh_phci_mutex); 566 mutex_destroy(&vh->vh_client_mutex); 567 568 kmem_free(vh, sizeof (mdi_vhci_t)); 569 return (MDI_SUCCESS); 570 } 571 572 /* 573 * i_mdi_vhci_class2vhci(): 574 * Look for a matching vHCI module given a vHCI class name 575 * Return Values: 576 * Handle to a vHCI component 577 * NULL 578 */ 579 static mdi_vhci_t * 580 i_mdi_vhci_class2vhci(char *class) 581 { 582 mdi_vhci_t *vh = NULL; 583 584 ASSERT(!MUTEX_HELD(&mdi_mutex)); 585 586 mutex_enter(&mdi_mutex); 587 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 588 if (strcmp(vh->vh_class, class) == 0) { 589 break; 590 } 591 } 592 mutex_exit(&mdi_mutex); 593 return (vh); 594 } 595 596 /* 597 * i_devi_get_vhci(): 598 * Utility function to get the handle to a vHCI component 599 * Return Values: 600 * Handle to a vHCI component 601 * NULL 602 */ 603 mdi_vhci_t * 604 i_devi_get_vhci(dev_info_t *vdip) 605 { 606 mdi_vhci_t *vh = NULL; 607 if (MDI_VHCI(vdip)) { 608 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 609 } 610 return (vh); 611 } 612 613 /* 614 * mdi_phci_register(): 615 * Register a pHCI module with mpxio framework 616 * mdi_phci_register() is called by pHCI drivers to register with 617 * the mpxio framework and a specific 'class_driver' vHCI. The 618 * pHCI driver must call this interface as part of its attach(9e) 619 * handler. 620 * Return Values: 621 * MDI_SUCCESS 622 * MDI_FAILURE 623 */ 624 /*ARGSUSED*/ 625 int 626 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 627 { 628 mdi_phci_t *ph; 629 mdi_vhci_t *vh; 630 char *data; 631 632 /* 633 * Some subsystems, like fcp, perform pHCI registration from a 634 * different thread than the one doing the pHCI attach(9E) - the 635 * driver attach code is waiting for this other thread to complete. 636 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 637 * (indicating that some thread has done an ndi_devi_enter of parent) 638 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 639 */ 640 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 641 642 /* 643 * Check for mpxio-disable property. Enable mpxio if the property is 644 * missing or not set to "yes". 645 * If the property is set to "yes" then emit a brief message. 646 */ 647 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 648 &data) == DDI_SUCCESS)) { 649 if (strcmp(data, "yes") == 0) { 650 MDI_DEBUG(1, (MDI_CONT, pdip, 651 "?multipath capabilities disabled via %s.conf.", 652 ddi_driver_name(pdip))); 653 ddi_prop_free(data); 654 return (MDI_FAILURE); 655 } 656 ddi_prop_free(data); 657 } 658 659 /* 660 * Search for a matching vHCI 661 */ 662 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 663 if (vh == NULL) { 664 return (MDI_FAILURE); 665 } 666 667 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 668 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 669 ph->ph_dip = pdip; 670 ph->ph_vhci = vh; 671 ph->ph_next = NULL; 672 ph->ph_unstable = 0; 673 ph->ph_vprivate = 0; 674 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 675 676 MDI_PHCI_LOCK(ph); 677 MDI_PHCI_SET_POWER_UP(ph); 678 MDI_PHCI_UNLOCK(ph); 679 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 680 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 681 682 vhcache_phci_add(vh->vh_config, ph); 683 684 MDI_VHCI_PHCI_LOCK(vh); 685 if (vh->vh_phci_head == NULL) { 686 vh->vh_phci_head = ph; 687 } 688 if (vh->vh_phci_tail) { 689 vh->vh_phci_tail->ph_next = ph; 690 } 691 vh->vh_phci_tail = ph; 692 vh->vh_phci_count++; 693 MDI_VHCI_PHCI_UNLOCK(vh); 694 695 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 696 return (MDI_SUCCESS); 697 } 698 699 /* 700 * mdi_phci_unregister(): 701 * Unregister a pHCI module from mpxio framework 702 * mdi_phci_unregister() is called by the pHCI drivers from their 703 * detach(9E) handler to unregister their instances from the 704 * framework. 705 * Return Values: 706 * MDI_SUCCESS 707 * MDI_FAILURE 708 */ 709 /*ARGSUSED*/ 710 int 711 mdi_phci_unregister(dev_info_t *pdip, int flags) 712 { 713 mdi_vhci_t *vh; 714 mdi_phci_t *ph; 715 mdi_phci_t *tmp; 716 mdi_phci_t *prev = NULL; 717 mdi_pathinfo_t *pip; 718 719 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 720 721 ph = i_devi_get_phci(pdip); 722 if (ph == NULL) { 723 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 724 return (MDI_FAILURE); 725 } 726 727 vh = ph->ph_vhci; 728 ASSERT(vh != NULL); 729 if (vh == NULL) { 730 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 731 return (MDI_FAILURE); 732 } 733 734 MDI_VHCI_PHCI_LOCK(vh); 735 tmp = vh->vh_phci_head; 736 while (tmp) { 737 if (tmp == ph) { 738 break; 739 } 740 prev = tmp; 741 tmp = tmp->ph_next; 742 } 743 744 if (ph == vh->vh_phci_head) { 745 vh->vh_phci_head = ph->ph_next; 746 } else { 747 prev->ph_next = ph->ph_next; 748 } 749 750 if (ph == vh->vh_phci_tail) { 751 vh->vh_phci_tail = prev; 752 } 753 754 vh->vh_phci_count--; 755 MDI_VHCI_PHCI_UNLOCK(vh); 756 757 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 758 MDI_PHCI_LOCK(ph); 759 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 760 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 761 MDI_PI(pip)->pi_phci = NULL; 762 MDI_PHCI_UNLOCK(ph); 763 764 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 765 ESC_DDI_INITIATOR_UNREGISTER); 766 vhcache_phci_remove(vh->vh_config, ph); 767 cv_destroy(&ph->ph_unstable_cv); 768 mutex_destroy(&ph->ph_mutex); 769 kmem_free(ph, sizeof (mdi_phci_t)); 770 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 771 DEVI(pdip)->devi_mdi_xhci = NULL; 772 return (MDI_SUCCESS); 773 } 774 775 /* 776 * i_devi_get_phci(): 777 * Utility function to return the phci extensions. 778 */ 779 static mdi_phci_t * 780 i_devi_get_phci(dev_info_t *pdip) 781 { 782 mdi_phci_t *ph = NULL; 783 784 if (MDI_PHCI(pdip)) { 785 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 786 } 787 return (ph); 788 } 789 790 /* 791 * Single thread mdi entry into devinfo node for modifying its children. 792 * If necessary we perform an ndi_devi_enter of the vHCI before doing 793 * an ndi_devi_enter of 'dip'. If we enter the vHCI, we set *enteredvp 794 * to true, otherwise it is unconditionally set to false. 795 */ 796 void 797 mdi_devi_enter(dev_info_t *phci_dip, boolean_t *enteredvp) 798 { 799 dev_info_t *vdip; 800 801 /* Verify calling context */ 802 ASSERT3P(enteredvp, !=, NULL); 803 ASSERT(MDI_PHCI(phci_dip)); 804 vdip = mdi_devi_get_vdip(phci_dip); 805 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */ 806 807 /* 808 * If pHCI is detaching then the framework has already entered the 809 * vHCI on a thread that went down the code path leading to 810 * detach_node(). This framework enter of the vHCI during pHCI 811 * detach is done to avoid deadlock with vHCI power management 812 * operations which enter the vHCI and then enter down the path 813 * to the pHCI. If pHCI is detaching then we piggyback this call's 814 * enter of the vHCI on the framework's vHCI enter that has already 815 * occurred - this is OK because we know that the framework thread 816 * doing detach is waiting for our completion. 817 * 818 * We should check DEVI_IS_DETACHING under an enter of the parent to 819 * avoid a race with detach, but we can't because the framework has 820 * already entered the parent, so we have this complexity instead. 821 */ 822 *enteredvp = B_FALSE; 823 for (;;) { 824 if (panicstr != NULL) 825 return; 826 827 if (ndi_devi_tryenter(vdip)) { 828 *enteredvp = B_TRUE; 829 if (DEVI_IS_DETACHING(phci_dip)) { 830 ndi_devi_exit(vdip); 831 *enteredvp = B_FALSE; 832 } 833 break; 834 } else if (DEVI_IS_DETACHING(phci_dip)) { 835 *enteredvp = B_FALSE; 836 break; 837 } else if (servicing_interrupt()) { 838 /* 839 * Don't delay an interrupt (and ensure adaptive 840 * mutex inversion support). 841 */ 842 ndi_devi_enter(vdip); 843 *enteredvp = B_TRUE; 844 break; 845 } else { 846 delay_random(mdi_delay); 847 } 848 } 849 850 ndi_devi_enter(phci_dip); 851 } 852 853 /* 854 * Attempt to mdi_devi_enter. 855 */ 856 int 857 mdi_devi_tryenter(dev_info_t *phci_dip, boolean_t *enteredvp) 858 { 859 dev_info_t *vdip; 860 861 /* Verify calling context */ 862 ASSERT(MDI_PHCI(phci_dip)); 863 vdip = mdi_devi_get_vdip(phci_dip); 864 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */ 865 866 *enteredvp = B_FALSE; 867 if (ndi_devi_tryenter(vdip)) { 868 if (ndi_devi_tryenter(phci_dip)) { 869 *enteredvp = B_TRUE; 870 return (1); /* locked */ 871 } 872 ndi_devi_exit(vdip); 873 } 874 return (0); /* busy */ 875 } 876 877 /* 878 * Release mdi_devi_enter or successful mdi_devi_tryenter. 879 */ 880 void 881 mdi_devi_exit(dev_info_t *phci_dip, boolean_t enteredv) 882 { 883 dev_info_t *vdip; 884 885 /* Verify calling context */ 886 ASSERT(MDI_PHCI(phci_dip)); 887 vdip = mdi_devi_get_vdip(phci_dip); 888 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */ 889 890 ndi_devi_exit(phci_dip); 891 if (enteredv) 892 ndi_devi_exit(vdip); 893 } 894 895 /* 896 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 897 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 898 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 899 * with vHCI power management code during path online/offline. Each 900 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 901 * occur within the scope of an active mdi_devi_enter that establishes the 902 * circular value. 903 */ 904 void 905 mdi_devi_exit_phci(dev_info_t *phci_dip) 906 { 907 /* Verify calling context */ 908 ASSERT(MDI_PHCI(phci_dip)); 909 910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 911 ndi_hold_devi(phci_dip); 912 913 ndi_devi_exit(phci_dip); 914 } 915 916 void 917 mdi_devi_enter_phci(dev_info_t *phci_dip) 918 { 919 /* Verify calling context */ 920 ASSERT(MDI_PHCI(phci_dip)); 921 922 ndi_devi_enter(phci_dip); 923 924 /* Drop hold from mdi_devi_exit_phci. */ 925 ndi_rele_devi(phci_dip); 926 } 927 928 /* 929 * mdi_devi_get_vdip(): 930 * given a pHCI dip return vHCI dip 931 * Returns: 932 * the vHCI dip if it exists 933 * else NULL 934 */ 935 dev_info_t * 936 mdi_devi_get_vdip(dev_info_t *pdip) 937 { 938 mdi_phci_t *ph; 939 940 ph = i_devi_get_phci(pdip); 941 if (ph && ph->ph_vhci) 942 return (ph->ph_vhci->vh_dip); 943 return (NULL); 944 } 945 946 /* 947 * mdi_devi_pdip_entered(): 948 * Return 1 if we are vHCI and have done an ndi_devi_enter 949 * of a pHCI 950 */ 951 int 952 mdi_devi_pdip_entered(dev_info_t *vdip) 953 { 954 mdi_vhci_t *vh; 955 mdi_phci_t *ph; 956 957 vh = i_devi_get_vhci(vdip); 958 if (vh == NULL) 959 return (0); 960 961 MDI_VHCI_PHCI_LOCK(vh); 962 ph = vh->vh_phci_head; 963 while (ph) { 964 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 965 MDI_VHCI_PHCI_UNLOCK(vh); 966 return (1); 967 } 968 ph = ph->ph_next; 969 } 970 MDI_VHCI_PHCI_UNLOCK(vh); 971 return (0); 972 } 973 974 /* 975 * mdi_phci_path2devinfo(): 976 * Utility function to search for a valid phci device given 977 * the devfs pathname. 978 */ 979 dev_info_t * 980 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 981 { 982 char *temp_pathname; 983 mdi_vhci_t *vh; 984 mdi_phci_t *ph; 985 dev_info_t *pdip = NULL; 986 987 vh = i_devi_get_vhci(vdip); 988 ASSERT(vh != NULL); 989 990 if (vh == NULL) { 991 /* 992 * Invalid vHCI component, return failure 993 */ 994 return (NULL); 995 } 996 997 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 998 MDI_VHCI_PHCI_LOCK(vh); 999 ph = vh->vh_phci_head; 1000 while (ph != NULL) { 1001 pdip = ph->ph_dip; 1002 ASSERT(pdip != NULL); 1003 *temp_pathname = '\0'; 1004 (void) ddi_pathname(pdip, temp_pathname); 1005 if (strcmp(temp_pathname, pathname) == 0) { 1006 break; 1007 } 1008 ph = ph->ph_next; 1009 } 1010 if (ph == NULL) { 1011 pdip = NULL; 1012 } 1013 MDI_VHCI_PHCI_UNLOCK(vh); 1014 kmem_free(temp_pathname, MAXPATHLEN); 1015 return (pdip); 1016 } 1017 1018 /* 1019 * mdi_phci_get_path_count(): 1020 * get number of path information nodes associated with a given 1021 * pHCI device. 1022 */ 1023 int 1024 mdi_phci_get_path_count(dev_info_t *pdip) 1025 { 1026 mdi_phci_t *ph; 1027 int count = 0; 1028 1029 ph = i_devi_get_phci(pdip); 1030 if (ph != NULL) { 1031 count = ph->ph_path_count; 1032 } 1033 return (count); 1034 } 1035 1036 /* 1037 * i_mdi_phci_lock(): 1038 * Lock a pHCI device 1039 * Return Values: 1040 * None 1041 * Note: 1042 * The default locking order is: 1043 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1044 * But there are number of situations where locks need to be 1045 * grabbed in reverse order. This routine implements try and lock 1046 * mechanism depending on the requested parameter option. 1047 */ 1048 static void 1049 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1050 { 1051 if (pip) { 1052 /* Reverse locking is requested. */ 1053 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1054 if (servicing_interrupt()) { 1055 MDI_PI_HOLD(pip); 1056 MDI_PI_UNLOCK(pip); 1057 MDI_PHCI_LOCK(ph); 1058 MDI_PI_LOCK(pip); 1059 MDI_PI_RELE(pip); 1060 break; 1061 } else { 1062 /* 1063 * tryenter failed. Try to grab again 1064 * after a small delay 1065 */ 1066 MDI_PI_HOLD(pip); 1067 MDI_PI_UNLOCK(pip); 1068 delay_random(mdi_delay); 1069 MDI_PI_LOCK(pip); 1070 MDI_PI_RELE(pip); 1071 } 1072 } 1073 } else { 1074 MDI_PHCI_LOCK(ph); 1075 } 1076 } 1077 1078 /* 1079 * i_mdi_phci_unlock(): 1080 * Unlock the pHCI component 1081 */ 1082 static void 1083 i_mdi_phci_unlock(mdi_phci_t *ph) 1084 { 1085 MDI_PHCI_UNLOCK(ph); 1086 } 1087 1088 /* 1089 * i_mdi_devinfo_create(): 1090 * create client device's devinfo node 1091 * Return Values: 1092 * dev_info 1093 * NULL 1094 * Notes: 1095 */ 1096 static dev_info_t * 1097 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1098 char **compatible, int ncompatible) 1099 { 1100 dev_info_t *cdip = NULL; 1101 1102 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1103 1104 /* Verify for duplicate entry */ 1105 cdip = i_mdi_devinfo_find(vh, name, guid); 1106 ASSERT(cdip == NULL); 1107 if (cdip) { 1108 cmn_err(CE_WARN, 1109 "i_mdi_devinfo_create: client %s@%s already exists", 1110 name ? name : "", guid ? guid : ""); 1111 } 1112 1113 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1114 if (cdip == NULL) 1115 goto fail; 1116 1117 /* 1118 * Create component type and Global unique identifier 1119 * properties 1120 */ 1121 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1122 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1123 goto fail; 1124 } 1125 1126 /* Decorate the node with compatible property */ 1127 if (compatible && 1128 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1129 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1130 goto fail; 1131 } 1132 1133 return (cdip); 1134 1135 fail: 1136 if (cdip) { 1137 (void) ndi_prop_remove_all(cdip); 1138 (void) ndi_devi_free(cdip); 1139 } 1140 return (NULL); 1141 } 1142 1143 /* 1144 * i_mdi_devinfo_find(): 1145 * Find a matching devinfo node for given client node name 1146 * and its guid. 1147 * Return Values: 1148 * Handle to a dev_info node or NULL 1149 */ 1150 static dev_info_t * 1151 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1152 { 1153 char *data; 1154 dev_info_t *cdip = NULL; 1155 dev_info_t *ndip = NULL; 1156 1157 ndi_devi_enter(vh->vh_dip); 1158 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1159 while ((cdip = ndip) != NULL) { 1160 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1161 1162 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1163 continue; 1164 } 1165 1166 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1167 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1168 &data) != DDI_PROP_SUCCESS) { 1169 continue; 1170 } 1171 1172 if (strcmp(data, guid) != 0) { 1173 ddi_prop_free(data); 1174 continue; 1175 } 1176 ddi_prop_free(data); 1177 break; 1178 } 1179 ndi_devi_exit(vh->vh_dip); 1180 return (cdip); 1181 } 1182 1183 /* 1184 * i_mdi_devinfo_remove(): 1185 * Remove a client device node 1186 */ 1187 static int 1188 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1189 { 1190 int rv = MDI_SUCCESS; 1191 1192 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1193 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1194 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1195 if (rv != NDI_SUCCESS) { 1196 MDI_DEBUG(1, (MDI_NOTE, cdip, 1197 "!failed: cdip %p", (void *)cdip)); 1198 } 1199 /* 1200 * Convert to MDI error code 1201 */ 1202 switch (rv) { 1203 case NDI_SUCCESS: 1204 rv = MDI_SUCCESS; 1205 break; 1206 case NDI_BUSY: 1207 rv = MDI_BUSY; 1208 break; 1209 default: 1210 rv = MDI_FAILURE; 1211 break; 1212 } 1213 } 1214 return (rv); 1215 } 1216 1217 /* 1218 * i_devi_get_client() 1219 * Utility function to get mpxio component extensions 1220 */ 1221 static mdi_client_t * 1222 i_devi_get_client(dev_info_t *cdip) 1223 { 1224 mdi_client_t *ct = NULL; 1225 1226 if (MDI_CLIENT(cdip)) { 1227 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1228 } 1229 return (ct); 1230 } 1231 1232 /* 1233 * i_mdi_is_child_present(): 1234 * Search for the presence of client device dev_info node 1235 */ 1236 static int 1237 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1238 { 1239 int rv = MDI_FAILURE; 1240 struct dev_info *dip; 1241 1242 ndi_devi_enter(vdip); 1243 dip = DEVI(vdip)->devi_child; 1244 while (dip) { 1245 if (dip == DEVI(cdip)) { 1246 rv = MDI_SUCCESS; 1247 break; 1248 } 1249 dip = dip->devi_sibling; 1250 } 1251 ndi_devi_exit(vdip); 1252 return (rv); 1253 } 1254 1255 1256 /* 1257 * i_mdi_client_lock(): 1258 * Grab client component lock 1259 * Return Values: 1260 * None 1261 * Note: 1262 * The default locking order is: 1263 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1264 * But there are number of situations where locks need to be 1265 * grabbed in reverse order. This routine implements try and lock 1266 * mechanism depending on the requested parameter option. 1267 */ 1268 static void 1269 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1270 { 1271 if (pip) { 1272 /* 1273 * Reverse locking is requested. 1274 */ 1275 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1276 if (servicing_interrupt()) { 1277 MDI_PI_HOLD(pip); 1278 MDI_PI_UNLOCK(pip); 1279 MDI_CLIENT_LOCK(ct); 1280 MDI_PI_LOCK(pip); 1281 MDI_PI_RELE(pip); 1282 break; 1283 } else { 1284 /* 1285 * tryenter failed. Try to grab again 1286 * after a small delay 1287 */ 1288 MDI_PI_HOLD(pip); 1289 MDI_PI_UNLOCK(pip); 1290 delay_random(mdi_delay); 1291 MDI_PI_LOCK(pip); 1292 MDI_PI_RELE(pip); 1293 } 1294 } 1295 } else { 1296 MDI_CLIENT_LOCK(ct); 1297 } 1298 } 1299 1300 /* 1301 * i_mdi_client_unlock(): 1302 * Unlock a client component 1303 */ 1304 static void 1305 i_mdi_client_unlock(mdi_client_t *ct) 1306 { 1307 MDI_CLIENT_UNLOCK(ct); 1308 } 1309 1310 /* 1311 * i_mdi_client_alloc(): 1312 * Allocate and initialize a client structure. Caller should 1313 * hold the vhci client lock. 1314 * Return Values: 1315 * Handle to a client component 1316 */ 1317 /*ARGSUSED*/ 1318 static mdi_client_t * 1319 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1320 { 1321 mdi_client_t *ct; 1322 1323 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1324 1325 /* 1326 * Allocate and initialize a component structure. 1327 */ 1328 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1329 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1330 ct->ct_hnext = NULL; 1331 ct->ct_hprev = NULL; 1332 ct->ct_dip = NULL; 1333 ct->ct_vhci = vh; 1334 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1335 (void) strcpy(ct->ct_drvname, name); 1336 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1337 (void) strcpy(ct->ct_guid, lguid); 1338 ct->ct_cprivate = NULL; 1339 ct->ct_vprivate = NULL; 1340 ct->ct_flags = 0; 1341 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1342 MDI_CLIENT_LOCK(ct); 1343 MDI_CLIENT_SET_OFFLINE(ct); 1344 MDI_CLIENT_SET_DETACH(ct); 1345 MDI_CLIENT_SET_POWER_UP(ct); 1346 MDI_CLIENT_UNLOCK(ct); 1347 ct->ct_failover_flags = 0; 1348 ct->ct_failover_status = 0; 1349 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1350 ct->ct_unstable = 0; 1351 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1352 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1353 ct->ct_lb = vh->vh_lb; 1354 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1355 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1356 ct->ct_path_count = 0; 1357 ct->ct_path_head = NULL; 1358 ct->ct_path_tail = NULL; 1359 ct->ct_path_last = NULL; 1360 1361 /* 1362 * Add this client component to our client hash queue 1363 */ 1364 i_mdi_client_enlist_table(vh, ct); 1365 return (ct); 1366 } 1367 1368 /* 1369 * i_mdi_client_enlist_table(): 1370 * Attach the client device to the client hash table. Caller 1371 * should hold the vhci client lock. 1372 */ 1373 static void 1374 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1375 { 1376 int index; 1377 struct client_hash *head; 1378 1379 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1380 1381 index = i_mdi_get_hash_key(ct->ct_guid); 1382 head = &vh->vh_client_table[index]; 1383 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1384 head->ct_hash_head = ct; 1385 head->ct_hash_count++; 1386 vh->vh_client_count++; 1387 } 1388 1389 /* 1390 * i_mdi_client_delist_table(): 1391 * Attach the client device to the client hash table. 1392 * Caller should hold the vhci client lock. 1393 */ 1394 static void 1395 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1396 { 1397 int index; 1398 char *guid; 1399 struct client_hash *head; 1400 mdi_client_t *next; 1401 mdi_client_t *last; 1402 1403 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1404 1405 guid = ct->ct_guid; 1406 index = i_mdi_get_hash_key(guid); 1407 head = &vh->vh_client_table[index]; 1408 1409 last = NULL; 1410 next = (mdi_client_t *)head->ct_hash_head; 1411 while (next != NULL) { 1412 if (next == ct) { 1413 break; 1414 } 1415 last = next; 1416 next = next->ct_hnext; 1417 } 1418 1419 if (next) { 1420 head->ct_hash_count--; 1421 if (last == NULL) { 1422 head->ct_hash_head = ct->ct_hnext; 1423 } else { 1424 last->ct_hnext = ct->ct_hnext; 1425 } 1426 ct->ct_hnext = NULL; 1427 vh->vh_client_count--; 1428 } 1429 } 1430 1431 1432 /* 1433 * i_mdi_client_free(): 1434 * Free a client component 1435 */ 1436 static int 1437 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1438 { 1439 int rv = MDI_SUCCESS; 1440 int flags = ct->ct_flags; 1441 dev_info_t *cdip; 1442 dev_info_t *vdip; 1443 1444 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1445 1446 vdip = vh->vh_dip; 1447 cdip = ct->ct_dip; 1448 1449 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1450 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1451 DEVI(cdip)->devi_mdi_client = NULL; 1452 1453 /* 1454 * Clear out back ref. to dev_info_t node 1455 */ 1456 ct->ct_dip = NULL; 1457 1458 /* 1459 * Remove this client from our hash queue 1460 */ 1461 i_mdi_client_delist_table(vh, ct); 1462 1463 /* 1464 * Uninitialize and free the component 1465 */ 1466 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1467 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1468 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1469 cv_destroy(&ct->ct_failover_cv); 1470 cv_destroy(&ct->ct_unstable_cv); 1471 cv_destroy(&ct->ct_powerchange_cv); 1472 mutex_destroy(&ct->ct_mutex); 1473 kmem_free(ct, sizeof (*ct)); 1474 1475 MDI_VHCI_CLIENT_UNLOCK(vh); 1476 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1477 MDI_VHCI_CLIENT_LOCK(vh); 1478 1479 return (rv); 1480 } 1481 1482 /* 1483 * i_mdi_client_find(): 1484 * Find the client structure corresponding to a given guid 1485 * Caller should hold the vhci client lock. 1486 */ 1487 static mdi_client_t * 1488 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1489 { 1490 int index; 1491 struct client_hash *head; 1492 mdi_client_t *ct; 1493 1494 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1495 1496 index = i_mdi_get_hash_key(guid); 1497 head = &vh->vh_client_table[index]; 1498 1499 ct = head->ct_hash_head; 1500 while (ct != NULL) { 1501 if (strcmp(ct->ct_guid, guid) == 0 && 1502 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1503 break; 1504 } 1505 ct = ct->ct_hnext; 1506 } 1507 return (ct); 1508 } 1509 1510 /* 1511 * i_mdi_client_update_state(): 1512 * Compute and update client device state 1513 * Notes: 1514 * A client device can be in any of three possible states: 1515 * 1516 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1517 * one online/standby paths. Can tolerate failures. 1518 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1519 * no alternate paths available as standby. A failure on the online 1520 * would result in loss of access to device data. 1521 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1522 * no paths available to access the device. 1523 */ 1524 static void 1525 i_mdi_client_update_state(mdi_client_t *ct) 1526 { 1527 int state; 1528 1529 ASSERT(MDI_CLIENT_LOCKED(ct)); 1530 state = i_mdi_client_compute_state(ct, NULL); 1531 MDI_CLIENT_SET_STATE(ct, state); 1532 } 1533 1534 /* 1535 * i_mdi_client_compute_state(): 1536 * Compute client device state 1537 * 1538 * mdi_phci_t * Pointer to pHCI structure which should 1539 * while computing the new value. Used by 1540 * i_mdi_phci_offline() to find the new 1541 * client state after DR of a pHCI. 1542 */ 1543 static int 1544 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1545 { 1546 int state; 1547 int online_count = 0; 1548 int standby_count = 0; 1549 mdi_pathinfo_t *pip, *next; 1550 1551 ASSERT(MDI_CLIENT_LOCKED(ct)); 1552 pip = ct->ct_path_head; 1553 while (pip != NULL) { 1554 MDI_PI_LOCK(pip); 1555 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1556 if (MDI_PI(pip)->pi_phci == ph) { 1557 MDI_PI_UNLOCK(pip); 1558 pip = next; 1559 continue; 1560 } 1561 1562 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1563 == MDI_PATHINFO_STATE_ONLINE) 1564 online_count++; 1565 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1566 == MDI_PATHINFO_STATE_STANDBY) 1567 standby_count++; 1568 MDI_PI_UNLOCK(pip); 1569 pip = next; 1570 } 1571 1572 if (online_count == 0) { 1573 if (standby_count == 0) { 1574 state = MDI_CLIENT_STATE_FAILED; 1575 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1576 "client state failed: ct = %p", (void *)ct)); 1577 } else if (standby_count == 1) { 1578 state = MDI_CLIENT_STATE_DEGRADED; 1579 } else { 1580 state = MDI_CLIENT_STATE_OPTIMAL; 1581 } 1582 } else if (online_count == 1) { 1583 if (standby_count == 0) { 1584 state = MDI_CLIENT_STATE_DEGRADED; 1585 } else { 1586 state = MDI_CLIENT_STATE_OPTIMAL; 1587 } 1588 } else { 1589 state = MDI_CLIENT_STATE_OPTIMAL; 1590 } 1591 return (state); 1592 } 1593 1594 /* 1595 * i_mdi_client2devinfo(): 1596 * Utility function 1597 */ 1598 dev_info_t * 1599 i_mdi_client2devinfo(mdi_client_t *ct) 1600 { 1601 return (ct->ct_dip); 1602 } 1603 1604 /* 1605 * mdi_client_path2_devinfo(): 1606 * Given the parent devinfo and child devfs pathname, search for 1607 * a valid devfs node handle. 1608 */ 1609 dev_info_t * 1610 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1611 { 1612 dev_info_t *cdip = NULL; 1613 dev_info_t *ndip = NULL; 1614 char *temp_pathname; 1615 1616 /* 1617 * Allocate temp buffer 1618 */ 1619 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1620 1621 /* 1622 * Lock parent against changes 1623 */ 1624 ndi_devi_enter(vdip); 1625 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1626 while ((cdip = ndip) != NULL) { 1627 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1628 1629 *temp_pathname = '\0'; 1630 (void) ddi_pathname(cdip, temp_pathname); 1631 if (strcmp(temp_pathname, pathname) == 0) { 1632 break; 1633 } 1634 } 1635 /* 1636 * Release devinfo lock 1637 */ 1638 ndi_devi_exit(vdip); 1639 1640 /* 1641 * Free the temp buffer 1642 */ 1643 kmem_free(temp_pathname, MAXPATHLEN); 1644 return (cdip); 1645 } 1646 1647 /* 1648 * mdi_client_get_path_count(): 1649 * Utility function to get number of path information nodes 1650 * associated with a given client device. 1651 */ 1652 int 1653 mdi_client_get_path_count(dev_info_t *cdip) 1654 { 1655 mdi_client_t *ct; 1656 int count = 0; 1657 1658 ct = i_devi_get_client(cdip); 1659 if (ct != NULL) { 1660 count = ct->ct_path_count; 1661 } 1662 return (count); 1663 } 1664 1665 1666 /* 1667 * i_mdi_get_hash_key(): 1668 * Create a hash using strings as keys 1669 * 1670 */ 1671 static int 1672 i_mdi_get_hash_key(char *str) 1673 { 1674 uint32_t g, hash = 0; 1675 char *p; 1676 1677 for (p = str; *p != '\0'; p++) { 1678 g = *p; 1679 hash += g; 1680 } 1681 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1682 } 1683 1684 /* 1685 * mdi_get_lb_policy(): 1686 * Get current load balancing policy for a given client device 1687 */ 1688 client_lb_t 1689 mdi_get_lb_policy(dev_info_t *cdip) 1690 { 1691 client_lb_t lb = LOAD_BALANCE_NONE; 1692 mdi_client_t *ct; 1693 1694 ct = i_devi_get_client(cdip); 1695 if (ct != NULL) { 1696 lb = ct->ct_lb; 1697 } 1698 return (lb); 1699 } 1700 1701 /* 1702 * mdi_set_lb_region_size(): 1703 * Set current region size for the load-balance 1704 */ 1705 int 1706 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1707 { 1708 mdi_client_t *ct; 1709 int rv = MDI_FAILURE; 1710 1711 ct = i_devi_get_client(cdip); 1712 if (ct != NULL && ct->ct_lb_args != NULL) { 1713 ct->ct_lb_args->region_size = region_size; 1714 rv = MDI_SUCCESS; 1715 } 1716 return (rv); 1717 } 1718 1719 /* 1720 * mdi_Set_lb_policy(): 1721 * Set current load balancing policy for a given client device 1722 */ 1723 int 1724 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1725 { 1726 mdi_client_t *ct; 1727 int rv = MDI_FAILURE; 1728 1729 ct = i_devi_get_client(cdip); 1730 if (ct != NULL) { 1731 ct->ct_lb = lb; 1732 rv = MDI_SUCCESS; 1733 } 1734 return (rv); 1735 } 1736 1737 static void 1738 mdi_failover_cb(void *arg) 1739 { 1740 (void)i_mdi_failover(arg); 1741 } 1742 1743 /* 1744 * mdi_failover(): 1745 * failover function called by the vHCI drivers to initiate 1746 * a failover operation. This is typically due to non-availability 1747 * of online paths to route I/O requests. Failover can be 1748 * triggered through user application also. 1749 * 1750 * The vHCI driver calls mdi_failover() to initiate a failover 1751 * operation. mdi_failover() calls back into the vHCI driver's 1752 * vo_failover() entry point to perform the actual failover 1753 * operation. The reason for requiring the vHCI driver to 1754 * initiate failover by calling mdi_failover(), instead of directly 1755 * executing vo_failover() itself, is to ensure that the mdi 1756 * framework can keep track of the client state properly. 1757 * Additionally, mdi_failover() provides as a convenience the 1758 * option of performing the failover operation synchronously or 1759 * asynchronously 1760 * 1761 * Upon successful completion of the failover operation, the 1762 * paths that were previously ONLINE will be in the STANDBY state, 1763 * and the newly activated paths will be in the ONLINE state. 1764 * 1765 * The flags modifier determines whether the activation is done 1766 * synchronously: MDI_FAILOVER_SYNC 1767 * Return Values: 1768 * MDI_SUCCESS 1769 * MDI_FAILURE 1770 * MDI_BUSY 1771 */ 1772 /*ARGSUSED*/ 1773 int 1774 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1775 { 1776 int rv; 1777 mdi_client_t *ct; 1778 1779 ct = i_devi_get_client(cdip); 1780 ASSERT(ct != NULL); 1781 if (ct == NULL) { 1782 /* cdip is not a valid client device. Nothing more to do. */ 1783 return (MDI_FAILURE); 1784 } 1785 1786 MDI_CLIENT_LOCK(ct); 1787 1788 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1789 /* A path to the client is being freed */ 1790 MDI_CLIENT_UNLOCK(ct); 1791 return (MDI_BUSY); 1792 } 1793 1794 1795 if (MDI_CLIENT_IS_FAILED(ct)) { 1796 /* 1797 * Client is in failed state. Nothing more to do. 1798 */ 1799 MDI_CLIENT_UNLOCK(ct); 1800 return (MDI_FAILURE); 1801 } 1802 1803 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1804 /* 1805 * Failover is already in progress; return BUSY 1806 */ 1807 MDI_CLIENT_UNLOCK(ct); 1808 return (MDI_BUSY); 1809 } 1810 /* 1811 * Make sure that mdi_pathinfo node state changes are processed. 1812 * We do not allow failovers to progress while client path state 1813 * changes are in progress 1814 */ 1815 if (ct->ct_unstable) { 1816 if (flags == MDI_FAILOVER_ASYNC) { 1817 MDI_CLIENT_UNLOCK(ct); 1818 return (MDI_BUSY); 1819 } else { 1820 while (ct->ct_unstable) 1821 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1822 } 1823 } 1824 1825 /* 1826 * Client device is in stable state. Before proceeding, perform sanity 1827 * checks again. 1828 */ 1829 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1830 (!i_ddi_devi_attached(cdip))) { 1831 /* 1832 * Client is in failed state. Nothing more to do. 1833 */ 1834 MDI_CLIENT_UNLOCK(ct); 1835 return (MDI_FAILURE); 1836 } 1837 1838 /* 1839 * Set the client state as failover in progress. 1840 */ 1841 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1842 ct->ct_failover_flags = flags; 1843 MDI_CLIENT_UNLOCK(ct); 1844 1845 if (flags == MDI_FAILOVER_ASYNC) { 1846 /* 1847 * Submit the initiate failover request via CPR safe 1848 * taskq threads. 1849 */ 1850 (void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP); 1851 return (MDI_ACCEPT); 1852 } else { 1853 /* 1854 * Synchronous failover mode. Typically invoked from the user 1855 * land. 1856 */ 1857 rv = i_mdi_failover(ct); 1858 } 1859 return (rv); 1860 } 1861 1862 /* 1863 * i_mdi_failover(): 1864 * internal failover function. Invokes vHCI drivers failover 1865 * callback function and process the failover status 1866 * Return Values: 1867 * None 1868 * 1869 * Note: A client device in failover state can not be detached or freed. 1870 */ 1871 static int 1872 i_mdi_failover(void *arg) 1873 { 1874 int rv = MDI_SUCCESS; 1875 mdi_client_t *ct = (mdi_client_t *)arg; 1876 mdi_vhci_t *vh = ct->ct_vhci; 1877 1878 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1879 1880 if (vh->vh_ops->vo_failover != NULL) { 1881 /* 1882 * Call vHCI drivers callback routine 1883 */ 1884 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1885 ct->ct_failover_flags); 1886 } 1887 1888 MDI_CLIENT_LOCK(ct); 1889 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1890 1891 /* 1892 * Save the failover return status 1893 */ 1894 ct->ct_failover_status = rv; 1895 1896 /* 1897 * As a result of failover, client status would have been changed. 1898 * Update the client state and wake up anyone waiting on this client 1899 * device. 1900 */ 1901 i_mdi_client_update_state(ct); 1902 1903 cv_broadcast(&ct->ct_failover_cv); 1904 MDI_CLIENT_UNLOCK(ct); 1905 return (rv); 1906 } 1907 1908 /* 1909 * Load balancing is logical block. 1910 * IOs within the range described by region_size 1911 * would go on the same path. This would improve the 1912 * performance by cache-hit on some of the RAID devices. 1913 * Search only for online paths(At some point we 1914 * may want to balance across target ports). 1915 * If no paths are found then default to round-robin. 1916 */ 1917 static int 1918 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1919 { 1920 int path_index = -1; 1921 int online_path_count = 0; 1922 int online_nonpref_path_count = 0; 1923 int region_size = ct->ct_lb_args->region_size; 1924 mdi_pathinfo_t *pip; 1925 mdi_pathinfo_t *next; 1926 int preferred, path_cnt; 1927 1928 pip = ct->ct_path_head; 1929 while (pip) { 1930 MDI_PI_LOCK(pip); 1931 if (MDI_PI(pip)->pi_state == 1932 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1933 online_path_count++; 1934 } else if (MDI_PI(pip)->pi_state == 1935 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1936 online_nonpref_path_count++; 1937 } 1938 next = (mdi_pathinfo_t *) 1939 MDI_PI(pip)->pi_client_link; 1940 MDI_PI_UNLOCK(pip); 1941 pip = next; 1942 } 1943 /* if found any online/preferred then use this type */ 1944 if (online_path_count > 0) { 1945 path_cnt = online_path_count; 1946 preferred = 1; 1947 } else if (online_nonpref_path_count > 0) { 1948 path_cnt = online_nonpref_path_count; 1949 preferred = 0; 1950 } else { 1951 path_cnt = 0; 1952 } 1953 if (path_cnt) { 1954 path_index = (bp->b_blkno >> region_size) % path_cnt; 1955 pip = ct->ct_path_head; 1956 while (pip && path_index != -1) { 1957 MDI_PI_LOCK(pip); 1958 if (path_index == 0 && 1959 (MDI_PI(pip)->pi_state == 1960 MDI_PATHINFO_STATE_ONLINE) && 1961 MDI_PI(pip)->pi_preferred == preferred) { 1962 MDI_PI_HOLD(pip); 1963 MDI_PI_UNLOCK(pip); 1964 *ret_pip = pip; 1965 return (MDI_SUCCESS); 1966 } 1967 path_index --; 1968 next = (mdi_pathinfo_t *) 1969 MDI_PI(pip)->pi_client_link; 1970 MDI_PI_UNLOCK(pip); 1971 pip = next; 1972 } 1973 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1974 "lba %llx: path %s %p", 1975 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1976 } 1977 return (MDI_FAILURE); 1978 } 1979 1980 /* 1981 * mdi_select_path(): 1982 * select a path to access a client device. 1983 * 1984 * mdi_select_path() function is called by the vHCI drivers to 1985 * select a path to route the I/O request to. The caller passes 1986 * the block I/O data transfer structure ("buf") as one of the 1987 * parameters. The mpxio framework uses the buf structure 1988 * contents to maintain per path statistics (total I/O size / 1989 * count pending). If more than one online paths are available to 1990 * select, the framework automatically selects a suitable path 1991 * for routing I/O request. If a failover operation is active for 1992 * this client device the call shall be failed with MDI_BUSY error 1993 * code. 1994 * 1995 * By default this function returns a suitable path in online 1996 * state based on the current load balancing policy. Currently 1997 * we support LOAD_BALANCE_NONE (Previously selected online path 1998 * will continue to be used till the path is usable) and 1999 * LOAD_BALANCE_RR (Online paths will be selected in a round 2000 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2001 * based on the logical block). The load balancing 2002 * through vHCI drivers configuration file (driver.conf). 2003 * 2004 * vHCI drivers may override this default behavior by specifying 2005 * appropriate flags. The meaning of the thrid argument depends 2006 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2007 * then the argument is the "path instance" of the path to select. 2008 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2009 * "start_pip". A non NULL "start_pip" is the starting point to 2010 * walk and find the next appropriate path. The following values 2011 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2012 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2013 * STANDBY path). 2014 * 2015 * The non-standard behavior is used by the scsi_vhci driver, 2016 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2017 * attach of client devices (to avoid an unnecessary failover 2018 * when the STANDBY path comes up first), during failover 2019 * (to activate a STANDBY path as ONLINE). 2020 * 2021 * The selected path is returned in a a mdi_hold_path() state 2022 * (pi_ref_cnt). Caller should release the hold by calling 2023 * mdi_rele_path(). 2024 * 2025 * Return Values: 2026 * MDI_SUCCESS - Completed successfully 2027 * MDI_BUSY - Client device is busy failing over 2028 * MDI_NOPATH - Client device is online, but no valid path are 2029 * available to access this client device 2030 * MDI_FAILURE - Invalid client device or state 2031 * MDI_DEVI_ONLINING 2032 * - Client device (struct dev_info state) is in 2033 * onlining state. 2034 */ 2035 2036 /*ARGSUSED*/ 2037 int 2038 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2039 void *arg, mdi_pathinfo_t **ret_pip) 2040 { 2041 mdi_client_t *ct; 2042 mdi_pathinfo_t *pip; 2043 mdi_pathinfo_t *next; 2044 mdi_pathinfo_t *head; 2045 mdi_pathinfo_t *start; 2046 client_lb_t lbp; /* load balancing policy */ 2047 int sb = 1; /* standard behavior */ 2048 int preferred = 1; /* preferred path */ 2049 int cond, cont = 1; 2050 int retry = 0; 2051 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2052 int path_instance; /* request specific path instance */ 2053 2054 /* determine type of arg based on flags */ 2055 if (flags & MDI_SELECT_PATH_INSTANCE) { 2056 path_instance = (int)(intptr_t)arg; 2057 start_pip = NULL; 2058 } else { 2059 path_instance = 0; 2060 start_pip = (mdi_pathinfo_t *)arg; 2061 } 2062 2063 if (flags != 0) { 2064 /* 2065 * disable default behavior 2066 */ 2067 sb = 0; 2068 } 2069 2070 *ret_pip = NULL; 2071 ct = i_devi_get_client(cdip); 2072 if (ct == NULL) { 2073 /* mdi extensions are NULL, Nothing more to do */ 2074 return (MDI_FAILURE); 2075 } 2076 2077 MDI_CLIENT_LOCK(ct); 2078 2079 if (sb) { 2080 if (MDI_CLIENT_IS_FAILED(ct)) { 2081 /* 2082 * Client is not ready to accept any I/O requests. 2083 * Fail this request. 2084 */ 2085 MDI_DEBUG(2, (MDI_NOTE, cdip, 2086 "client state offline ct = %p", (void *)ct)); 2087 MDI_CLIENT_UNLOCK(ct); 2088 return (MDI_FAILURE); 2089 } 2090 2091 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2092 /* 2093 * Check for Failover is in progress. If so tell the 2094 * caller that this device is busy. 2095 */ 2096 MDI_DEBUG(2, (MDI_NOTE, cdip, 2097 "client failover in progress ct = %p", 2098 (void *)ct)); 2099 MDI_CLIENT_UNLOCK(ct); 2100 return (MDI_BUSY); 2101 } 2102 2103 /* 2104 * Check to see whether the client device is attached. 2105 * If not so, let the vHCI driver manually select a path 2106 * (standby) and let the probe/attach process to continue. 2107 */ 2108 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2109 MDI_DEBUG(4, (MDI_NOTE, cdip, 2110 "devi is onlining ct = %p", (void *)ct)); 2111 MDI_CLIENT_UNLOCK(ct); 2112 return (MDI_DEVI_ONLINING); 2113 } 2114 } 2115 2116 /* 2117 * Cache in the client list head. If head of the list is NULL 2118 * return MDI_NOPATH 2119 */ 2120 head = ct->ct_path_head; 2121 if (head == NULL) { 2122 MDI_CLIENT_UNLOCK(ct); 2123 return (MDI_NOPATH); 2124 } 2125 2126 /* Caller is specifying a specific pathinfo path by path_instance */ 2127 if (path_instance) { 2128 /* search for pathinfo with correct path_instance */ 2129 for (pip = head; 2130 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2131 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2132 ; 2133 2134 /* If path can't be selected then MDI_NOPATH is returned. */ 2135 if (pip == NULL) { 2136 MDI_CLIENT_UNLOCK(ct); 2137 return (MDI_NOPATH); 2138 } 2139 2140 /* 2141 * Verify state of path. When asked to select a specific 2142 * path_instance, we select the requested path in any 2143 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2144 * We don't however select paths where the pHCI has detached. 2145 * NOTE: last pathinfo node of an opened client device may 2146 * exist in an OFFLINE state after the pHCI associated with 2147 * that path has detached (but pi_phci will be NULL if that 2148 * has occurred). 2149 */ 2150 MDI_PI_LOCK(pip); 2151 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2152 (MDI_PI(pip)->pi_phci == NULL)) { 2153 MDI_PI_UNLOCK(pip); 2154 MDI_CLIENT_UNLOCK(ct); 2155 return (MDI_FAILURE); 2156 } 2157 2158 /* Return MDI_BUSY if we have a transient condition */ 2159 if (MDI_PI_IS_TRANSIENT(pip)) { 2160 MDI_PI_UNLOCK(pip); 2161 MDI_CLIENT_UNLOCK(ct); 2162 return (MDI_BUSY); 2163 } 2164 2165 /* 2166 * Return the path in hold state. Caller should release the 2167 * lock by calling mdi_rele_path() 2168 */ 2169 MDI_PI_HOLD(pip); 2170 MDI_PI_UNLOCK(pip); 2171 *ret_pip = pip; 2172 MDI_CLIENT_UNLOCK(ct); 2173 return (MDI_SUCCESS); 2174 } 2175 2176 /* 2177 * for non default behavior, bypass current 2178 * load balancing policy and always use LOAD_BALANCE_RR 2179 * except that the start point will be adjusted based 2180 * on the provided start_pip 2181 */ 2182 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2183 2184 switch (lbp) { 2185 case LOAD_BALANCE_NONE: 2186 /* 2187 * Load balancing is None or Alternate path mode 2188 * Start looking for a online mdi_pathinfo node starting from 2189 * last known selected path 2190 */ 2191 preferred = 1; 2192 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2193 if (pip == NULL) { 2194 pip = head; 2195 } 2196 start = pip; 2197 do { 2198 MDI_PI_LOCK(pip); 2199 /* 2200 * No need to explicitly check if the path is disabled. 2201 * Since we are checking for state == ONLINE and the 2202 * same variable is used for DISABLE/ENABLE information. 2203 */ 2204 if ((MDI_PI(pip)->pi_state == 2205 MDI_PATHINFO_STATE_ONLINE) && 2206 preferred == MDI_PI(pip)->pi_preferred) { 2207 /* 2208 * Return the path in hold state. Caller should 2209 * release the lock by calling mdi_rele_path() 2210 */ 2211 MDI_PI_HOLD(pip); 2212 MDI_PI_UNLOCK(pip); 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 2219 /* 2220 * Path is busy. 2221 */ 2222 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2223 MDI_PI_IS_TRANSIENT(pip)) 2224 retry = 1; 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2229 if (next == NULL) { 2230 next = head; 2231 } 2232 MDI_PI_UNLOCK(pip); 2233 pip = next; 2234 if (start == pip && preferred) { 2235 preferred = 0; 2236 } else if (start == pip && !preferred) { 2237 cont = 0; 2238 } 2239 } while (cont); 2240 break; 2241 2242 case LOAD_BALANCE_LBA: 2243 /* 2244 * Make sure we are looking 2245 * for an online path. Otherwise, if it is for a STANDBY 2246 * path request, it will go through and fetch an ONLINE 2247 * path which is not desirable. 2248 */ 2249 if ((ct->ct_lb_args != NULL) && 2250 (ct->ct_lb_args->region_size) && bp && 2251 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2252 if (i_mdi_lba_lb(ct, ret_pip, bp) 2253 == MDI_SUCCESS) { 2254 MDI_CLIENT_UNLOCK(ct); 2255 return (MDI_SUCCESS); 2256 } 2257 } 2258 /* FALLTHROUGH */ 2259 case LOAD_BALANCE_RR: 2260 /* 2261 * Load balancing is Round Robin. Start looking for a online 2262 * mdi_pathinfo node starting from last known selected path 2263 * as the start point. If override flags are specified, 2264 * process accordingly. 2265 * If the search is already in effect(start_pip not null), 2266 * then lets just use the same path preference to continue the 2267 * traversal. 2268 */ 2269 2270 if (start_pip != NULL) { 2271 preferred = MDI_PI(start_pip)->pi_preferred; 2272 } else { 2273 preferred = 1; 2274 } 2275 2276 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2277 if (start == NULL) { 2278 pip = head; 2279 } else { 2280 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2281 if (pip == NULL) { 2282 if ( flags & MDI_SELECT_NO_PREFERRED) { 2283 /* 2284 * Return since we hit the end of list 2285 */ 2286 MDI_CLIENT_UNLOCK(ct); 2287 return (MDI_NOPATH); 2288 } 2289 2290 if (!sb) { 2291 if (preferred == 0) { 2292 /* 2293 * Looks like we have completed 2294 * the traversal as preferred 2295 * value is 0. Time to bail out. 2296 */ 2297 *ret_pip = NULL; 2298 MDI_CLIENT_UNLOCK(ct); 2299 return (MDI_NOPATH); 2300 } else { 2301 /* 2302 * Looks like we reached the 2303 * end of the list. Lets enable 2304 * traversal of non preferred 2305 * paths. 2306 */ 2307 preferred = 0; 2308 } 2309 } 2310 pip = head; 2311 } 2312 } 2313 start = pip; 2314 do { 2315 MDI_PI_LOCK(pip); 2316 if (sb) { 2317 cond = ((MDI_PI(pip)->pi_state == 2318 MDI_PATHINFO_STATE_ONLINE && 2319 MDI_PI(pip)->pi_preferred == 2320 preferred) ? 1 : 0); 2321 } else { 2322 if (flags == MDI_SELECT_ONLINE_PATH) { 2323 cond = ((MDI_PI(pip)->pi_state == 2324 MDI_PATHINFO_STATE_ONLINE && 2325 MDI_PI(pip)->pi_preferred == 2326 preferred) ? 1 : 0); 2327 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2328 cond = ((MDI_PI(pip)->pi_state == 2329 MDI_PATHINFO_STATE_STANDBY && 2330 MDI_PI(pip)->pi_preferred == 2331 preferred) ? 1 : 0); 2332 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2333 MDI_SELECT_STANDBY_PATH)) { 2334 cond = (((MDI_PI(pip)->pi_state == 2335 MDI_PATHINFO_STATE_ONLINE || 2336 (MDI_PI(pip)->pi_state == 2337 MDI_PATHINFO_STATE_STANDBY)) && 2338 MDI_PI(pip)->pi_preferred == 2339 preferred) ? 1 : 0); 2340 } else if (flags == 2341 (MDI_SELECT_STANDBY_PATH | 2342 MDI_SELECT_ONLINE_PATH | 2343 MDI_SELECT_USER_DISABLE_PATH)) { 2344 cond = (((MDI_PI(pip)->pi_state == 2345 MDI_PATHINFO_STATE_ONLINE || 2346 (MDI_PI(pip)->pi_state == 2347 MDI_PATHINFO_STATE_STANDBY) || 2348 (MDI_PI(pip)->pi_state == 2349 (MDI_PATHINFO_STATE_ONLINE| 2350 MDI_PATHINFO_STATE_USER_DISABLE)) || 2351 (MDI_PI(pip)->pi_state == 2352 (MDI_PATHINFO_STATE_STANDBY | 2353 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2354 MDI_PI(pip)->pi_preferred == 2355 preferred) ? 1 : 0); 2356 } else if (flags == 2357 (MDI_SELECT_STANDBY_PATH | 2358 MDI_SELECT_ONLINE_PATH | 2359 MDI_SELECT_NO_PREFERRED)) { 2360 cond = (((MDI_PI(pip)->pi_state == 2361 MDI_PATHINFO_STATE_ONLINE) || 2362 (MDI_PI(pip)->pi_state == 2363 MDI_PATHINFO_STATE_STANDBY)) 2364 ? 1 : 0); 2365 } else { 2366 cond = 0; 2367 } 2368 } 2369 /* 2370 * No need to explicitly check if the path is disabled. 2371 * Since we are checking for state == ONLINE and the 2372 * same variable is used for DISABLE/ENABLE information. 2373 */ 2374 if (cond) { 2375 /* 2376 * Return the path in hold state. Caller should 2377 * release the lock by calling mdi_rele_path() 2378 */ 2379 MDI_PI_HOLD(pip); 2380 MDI_PI_UNLOCK(pip); 2381 if (sb) 2382 ct->ct_path_last = pip; 2383 *ret_pip = pip; 2384 MDI_CLIENT_UNLOCK(ct); 2385 return (MDI_SUCCESS); 2386 } 2387 /* 2388 * Path is busy. 2389 */ 2390 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2391 MDI_PI_IS_TRANSIENT(pip)) 2392 retry = 1; 2393 2394 /* 2395 * Keep looking for a next available online path 2396 */ 2397 do_again: 2398 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2399 if (next == NULL) { 2400 if ( flags & MDI_SELECT_NO_PREFERRED) { 2401 /* 2402 * Bail out since we hit the end of list 2403 */ 2404 MDI_PI_UNLOCK(pip); 2405 break; 2406 } 2407 2408 if (!sb) { 2409 if (preferred == 1) { 2410 /* 2411 * Looks like we reached the 2412 * end of the list. Lets enable 2413 * traversal of non preferred 2414 * paths. 2415 */ 2416 preferred = 0; 2417 next = head; 2418 } else { 2419 /* 2420 * We have done both the passes 2421 * Preferred as well as for 2422 * Non-preferred. Bail out now. 2423 */ 2424 cont = 0; 2425 } 2426 } else { 2427 /* 2428 * Standard behavior case. 2429 */ 2430 next = head; 2431 } 2432 } 2433 MDI_PI_UNLOCK(pip); 2434 if (cont == 0) { 2435 break; 2436 } 2437 pip = next; 2438 2439 if (!sb) { 2440 /* 2441 * We need to handle the selection of 2442 * non-preferred path in the following 2443 * case: 2444 * 2445 * +------+ +------+ +------+ +-----+ 2446 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2447 * +------+ +------+ +------+ +-----+ 2448 * 2449 * If we start the search with B, we need to 2450 * skip beyond B to pick C which is non - 2451 * preferred in the second pass. The following 2452 * test, if true, will allow us to skip over 2453 * the 'start'(B in the example) to select 2454 * other non preferred elements. 2455 */ 2456 if ((start_pip != NULL) && (start_pip == pip) && 2457 (MDI_PI(start_pip)->pi_preferred 2458 != preferred)) { 2459 /* 2460 * try again after going past the start 2461 * pip 2462 */ 2463 MDI_PI_LOCK(pip); 2464 goto do_again; 2465 } 2466 } else { 2467 /* 2468 * Standard behavior case 2469 */ 2470 if (start == pip && preferred) { 2471 /* look for nonpreferred paths */ 2472 preferred = 0; 2473 } else if (start == pip && !preferred) { 2474 /* 2475 * Exit condition 2476 */ 2477 cont = 0; 2478 } 2479 } 2480 } while (cont); 2481 break; 2482 } 2483 2484 MDI_CLIENT_UNLOCK(ct); 2485 if (retry == 1) { 2486 return (MDI_BUSY); 2487 } else { 2488 return (MDI_NOPATH); 2489 } 2490 } 2491 2492 /* 2493 * For a client, return the next available path to any phci 2494 * 2495 * Note: 2496 * Caller should hold the branch's devinfo node to get a consistent 2497 * snap shot of the mdi_pathinfo nodes. 2498 * 2499 * Please note that even the list is stable the mdi_pathinfo 2500 * node state and properties are volatile. The caller should lock 2501 * and unlock the nodes by calling mdi_pi_lock() and 2502 * mdi_pi_unlock() functions to get a stable properties. 2503 * 2504 * If there is a need to use the nodes beyond the hold of the 2505 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2506 * need to be held against unexpected removal by calling 2507 * mdi_hold_path() and should be released by calling 2508 * mdi_rele_path() on completion. 2509 */ 2510 mdi_pathinfo_t * 2511 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2512 { 2513 mdi_client_t *ct; 2514 2515 if (!MDI_CLIENT(ct_dip)) 2516 return (NULL); 2517 2518 /* 2519 * Walk through client link 2520 */ 2521 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2522 ASSERT(ct != NULL); 2523 2524 if (pip == NULL) 2525 return ((mdi_pathinfo_t *)ct->ct_path_head); 2526 2527 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2528 } 2529 2530 /* 2531 * For a phci, return the next available path to any client 2532 * Note: ditto mdi_get_next_phci_path() 2533 */ 2534 mdi_pathinfo_t * 2535 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2536 { 2537 mdi_phci_t *ph; 2538 2539 if (!MDI_PHCI(ph_dip)) 2540 return (NULL); 2541 2542 /* 2543 * Walk through pHCI link 2544 */ 2545 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2546 ASSERT(ph != NULL); 2547 2548 if (pip == NULL) 2549 return ((mdi_pathinfo_t *)ph->ph_path_head); 2550 2551 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2552 } 2553 2554 /* 2555 * mdi_hold_path(): 2556 * Hold the mdi_pathinfo node against unwanted unexpected free. 2557 * Return Values: 2558 * None 2559 */ 2560 void 2561 mdi_hold_path(mdi_pathinfo_t *pip) 2562 { 2563 if (pip) { 2564 MDI_PI_LOCK(pip); 2565 MDI_PI_HOLD(pip); 2566 MDI_PI_UNLOCK(pip); 2567 } 2568 } 2569 2570 2571 /* 2572 * mdi_rele_path(): 2573 * Release the mdi_pathinfo node which was selected 2574 * through mdi_select_path() mechanism or manually held by 2575 * calling mdi_hold_path(). 2576 * Return Values: 2577 * None 2578 */ 2579 void 2580 mdi_rele_path(mdi_pathinfo_t *pip) 2581 { 2582 if (pip) { 2583 MDI_PI_LOCK(pip); 2584 MDI_PI_RELE(pip); 2585 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2586 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2587 } 2588 MDI_PI_UNLOCK(pip); 2589 } 2590 } 2591 2592 /* 2593 * mdi_pi_lock(): 2594 * Lock the mdi_pathinfo node. 2595 * Note: 2596 * The caller should release the lock by calling mdi_pi_unlock() 2597 */ 2598 void 2599 mdi_pi_lock(mdi_pathinfo_t *pip) 2600 { 2601 ASSERT(pip != NULL); 2602 if (pip) { 2603 MDI_PI_LOCK(pip); 2604 } 2605 } 2606 2607 2608 /* 2609 * mdi_pi_unlock(): 2610 * Unlock the mdi_pathinfo node. 2611 * Note: 2612 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2613 */ 2614 void 2615 mdi_pi_unlock(mdi_pathinfo_t *pip) 2616 { 2617 ASSERT(pip != NULL); 2618 if (pip) { 2619 MDI_PI_UNLOCK(pip); 2620 } 2621 } 2622 2623 /* 2624 * mdi_pi_find(): 2625 * Search the list of mdi_pathinfo nodes attached to the 2626 * pHCI/Client device node whose path address matches "paddr". 2627 * Returns a pointer to the mdi_pathinfo node if a matching node is 2628 * found. 2629 * Return Values: 2630 * mdi_pathinfo node handle 2631 * NULL 2632 * Notes: 2633 * Caller need not hold any locks to call this function. 2634 */ 2635 mdi_pathinfo_t * 2636 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2637 { 2638 mdi_phci_t *ph; 2639 mdi_vhci_t *vh; 2640 mdi_client_t *ct; 2641 mdi_pathinfo_t *pip = NULL; 2642 2643 MDI_DEBUG(2, (MDI_NOTE, pdip, 2644 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2645 if ((pdip == NULL) || (paddr == NULL)) { 2646 return (NULL); 2647 } 2648 ph = i_devi_get_phci(pdip); 2649 if (ph == NULL) { 2650 /* 2651 * Invalid pHCI device, Nothing more to do. 2652 */ 2653 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2654 return (NULL); 2655 } 2656 2657 vh = ph->ph_vhci; 2658 if (vh == NULL) { 2659 /* 2660 * Invalid vHCI device, Nothing more to do. 2661 */ 2662 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2663 return (NULL); 2664 } 2665 2666 /* 2667 * Look for pathinfo node identified by paddr. 2668 */ 2669 if (caddr == NULL) { 2670 /* 2671 * Find a mdi_pathinfo node under pHCI list for a matching 2672 * unit address. 2673 */ 2674 MDI_PHCI_LOCK(ph); 2675 if (MDI_PHCI_IS_OFFLINE(ph)) { 2676 MDI_DEBUG(2, (MDI_WARN, pdip, 2677 "offline phci %p", (void *)ph)); 2678 MDI_PHCI_UNLOCK(ph); 2679 return (NULL); 2680 } 2681 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2682 2683 while (pip != NULL) { 2684 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2685 break; 2686 } 2687 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2688 } 2689 MDI_PHCI_UNLOCK(ph); 2690 MDI_DEBUG(2, (MDI_NOTE, pdip, 2691 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2692 return (pip); 2693 } 2694 2695 /* 2696 * XXX - Is the rest of the code in this function really necessary? 2697 * The consumers of mdi_pi_find() can search for the desired pathinfo 2698 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2699 * whether the search is based on the pathinfo nodes attached to 2700 * the pHCI or the client node, the result will be the same. 2701 */ 2702 2703 /* 2704 * Find the client device corresponding to 'caddr' 2705 */ 2706 MDI_VHCI_CLIENT_LOCK(vh); 2707 2708 /* 2709 * XXX - Passing NULL to the following function works as long as the 2710 * the client addresses (caddr) are unique per vhci basis. 2711 */ 2712 ct = i_mdi_client_find(vh, NULL, caddr); 2713 if (ct == NULL) { 2714 /* 2715 * Client not found, Obviously mdi_pathinfo node has not been 2716 * created yet. 2717 */ 2718 MDI_VHCI_CLIENT_UNLOCK(vh); 2719 MDI_DEBUG(2, (MDI_NOTE, pdip, 2720 "client not found for caddr @%s", caddr ? caddr : "")); 2721 return (NULL); 2722 } 2723 2724 /* 2725 * Hold the client lock and look for a mdi_pathinfo node with matching 2726 * pHCI and paddr 2727 */ 2728 MDI_CLIENT_LOCK(ct); 2729 2730 /* 2731 * Release the global mutex as it is no more needed. Note: We always 2732 * respect the locking order while acquiring. 2733 */ 2734 MDI_VHCI_CLIENT_UNLOCK(vh); 2735 2736 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2737 while (pip != NULL) { 2738 /* 2739 * Compare the unit address 2740 */ 2741 if ((MDI_PI(pip)->pi_phci == ph) && 2742 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2743 break; 2744 } 2745 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2746 } 2747 MDI_CLIENT_UNLOCK(ct); 2748 MDI_DEBUG(2, (MDI_NOTE, pdip, 2749 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2750 return (pip); 2751 } 2752 2753 /* 2754 * mdi_pi_alloc(): 2755 * Allocate and initialize a new instance of a mdi_pathinfo node. 2756 * The mdi_pathinfo node returned by this function identifies a 2757 * unique device path is capable of having properties attached 2758 * and passed to mdi_pi_online() to fully attach and online the 2759 * path and client device node. 2760 * The mdi_pathinfo node returned by this function must be 2761 * destroyed using mdi_pi_free() if the path is no longer 2762 * operational or if the caller fails to attach a client device 2763 * node when calling mdi_pi_online(). The framework will not free 2764 * the resources allocated. 2765 * This function can be called from both interrupt and kernel 2766 * contexts. DDI_NOSLEEP flag should be used while calling 2767 * from interrupt contexts. 2768 * Return Values: 2769 * MDI_SUCCESS 2770 * MDI_FAILURE 2771 * MDI_NOMEM 2772 */ 2773 /*ARGSUSED*/ 2774 int 2775 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2776 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2777 { 2778 mdi_vhci_t *vh; 2779 mdi_phci_t *ph; 2780 mdi_client_t *ct; 2781 mdi_pathinfo_t *pip = NULL; 2782 dev_info_t *cdip; 2783 int rv = MDI_NOMEM; 2784 int path_allocated = 0; 2785 2786 MDI_DEBUG(2, (MDI_NOTE, pdip, 2787 "cname %s: caddr@%s paddr@%s", 2788 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2789 2790 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2791 ret_pip == NULL) { 2792 /* Nothing more to do */ 2793 return (MDI_FAILURE); 2794 } 2795 2796 *ret_pip = NULL; 2797 2798 /* No allocations on detaching pHCI */ 2799 if (DEVI_IS_DETACHING(pdip)) { 2800 /* Invalid pHCI device, return failure */ 2801 MDI_DEBUG(1, (MDI_WARN, pdip, 2802 "!detaching pHCI=%p", (void *)pdip)); 2803 return (MDI_FAILURE); 2804 } 2805 2806 ph = i_devi_get_phci(pdip); 2807 ASSERT(ph != NULL); 2808 if (ph == NULL) { 2809 /* Invalid pHCI device, return failure */ 2810 MDI_DEBUG(1, (MDI_WARN, pdip, 2811 "!invalid pHCI=%p", (void *)pdip)); 2812 return (MDI_FAILURE); 2813 } 2814 2815 MDI_PHCI_LOCK(ph); 2816 vh = ph->ph_vhci; 2817 if (vh == NULL) { 2818 /* Invalid vHCI device, return failure */ 2819 MDI_DEBUG(1, (MDI_WARN, pdip, 2820 "!invalid vHCI=%p", (void *)pdip)); 2821 MDI_PHCI_UNLOCK(ph); 2822 return (MDI_FAILURE); 2823 } 2824 2825 if (MDI_PHCI_IS_READY(ph) == 0) { 2826 /* 2827 * Do not allow new node creation when pHCI is in 2828 * offline/suspended states 2829 */ 2830 MDI_DEBUG(1, (MDI_WARN, pdip, 2831 "pHCI=%p is not ready", (void *)ph)); 2832 MDI_PHCI_UNLOCK(ph); 2833 return (MDI_BUSY); 2834 } 2835 MDI_PHCI_UNSTABLE(ph); 2836 MDI_PHCI_UNLOCK(ph); 2837 2838 /* look for a matching client, create one if not found */ 2839 MDI_VHCI_CLIENT_LOCK(vh); 2840 ct = i_mdi_client_find(vh, cname, caddr); 2841 if (ct == NULL) { 2842 ct = i_mdi_client_alloc(vh, cname, caddr); 2843 ASSERT(ct != NULL); 2844 } 2845 2846 if (ct->ct_dip == NULL) { 2847 /* 2848 * Allocate a devinfo node 2849 */ 2850 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2851 compatible, ncompatible); 2852 if (ct->ct_dip == NULL) { 2853 (void) i_mdi_client_free(vh, ct); 2854 goto fail; 2855 } 2856 } 2857 cdip = ct->ct_dip; 2858 2859 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2860 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2861 2862 MDI_CLIENT_LOCK(ct); 2863 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2864 while (pip != NULL) { 2865 /* 2866 * Compare the unit address 2867 */ 2868 if ((MDI_PI(pip)->pi_phci == ph) && 2869 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2870 break; 2871 } 2872 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2873 } 2874 MDI_CLIENT_UNLOCK(ct); 2875 2876 if (pip == NULL) { 2877 /* 2878 * This is a new path for this client device. Allocate and 2879 * initialize a new pathinfo node 2880 */ 2881 pip = i_mdi_pi_alloc(ph, paddr, ct); 2882 ASSERT(pip != NULL); 2883 path_allocated = 1; 2884 } 2885 rv = MDI_SUCCESS; 2886 2887 fail: 2888 /* 2889 * Release the global mutex. 2890 */ 2891 MDI_VHCI_CLIENT_UNLOCK(vh); 2892 2893 /* 2894 * Mark the pHCI as stable 2895 */ 2896 MDI_PHCI_LOCK(ph); 2897 MDI_PHCI_STABLE(ph); 2898 MDI_PHCI_UNLOCK(ph); 2899 *ret_pip = pip; 2900 2901 MDI_DEBUG(2, (MDI_NOTE, pdip, 2902 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2903 2904 if (path_allocated) 2905 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2906 2907 return (rv); 2908 } 2909 2910 /*ARGSUSED*/ 2911 int 2912 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2913 int flags, mdi_pathinfo_t **ret_pip) 2914 { 2915 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2916 flags, ret_pip)); 2917 } 2918 2919 /* 2920 * i_mdi_pi_alloc(): 2921 * Allocate a mdi_pathinfo node and add to the pHCI path list 2922 * Return Values: 2923 * mdi_pathinfo 2924 */ 2925 /*ARGSUSED*/ 2926 static mdi_pathinfo_t * 2927 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2928 { 2929 mdi_pathinfo_t *pip; 2930 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2931 char *path_persistent; 2932 int path_instance; 2933 mod_hash_val_t hv; 2934 2935 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2936 2937 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2938 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2939 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2940 MDI_PATHINFO_STATE_TRANSIENT; 2941 2942 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2943 MDI_PI_SET_USER_DISABLE(pip); 2944 2945 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2946 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2947 2948 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2949 MDI_PI_SET_DRV_DISABLE(pip); 2950 2951 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2952 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2953 MDI_PI(pip)->pi_client = ct; 2954 MDI_PI(pip)->pi_phci = ph; 2955 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2956 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2957 2958 /* 2959 * We form the "path" to the pathinfo node, and see if we have 2960 * already allocated a 'path_instance' for that "path". If so, 2961 * we use the already allocated 'path_instance'. If not, we 2962 * allocate a new 'path_instance' and associate it with a copy of 2963 * the "path" string (which is never freed). The association 2964 * between a 'path_instance' this "path" string persists until 2965 * reboot. 2966 */ 2967 mutex_enter(&mdi_pathmap_mutex); 2968 (void) ddi_pathname(ph->ph_dip, path); 2969 (void) sprintf(path + strlen(path), "/%s@%s", 2970 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2971 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2972 path_instance = (uint_t)(intptr_t)hv; 2973 } else { 2974 /* allocate a new 'path_instance' and persistent "path" */ 2975 path_instance = mdi_pathmap_instance++; 2976 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2977 (void) mod_hash_insert(mdi_pathmap_bypath, 2978 (mod_hash_key_t)path_persistent, 2979 (mod_hash_val_t)(intptr_t)path_instance); 2980 (void) mod_hash_insert(mdi_pathmap_byinstance, 2981 (mod_hash_key_t)(intptr_t)path_instance, 2982 (mod_hash_val_t)path_persistent); 2983 2984 /* create shortpath name */ 2985 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2986 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2987 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2988 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2989 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2990 (mod_hash_key_t)(intptr_t)path_instance, 2991 (mod_hash_val_t)path_persistent); 2992 } 2993 mutex_exit(&mdi_pathmap_mutex); 2994 MDI_PI(pip)->pi_path_instance = path_instance; 2995 2996 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2997 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2998 MDI_PI(pip)->pi_pprivate = NULL; 2999 MDI_PI(pip)->pi_cprivate = NULL; 3000 MDI_PI(pip)->pi_vprivate = NULL; 3001 MDI_PI(pip)->pi_client_link = NULL; 3002 MDI_PI(pip)->pi_phci_link = NULL; 3003 MDI_PI(pip)->pi_ref_cnt = 0; 3004 MDI_PI(pip)->pi_kstats = NULL; 3005 MDI_PI(pip)->pi_preferred = 1; 3006 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3007 3008 /* 3009 * Lock both dev_info nodes against changes in parallel. 3010 * 3011 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3012 * This atypical operation is done to synchronize pathinfo nodes 3013 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3014 * the pathinfo nodes are children of the Client. 3015 */ 3016 ndi_devi_enter(ct->ct_dip); 3017 ndi_devi_enter(ph->ph_dip); 3018 3019 i_mdi_phci_add_path(ph, pip); 3020 i_mdi_client_add_path(ct, pip); 3021 3022 ndi_devi_exit(ph->ph_dip); 3023 ndi_devi_exit(ct->ct_dip); 3024 3025 return (pip); 3026 } 3027 3028 /* 3029 * mdi_pi_pathname_by_instance(): 3030 * Lookup of "path" by 'path_instance'. Return "path". 3031 * NOTE: returned "path" remains valid forever (until reboot). 3032 */ 3033 char * 3034 mdi_pi_pathname_by_instance(int path_instance) 3035 { 3036 char *path; 3037 mod_hash_val_t hv; 3038 3039 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3040 mutex_enter(&mdi_pathmap_mutex); 3041 if (mod_hash_find(mdi_pathmap_byinstance, 3042 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3043 path = (char *)hv; 3044 else 3045 path = NULL; 3046 mutex_exit(&mdi_pathmap_mutex); 3047 return (path); 3048 } 3049 3050 /* 3051 * mdi_pi_spathname_by_instance(): 3052 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3053 * NOTE: returned "shortpath" remains valid forever (until reboot). 3054 */ 3055 char * 3056 mdi_pi_spathname_by_instance(int path_instance) 3057 { 3058 char *path; 3059 mod_hash_val_t hv; 3060 3061 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3062 mutex_enter(&mdi_pathmap_mutex); 3063 if (mod_hash_find(mdi_pathmap_sbyinstance, 3064 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3065 path = (char *)hv; 3066 else 3067 path = NULL; 3068 mutex_exit(&mdi_pathmap_mutex); 3069 return (path); 3070 } 3071 3072 3073 /* 3074 * i_mdi_phci_add_path(): 3075 * Add a mdi_pathinfo node to pHCI list. 3076 * Notes: 3077 * Caller should per-pHCI mutex 3078 */ 3079 static void 3080 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3081 { 3082 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3083 3084 MDI_PHCI_LOCK(ph); 3085 if (ph->ph_path_head == NULL) { 3086 ph->ph_path_head = pip; 3087 } else { 3088 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3089 } 3090 ph->ph_path_tail = pip; 3091 ph->ph_path_count++; 3092 MDI_PHCI_UNLOCK(ph); 3093 } 3094 3095 /* 3096 * i_mdi_client_add_path(): 3097 * Add mdi_pathinfo node to client list 3098 */ 3099 static void 3100 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3101 { 3102 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3103 3104 MDI_CLIENT_LOCK(ct); 3105 if (ct->ct_path_head == NULL) { 3106 ct->ct_path_head = pip; 3107 } else { 3108 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3109 } 3110 ct->ct_path_tail = pip; 3111 ct->ct_path_count++; 3112 MDI_CLIENT_UNLOCK(ct); 3113 } 3114 3115 /* 3116 * mdi_pi_free(): 3117 * Free the mdi_pathinfo node and also client device node if this 3118 * is the last path to the device 3119 * Return Values: 3120 * MDI_SUCCESS 3121 * MDI_FAILURE 3122 * MDI_BUSY 3123 */ 3124 /*ARGSUSED*/ 3125 int 3126 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3127 { 3128 int rv; 3129 mdi_vhci_t *vh; 3130 mdi_phci_t *ph; 3131 mdi_client_t *ct; 3132 int (*f)(); 3133 int client_held = 0; 3134 3135 MDI_PI_LOCK(pip); 3136 ph = MDI_PI(pip)->pi_phci; 3137 ASSERT(ph != NULL); 3138 if (ph == NULL) { 3139 /* 3140 * Invalid pHCI device, return failure 3141 */ 3142 MDI_DEBUG(1, (MDI_WARN, NULL, 3143 "!invalid pHCI: pip %s %p", 3144 mdi_pi_spathname(pip), (void *)pip)); 3145 MDI_PI_UNLOCK(pip); 3146 return (MDI_FAILURE); 3147 } 3148 3149 vh = ph->ph_vhci; 3150 ASSERT(vh != NULL); 3151 if (vh == NULL) { 3152 /* Invalid pHCI device, return failure */ 3153 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3154 "!invalid vHCI: pip %s %p", 3155 mdi_pi_spathname(pip), (void *)pip)); 3156 MDI_PI_UNLOCK(pip); 3157 return (MDI_FAILURE); 3158 } 3159 3160 ct = MDI_PI(pip)->pi_client; 3161 ASSERT(ct != NULL); 3162 if (ct == NULL) { 3163 /* 3164 * Invalid Client device, return failure 3165 */ 3166 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3167 "!invalid client: pip %s %p", 3168 mdi_pi_spathname(pip), (void *)pip)); 3169 MDI_PI_UNLOCK(pip); 3170 return (MDI_FAILURE); 3171 } 3172 3173 /* 3174 * Check to see for busy condition. A mdi_pathinfo can only be freed 3175 * if the node state is either offline or init and the reference count 3176 * is zero. 3177 */ 3178 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3179 MDI_PI_IS_INITING(pip))) { 3180 /* 3181 * Node is busy 3182 */ 3183 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3184 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3185 MDI_PI_UNLOCK(pip); 3186 return (MDI_BUSY); 3187 } 3188 3189 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3190 /* 3191 * Give a chance for pending I/Os to complete. 3192 */ 3193 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3194 "!%d cmds still pending on path: %s %p", 3195 MDI_PI(pip)->pi_ref_cnt, 3196 mdi_pi_spathname(pip), (void *)pip)); 3197 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3198 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3199 TR_CLOCK_TICK) == -1) { 3200 /* 3201 * The timeout time reached without ref_cnt being zero 3202 * being signaled. 3203 */ 3204 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3205 "!Timeout reached on path %s %p without the cond", 3206 mdi_pi_spathname(pip), (void *)pip)); 3207 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3208 "!%d cmds still pending on path %s %p", 3209 MDI_PI(pip)->pi_ref_cnt, 3210 mdi_pi_spathname(pip), (void *)pip)); 3211 MDI_PI_UNLOCK(pip); 3212 return (MDI_BUSY); 3213 } 3214 } 3215 if (MDI_PI(pip)->pi_pm_held) { 3216 client_held = 1; 3217 } 3218 MDI_PI_UNLOCK(pip); 3219 3220 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3221 3222 MDI_CLIENT_LOCK(ct); 3223 3224 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3225 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3226 3227 /* 3228 * Wait till failover is complete before removing this node. 3229 */ 3230 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3231 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3232 3233 MDI_CLIENT_UNLOCK(ct); 3234 MDI_VHCI_CLIENT_LOCK(vh); 3235 MDI_CLIENT_LOCK(ct); 3236 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3237 3238 rv = MDI_SUCCESS; 3239 if (!MDI_PI_IS_INITING(pip)) { 3240 f = vh->vh_ops->vo_pi_uninit; 3241 if (f != NULL) { 3242 rv = (*f)(vh->vh_dip, pip, 0); 3243 } 3244 } 3245 3246 /* 3247 * If vo_pi_uninit() completed successfully. 3248 */ 3249 if (rv == MDI_SUCCESS) { 3250 if (client_held) { 3251 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3252 "i_mdi_pm_rele_client\n")); 3253 i_mdi_pm_rele_client(ct, 1); 3254 } 3255 i_mdi_pi_free(ph, pip, ct); 3256 if (ct->ct_path_count == 0) { 3257 /* 3258 * Client lost its last path. 3259 * Clean up the client device 3260 */ 3261 MDI_CLIENT_UNLOCK(ct); 3262 (void) i_mdi_client_free(ct->ct_vhci, ct); 3263 MDI_VHCI_CLIENT_UNLOCK(vh); 3264 return (rv); 3265 } 3266 } 3267 MDI_CLIENT_UNLOCK(ct); 3268 MDI_VHCI_CLIENT_UNLOCK(vh); 3269 3270 if (rv == MDI_FAILURE) 3271 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3272 3273 return (rv); 3274 } 3275 3276 /* 3277 * i_mdi_pi_free(): 3278 * Free the mdi_pathinfo node 3279 */ 3280 static void 3281 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3282 { 3283 ASSERT(MDI_CLIENT_LOCKED(ct)); 3284 3285 /* 3286 * remove any per-path kstats 3287 */ 3288 i_mdi_pi_kstat_destroy(pip); 3289 3290 /* See comments in i_mdi_pi_alloc() */ 3291 ndi_devi_enter(ct->ct_dip); 3292 ndi_devi_enter(ph->ph_dip); 3293 3294 i_mdi_client_remove_path(ct, pip); 3295 i_mdi_phci_remove_path(ph, pip); 3296 3297 ndi_devi_exit(ph->ph_dip); 3298 ndi_devi_exit(ct->ct_dip); 3299 3300 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3301 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3302 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3303 if (MDI_PI(pip)->pi_addr) { 3304 kmem_free(MDI_PI(pip)->pi_addr, 3305 strlen(MDI_PI(pip)->pi_addr) + 1); 3306 MDI_PI(pip)->pi_addr = NULL; 3307 } 3308 3309 if (MDI_PI(pip)->pi_prop) { 3310 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3311 MDI_PI(pip)->pi_prop = NULL; 3312 } 3313 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3314 } 3315 3316 3317 /* 3318 * i_mdi_phci_remove_path(): 3319 * Remove a mdi_pathinfo node from pHCI list. 3320 * Notes: 3321 * Caller should hold per-pHCI mutex 3322 */ 3323 static void 3324 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3325 { 3326 mdi_pathinfo_t *prev = NULL; 3327 mdi_pathinfo_t *path = NULL; 3328 3329 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3330 3331 MDI_PHCI_LOCK(ph); 3332 path = ph->ph_path_head; 3333 while (path != NULL) { 3334 if (path == pip) { 3335 break; 3336 } 3337 prev = path; 3338 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3339 } 3340 3341 if (path) { 3342 ph->ph_path_count--; 3343 if (prev) { 3344 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3345 } else { 3346 ph->ph_path_head = 3347 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3348 } 3349 if (ph->ph_path_tail == path) { 3350 ph->ph_path_tail = prev; 3351 } 3352 } 3353 3354 /* 3355 * Clear the pHCI link 3356 */ 3357 MDI_PI(pip)->pi_phci_link = NULL; 3358 MDI_PI(pip)->pi_phci = NULL; 3359 MDI_PHCI_UNLOCK(ph); 3360 } 3361 3362 /* 3363 * i_mdi_client_remove_path(): 3364 * Remove a mdi_pathinfo node from client path list. 3365 */ 3366 static void 3367 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3368 { 3369 mdi_pathinfo_t *prev = NULL; 3370 mdi_pathinfo_t *path; 3371 3372 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3373 3374 ASSERT(MDI_CLIENT_LOCKED(ct)); 3375 path = ct->ct_path_head; 3376 while (path != NULL) { 3377 if (path == pip) { 3378 break; 3379 } 3380 prev = path; 3381 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3382 } 3383 3384 if (path) { 3385 ct->ct_path_count--; 3386 if (prev) { 3387 MDI_PI(prev)->pi_client_link = 3388 MDI_PI(path)->pi_client_link; 3389 } else { 3390 ct->ct_path_head = 3391 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3392 } 3393 if (ct->ct_path_tail == path) { 3394 ct->ct_path_tail = prev; 3395 } 3396 if (ct->ct_path_last == path) { 3397 ct->ct_path_last = ct->ct_path_head; 3398 } 3399 } 3400 MDI_PI(pip)->pi_client_link = NULL; 3401 MDI_PI(pip)->pi_client = NULL; 3402 } 3403 3404 /* 3405 * i_mdi_pi_state_change(): 3406 * online a mdi_pathinfo node 3407 * 3408 * Return Values: 3409 * MDI_SUCCESS 3410 * MDI_FAILURE 3411 */ 3412 /*ARGSUSED*/ 3413 static int 3414 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3415 { 3416 int rv = MDI_SUCCESS; 3417 mdi_vhci_t *vh; 3418 mdi_phci_t *ph; 3419 mdi_client_t *ct; 3420 int (*f)(); 3421 dev_info_t *cdip; 3422 3423 MDI_PI_LOCK(pip); 3424 3425 ph = MDI_PI(pip)->pi_phci; 3426 ASSERT(ph); 3427 if (ph == NULL) { 3428 /* 3429 * Invalid pHCI device, fail the request 3430 */ 3431 MDI_PI_UNLOCK(pip); 3432 MDI_DEBUG(1, (MDI_WARN, NULL, 3433 "!invalid phci: pip %s %p", 3434 mdi_pi_spathname(pip), (void *)pip)); 3435 return (MDI_FAILURE); 3436 } 3437 3438 vh = ph->ph_vhci; 3439 ASSERT(vh); 3440 if (vh == NULL) { 3441 /* 3442 * Invalid vHCI device, fail the request 3443 */ 3444 MDI_PI_UNLOCK(pip); 3445 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3446 "!invalid vhci: pip %s %p", 3447 mdi_pi_spathname(pip), (void *)pip)); 3448 return (MDI_FAILURE); 3449 } 3450 3451 ct = MDI_PI(pip)->pi_client; 3452 ASSERT(ct != NULL); 3453 if (ct == NULL) { 3454 /* 3455 * Invalid client device, fail the request 3456 */ 3457 MDI_PI_UNLOCK(pip); 3458 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3459 "!invalid client: pip %s %p", 3460 mdi_pi_spathname(pip), (void *)pip)); 3461 return (MDI_FAILURE); 3462 } 3463 3464 /* 3465 * If this path has not been initialized yet, Callback vHCI driver's 3466 * pathinfo node initialize entry point 3467 */ 3468 3469 if (MDI_PI_IS_INITING(pip)) { 3470 MDI_PI_UNLOCK(pip); 3471 f = vh->vh_ops->vo_pi_init; 3472 if (f != NULL) { 3473 rv = (*f)(vh->vh_dip, pip, 0); 3474 if (rv != MDI_SUCCESS) { 3475 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3476 "!vo_pi_init failed: vHCI %p, pip %s %p", 3477 (void *)vh, mdi_pi_spathname(pip), 3478 (void *)pip)); 3479 return (MDI_FAILURE); 3480 } 3481 } 3482 MDI_PI_LOCK(pip); 3483 MDI_PI_CLEAR_TRANSIENT(pip); 3484 } 3485 3486 /* 3487 * Do not allow state transition when pHCI is in offline/suspended 3488 * states 3489 */ 3490 i_mdi_phci_lock(ph, pip); 3491 if (MDI_PHCI_IS_READY(ph) == 0) { 3492 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3493 "!pHCI not ready, pHCI=%p", (void *)ph)); 3494 MDI_PI_UNLOCK(pip); 3495 i_mdi_phci_unlock(ph); 3496 return (MDI_BUSY); 3497 } 3498 MDI_PHCI_UNSTABLE(ph); 3499 i_mdi_phci_unlock(ph); 3500 3501 /* 3502 * Check if mdi_pathinfo state is in transient state. 3503 * If yes, offlining is in progress and wait till transient state is 3504 * cleared. 3505 */ 3506 if (MDI_PI_IS_TRANSIENT(pip)) { 3507 while (MDI_PI_IS_TRANSIENT(pip)) { 3508 cv_wait(&MDI_PI(pip)->pi_state_cv, 3509 &MDI_PI(pip)->pi_mutex); 3510 } 3511 } 3512 3513 /* 3514 * Grab the client lock in reverse order sequence and release the 3515 * mdi_pathinfo mutex. 3516 */ 3517 i_mdi_client_lock(ct, pip); 3518 MDI_PI_UNLOCK(pip); 3519 3520 /* 3521 * Wait till failover state is cleared 3522 */ 3523 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3524 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3525 3526 /* 3527 * Mark the mdi_pathinfo node state as transient 3528 */ 3529 MDI_PI_LOCK(pip); 3530 switch (state) { 3531 case MDI_PATHINFO_STATE_ONLINE: 3532 MDI_PI_SET_ONLINING(pip); 3533 break; 3534 3535 case MDI_PATHINFO_STATE_STANDBY: 3536 MDI_PI_SET_STANDBYING(pip); 3537 break; 3538 3539 case MDI_PATHINFO_STATE_FAULT: 3540 /* 3541 * Mark the pathinfo state as FAULTED 3542 */ 3543 MDI_PI_SET_FAULTING(pip); 3544 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3545 break; 3546 3547 case MDI_PATHINFO_STATE_OFFLINE: 3548 /* 3549 * ndi_devi_offline() cannot hold pip or ct locks. 3550 */ 3551 MDI_PI_UNLOCK(pip); 3552 3553 /* 3554 * If this is a user initiated path online->offline operation 3555 * who's success would transition a client from DEGRADED to 3556 * FAILED then only proceed if we can offline the client first. 3557 */ 3558 cdip = ct->ct_dip; 3559 if ((flag & NDI_USER_REQ) && 3560 MDI_PI_IS_ONLINE(pip) && 3561 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3562 i_mdi_client_unlock(ct); 3563 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3564 if (rv != NDI_SUCCESS) { 3565 /* 3566 * Convert to MDI error code 3567 */ 3568 switch (rv) { 3569 case NDI_BUSY: 3570 rv = MDI_BUSY; 3571 break; 3572 default: 3573 rv = MDI_FAILURE; 3574 break; 3575 } 3576 goto state_change_exit; 3577 } else { 3578 i_mdi_client_lock(ct, NULL); 3579 } 3580 } 3581 /* 3582 * Mark the mdi_pathinfo node state as transient 3583 */ 3584 MDI_PI_LOCK(pip); 3585 MDI_PI_SET_OFFLINING(pip); 3586 break; 3587 3588 case MDI_PATHINFO_STATE_INIT: 3589 /* 3590 * Callers are not allowed to ask us to change the state to the 3591 * initial state. 3592 */ 3593 rv = MDI_FAILURE; 3594 MDI_PI_UNLOCK(pip); 3595 goto state_change_exit; 3596 3597 } 3598 MDI_PI_UNLOCK(pip); 3599 MDI_CLIENT_UNSTABLE(ct); 3600 i_mdi_client_unlock(ct); 3601 3602 f = vh->vh_ops->vo_pi_state_change; 3603 if (f != NULL) 3604 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3605 3606 MDI_CLIENT_LOCK(ct); 3607 MDI_PI_LOCK(pip); 3608 if (rv == MDI_NOT_SUPPORTED) { 3609 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3610 } 3611 if (rv != MDI_SUCCESS) { 3612 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3613 "vo_pi_state_change failed: rv %x", rv)); 3614 } 3615 if (MDI_PI_IS_TRANSIENT(pip)) { 3616 if (rv == MDI_SUCCESS) { 3617 MDI_PI_CLEAR_TRANSIENT(pip); 3618 } else { 3619 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3620 } 3621 } 3622 3623 /* 3624 * Wake anyone waiting for this mdi_pathinfo node 3625 */ 3626 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3627 MDI_PI_UNLOCK(pip); 3628 3629 /* 3630 * Mark the client device as stable 3631 */ 3632 MDI_CLIENT_STABLE(ct); 3633 if (rv == MDI_SUCCESS) { 3634 if (ct->ct_unstable == 0) { 3635 cdip = ct->ct_dip; 3636 3637 /* 3638 * Onlining the mdi_pathinfo node will impact the 3639 * client state Update the client and dev_info node 3640 * state accordingly 3641 */ 3642 rv = NDI_SUCCESS; 3643 i_mdi_client_update_state(ct); 3644 switch (MDI_CLIENT_STATE(ct)) { 3645 case MDI_CLIENT_STATE_OPTIMAL: 3646 case MDI_CLIENT_STATE_DEGRADED: 3647 if (cdip && !i_ddi_devi_attached(cdip) && 3648 ((state == MDI_PATHINFO_STATE_ONLINE) || 3649 (state == MDI_PATHINFO_STATE_STANDBY))) { 3650 3651 /* 3652 * Must do ndi_devi_online() through 3653 * hotplug thread for deferred 3654 * attach mechanism to work 3655 */ 3656 MDI_CLIENT_UNLOCK(ct); 3657 rv = ndi_devi_online(cdip, 0); 3658 MDI_CLIENT_LOCK(ct); 3659 if ((rv != NDI_SUCCESS) && 3660 (MDI_CLIENT_STATE(ct) == 3661 MDI_CLIENT_STATE_DEGRADED)) { 3662 MDI_DEBUG(1, (MDI_WARN, cdip, 3663 "!ndi_devi_online failed " 3664 "error %x", rv)); 3665 } 3666 rv = NDI_SUCCESS; 3667 } 3668 break; 3669 3670 case MDI_CLIENT_STATE_FAILED: 3671 /* 3672 * This is the last path case for 3673 * non-user initiated events. 3674 */ 3675 if (((flag & NDI_USER_REQ) == 0) && 3676 cdip && (i_ddi_node_state(cdip) >= 3677 DS_INITIALIZED)) { 3678 MDI_CLIENT_UNLOCK(ct); 3679 rv = ndi_devi_offline(cdip, 3680 NDI_DEVFS_CLEAN); 3681 MDI_CLIENT_LOCK(ct); 3682 3683 if (rv != NDI_SUCCESS) { 3684 /* 3685 * ndi_devi_offline failed. 3686 * Reset client flags to 3687 * online as the path could not 3688 * be offlined. 3689 */ 3690 MDI_DEBUG(1, (MDI_WARN, cdip, 3691 "!ndi_devi_offline failed: " 3692 "error %x", rv)); 3693 MDI_CLIENT_SET_ONLINE(ct); 3694 } 3695 } 3696 break; 3697 } 3698 /* 3699 * Convert to MDI error code 3700 */ 3701 switch (rv) { 3702 case NDI_SUCCESS: 3703 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3704 i_mdi_report_path_state(ct, pip); 3705 rv = MDI_SUCCESS; 3706 break; 3707 case NDI_BUSY: 3708 rv = MDI_BUSY; 3709 break; 3710 default: 3711 rv = MDI_FAILURE; 3712 break; 3713 } 3714 } 3715 } 3716 MDI_CLIENT_UNLOCK(ct); 3717 3718 state_change_exit: 3719 /* 3720 * Mark the pHCI as stable again. 3721 */ 3722 MDI_PHCI_LOCK(ph); 3723 MDI_PHCI_STABLE(ph); 3724 MDI_PHCI_UNLOCK(ph); 3725 return (rv); 3726 } 3727 3728 /* 3729 * mdi_pi_online(): 3730 * Place the path_info node in the online state. The path is 3731 * now available to be selected by mdi_select_path() for 3732 * transporting I/O requests to client devices. 3733 * Return Values: 3734 * MDI_SUCCESS 3735 * MDI_FAILURE 3736 */ 3737 int 3738 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3739 { 3740 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3741 int client_held = 0; 3742 int rv; 3743 3744 ASSERT(ct != NULL); 3745 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3746 if (rv != MDI_SUCCESS) 3747 return (rv); 3748 3749 MDI_PI_LOCK(pip); 3750 if (MDI_PI(pip)->pi_pm_held == 0) { 3751 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3752 "i_mdi_pm_hold_pip %p", (void *)pip)); 3753 i_mdi_pm_hold_pip(pip); 3754 client_held = 1; 3755 } 3756 MDI_PI_UNLOCK(pip); 3757 3758 if (client_held) { 3759 MDI_CLIENT_LOCK(ct); 3760 if (ct->ct_power_cnt == 0) { 3761 rv = i_mdi_power_all_phci(ct); 3762 } 3763 3764 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3765 "i_mdi_pm_hold_client %p", (void *)ct)); 3766 i_mdi_pm_hold_client(ct, 1); 3767 MDI_CLIENT_UNLOCK(ct); 3768 } 3769 3770 return (rv); 3771 } 3772 3773 /* 3774 * mdi_pi_standby(): 3775 * Place the mdi_pathinfo node in standby state 3776 * 3777 * Return Values: 3778 * MDI_SUCCESS 3779 * MDI_FAILURE 3780 */ 3781 int 3782 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3783 { 3784 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3785 } 3786 3787 /* 3788 * mdi_pi_fault(): 3789 * Place the mdi_pathinfo node in fault'ed state 3790 * Return Values: 3791 * MDI_SUCCESS 3792 * MDI_FAILURE 3793 */ 3794 int 3795 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3796 { 3797 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3798 } 3799 3800 /* 3801 * mdi_pi_offline(): 3802 * Offline a mdi_pathinfo node. 3803 * Return Values: 3804 * MDI_SUCCESS 3805 * MDI_FAILURE 3806 */ 3807 int 3808 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3809 { 3810 int ret, client_held = 0; 3811 mdi_client_t *ct; 3812 3813 /* 3814 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3815 * used it to mean "user initiated operation" (i.e. devctl). Callers 3816 * should now just use NDI_USER_REQ. 3817 */ 3818 if (flags & NDI_DEVI_REMOVE) { 3819 flags &= ~NDI_DEVI_REMOVE; 3820 flags |= NDI_USER_REQ; 3821 } 3822 3823 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3824 3825 if (ret == MDI_SUCCESS) { 3826 MDI_PI_LOCK(pip); 3827 if (MDI_PI(pip)->pi_pm_held) { 3828 client_held = 1; 3829 } 3830 MDI_PI_UNLOCK(pip); 3831 3832 if (client_held) { 3833 ct = MDI_PI(pip)->pi_client; 3834 MDI_CLIENT_LOCK(ct); 3835 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3836 "i_mdi_pm_rele_client\n")); 3837 i_mdi_pm_rele_client(ct, 1); 3838 MDI_CLIENT_UNLOCK(ct); 3839 } 3840 } 3841 3842 return (ret); 3843 } 3844 3845 /* 3846 * i_mdi_pi_offline(): 3847 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3848 */ 3849 static int 3850 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3851 { 3852 dev_info_t *vdip = NULL; 3853 mdi_vhci_t *vh = NULL; 3854 mdi_client_t *ct = NULL; 3855 int (*f)(); 3856 int rv; 3857 3858 MDI_PI_LOCK(pip); 3859 ct = MDI_PI(pip)->pi_client; 3860 ASSERT(ct != NULL); 3861 3862 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3863 /* 3864 * Give a chance for pending I/Os to complete. 3865 */ 3866 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3867 "!%d cmds still pending on path %s %p", 3868 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3869 (void *)pip)); 3870 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3871 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3872 TR_CLOCK_TICK) == -1) { 3873 /* 3874 * The timeout time reached without ref_cnt being zero 3875 * being signaled. 3876 */ 3877 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3878 "!Timeout reached on path %s %p without the cond", 3879 mdi_pi_spathname(pip), (void *)pip)); 3880 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3881 "!%d cmds still pending on path %s %p", 3882 MDI_PI(pip)->pi_ref_cnt, 3883 mdi_pi_spathname(pip), (void *)pip)); 3884 } 3885 } 3886 vh = ct->ct_vhci; 3887 vdip = vh->vh_dip; 3888 3889 /* 3890 * Notify vHCI that has registered this event 3891 */ 3892 ASSERT(vh->vh_ops); 3893 f = vh->vh_ops->vo_pi_state_change; 3894 3895 rv = MDI_SUCCESS; 3896 if (f != NULL) { 3897 MDI_PI_UNLOCK(pip); 3898 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3899 flags)) != MDI_SUCCESS) { 3900 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3901 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3902 ddi_driver_name(vdip), ddi_get_instance(vdip), 3903 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3904 } 3905 MDI_PI_LOCK(pip); 3906 } 3907 3908 /* 3909 * Set the mdi_pathinfo node state and clear the transient condition 3910 */ 3911 MDI_PI_SET_OFFLINE(pip); 3912 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3913 MDI_PI_UNLOCK(pip); 3914 3915 MDI_CLIENT_LOCK(ct); 3916 if (rv == MDI_SUCCESS) { 3917 if (ct->ct_unstable == 0) { 3918 dev_info_t *cdip = ct->ct_dip; 3919 3920 /* 3921 * Onlining the mdi_pathinfo node will impact the 3922 * client state Update the client and dev_info node 3923 * state accordingly 3924 */ 3925 i_mdi_client_update_state(ct); 3926 rv = NDI_SUCCESS; 3927 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3928 if (cdip && 3929 (i_ddi_node_state(cdip) >= 3930 DS_INITIALIZED)) { 3931 MDI_CLIENT_UNLOCK(ct); 3932 rv = ndi_devi_offline(cdip, 3933 NDI_DEVFS_CLEAN); 3934 MDI_CLIENT_LOCK(ct); 3935 if (rv != NDI_SUCCESS) { 3936 /* 3937 * ndi_devi_offline failed. 3938 * Reset client flags to 3939 * online. 3940 */ 3941 MDI_DEBUG(4, (MDI_WARN, cdip, 3942 "ndi_devi_offline failed: " 3943 "error %x", rv)); 3944 MDI_CLIENT_SET_ONLINE(ct); 3945 } 3946 } 3947 } 3948 /* 3949 * Convert to MDI error code 3950 */ 3951 switch (rv) { 3952 case NDI_SUCCESS: 3953 rv = MDI_SUCCESS; 3954 break; 3955 case NDI_BUSY: 3956 rv = MDI_BUSY; 3957 break; 3958 default: 3959 rv = MDI_FAILURE; 3960 break; 3961 } 3962 } 3963 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3964 i_mdi_report_path_state(ct, pip); 3965 } 3966 3967 MDI_CLIENT_UNLOCK(ct); 3968 3969 /* 3970 * Change in the mdi_pathinfo node state will impact the client state 3971 */ 3972 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3973 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3974 return (rv); 3975 } 3976 3977 /* 3978 * i_mdi_pi_online(): 3979 * Online a mdi_pathinfo node and call the vHCI driver's callback 3980 */ 3981 static int 3982 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3983 { 3984 mdi_vhci_t *vh = NULL; 3985 mdi_client_t *ct = NULL; 3986 mdi_phci_t *ph; 3987 int (*f)(); 3988 int rv; 3989 3990 MDI_PI_LOCK(pip); 3991 ph = MDI_PI(pip)->pi_phci; 3992 vh = ph->ph_vhci; 3993 ct = MDI_PI(pip)->pi_client; 3994 MDI_PI_SET_ONLINING(pip) 3995 MDI_PI_UNLOCK(pip); 3996 f = vh->vh_ops->vo_pi_state_change; 3997 rv = MDI_SUCCESS; 3998 if (f != NULL) 3999 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, flags); 4000 MDI_CLIENT_LOCK(ct); 4001 MDI_PI_LOCK(pip); 4002 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 4003 MDI_PI_UNLOCK(pip); 4004 if (rv == MDI_SUCCESS) { 4005 dev_info_t *cdip = ct->ct_dip; 4006 4007 i_mdi_client_update_state(ct); 4008 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL || 4009 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4010 if (cdip && !i_ddi_devi_attached(cdip)) { 4011 MDI_CLIENT_UNLOCK(ct); 4012 rv = ndi_devi_online(cdip, 0); 4013 MDI_CLIENT_LOCK(ct); 4014 if ((rv != NDI_SUCCESS) && 4015 (MDI_CLIENT_STATE(ct) == 4016 MDI_CLIENT_STATE_DEGRADED)) { 4017 MDI_CLIENT_SET_OFFLINE(ct); 4018 } 4019 if (rv != NDI_SUCCESS) { 4020 /* Reset the path state */ 4021 MDI_PI_LOCK(pip); 4022 MDI_PI(pip)->pi_state = 4023 MDI_PI_OLD_STATE(pip); 4024 MDI_PI_UNLOCK(pip); 4025 } 4026 } 4027 } 4028 switch (rv) { 4029 case NDI_SUCCESS: 4030 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 4031 i_mdi_report_path_state(ct, pip); 4032 rv = MDI_SUCCESS; 4033 break; 4034 case NDI_BUSY: 4035 rv = MDI_BUSY; 4036 break; 4037 default: 4038 rv = MDI_FAILURE; 4039 break; 4040 } 4041 } else { 4042 /* Reset the path state */ 4043 MDI_PI_LOCK(pip); 4044 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 4045 MDI_PI_UNLOCK(pip); 4046 } 4047 MDI_CLIENT_UNLOCK(ct); 4048 return (rv); 4049 } 4050 4051 /* 4052 * mdi_pi_get_node_name(): 4053 * Get the name associated with a mdi_pathinfo node. 4054 * Since pathinfo nodes are not directly named, we 4055 * return the node_name of the client. 4056 * 4057 * Return Values: 4058 * char * 4059 */ 4060 char * 4061 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 4062 { 4063 mdi_client_t *ct; 4064 4065 if (pip == NULL) 4066 return (NULL); 4067 ct = MDI_PI(pip)->pi_client; 4068 if ((ct == NULL) || (ct->ct_dip == NULL)) 4069 return (NULL); 4070 return (ddi_node_name(ct->ct_dip)); 4071 } 4072 4073 /* 4074 * mdi_pi_get_addr(): 4075 * Get the unit address associated with a mdi_pathinfo node 4076 * 4077 * Return Values: 4078 * char * 4079 */ 4080 char * 4081 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4082 { 4083 if (pip == NULL) 4084 return (NULL); 4085 4086 return (MDI_PI(pip)->pi_addr); 4087 } 4088 4089 /* 4090 * mdi_pi_get_path_instance(): 4091 * Get the 'path_instance' of a mdi_pathinfo node 4092 * 4093 * Return Values: 4094 * path_instance 4095 */ 4096 int 4097 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4098 { 4099 if (pip == NULL) 4100 return (0); 4101 4102 return (MDI_PI(pip)->pi_path_instance); 4103 } 4104 4105 /* 4106 * mdi_pi_pathname(): 4107 * Return pointer to path to pathinfo node. 4108 */ 4109 char * 4110 mdi_pi_pathname(mdi_pathinfo_t *pip) 4111 { 4112 if (pip == NULL) 4113 return (NULL); 4114 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4115 } 4116 4117 /* 4118 * mdi_pi_spathname(): 4119 * Return pointer to shortpath to pathinfo node. Used for debug 4120 * messages, so return "" instead of NULL when unknown. 4121 */ 4122 char * 4123 mdi_pi_spathname(mdi_pathinfo_t *pip) 4124 { 4125 char *spath = ""; 4126 4127 if (pip) { 4128 spath = mdi_pi_spathname_by_instance( 4129 mdi_pi_get_path_instance(pip)); 4130 if (spath == NULL) 4131 spath = ""; 4132 } 4133 return (spath); 4134 } 4135 4136 char * 4137 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4138 { 4139 char *obp_path = NULL; 4140 if ((pip == NULL) || (path == NULL)) 4141 return (NULL); 4142 4143 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4144 (void) strcpy(path, obp_path); 4145 (void) mdi_prop_free(obp_path); 4146 } else { 4147 path = NULL; 4148 } 4149 return (path); 4150 } 4151 4152 int 4153 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4154 { 4155 dev_info_t *pdip; 4156 char *obp_path = NULL; 4157 int rc = MDI_FAILURE; 4158 4159 if (pip == NULL) 4160 return (MDI_FAILURE); 4161 4162 pdip = mdi_pi_get_phci(pip); 4163 if (pdip == NULL) 4164 return (MDI_FAILURE); 4165 4166 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4167 4168 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4169 (void) ddi_pathname(pdip, obp_path); 4170 } 4171 4172 if (component) { 4173 (void) strncat(obp_path, "/", MAXPATHLEN); 4174 (void) strncat(obp_path, component, MAXPATHLEN); 4175 } 4176 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4177 4178 if (obp_path) 4179 kmem_free(obp_path, MAXPATHLEN); 4180 return (rc); 4181 } 4182 4183 /* 4184 * mdi_pi_get_client(): 4185 * Get the client devinfo associated with a mdi_pathinfo node 4186 * 4187 * Return Values: 4188 * Handle to client device dev_info node 4189 */ 4190 dev_info_t * 4191 mdi_pi_get_client(mdi_pathinfo_t *pip) 4192 { 4193 dev_info_t *dip = NULL; 4194 if (pip) { 4195 dip = MDI_PI(pip)->pi_client->ct_dip; 4196 } 4197 return (dip); 4198 } 4199 4200 /* 4201 * mdi_pi_get_phci(): 4202 * Get the pHCI devinfo associated with the mdi_pathinfo node 4203 * Return Values: 4204 * Handle to dev_info node 4205 */ 4206 dev_info_t * 4207 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4208 { 4209 dev_info_t *dip = NULL; 4210 mdi_phci_t *ph; 4211 4212 if (pip) { 4213 ph = MDI_PI(pip)->pi_phci; 4214 if (ph) 4215 dip = ph->ph_dip; 4216 } 4217 return (dip); 4218 } 4219 4220 /* 4221 * mdi_pi_get_client_private(): 4222 * Get the client private information associated with the 4223 * mdi_pathinfo node 4224 */ 4225 void * 4226 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4227 { 4228 void *cprivate = NULL; 4229 if (pip) { 4230 cprivate = MDI_PI(pip)->pi_cprivate; 4231 } 4232 return (cprivate); 4233 } 4234 4235 /* 4236 * mdi_pi_set_client_private(): 4237 * Set the client private information in the mdi_pathinfo node 4238 */ 4239 void 4240 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4241 { 4242 if (pip) { 4243 MDI_PI(pip)->pi_cprivate = priv; 4244 } 4245 } 4246 4247 /* 4248 * mdi_pi_get_phci_private(): 4249 * Get the pHCI private information associated with the 4250 * mdi_pathinfo node 4251 */ 4252 caddr_t 4253 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4254 { 4255 caddr_t pprivate = NULL; 4256 4257 if (pip) { 4258 pprivate = MDI_PI(pip)->pi_pprivate; 4259 } 4260 return (pprivate); 4261 } 4262 4263 /* 4264 * mdi_pi_set_phci_private(): 4265 * Set the pHCI private information in the mdi_pathinfo node 4266 */ 4267 void 4268 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4269 { 4270 if (pip) { 4271 MDI_PI(pip)->pi_pprivate = priv; 4272 } 4273 } 4274 4275 /* 4276 * mdi_pi_get_state(): 4277 * Get the mdi_pathinfo node state. Transient states are internal 4278 * and not provided to the users 4279 */ 4280 mdi_pathinfo_state_t 4281 mdi_pi_get_state(mdi_pathinfo_t *pip) 4282 { 4283 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4284 4285 if (pip) { 4286 if (MDI_PI_IS_TRANSIENT(pip)) { 4287 /* 4288 * mdi_pathinfo is in state transition. Return the 4289 * last good state. 4290 */ 4291 state = MDI_PI_OLD_STATE(pip); 4292 } else { 4293 state = MDI_PI_STATE(pip); 4294 } 4295 } 4296 return (state); 4297 } 4298 4299 /* 4300 * mdi_pi_get_flags(): 4301 * Get the mdi_pathinfo node flags. 4302 */ 4303 uint_t 4304 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4305 { 4306 return (pip ? MDI_PI(pip)->pi_flags : 0); 4307 } 4308 4309 /* 4310 * Note that the following function needs to be the new interface for 4311 * mdi_pi_get_state when mpxio gets integrated to ON. 4312 */ 4313 int 4314 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4315 uint32_t *ext_state) 4316 { 4317 *state = MDI_PATHINFO_STATE_INIT; 4318 4319 if (pip) { 4320 if (MDI_PI_IS_TRANSIENT(pip)) { 4321 /* 4322 * mdi_pathinfo is in state transition. Return the 4323 * last good state. 4324 */ 4325 *state = MDI_PI_OLD_STATE(pip); 4326 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4327 } else { 4328 *state = MDI_PI_STATE(pip); 4329 *ext_state = MDI_PI_EXT_STATE(pip); 4330 } 4331 } 4332 return (MDI_SUCCESS); 4333 } 4334 4335 /* 4336 * mdi_pi_get_preferred: 4337 * Get the preferred path flag 4338 */ 4339 int 4340 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4341 { 4342 if (pip) { 4343 return (MDI_PI(pip)->pi_preferred); 4344 } 4345 return (0); 4346 } 4347 4348 /* 4349 * mdi_pi_set_preferred: 4350 * Set the preferred path flag 4351 */ 4352 void 4353 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4354 { 4355 if (pip) { 4356 MDI_PI(pip)->pi_preferred = preferred; 4357 } 4358 } 4359 4360 /* 4361 * mdi_pi_set_state(): 4362 * Set the mdi_pathinfo node state 4363 */ 4364 void 4365 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4366 { 4367 uint32_t ext_state; 4368 4369 if (pip) { 4370 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4371 MDI_PI(pip)->pi_state = state; 4372 MDI_PI(pip)->pi_state |= ext_state; 4373 4374 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4375 i_ddi_di_cache_invalidate(); 4376 } 4377 } 4378 4379 /* 4380 * Property functions: 4381 */ 4382 int 4383 i_map_nvlist_error_to_mdi(int val) 4384 { 4385 int rv; 4386 4387 switch (val) { 4388 case 0: 4389 rv = DDI_PROP_SUCCESS; 4390 break; 4391 case EINVAL: 4392 case ENOTSUP: 4393 rv = DDI_PROP_INVAL_ARG; 4394 break; 4395 case ENOMEM: 4396 rv = DDI_PROP_NO_MEMORY; 4397 break; 4398 default: 4399 rv = DDI_PROP_NOT_FOUND; 4400 break; 4401 } 4402 return (rv); 4403 } 4404 4405 /* 4406 * mdi_pi_get_next_prop(): 4407 * Property walk function. The caller should hold mdi_pi_lock() 4408 * and release by calling mdi_pi_unlock() at the end of walk to 4409 * get a consistent value. 4410 */ 4411 nvpair_t * 4412 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4413 { 4414 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4415 return (NULL); 4416 } 4417 ASSERT(MDI_PI_LOCKED(pip)); 4418 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4419 } 4420 4421 /* 4422 * mdi_prop_remove(): 4423 * Remove the named property from the named list. 4424 */ 4425 int 4426 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4427 { 4428 if (pip == NULL) { 4429 return (DDI_PROP_NOT_FOUND); 4430 } 4431 ASSERT(!MDI_PI_LOCKED(pip)); 4432 MDI_PI_LOCK(pip); 4433 if (MDI_PI(pip)->pi_prop == NULL) { 4434 MDI_PI_UNLOCK(pip); 4435 return (DDI_PROP_NOT_FOUND); 4436 } 4437 if (name) { 4438 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4439 } else { 4440 char nvp_name[MAXNAMELEN]; 4441 nvpair_t *nvp; 4442 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4443 while (nvp) { 4444 nvpair_t *next; 4445 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4446 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4447 nvpair_name(nvp)); 4448 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4449 nvp_name); 4450 nvp = next; 4451 } 4452 } 4453 MDI_PI_UNLOCK(pip); 4454 return (DDI_PROP_SUCCESS); 4455 } 4456 4457 /* 4458 * mdi_prop_size(): 4459 * Get buffer size needed to pack the property data. 4460 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4461 * buffer size. 4462 */ 4463 int 4464 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4465 { 4466 int rv; 4467 size_t bufsize; 4468 4469 *buflenp = 0; 4470 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4471 return (DDI_PROP_NOT_FOUND); 4472 } 4473 ASSERT(MDI_PI_LOCKED(pip)); 4474 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4475 &bufsize, NV_ENCODE_NATIVE); 4476 *buflenp = bufsize; 4477 return (i_map_nvlist_error_to_mdi(rv)); 4478 } 4479 4480 /* 4481 * mdi_prop_pack(): 4482 * pack the property list. The caller should hold the 4483 * mdi_pathinfo_t node to get a consistent data 4484 */ 4485 int 4486 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4487 { 4488 int rv; 4489 size_t bufsize; 4490 4491 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4492 return (DDI_PROP_NOT_FOUND); 4493 } 4494 4495 ASSERT(MDI_PI_LOCKED(pip)); 4496 4497 bufsize = buflen; 4498 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4499 NV_ENCODE_NATIVE, KM_SLEEP); 4500 4501 return (i_map_nvlist_error_to_mdi(rv)); 4502 } 4503 4504 /* 4505 * mdi_prop_update_byte(): 4506 * Create/Update a byte property 4507 */ 4508 int 4509 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4510 { 4511 int rv; 4512 4513 if (pip == NULL) { 4514 return (DDI_PROP_INVAL_ARG); 4515 } 4516 ASSERT(!MDI_PI_LOCKED(pip)); 4517 MDI_PI_LOCK(pip); 4518 if (MDI_PI(pip)->pi_prop == NULL) { 4519 MDI_PI_UNLOCK(pip); 4520 return (DDI_PROP_NOT_FOUND); 4521 } 4522 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4523 MDI_PI_UNLOCK(pip); 4524 return (i_map_nvlist_error_to_mdi(rv)); 4525 } 4526 4527 /* 4528 * mdi_prop_update_byte_array(): 4529 * Create/Update a byte array property 4530 */ 4531 int 4532 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4533 uint_t nelements) 4534 { 4535 int rv; 4536 4537 if (pip == NULL) { 4538 return (DDI_PROP_INVAL_ARG); 4539 } 4540 ASSERT(!MDI_PI_LOCKED(pip)); 4541 MDI_PI_LOCK(pip); 4542 if (MDI_PI(pip)->pi_prop == NULL) { 4543 MDI_PI_UNLOCK(pip); 4544 return (DDI_PROP_NOT_FOUND); 4545 } 4546 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4547 MDI_PI_UNLOCK(pip); 4548 return (i_map_nvlist_error_to_mdi(rv)); 4549 } 4550 4551 /* 4552 * mdi_prop_update_int(): 4553 * Create/Update a 32 bit integer property 4554 */ 4555 int 4556 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4557 { 4558 int rv; 4559 4560 if (pip == NULL) { 4561 return (DDI_PROP_INVAL_ARG); 4562 } 4563 ASSERT(!MDI_PI_LOCKED(pip)); 4564 MDI_PI_LOCK(pip); 4565 if (MDI_PI(pip)->pi_prop == NULL) { 4566 MDI_PI_UNLOCK(pip); 4567 return (DDI_PROP_NOT_FOUND); 4568 } 4569 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4570 MDI_PI_UNLOCK(pip); 4571 return (i_map_nvlist_error_to_mdi(rv)); 4572 } 4573 4574 /* 4575 * mdi_prop_update_int64(): 4576 * Create/Update a 64 bit integer property 4577 */ 4578 int 4579 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4580 { 4581 int rv; 4582 4583 if (pip == NULL) { 4584 return (DDI_PROP_INVAL_ARG); 4585 } 4586 ASSERT(!MDI_PI_LOCKED(pip)); 4587 MDI_PI_LOCK(pip); 4588 if (MDI_PI(pip)->pi_prop == NULL) { 4589 MDI_PI_UNLOCK(pip); 4590 return (DDI_PROP_NOT_FOUND); 4591 } 4592 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4593 MDI_PI_UNLOCK(pip); 4594 return (i_map_nvlist_error_to_mdi(rv)); 4595 } 4596 4597 /* 4598 * mdi_prop_update_int_array(): 4599 * Create/Update a int array property 4600 */ 4601 int 4602 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4603 uint_t nelements) 4604 { 4605 int rv; 4606 4607 if (pip == NULL) { 4608 return (DDI_PROP_INVAL_ARG); 4609 } 4610 ASSERT(!MDI_PI_LOCKED(pip)); 4611 MDI_PI_LOCK(pip); 4612 if (MDI_PI(pip)->pi_prop == NULL) { 4613 MDI_PI_UNLOCK(pip); 4614 return (DDI_PROP_NOT_FOUND); 4615 } 4616 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4617 nelements); 4618 MDI_PI_UNLOCK(pip); 4619 return (i_map_nvlist_error_to_mdi(rv)); 4620 } 4621 4622 /* 4623 * mdi_prop_update_string(): 4624 * Create/Update a string property 4625 */ 4626 int 4627 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4628 { 4629 int rv; 4630 4631 if (pip == NULL) { 4632 return (DDI_PROP_INVAL_ARG); 4633 } 4634 ASSERT(!MDI_PI_LOCKED(pip)); 4635 MDI_PI_LOCK(pip); 4636 if (MDI_PI(pip)->pi_prop == NULL) { 4637 MDI_PI_UNLOCK(pip); 4638 return (DDI_PROP_NOT_FOUND); 4639 } 4640 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4641 MDI_PI_UNLOCK(pip); 4642 return (i_map_nvlist_error_to_mdi(rv)); 4643 } 4644 4645 /* 4646 * mdi_prop_update_string_array(): 4647 * Create/Update a string array property 4648 */ 4649 int 4650 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4651 uint_t nelements) 4652 { 4653 int rv; 4654 4655 if (pip == NULL) { 4656 return (DDI_PROP_INVAL_ARG); 4657 } 4658 ASSERT(!MDI_PI_LOCKED(pip)); 4659 MDI_PI_LOCK(pip); 4660 if (MDI_PI(pip)->pi_prop == NULL) { 4661 MDI_PI_UNLOCK(pip); 4662 return (DDI_PROP_NOT_FOUND); 4663 } 4664 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4665 nelements); 4666 MDI_PI_UNLOCK(pip); 4667 return (i_map_nvlist_error_to_mdi(rv)); 4668 } 4669 4670 /* 4671 * mdi_prop_lookup_byte(): 4672 * Look for byte property identified by name. The data returned 4673 * is the actual property and valid as long as mdi_pathinfo_t node 4674 * is alive. 4675 */ 4676 int 4677 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4678 { 4679 int rv; 4680 4681 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4682 return (DDI_PROP_NOT_FOUND); 4683 } 4684 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4685 return (i_map_nvlist_error_to_mdi(rv)); 4686 } 4687 4688 4689 /* 4690 * mdi_prop_lookup_byte_array(): 4691 * Look for byte array property identified by name. The data 4692 * returned is the actual property and valid as long as 4693 * mdi_pathinfo_t node is alive. 4694 */ 4695 int 4696 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4697 uint_t *nelements) 4698 { 4699 int rv; 4700 4701 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4702 return (DDI_PROP_NOT_FOUND); 4703 } 4704 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4705 nelements); 4706 return (i_map_nvlist_error_to_mdi(rv)); 4707 } 4708 4709 /* 4710 * mdi_prop_lookup_int(): 4711 * Look for int property identified by name. The data returned 4712 * is the actual property and valid as long as mdi_pathinfo_t 4713 * node is alive. 4714 */ 4715 int 4716 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4717 { 4718 int rv; 4719 4720 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4721 return (DDI_PROP_NOT_FOUND); 4722 } 4723 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4724 return (i_map_nvlist_error_to_mdi(rv)); 4725 } 4726 4727 /* 4728 * mdi_prop_lookup_int64(): 4729 * Look for int64 property identified by name. The data returned 4730 * is the actual property and valid as long as mdi_pathinfo_t node 4731 * is alive. 4732 */ 4733 int 4734 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4735 { 4736 int rv; 4737 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4738 return (DDI_PROP_NOT_FOUND); 4739 } 4740 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4741 return (i_map_nvlist_error_to_mdi(rv)); 4742 } 4743 4744 /* 4745 * mdi_prop_lookup_int_array(): 4746 * Look for int array property identified by name. The data 4747 * returned is the actual property and valid as long as 4748 * mdi_pathinfo_t node is alive. 4749 */ 4750 int 4751 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4752 uint_t *nelements) 4753 { 4754 int rv; 4755 4756 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4757 return (DDI_PROP_NOT_FOUND); 4758 } 4759 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4760 (int32_t **)data, nelements); 4761 return (i_map_nvlist_error_to_mdi(rv)); 4762 } 4763 4764 /* 4765 * mdi_prop_lookup_string(): 4766 * Look for string property identified by name. The data 4767 * returned is the actual property and valid as long as 4768 * mdi_pathinfo_t node is alive. 4769 */ 4770 int 4771 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4772 { 4773 int rv; 4774 4775 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4776 return (DDI_PROP_NOT_FOUND); 4777 } 4778 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4779 return (i_map_nvlist_error_to_mdi(rv)); 4780 } 4781 4782 /* 4783 * mdi_prop_lookup_string_array(): 4784 * Look for string array property identified by name. The data 4785 * returned is the actual property and valid as long as 4786 * mdi_pathinfo_t node is alive. 4787 */ 4788 int 4789 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4790 uint_t *nelements) 4791 { 4792 int rv; 4793 4794 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4795 return (DDI_PROP_NOT_FOUND); 4796 } 4797 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4798 nelements); 4799 return (i_map_nvlist_error_to_mdi(rv)); 4800 } 4801 4802 /* 4803 * mdi_prop_free(): 4804 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4805 * functions return the pointer to actual property data and not a 4806 * copy of it. So the data returned is valid as long as 4807 * mdi_pathinfo_t node is valid. 4808 */ 4809 /*ARGSUSED*/ 4810 int 4811 mdi_prop_free(void *data) 4812 { 4813 return (DDI_PROP_SUCCESS); 4814 } 4815 4816 /*ARGSUSED*/ 4817 static void 4818 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4819 { 4820 char *ct_path; 4821 char *ct_status; 4822 char *status; 4823 dev_info_t *cdip = ct->ct_dip; 4824 char lb_buf[64]; 4825 int report_lb_c = 0, report_lb_p = 0; 4826 4827 ASSERT(MDI_CLIENT_LOCKED(ct)); 4828 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4829 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4830 return; 4831 } 4832 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4833 ct_status = "optimal"; 4834 report_lb_c = 1; 4835 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4836 ct_status = "degraded"; 4837 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4838 ct_status = "failed"; 4839 } else { 4840 ct_status = "unknown"; 4841 } 4842 4843 lb_buf[0] = 0; /* not interested in load balancing config */ 4844 4845 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4846 status = "removed"; 4847 } else if (MDI_PI_IS_OFFLINE(pip)) { 4848 status = "offline"; 4849 } else if (MDI_PI_IS_ONLINE(pip)) { 4850 status = "online"; 4851 report_lb_p = 1; 4852 } else if (MDI_PI_IS_STANDBY(pip)) { 4853 status = "standby"; 4854 } else if (MDI_PI_IS_FAULT(pip)) { 4855 status = "faulted"; 4856 } else { 4857 status = "unknown"; 4858 } 4859 4860 if (cdip) { 4861 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4862 4863 /* 4864 * NOTE: Keeping "multipath status: %s" and 4865 * "Load balancing: %s" format unchanged in case someone 4866 * scrubs /var/adm/messages looking for these messages. 4867 */ 4868 if (report_lb_c && report_lb_p) { 4869 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4870 (void) snprintf(lb_buf, sizeof (lb_buf), 4871 "%s, region-size: %d", mdi_load_balance_lba, 4872 ct->ct_lb_args->region_size); 4873 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4874 (void) snprintf(lb_buf, sizeof (lb_buf), 4875 "%s", mdi_load_balance_none); 4876 } else { 4877 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4878 mdi_load_balance_rr); 4879 } 4880 4881 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4882 "?%s (%s%d) multipath status: %s: " 4883 "path %d %s is %s: Load balancing: %s\n", 4884 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4885 ddi_get_instance(cdip), ct_status, 4886 mdi_pi_get_path_instance(pip), 4887 mdi_pi_spathname(pip), status, lb_buf); 4888 } else { 4889 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4890 "?%s (%s%d) multipath status: %s: " 4891 "path %d %s is %s\n", 4892 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4893 ddi_get_instance(cdip), ct_status, 4894 mdi_pi_get_path_instance(pip), 4895 mdi_pi_spathname(pip), status); 4896 } 4897 4898 kmem_free(ct_path, MAXPATHLEN); 4899 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4900 } 4901 } 4902 4903 #ifdef DEBUG 4904 /* 4905 * i_mdi_log(): 4906 * Utility function for error message management 4907 * 4908 * NOTE: Implementation takes care of trailing \n for cmn_err, 4909 * MDI_DEBUG should not terminate fmt strings with \n. 4910 * 4911 * NOTE: If the level is >= 2, and there is no leading !?^ 4912 * then a leading ! is implied (but can be overriden via 4913 * mdi_debug_consoleonly). If you are using kmdb on the console, 4914 * consider setting mdi_debug_consoleonly to 1 as an aid. 4915 */ 4916 /*PRINTFLIKE4*/ 4917 static void 4918 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4919 { 4920 char name[MAXNAMELEN]; 4921 char buf[512]; 4922 char *bp; 4923 va_list ap; 4924 int log_only = 0; 4925 int boot_only = 0; 4926 int console_only = 0; 4927 4928 if (dip) { 4929 (void) snprintf(name, sizeof(name), "%s%d: ", 4930 ddi_driver_name(dip), ddi_get_instance(dip)); 4931 } else { 4932 name[0] = 0; 4933 } 4934 4935 va_start(ap, fmt); 4936 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4937 va_end(ap); 4938 4939 switch (buf[0]) { 4940 case '!': 4941 bp = &buf[1]; 4942 log_only = 1; 4943 break; 4944 case '?': 4945 bp = &buf[1]; 4946 boot_only = 1; 4947 break; 4948 case '^': 4949 bp = &buf[1]; 4950 console_only = 1; 4951 break; 4952 default: 4953 if (level >= 2) 4954 log_only = 1; /* ! implied */ 4955 bp = buf; 4956 break; 4957 } 4958 if (mdi_debug_logonly) { 4959 log_only = 1; 4960 boot_only = 0; 4961 console_only = 0; 4962 } 4963 if (mdi_debug_consoleonly) { 4964 log_only = 0; 4965 boot_only = 0; 4966 console_only = 1; 4967 level = CE_NOTE; 4968 goto console; 4969 } 4970 4971 switch (level) { 4972 case CE_NOTE: 4973 level = CE_CONT; 4974 /* FALLTHROUGH */ 4975 case CE_CONT: 4976 if (boot_only) { 4977 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4978 } else if (console_only) { 4979 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4980 } else if (log_only) { 4981 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4982 } else { 4983 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4984 } 4985 break; 4986 4987 case CE_WARN: 4988 case CE_PANIC: 4989 console: 4990 if (boot_only) { 4991 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4992 } else if (console_only) { 4993 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4994 } else if (log_only) { 4995 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4996 } else { 4997 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4998 } 4999 break; 5000 default: 5001 cmn_err(level, "mdi: %s%s", name, bp); 5002 break; 5003 } 5004 } 5005 #endif /* DEBUG */ 5006 5007 void 5008 i_mdi_client_online(dev_info_t *ct_dip) 5009 { 5010 mdi_client_t *ct; 5011 5012 /* 5013 * Client online notification. Mark client state as online 5014 * restore our binding with dev_info node 5015 */ 5016 ct = i_devi_get_client(ct_dip); 5017 ASSERT(ct != NULL); 5018 MDI_CLIENT_LOCK(ct); 5019 MDI_CLIENT_SET_ONLINE(ct); 5020 /* catch for any memory leaks */ 5021 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 5022 ct->ct_dip = ct_dip; 5023 5024 if (ct->ct_power_cnt == 0) 5025 (void) i_mdi_power_all_phci(ct); 5026 5027 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 5028 "i_mdi_pm_hold_client %p", (void *)ct)); 5029 i_mdi_pm_hold_client(ct, 1); 5030 5031 MDI_CLIENT_UNLOCK(ct); 5032 } 5033 5034 void 5035 i_mdi_phci_online(dev_info_t *ph_dip) 5036 { 5037 mdi_phci_t *ph; 5038 5039 /* pHCI online notification. Mark state accordingly */ 5040 ph = i_devi_get_phci(ph_dip); 5041 ASSERT(ph != NULL); 5042 MDI_PHCI_LOCK(ph); 5043 MDI_PHCI_SET_ONLINE(ph); 5044 MDI_PHCI_UNLOCK(ph); 5045 } 5046 5047 /* 5048 * mdi_devi_online(): 5049 * Online notification from NDI framework on pHCI/client 5050 * device online. 5051 * Return Values: 5052 * NDI_SUCCESS 5053 * MDI_FAILURE 5054 */ 5055 /*ARGSUSED*/ 5056 int 5057 mdi_devi_online(dev_info_t *dip, uint_t flags) 5058 { 5059 if (MDI_PHCI(dip)) { 5060 i_mdi_phci_online(dip); 5061 } 5062 5063 if (MDI_CLIENT(dip)) { 5064 i_mdi_client_online(dip); 5065 } 5066 return (NDI_SUCCESS); 5067 } 5068 5069 /* 5070 * mdi_devi_offline(): 5071 * Offline notification from NDI framework on pHCI/Client device 5072 * offline. 5073 * 5074 * Return Values: 5075 * NDI_SUCCESS 5076 * NDI_FAILURE 5077 */ 5078 /*ARGSUSED*/ 5079 int 5080 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5081 { 5082 int rv = NDI_SUCCESS; 5083 5084 if (MDI_CLIENT(dip)) { 5085 rv = i_mdi_client_offline(dip, flags); 5086 if (rv != NDI_SUCCESS) 5087 return (rv); 5088 } 5089 5090 if (MDI_PHCI(dip)) { 5091 rv = i_mdi_phci_offline(dip, flags); 5092 5093 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5094 /* set client back online */ 5095 i_mdi_client_online(dip); 5096 } 5097 } 5098 5099 return (rv); 5100 } 5101 5102 /*ARGSUSED*/ 5103 static int 5104 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5105 { 5106 int rv = NDI_SUCCESS; 5107 mdi_phci_t *ph; 5108 mdi_client_t *ct; 5109 mdi_pathinfo_t *pip; 5110 mdi_pathinfo_t *next; 5111 mdi_pathinfo_t *failed_pip = NULL; 5112 dev_info_t *cdip; 5113 5114 /* 5115 * pHCI component offline notification 5116 * Make sure that this pHCI instance is free to be offlined. 5117 * If it is OK to proceed, Offline and remove all the child 5118 * mdi_pathinfo nodes. This process automatically offlines 5119 * corresponding client devices, for which this pHCI provides 5120 * critical services. 5121 */ 5122 ph = i_devi_get_phci(dip); 5123 MDI_DEBUG(2, (MDI_NOTE, dip, 5124 "called %p %p", (void *)dip, (void *)ph)); 5125 if (ph == NULL) { 5126 return (rv); 5127 } 5128 5129 MDI_PHCI_LOCK(ph); 5130 5131 if (MDI_PHCI_IS_OFFLINE(ph)) { 5132 MDI_DEBUG(1, (MDI_WARN, dip, 5133 "!pHCI already offlined: %p", (void *)dip)); 5134 MDI_PHCI_UNLOCK(ph); 5135 return (NDI_SUCCESS); 5136 } 5137 5138 /* 5139 * Check to see if the pHCI can be offlined 5140 */ 5141 if (ph->ph_unstable) { 5142 MDI_DEBUG(1, (MDI_WARN, dip, 5143 "!One or more target devices are in transient state. " 5144 "This device can not be removed at this moment. " 5145 "Please try again later.")); 5146 MDI_PHCI_UNLOCK(ph); 5147 return (NDI_BUSY); 5148 } 5149 5150 pip = ph->ph_path_head; 5151 while (pip != NULL) { 5152 MDI_PI_LOCK(pip); 5153 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5154 5155 /* 5156 * The mdi_pathinfo state is OK. Check the client state. 5157 * If failover in progress fail the pHCI from offlining 5158 */ 5159 ct = MDI_PI(pip)->pi_client; 5160 i_mdi_client_lock(ct, pip); 5161 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5162 (ct->ct_unstable)) { 5163 /* 5164 * Failover is in progress, Fail the DR 5165 */ 5166 MDI_DEBUG(1, (MDI_WARN, dip, 5167 "!pHCI device is busy. " 5168 "This device can not be removed at this moment. " 5169 "Please try again later.")); 5170 MDI_PI_UNLOCK(pip); 5171 i_mdi_client_unlock(ct); 5172 MDI_PHCI_UNLOCK(ph); 5173 return (NDI_BUSY); 5174 } 5175 MDI_PI_UNLOCK(pip); 5176 5177 /* 5178 * Check to see of we are removing the last path of this 5179 * client device... 5180 */ 5181 cdip = ct->ct_dip; 5182 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5183 (i_mdi_client_compute_state(ct, ph) == 5184 MDI_CLIENT_STATE_FAILED)) { 5185 i_mdi_client_unlock(ct); 5186 MDI_PHCI_UNLOCK(ph); 5187 if (ndi_devi_offline(cdip, 5188 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5189 /* 5190 * ndi_devi_offline() failed. 5191 * This pHCI provides the critical path 5192 * to one or more client devices. 5193 * Return busy. 5194 */ 5195 MDI_PHCI_LOCK(ph); 5196 MDI_DEBUG(1, (MDI_WARN, dip, 5197 "!pHCI device is busy. " 5198 "This device can not be removed at this " 5199 "moment. Please try again later.")); 5200 failed_pip = pip; 5201 break; 5202 } else { 5203 MDI_PHCI_LOCK(ph); 5204 pip = next; 5205 } 5206 } else { 5207 i_mdi_client_unlock(ct); 5208 pip = next; 5209 } 5210 } 5211 5212 if (failed_pip) { 5213 pip = ph->ph_path_head; 5214 while (pip != failed_pip) { 5215 MDI_PI_LOCK(pip); 5216 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5217 ct = MDI_PI(pip)->pi_client; 5218 i_mdi_client_lock(ct, pip); 5219 cdip = ct->ct_dip; 5220 switch (MDI_CLIENT_STATE(ct)) { 5221 case MDI_CLIENT_STATE_OPTIMAL: 5222 case MDI_CLIENT_STATE_DEGRADED: 5223 if (cdip) { 5224 MDI_PI_UNLOCK(pip); 5225 i_mdi_client_unlock(ct); 5226 MDI_PHCI_UNLOCK(ph); 5227 (void) ndi_devi_online(cdip, 0); 5228 MDI_PHCI_LOCK(ph); 5229 pip = next; 5230 continue; 5231 } 5232 break; 5233 5234 case MDI_CLIENT_STATE_FAILED: 5235 if (cdip) { 5236 MDI_PI_UNLOCK(pip); 5237 i_mdi_client_unlock(ct); 5238 MDI_PHCI_UNLOCK(ph); 5239 (void) ndi_devi_offline(cdip, 5240 NDI_DEVFS_CLEAN); 5241 MDI_PHCI_LOCK(ph); 5242 pip = next; 5243 continue; 5244 } 5245 break; 5246 } 5247 MDI_PI_UNLOCK(pip); 5248 i_mdi_client_unlock(ct); 5249 pip = next; 5250 } 5251 MDI_PHCI_UNLOCK(ph); 5252 return (NDI_BUSY); 5253 } 5254 5255 /* 5256 * Mark the pHCI as offline 5257 */ 5258 MDI_PHCI_SET_OFFLINE(ph); 5259 5260 /* 5261 * Mark the child mdi_pathinfo nodes as transient 5262 */ 5263 pip = ph->ph_path_head; 5264 while (pip != NULL) { 5265 MDI_PI_LOCK(pip); 5266 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5267 MDI_PI_SET_OFFLINING(pip); 5268 MDI_PI_UNLOCK(pip); 5269 pip = next; 5270 } 5271 MDI_PHCI_UNLOCK(ph); 5272 /* 5273 * Give a chance for any pending commands to execute 5274 */ 5275 delay_random(mdi_delay); 5276 MDI_PHCI_LOCK(ph); 5277 pip = ph->ph_path_head; 5278 while (pip != NULL) { 5279 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5280 (void) i_mdi_pi_offline(pip, flags); 5281 MDI_PI_LOCK(pip); 5282 ct = MDI_PI(pip)->pi_client; 5283 if (!MDI_PI_IS_OFFLINE(pip)) { 5284 MDI_DEBUG(1, (MDI_WARN, dip, 5285 "!pHCI device is busy. " 5286 "This device can not be removed at this moment. " 5287 "Please try again later.")); 5288 MDI_PI_UNLOCK(pip); 5289 MDI_PHCI_SET_ONLINE(ph); 5290 MDI_PHCI_UNLOCK(ph); 5291 return (NDI_BUSY); 5292 } 5293 MDI_PI_UNLOCK(pip); 5294 pip = next; 5295 } 5296 MDI_PHCI_UNLOCK(ph); 5297 5298 return (rv); 5299 } 5300 5301 void 5302 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5303 { 5304 mdi_phci_t *ph; 5305 mdi_client_t *ct; 5306 mdi_pathinfo_t *pip; 5307 mdi_pathinfo_t *next; 5308 dev_info_t *cdip; 5309 5310 if (!MDI_PHCI(dip)) 5311 return; 5312 5313 ph = i_devi_get_phci(dip); 5314 if (ph == NULL) { 5315 return; 5316 } 5317 5318 MDI_PHCI_LOCK(ph); 5319 5320 if (MDI_PHCI_IS_OFFLINE(ph)) { 5321 /* has no last path */ 5322 MDI_PHCI_UNLOCK(ph); 5323 return; 5324 } 5325 5326 pip = ph->ph_path_head; 5327 while (pip != NULL) { 5328 MDI_PI_LOCK(pip); 5329 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5330 5331 ct = MDI_PI(pip)->pi_client; 5332 i_mdi_client_lock(ct, pip); 5333 MDI_PI_UNLOCK(pip); 5334 5335 cdip = ct->ct_dip; 5336 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5337 (i_mdi_client_compute_state(ct, ph) == 5338 MDI_CLIENT_STATE_FAILED)) { 5339 /* Last path. Mark client dip as retiring */ 5340 i_mdi_client_unlock(ct); 5341 MDI_PHCI_UNLOCK(ph); 5342 (void) e_ddi_mark_retiring(cdip, cons_array); 5343 MDI_PHCI_LOCK(ph); 5344 pip = next; 5345 } else { 5346 i_mdi_client_unlock(ct); 5347 pip = next; 5348 } 5349 } 5350 5351 MDI_PHCI_UNLOCK(ph); 5352 5353 return; 5354 } 5355 5356 void 5357 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5358 { 5359 mdi_phci_t *ph; 5360 mdi_client_t *ct; 5361 mdi_pathinfo_t *pip; 5362 mdi_pathinfo_t *next; 5363 dev_info_t *cdip; 5364 5365 if (!MDI_PHCI(dip)) 5366 return; 5367 5368 ph = i_devi_get_phci(dip); 5369 if (ph == NULL) 5370 return; 5371 5372 MDI_PHCI_LOCK(ph); 5373 5374 if (MDI_PHCI_IS_OFFLINE(ph)) { 5375 MDI_PHCI_UNLOCK(ph); 5376 /* not last path */ 5377 return; 5378 } 5379 5380 if (ph->ph_unstable) { 5381 MDI_PHCI_UNLOCK(ph); 5382 /* can't check for constraints */ 5383 *constraint = 0; 5384 return; 5385 } 5386 5387 pip = ph->ph_path_head; 5388 while (pip != NULL) { 5389 MDI_PI_LOCK(pip); 5390 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5391 5392 /* 5393 * The mdi_pathinfo state is OK. Check the client state. 5394 * If failover in progress fail the pHCI from offlining 5395 */ 5396 ct = MDI_PI(pip)->pi_client; 5397 i_mdi_client_lock(ct, pip); 5398 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5399 (ct->ct_unstable)) { 5400 /* 5401 * Failover is in progress, can't check for constraints 5402 */ 5403 MDI_PI_UNLOCK(pip); 5404 i_mdi_client_unlock(ct); 5405 MDI_PHCI_UNLOCK(ph); 5406 *constraint = 0; 5407 return; 5408 } 5409 MDI_PI_UNLOCK(pip); 5410 5411 /* 5412 * Check to see of we are retiring the last path of this 5413 * client device... 5414 */ 5415 cdip = ct->ct_dip; 5416 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5417 (i_mdi_client_compute_state(ct, ph) == 5418 MDI_CLIENT_STATE_FAILED)) { 5419 i_mdi_client_unlock(ct); 5420 MDI_PHCI_UNLOCK(ph); 5421 (void) e_ddi_retire_notify(cdip, constraint); 5422 MDI_PHCI_LOCK(ph); 5423 pip = next; 5424 } else { 5425 i_mdi_client_unlock(ct); 5426 pip = next; 5427 } 5428 } 5429 5430 MDI_PHCI_UNLOCK(ph); 5431 5432 return; 5433 } 5434 5435 /* 5436 * offline the path(s) hanging off the pHCI. If the 5437 * last path to any client, check that constraints 5438 * have been applied. 5439 * 5440 * If constraint is 0, we aren't going to retire the 5441 * pHCI. However we still need to go through the paths 5442 * calling e_ddi_retire_finalize() to clear their 5443 * contract barriers. 5444 */ 5445 void 5446 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint) 5447 { 5448 mdi_phci_t *ph; 5449 mdi_client_t *ct; 5450 mdi_pathinfo_t *pip; 5451 mdi_pathinfo_t *next; 5452 dev_info_t *cdip; 5453 int unstable = 0; 5454 int tmp_constraint; 5455 5456 if (!MDI_PHCI(dip)) 5457 return; 5458 5459 ph = i_devi_get_phci(dip); 5460 if (ph == NULL) { 5461 /* no last path and no pips */ 5462 return; 5463 } 5464 5465 MDI_PHCI_LOCK(ph); 5466 5467 if (MDI_PHCI_IS_OFFLINE(ph)) { 5468 MDI_PHCI_UNLOCK(ph); 5469 /* no last path and no pips */ 5470 return; 5471 } 5472 5473 /* 5474 * Check to see if the pHCI can be offlined 5475 */ 5476 if (ph->ph_unstable) { 5477 unstable = 1; 5478 } 5479 5480 pip = ph->ph_path_head; 5481 while (pip != NULL) { 5482 MDI_PI_LOCK(pip); 5483 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5484 5485 /* 5486 * if failover in progress fail the pHCI from offlining 5487 */ 5488 ct = MDI_PI(pip)->pi_client; 5489 i_mdi_client_lock(ct, pip); 5490 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5491 (ct->ct_unstable)) { 5492 unstable = 1; 5493 } 5494 MDI_PI_UNLOCK(pip); 5495 5496 /* 5497 * Check to see of we are removing the last path of this 5498 * client device... 5499 */ 5500 cdip = ct->ct_dip; 5501 if (!phci_only && cdip && 5502 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5503 (i_mdi_client_compute_state(ct, ph) == 5504 MDI_CLIENT_STATE_FAILED)) { 5505 i_mdi_client_unlock(ct); 5506 MDI_PHCI_UNLOCK(ph); 5507 /* 5508 * This is the last path to this client. 5509 * 5510 * Constraint will only be set to 1 if this client can 5511 * be retired (as already determined by 5512 * mdi_phci_retire_notify). However we don't actually 5513 * need to retire the client (we just retire the last 5514 * path - MPXIO will then fail all I/Os to the client). 5515 * But we still need to call e_ddi_retire_finalize so 5516 * the contract barriers can be cleared. Therefore we 5517 * temporarily set constraint = 0 so that the client 5518 * dip is not retired. 5519 */ 5520 tmp_constraint = 0; 5521 (void) e_ddi_retire_finalize(cdip, &tmp_constraint); 5522 MDI_PHCI_LOCK(ph); 5523 pip = next; 5524 } else { 5525 i_mdi_client_unlock(ct); 5526 pip = next; 5527 } 5528 } 5529 5530 if (!phci_only && *((int *)constraint) == 0) { 5531 MDI_PHCI_UNLOCK(ph); 5532 return; 5533 } 5534 5535 /* 5536 * Cannot offline pip(s) 5537 */ 5538 if (unstable) { 5539 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5540 "pHCI in transient state, cannot retire", 5541 ddi_driver_name(dip), ddi_get_instance(dip)); 5542 MDI_PHCI_UNLOCK(ph); 5543 return; 5544 } 5545 5546 /* 5547 * Mark the pHCI as offline 5548 */ 5549 MDI_PHCI_SET_OFFLINE(ph); 5550 5551 /* 5552 * Mark the child mdi_pathinfo nodes as transient 5553 */ 5554 pip = ph->ph_path_head; 5555 while (pip != NULL) { 5556 MDI_PI_LOCK(pip); 5557 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5558 MDI_PI_SET_OFFLINING(pip); 5559 MDI_PI_UNLOCK(pip); 5560 pip = next; 5561 } 5562 MDI_PHCI_UNLOCK(ph); 5563 /* 5564 * Give a chance for any pending commands to execute 5565 */ 5566 delay_random(mdi_delay); 5567 MDI_PHCI_LOCK(ph); 5568 pip = ph->ph_path_head; 5569 while (pip != NULL) { 5570 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5571 (void) i_mdi_pi_offline(pip, 0); 5572 MDI_PI_LOCK(pip); 5573 ct = MDI_PI(pip)->pi_client; 5574 if (!MDI_PI_IS_OFFLINE(pip)) { 5575 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5576 "path %d %s busy, cannot offline", 5577 mdi_pi_get_path_instance(pip), 5578 mdi_pi_spathname(pip)); 5579 MDI_PI_UNLOCK(pip); 5580 MDI_PHCI_SET_ONLINE(ph); 5581 MDI_PHCI_UNLOCK(ph); 5582 return; 5583 } 5584 MDI_PI_UNLOCK(pip); 5585 pip = next; 5586 } 5587 MDI_PHCI_UNLOCK(ph); 5588 5589 return; 5590 } 5591 5592 void 5593 mdi_phci_unretire(dev_info_t *dip) 5594 { 5595 mdi_phci_t *ph; 5596 mdi_pathinfo_t *pip; 5597 mdi_pathinfo_t *next; 5598 5599 ASSERT(MDI_PHCI(dip)); 5600 5601 /* 5602 * Online the phci 5603 */ 5604 i_mdi_phci_online(dip); 5605 5606 ph = i_devi_get_phci(dip); 5607 MDI_PHCI_LOCK(ph); 5608 pip = ph->ph_path_head; 5609 while (pip != NULL) { 5610 MDI_PI_LOCK(pip); 5611 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5612 MDI_PI_UNLOCK(pip); 5613 (void) i_mdi_pi_online(pip, 0); 5614 pip = next; 5615 } 5616 MDI_PHCI_UNLOCK(ph); 5617 } 5618 5619 /*ARGSUSED*/ 5620 static int 5621 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5622 { 5623 int rv = NDI_SUCCESS; 5624 mdi_client_t *ct; 5625 5626 /* 5627 * Client component to go offline. Make sure that we are 5628 * not in failing over state and update client state 5629 * accordingly 5630 */ 5631 ct = i_devi_get_client(dip); 5632 MDI_DEBUG(2, (MDI_NOTE, dip, 5633 "called %p %p", (void *)dip, (void *)ct)); 5634 if (ct != NULL) { 5635 MDI_CLIENT_LOCK(ct); 5636 if (ct->ct_unstable) { 5637 /* 5638 * One or more paths are in transient state, 5639 * Dont allow offline of a client device 5640 */ 5641 MDI_DEBUG(1, (MDI_WARN, dip, 5642 "!One or more paths to " 5643 "this device are in transient state. " 5644 "This device can not be removed at this moment. " 5645 "Please try again later.")); 5646 MDI_CLIENT_UNLOCK(ct); 5647 return (NDI_BUSY); 5648 } 5649 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5650 /* 5651 * Failover is in progress, Dont allow DR of 5652 * a client device 5653 */ 5654 MDI_DEBUG(1, (MDI_WARN, dip, 5655 "!Client device is Busy. " 5656 "This device can not be removed at this moment. " 5657 "Please try again later.")); 5658 MDI_CLIENT_UNLOCK(ct); 5659 return (NDI_BUSY); 5660 } 5661 MDI_CLIENT_SET_OFFLINE(ct); 5662 5663 /* 5664 * Unbind our relationship with the dev_info node 5665 */ 5666 if (flags & NDI_DEVI_REMOVE) { 5667 ct->ct_dip = NULL; 5668 } 5669 MDI_CLIENT_UNLOCK(ct); 5670 } 5671 return (rv); 5672 } 5673 5674 /* 5675 * mdi_pre_attach(): 5676 * Pre attach() notification handler 5677 */ 5678 /*ARGSUSED*/ 5679 int 5680 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5681 { 5682 /* don't support old DDI_PM_RESUME */ 5683 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5684 (cmd == DDI_PM_RESUME)) 5685 return (DDI_FAILURE); 5686 5687 return (DDI_SUCCESS); 5688 } 5689 5690 /* 5691 * mdi_post_attach(): 5692 * Post attach() notification handler 5693 */ 5694 /*ARGSUSED*/ 5695 void 5696 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5697 { 5698 mdi_phci_t *ph; 5699 mdi_client_t *ct; 5700 mdi_vhci_t *vh; 5701 5702 if (MDI_PHCI(dip)) { 5703 ph = i_devi_get_phci(dip); 5704 ASSERT(ph != NULL); 5705 5706 MDI_PHCI_LOCK(ph); 5707 switch (cmd) { 5708 case DDI_ATTACH: 5709 MDI_DEBUG(2, (MDI_NOTE, dip, 5710 "phci post_attach called %p", (void *)ph)); 5711 if (error == DDI_SUCCESS) { 5712 MDI_PHCI_SET_ATTACH(ph); 5713 } else { 5714 MDI_DEBUG(1, (MDI_NOTE, dip, 5715 "!pHCI post_attach failed: error %d", 5716 error)); 5717 MDI_PHCI_SET_DETACH(ph); 5718 } 5719 break; 5720 5721 case DDI_RESUME: 5722 case DDI_PM_RESUME: 5723 MDI_DEBUG(2, (MDI_NOTE, dip, 5724 "pHCI post_resume: called %p", (void *)ph)); 5725 if (error == DDI_SUCCESS) { 5726 MDI_PHCI_SET_RESUME(ph); 5727 } else { 5728 MDI_DEBUG(1, (MDI_NOTE, dip, 5729 "!pHCI post_resume failed: error %d", 5730 error)); 5731 MDI_PHCI_SET_SUSPEND(ph); 5732 } 5733 break; 5734 } 5735 MDI_PHCI_UNLOCK(ph); 5736 } 5737 5738 if (MDI_CLIENT(dip)) { 5739 ct = i_devi_get_client(dip); 5740 ASSERT(ct != NULL); 5741 5742 MDI_CLIENT_LOCK(ct); 5743 switch (cmd) { 5744 case DDI_ATTACH: 5745 MDI_DEBUG(2, (MDI_NOTE, dip, 5746 "client post_attach called %p", (void *)ct)); 5747 if (error != DDI_SUCCESS) { 5748 MDI_DEBUG(1, (MDI_NOTE, dip, 5749 "!client post_attach failed: error %d", 5750 error)); 5751 MDI_CLIENT_SET_DETACH(ct); 5752 MDI_DEBUG(4, (MDI_WARN, dip, 5753 "i_mdi_pm_reset_client")); 5754 i_mdi_pm_reset_client(ct); 5755 break; 5756 } 5757 5758 /* 5759 * Client device has successfully attached, inform 5760 * the vhci. 5761 */ 5762 vh = ct->ct_vhci; 5763 if (vh->vh_ops->vo_client_attached) 5764 (*vh->vh_ops->vo_client_attached)(dip); 5765 5766 MDI_CLIENT_SET_ATTACH(ct); 5767 break; 5768 5769 case DDI_RESUME: 5770 case DDI_PM_RESUME: 5771 MDI_DEBUG(2, (MDI_NOTE, dip, 5772 "client post_attach: called %p", (void *)ct)); 5773 if (error == DDI_SUCCESS) { 5774 MDI_CLIENT_SET_RESUME(ct); 5775 } else { 5776 MDI_DEBUG(1, (MDI_NOTE, dip, 5777 "!client post_resume failed: error %d", 5778 error)); 5779 MDI_CLIENT_SET_SUSPEND(ct); 5780 } 5781 break; 5782 } 5783 MDI_CLIENT_UNLOCK(ct); 5784 } 5785 } 5786 5787 /* 5788 * mdi_pre_detach(): 5789 * Pre detach notification handler 5790 */ 5791 /*ARGSUSED*/ 5792 int 5793 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5794 { 5795 int rv = DDI_SUCCESS; 5796 5797 if (MDI_CLIENT(dip)) { 5798 (void) i_mdi_client_pre_detach(dip, cmd); 5799 } 5800 5801 if (MDI_PHCI(dip)) { 5802 rv = i_mdi_phci_pre_detach(dip, cmd); 5803 } 5804 5805 return (rv); 5806 } 5807 5808 /*ARGSUSED*/ 5809 static int 5810 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5811 { 5812 int rv = DDI_SUCCESS; 5813 mdi_phci_t *ph; 5814 mdi_client_t *ct; 5815 mdi_pathinfo_t *pip; 5816 mdi_pathinfo_t *failed_pip = NULL; 5817 mdi_pathinfo_t *next; 5818 5819 ph = i_devi_get_phci(dip); 5820 if (ph == NULL) { 5821 return (rv); 5822 } 5823 5824 MDI_PHCI_LOCK(ph); 5825 switch (cmd) { 5826 case DDI_DETACH: 5827 MDI_DEBUG(2, (MDI_NOTE, dip, 5828 "pHCI pre_detach: called %p", (void *)ph)); 5829 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5830 /* 5831 * mdi_pathinfo nodes are still attached to 5832 * this pHCI. Fail the detach for this pHCI. 5833 */ 5834 MDI_DEBUG(2, (MDI_WARN, dip, 5835 "pHCI pre_detach: paths are still attached %p", 5836 (void *)ph)); 5837 rv = DDI_FAILURE; 5838 break; 5839 } 5840 MDI_PHCI_SET_DETACH(ph); 5841 break; 5842 5843 case DDI_SUSPEND: 5844 /* 5845 * pHCI is getting suspended. Since mpxio client 5846 * devices may not be suspended at this point, to avoid 5847 * a potential stack overflow, it is important to suspend 5848 * client devices before pHCI can be suspended. 5849 */ 5850 5851 MDI_DEBUG(2, (MDI_NOTE, dip, 5852 "pHCI pre_suspend: called %p", (void *)ph)); 5853 /* 5854 * Suspend all the client devices accessible through this pHCI 5855 */ 5856 pip = ph->ph_path_head; 5857 while (pip != NULL && rv == DDI_SUCCESS) { 5858 dev_info_t *cdip; 5859 MDI_PI_LOCK(pip); 5860 next = 5861 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5862 ct = MDI_PI(pip)->pi_client; 5863 i_mdi_client_lock(ct, pip); 5864 cdip = ct->ct_dip; 5865 MDI_PI_UNLOCK(pip); 5866 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5867 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5868 i_mdi_client_unlock(ct); 5869 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5870 DDI_SUCCESS) { 5871 /* 5872 * Suspend of one of the client 5873 * device has failed. 5874 */ 5875 MDI_DEBUG(1, (MDI_WARN, dip, 5876 "!suspend of device (%s%d) failed.", 5877 ddi_driver_name(cdip), 5878 ddi_get_instance(cdip))); 5879 failed_pip = pip; 5880 break; 5881 } 5882 } else { 5883 i_mdi_client_unlock(ct); 5884 } 5885 pip = next; 5886 } 5887 5888 if (rv == DDI_SUCCESS) { 5889 /* 5890 * Suspend of client devices is complete. Proceed 5891 * with pHCI suspend. 5892 */ 5893 MDI_PHCI_SET_SUSPEND(ph); 5894 } else { 5895 /* 5896 * Revert back all the suspended client device states 5897 * to converse. 5898 */ 5899 pip = ph->ph_path_head; 5900 while (pip != failed_pip) { 5901 dev_info_t *cdip; 5902 MDI_PI_LOCK(pip); 5903 next = 5904 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5905 ct = MDI_PI(pip)->pi_client; 5906 i_mdi_client_lock(ct, pip); 5907 cdip = ct->ct_dip; 5908 MDI_PI_UNLOCK(pip); 5909 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5910 i_mdi_client_unlock(ct); 5911 (void) devi_attach(cdip, DDI_RESUME); 5912 } else { 5913 i_mdi_client_unlock(ct); 5914 } 5915 pip = next; 5916 } 5917 } 5918 break; 5919 5920 default: 5921 rv = DDI_FAILURE; 5922 break; 5923 } 5924 MDI_PHCI_UNLOCK(ph); 5925 return (rv); 5926 } 5927 5928 /*ARGSUSED*/ 5929 static int 5930 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5931 { 5932 int rv = DDI_SUCCESS; 5933 mdi_client_t *ct; 5934 5935 ct = i_devi_get_client(dip); 5936 if (ct == NULL) { 5937 return (rv); 5938 } 5939 5940 MDI_CLIENT_LOCK(ct); 5941 switch (cmd) { 5942 case DDI_DETACH: 5943 MDI_DEBUG(2, (MDI_NOTE, dip, 5944 "client pre_detach: called %p", 5945 (void *)ct)); 5946 MDI_CLIENT_SET_DETACH(ct); 5947 break; 5948 5949 case DDI_SUSPEND: 5950 MDI_DEBUG(2, (MDI_NOTE, dip, 5951 "client pre_suspend: called %p", 5952 (void *)ct)); 5953 MDI_CLIENT_SET_SUSPEND(ct); 5954 break; 5955 5956 default: 5957 rv = DDI_FAILURE; 5958 break; 5959 } 5960 MDI_CLIENT_UNLOCK(ct); 5961 return (rv); 5962 } 5963 5964 /* 5965 * mdi_post_detach(): 5966 * Post detach notification handler 5967 */ 5968 /*ARGSUSED*/ 5969 void 5970 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5971 { 5972 /* 5973 * Detach/Suspend of mpxio component failed. Update our state 5974 * too 5975 */ 5976 if (MDI_PHCI(dip)) 5977 i_mdi_phci_post_detach(dip, cmd, error); 5978 5979 if (MDI_CLIENT(dip)) 5980 i_mdi_client_post_detach(dip, cmd, error); 5981 } 5982 5983 /*ARGSUSED*/ 5984 static void 5985 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5986 { 5987 mdi_phci_t *ph; 5988 5989 /* 5990 * Detach/Suspend of phci component failed. Update our state 5991 * too 5992 */ 5993 ph = i_devi_get_phci(dip); 5994 if (ph == NULL) { 5995 return; 5996 } 5997 5998 MDI_PHCI_LOCK(ph); 5999 /* 6000 * Detach of pHCI failed. Restore back converse 6001 * state 6002 */ 6003 switch (cmd) { 6004 case DDI_DETACH: 6005 MDI_DEBUG(2, (MDI_NOTE, dip, 6006 "pHCI post_detach: called %p", 6007 (void *)ph)); 6008 if (error != DDI_SUCCESS) 6009 MDI_PHCI_SET_ATTACH(ph); 6010 break; 6011 6012 case DDI_SUSPEND: 6013 case DDI_PM_SUSPEND: 6014 MDI_DEBUG(2, (MDI_NOTE, dip, 6015 "pHCI post_suspend: called %p", 6016 (void *)ph)); 6017 if (error != DDI_SUCCESS) 6018 MDI_PHCI_SET_RESUME(ph); 6019 break; 6020 case DDI_HOTPLUG_DETACH: 6021 break; 6022 } 6023 MDI_PHCI_UNLOCK(ph); 6024 } 6025 6026 /*ARGSUSED*/ 6027 static void 6028 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 6029 { 6030 mdi_client_t *ct; 6031 6032 ct = i_devi_get_client(dip); 6033 if (ct == NULL) { 6034 return; 6035 } 6036 MDI_CLIENT_LOCK(ct); 6037 /* 6038 * Detach of Client failed. Restore back converse 6039 * state 6040 */ 6041 switch (cmd) { 6042 case DDI_DETACH: 6043 MDI_DEBUG(2, (MDI_NOTE, dip, 6044 "client post_detach: called %p", (void *)ct)); 6045 if (DEVI_IS_ATTACHING(dip)) { 6046 MDI_DEBUG(4, (MDI_NOTE, dip, 6047 "i_mdi_pm_rele_client\n")); 6048 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6049 } else { 6050 MDI_DEBUG(4, (MDI_NOTE, dip, 6051 "i_mdi_pm_reset_client\n")); 6052 i_mdi_pm_reset_client(ct); 6053 } 6054 if (error != DDI_SUCCESS) 6055 MDI_CLIENT_SET_ATTACH(ct); 6056 break; 6057 6058 case DDI_SUSPEND: 6059 case DDI_PM_SUSPEND: 6060 MDI_DEBUG(2, (MDI_NOTE, dip, 6061 "called %p", (void *)ct)); 6062 if (error != DDI_SUCCESS) 6063 MDI_CLIENT_SET_RESUME(ct); 6064 break; 6065 case DDI_HOTPLUG_DETACH: 6066 break; 6067 } 6068 MDI_CLIENT_UNLOCK(ct); 6069 } 6070 6071 int 6072 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 6073 { 6074 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 6075 } 6076 6077 /* 6078 * create and install per-path (client - pHCI) statistics 6079 * I/O stats supported: nread, nwritten, reads, and writes 6080 * Error stats - hard errors, soft errors, & transport errors 6081 */ 6082 int 6083 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 6084 { 6085 kstat_t *kiosp, *kerrsp; 6086 struct pi_errs *nsp; 6087 struct mdi_pi_kstats *mdi_statp; 6088 6089 if (MDI_PI(pip)->pi_kstats != NULL) 6090 return (MDI_SUCCESS); 6091 6092 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 6093 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 6094 return (MDI_FAILURE); 6095 } 6096 6097 (void) strcat(ksname, ",err"); 6098 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 6099 KSTAT_TYPE_NAMED, 6100 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 6101 if (kerrsp == NULL) { 6102 kstat_delete(kiosp); 6103 return (MDI_FAILURE); 6104 } 6105 6106 nsp = (struct pi_errs *)kerrsp->ks_data; 6107 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6108 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6109 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6110 KSTAT_DATA_UINT32); 6111 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6112 KSTAT_DATA_UINT32); 6113 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6114 KSTAT_DATA_UINT32); 6115 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6116 KSTAT_DATA_UINT32); 6117 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6118 KSTAT_DATA_UINT32); 6119 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6120 KSTAT_DATA_UINT32); 6121 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6122 KSTAT_DATA_UINT32); 6123 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6124 6125 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6126 mdi_statp->pi_kstat_ref = 1; 6127 mdi_statp->pi_kstat_iostats = kiosp; 6128 mdi_statp->pi_kstat_errstats = kerrsp; 6129 kstat_install(kiosp); 6130 kstat_install(kerrsp); 6131 MDI_PI(pip)->pi_kstats = mdi_statp; 6132 return (MDI_SUCCESS); 6133 } 6134 6135 /* 6136 * destroy per-path properties 6137 */ 6138 static void 6139 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6140 { 6141 6142 struct mdi_pi_kstats *mdi_statp; 6143 6144 if (MDI_PI(pip)->pi_kstats == NULL) 6145 return; 6146 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6147 return; 6148 6149 MDI_PI(pip)->pi_kstats = NULL; 6150 6151 /* 6152 * the kstat may be shared between multiple pathinfo nodes 6153 * decrement this pathinfo's usage, removing the kstats 6154 * themselves when the last pathinfo reference is removed. 6155 */ 6156 ASSERT(mdi_statp->pi_kstat_ref > 0); 6157 if (--mdi_statp->pi_kstat_ref != 0) 6158 return; 6159 6160 kstat_delete(mdi_statp->pi_kstat_iostats); 6161 kstat_delete(mdi_statp->pi_kstat_errstats); 6162 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6163 } 6164 6165 /* 6166 * update I/O paths KSTATS 6167 */ 6168 void 6169 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6170 { 6171 kstat_t *iostatp; 6172 size_t xfer_cnt; 6173 6174 ASSERT(pip != NULL); 6175 6176 /* 6177 * I/O can be driven across a path prior to having path 6178 * statistics available, i.e. probe(9e). 6179 */ 6180 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6181 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6182 xfer_cnt = bp->b_bcount - bp->b_resid; 6183 if (bp->b_flags & B_READ) { 6184 KSTAT_IO_PTR(iostatp)->reads++; 6185 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6186 } else { 6187 KSTAT_IO_PTR(iostatp)->writes++; 6188 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6189 } 6190 } 6191 } 6192 6193 /* 6194 * Enable the path(specific client/target/initiator) 6195 * Enabling a path means that MPxIO may select the enabled path for routing 6196 * future I/O requests, subject to other path state constraints. 6197 */ 6198 int 6199 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6200 { 6201 mdi_phci_t *ph; 6202 6203 ph = MDI_PI(pip)->pi_phci; 6204 if (ph == NULL) { 6205 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6206 "!failed: path %s %p: NULL ph", 6207 mdi_pi_spathname(pip), (void *)pip)); 6208 return (MDI_FAILURE); 6209 } 6210 6211 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6212 MDI_ENABLE_OP); 6213 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6214 "!returning success pip = %p. ph = %p", 6215 (void *)pip, (void *)ph)); 6216 return (MDI_SUCCESS); 6217 6218 } 6219 6220 /* 6221 * Disable the path (specific client/target/initiator) 6222 * Disabling a path means that MPxIO will not select the disabled path for 6223 * routing any new I/O requests. 6224 */ 6225 int 6226 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6227 { 6228 mdi_phci_t *ph; 6229 6230 ph = MDI_PI(pip)->pi_phci; 6231 if (ph == NULL) { 6232 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6233 "!failed: path %s %p: NULL ph", 6234 mdi_pi_spathname(pip), (void *)pip)); 6235 return (MDI_FAILURE); 6236 } 6237 6238 (void) i_mdi_enable_disable_path(pip, 6239 ph->ph_vhci, flags, MDI_DISABLE_OP); 6240 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6241 "!returning success pip = %p. ph = %p", 6242 (void *)pip, (void *)ph)); 6243 return (MDI_SUCCESS); 6244 } 6245 6246 /* 6247 * disable the path to a particular pHCI (pHCI specified in the phci_path 6248 * argument) for a particular client (specified in the client_path argument). 6249 * Disabling a path means that MPxIO will not select the disabled path for 6250 * routing any new I/O requests. 6251 * NOTE: this will be removed once the NWS files are changed to use the new 6252 * mdi_{enable,disable}_path interfaces 6253 */ 6254 int 6255 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6256 { 6257 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6258 } 6259 6260 /* 6261 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6262 * argument) for a particular client (specified in the client_path argument). 6263 * Enabling a path means that MPxIO may select the enabled path for routing 6264 * future I/O requests, subject to other path state constraints. 6265 * NOTE: this will be removed once the NWS files are changed to use the new 6266 * mdi_{enable,disable}_path interfaces 6267 */ 6268 6269 int 6270 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6271 { 6272 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6273 } 6274 6275 /* 6276 * Common routine for doing enable/disable. 6277 */ 6278 static mdi_pathinfo_t * 6279 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6280 int op) 6281 { 6282 int sync_flag = 0; 6283 int rv; 6284 mdi_pathinfo_t *next; 6285 int (*f)() = NULL; 6286 6287 /* 6288 * Check to make sure the path is not already in the 6289 * requested state. If it is just return the next path 6290 * as we have nothing to do here. 6291 */ 6292 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6293 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6294 MDI_PI_LOCK(pip); 6295 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6296 MDI_PI_UNLOCK(pip); 6297 return (next); 6298 } 6299 6300 f = vh->vh_ops->vo_pi_state_change; 6301 6302 sync_flag = (flags << 8) & 0xf00; 6303 6304 /* 6305 * Do a callback into the mdi consumer to let it 6306 * know that path is about to get enabled/disabled. 6307 */ 6308 rv = MDI_SUCCESS; 6309 if (f != NULL) { 6310 rv = (*f)(vh->vh_dip, pip, 0, 6311 MDI_PI_EXT_STATE(pip), 6312 MDI_EXT_STATE_CHANGE | sync_flag | 6313 op | MDI_BEFORE_STATE_CHANGE); 6314 if (rv != MDI_SUCCESS) { 6315 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6316 "vo_pi_state_change: failed rv = %x", rv)); 6317 } 6318 } 6319 MDI_PI_LOCK(pip); 6320 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6321 6322 switch (flags) { 6323 case USER_DISABLE: 6324 if (op == MDI_DISABLE_OP) { 6325 MDI_PI_SET_USER_DISABLE(pip); 6326 } else { 6327 MDI_PI_SET_USER_ENABLE(pip); 6328 } 6329 break; 6330 case DRIVER_DISABLE: 6331 if (op == MDI_DISABLE_OP) { 6332 MDI_PI_SET_DRV_DISABLE(pip); 6333 } else { 6334 MDI_PI_SET_DRV_ENABLE(pip); 6335 } 6336 break; 6337 case DRIVER_DISABLE_TRANSIENT: 6338 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6339 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6340 } else { 6341 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6342 } 6343 break; 6344 } 6345 MDI_PI_UNLOCK(pip); 6346 /* 6347 * Do a callback into the mdi consumer to let it 6348 * know that path is now enabled/disabled. 6349 */ 6350 if (f != NULL) { 6351 rv = (*f)(vh->vh_dip, pip, 0, 6352 MDI_PI_EXT_STATE(pip), 6353 MDI_EXT_STATE_CHANGE | sync_flag | 6354 op | MDI_AFTER_STATE_CHANGE); 6355 if (rv != MDI_SUCCESS) { 6356 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6357 "vo_pi_state_change failed: rv = %x", rv)); 6358 } 6359 } 6360 return (next); 6361 } 6362 6363 /* 6364 * Common routine for doing enable/disable. 6365 * NOTE: this will be removed once the NWS files are changed to use the new 6366 * mdi_{enable,disable}_path has been putback 6367 */ 6368 int 6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6370 { 6371 6372 mdi_phci_t *ph; 6373 mdi_vhci_t *vh = NULL; 6374 mdi_client_t *ct; 6375 mdi_pathinfo_t *next, *pip; 6376 int found_it; 6377 6378 ph = i_devi_get_phci(pdip); 6379 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6380 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6381 (void *)cdip)); 6382 if (ph == NULL) { 6383 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6384 "!failed: operation %d: NULL ph", op)); 6385 return (MDI_FAILURE); 6386 } 6387 6388 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6389 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6390 "!failed: invalid operation %d", op)); 6391 return (MDI_FAILURE); 6392 } 6393 6394 vh = ph->ph_vhci; 6395 6396 if (cdip == NULL) { 6397 /* 6398 * Need to mark the Phci as enabled/disabled. 6399 */ 6400 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6401 "op %d for the phci", op)); 6402 MDI_PHCI_LOCK(ph); 6403 switch (flags) { 6404 case USER_DISABLE: 6405 if (op == MDI_DISABLE_OP) { 6406 MDI_PHCI_SET_USER_DISABLE(ph); 6407 } else { 6408 MDI_PHCI_SET_USER_ENABLE(ph); 6409 } 6410 break; 6411 case DRIVER_DISABLE: 6412 if (op == MDI_DISABLE_OP) { 6413 MDI_PHCI_SET_DRV_DISABLE(ph); 6414 } else { 6415 MDI_PHCI_SET_DRV_ENABLE(ph); 6416 } 6417 break; 6418 case DRIVER_DISABLE_TRANSIENT: 6419 if (op == MDI_DISABLE_OP) { 6420 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6421 } else { 6422 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6423 } 6424 break; 6425 default: 6426 MDI_PHCI_UNLOCK(ph); 6427 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6428 "!invalid flag argument= %d", flags)); 6429 } 6430 6431 /* 6432 * Phci has been disabled. Now try to enable/disable 6433 * path info's to each client. 6434 */ 6435 pip = ph->ph_path_head; 6436 while (pip != NULL) { 6437 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6438 } 6439 MDI_PHCI_UNLOCK(ph); 6440 } else { 6441 6442 /* 6443 * Disable a specific client. 6444 */ 6445 ct = i_devi_get_client(cdip); 6446 if (ct == NULL) { 6447 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6448 "!failed: operation = %d: NULL ct", op)); 6449 return (MDI_FAILURE); 6450 } 6451 6452 MDI_CLIENT_LOCK(ct); 6453 pip = ct->ct_path_head; 6454 found_it = 0; 6455 while (pip != NULL) { 6456 MDI_PI_LOCK(pip); 6457 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6458 if (MDI_PI(pip)->pi_phci == ph) { 6459 MDI_PI_UNLOCK(pip); 6460 found_it = 1; 6461 break; 6462 } 6463 MDI_PI_UNLOCK(pip); 6464 pip = next; 6465 } 6466 6467 6468 MDI_CLIENT_UNLOCK(ct); 6469 if (found_it == 0) { 6470 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6471 "!failed. Could not find corresponding pip\n")); 6472 return (MDI_FAILURE); 6473 } 6474 6475 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6476 } 6477 6478 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6479 "!op %d returning success pdip = %p cdip = %p", 6480 op, (void *)pdip, (void *)cdip)); 6481 return (MDI_SUCCESS); 6482 } 6483 6484 /* 6485 * Ensure phci powered up 6486 */ 6487 static void 6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6489 { 6490 dev_info_t *ph_dip; 6491 6492 ASSERT(pip != NULL); 6493 ASSERT(MDI_PI_LOCKED(pip)); 6494 6495 if (MDI_PI(pip)->pi_pm_held) { 6496 return; 6497 } 6498 6499 ph_dip = mdi_pi_get_phci(pip); 6500 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6501 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6502 if (ph_dip == NULL) { 6503 return; 6504 } 6505 6506 MDI_PI_UNLOCK(pip); 6507 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6508 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6509 pm_hold_power(ph_dip); 6510 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6511 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6512 MDI_PI_LOCK(pip); 6513 6514 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6515 if (DEVI(ph_dip)->devi_pm_info) 6516 MDI_PI(pip)->pi_pm_held = 1; 6517 } 6518 6519 /* 6520 * Allow phci powered down 6521 */ 6522 static void 6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6524 { 6525 dev_info_t *ph_dip = NULL; 6526 6527 ASSERT(pip != NULL); 6528 ASSERT(MDI_PI_LOCKED(pip)); 6529 6530 if (MDI_PI(pip)->pi_pm_held == 0) { 6531 return; 6532 } 6533 6534 ph_dip = mdi_pi_get_phci(pip); 6535 ASSERT(ph_dip != NULL); 6536 6537 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6538 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6539 6540 MDI_PI_UNLOCK(pip); 6541 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6542 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6543 pm_rele_power(ph_dip); 6544 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6545 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6546 MDI_PI_LOCK(pip); 6547 6548 MDI_PI(pip)->pi_pm_held = 0; 6549 } 6550 6551 static void 6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6553 { 6554 ASSERT(MDI_CLIENT_LOCKED(ct)); 6555 6556 ct->ct_power_cnt += incr; 6557 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6558 "%p ct_power_cnt = %d incr = %d", 6559 (void *)ct, ct->ct_power_cnt, incr)); 6560 ASSERT(ct->ct_power_cnt >= 0); 6561 } 6562 6563 static void 6564 i_mdi_rele_all_phci(mdi_client_t *ct) 6565 { 6566 mdi_pathinfo_t *pip; 6567 6568 ASSERT(MDI_CLIENT_LOCKED(ct)); 6569 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6570 while (pip != NULL) { 6571 mdi_hold_path(pip); 6572 MDI_PI_LOCK(pip); 6573 i_mdi_pm_rele_pip(pip); 6574 MDI_PI_UNLOCK(pip); 6575 mdi_rele_path(pip); 6576 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6577 } 6578 } 6579 6580 static void 6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6582 { 6583 ASSERT(MDI_CLIENT_LOCKED(ct)); 6584 6585 if (i_ddi_devi_attached(ct->ct_dip)) { 6586 ct->ct_power_cnt -= decr; 6587 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6588 "%p ct_power_cnt = %d decr = %d", 6589 (void *)ct, ct->ct_power_cnt, decr)); 6590 } 6591 6592 ASSERT(ct->ct_power_cnt >= 0); 6593 if (ct->ct_power_cnt == 0) { 6594 i_mdi_rele_all_phci(ct); 6595 return; 6596 } 6597 } 6598 6599 static void 6600 i_mdi_pm_reset_client(mdi_client_t *ct) 6601 { 6602 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6603 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6604 ASSERT(MDI_CLIENT_LOCKED(ct)); 6605 ct->ct_power_cnt = 0; 6606 i_mdi_rele_all_phci(ct); 6607 ct->ct_powercnt_config = 0; 6608 ct->ct_powercnt_unconfig = 0; 6609 ct->ct_powercnt_reset = 1; 6610 } 6611 6612 static int 6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6614 { 6615 int ret; 6616 dev_info_t *ph_dip; 6617 6618 MDI_PI_LOCK(pip); 6619 i_mdi_pm_hold_pip(pip); 6620 6621 ph_dip = mdi_pi_get_phci(pip); 6622 MDI_PI_UNLOCK(pip); 6623 6624 /* bring all components of phci to full power */ 6625 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6626 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6627 ddi_get_instance(ph_dip), (void *)pip)); 6628 6629 ret = pm_powerup(ph_dip); 6630 6631 if (ret == DDI_FAILURE) { 6632 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6633 "pm_powerup FAILED for %s%d %p", 6634 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6635 (void *)pip)); 6636 6637 MDI_PI_LOCK(pip); 6638 i_mdi_pm_rele_pip(pip); 6639 MDI_PI_UNLOCK(pip); 6640 return (MDI_FAILURE); 6641 } 6642 6643 return (MDI_SUCCESS); 6644 } 6645 6646 static int 6647 i_mdi_power_all_phci(mdi_client_t *ct) 6648 { 6649 mdi_pathinfo_t *pip; 6650 int succeeded = 0; 6651 6652 ASSERT(MDI_CLIENT_LOCKED(ct)); 6653 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6654 while (pip != NULL) { 6655 /* 6656 * Don't power if MDI_PATHINFO_STATE_FAULT 6657 * or MDI_PATHINFO_STATE_OFFLINE. 6658 */ 6659 if (MDI_PI_IS_INIT(pip) || 6660 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6661 mdi_hold_path(pip); 6662 MDI_CLIENT_UNLOCK(ct); 6663 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6664 succeeded = 1; 6665 6666 ASSERT(ct == MDI_PI(pip)->pi_client); 6667 MDI_CLIENT_LOCK(ct); 6668 mdi_rele_path(pip); 6669 } 6670 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6671 } 6672 6673 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6674 } 6675 6676 /* 6677 * mdi_bus_power(): 6678 * 1. Place the phci(s) into powered up state so that 6679 * client can do power management 6680 * 2. Ensure phci powered up as client power managing 6681 * Return Values: 6682 * MDI_SUCCESS 6683 * MDI_FAILURE 6684 */ 6685 int 6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6687 void *arg, void *result) 6688 { 6689 int ret = MDI_SUCCESS; 6690 pm_bp_child_pwrchg_t *bpc; 6691 mdi_client_t *ct; 6692 dev_info_t *cdip; 6693 pm_bp_has_changed_t *bphc; 6694 6695 /* 6696 * BUS_POWER_NOINVOL not supported 6697 */ 6698 if (op == BUS_POWER_NOINVOL) 6699 return (MDI_FAILURE); 6700 6701 /* 6702 * ignore other OPs. 6703 * return quickly to save cou cycles on the ct processing 6704 */ 6705 switch (op) { 6706 case BUS_POWER_PRE_NOTIFICATION: 6707 case BUS_POWER_POST_NOTIFICATION: 6708 bpc = (pm_bp_child_pwrchg_t *)arg; 6709 cdip = bpc->bpc_dip; 6710 break; 6711 case BUS_POWER_HAS_CHANGED: 6712 bphc = (pm_bp_has_changed_t *)arg; 6713 cdip = bphc->bphc_dip; 6714 break; 6715 default: 6716 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6717 } 6718 6719 ASSERT(MDI_CLIENT(cdip)); 6720 6721 ct = i_devi_get_client(cdip); 6722 if (ct == NULL) 6723 return (MDI_FAILURE); 6724 6725 /* 6726 * wait till the mdi_pathinfo node state change are processed 6727 */ 6728 MDI_CLIENT_LOCK(ct); 6729 switch (op) { 6730 case BUS_POWER_PRE_NOTIFICATION: 6731 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6732 "BUS_POWER_PRE_NOTIFICATION:" 6733 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6734 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6735 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6736 6737 /* serialize power level change per client */ 6738 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6739 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6740 6741 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6742 6743 if (ct->ct_power_cnt == 0) { 6744 ret = i_mdi_power_all_phci(ct); 6745 } 6746 6747 /* 6748 * if new_level > 0: 6749 * - hold phci(s) 6750 * - power up phci(s) if not already 6751 * ignore power down 6752 */ 6753 if (bpc->bpc_nlevel > 0) { 6754 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6755 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6756 "i_mdi_pm_hold_client\n")); 6757 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6758 } 6759 } 6760 break; 6761 case BUS_POWER_POST_NOTIFICATION: 6762 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6763 "BUS_POWER_POST_NOTIFICATION:" 6764 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6765 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6766 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6767 *(int *)result)); 6768 6769 if (*(int *)result == DDI_SUCCESS) { 6770 if (bpc->bpc_nlevel > 0) { 6771 MDI_CLIENT_SET_POWER_UP(ct); 6772 } else { 6773 MDI_CLIENT_SET_POWER_DOWN(ct); 6774 } 6775 } 6776 6777 /* release the hold we did in pre-notification */ 6778 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6779 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6780 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6781 "i_mdi_pm_rele_client\n")); 6782 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6783 } 6784 6785 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6786 /* another thread might started attaching */ 6787 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6788 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6789 "i_mdi_pm_rele_client\n")); 6790 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6791 /* detaching has been taken care in pm_post_unconfig */ 6792 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6793 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6794 "i_mdi_pm_reset_client\n")); 6795 i_mdi_pm_reset_client(ct); 6796 } 6797 } 6798 6799 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6800 cv_broadcast(&ct->ct_powerchange_cv); 6801 6802 break; 6803 6804 /* need to do more */ 6805 case BUS_POWER_HAS_CHANGED: 6806 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6807 "BUS_POWER_HAS_CHANGED:" 6808 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6809 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6810 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6811 6812 if (bphc->bphc_nlevel > 0 && 6813 bphc->bphc_nlevel > bphc->bphc_olevel) { 6814 if (ct->ct_power_cnt == 0) { 6815 ret = i_mdi_power_all_phci(ct); 6816 } 6817 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6818 "i_mdi_pm_hold_client\n")); 6819 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6820 } 6821 6822 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6823 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6824 "i_mdi_pm_rele_client\n")); 6825 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6826 } 6827 break; 6828 default: 6829 dev_err(parent, CE_WARN, "!unhandled bus power operation: 0x%x", 6830 op); 6831 break; 6832 } 6833 6834 MDI_CLIENT_UNLOCK(ct); 6835 return (ret); 6836 } 6837 6838 static int 6839 i_mdi_pm_pre_config_one(dev_info_t *child) 6840 { 6841 int ret = MDI_SUCCESS; 6842 mdi_client_t *ct; 6843 6844 ct = i_devi_get_client(child); 6845 if (ct == NULL) 6846 return (MDI_FAILURE); 6847 6848 MDI_CLIENT_LOCK(ct); 6849 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6850 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6851 6852 if (!MDI_CLIENT_IS_FAILED(ct)) { 6853 MDI_CLIENT_UNLOCK(ct); 6854 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6855 return (MDI_SUCCESS); 6856 } 6857 6858 if (ct->ct_powercnt_config) { 6859 MDI_CLIENT_UNLOCK(ct); 6860 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6861 return (MDI_SUCCESS); 6862 } 6863 6864 if (ct->ct_power_cnt == 0) { 6865 ret = i_mdi_power_all_phci(ct); 6866 } 6867 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6868 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6869 ct->ct_powercnt_config = 1; 6870 ct->ct_powercnt_reset = 0; 6871 MDI_CLIENT_UNLOCK(ct); 6872 return (ret); 6873 } 6874 6875 static int 6876 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6877 { 6878 int ret = MDI_SUCCESS; 6879 dev_info_t *cdip; 6880 6881 ASSERT(MDI_VHCI(vdip)); 6882 6883 /* ndi_devi_config_one */ 6884 if (child) { 6885 ASSERT(DEVI_BUSY_OWNED(vdip)); 6886 return (i_mdi_pm_pre_config_one(child)); 6887 } 6888 6889 /* devi_config_common */ 6890 ndi_devi_enter(vdip); 6891 cdip = ddi_get_child(vdip); 6892 while (cdip) { 6893 dev_info_t *next = ddi_get_next_sibling(cdip); 6894 6895 ret = i_mdi_pm_pre_config_one(cdip); 6896 if (ret != MDI_SUCCESS) 6897 break; 6898 cdip = next; 6899 } 6900 ndi_devi_exit(vdip); 6901 return (ret); 6902 } 6903 6904 static int 6905 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6906 { 6907 int ret = MDI_SUCCESS; 6908 mdi_client_t *ct; 6909 6910 ct = i_devi_get_client(child); 6911 if (ct == NULL) 6912 return (MDI_FAILURE); 6913 6914 MDI_CLIENT_LOCK(ct); 6915 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6916 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6917 6918 if (!i_ddi_devi_attached(child)) { 6919 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6920 MDI_CLIENT_UNLOCK(ct); 6921 return (MDI_SUCCESS); 6922 } 6923 6924 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6925 (flags & NDI_AUTODETACH)) { 6926 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6927 MDI_CLIENT_UNLOCK(ct); 6928 return (MDI_FAILURE); 6929 } 6930 6931 if (ct->ct_powercnt_unconfig) { 6932 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6933 MDI_CLIENT_UNLOCK(ct); 6934 *held = 1; 6935 return (MDI_SUCCESS); 6936 } 6937 6938 if (ct->ct_power_cnt == 0) { 6939 ret = i_mdi_power_all_phci(ct); 6940 } 6941 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6942 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6943 ct->ct_powercnt_unconfig = 1; 6944 ct->ct_powercnt_reset = 0; 6945 MDI_CLIENT_UNLOCK(ct); 6946 if (ret == MDI_SUCCESS) 6947 *held = 1; 6948 return (ret); 6949 } 6950 6951 static int 6952 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6953 int flags) 6954 { 6955 int ret = MDI_SUCCESS; 6956 dev_info_t *cdip; 6957 6958 ASSERT(MDI_VHCI(vdip)); 6959 *held = 0; 6960 6961 /* ndi_devi_unconfig_one */ 6962 if (child) { 6963 ASSERT(DEVI_BUSY_OWNED(vdip)); 6964 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6965 } 6966 6967 /* devi_unconfig_common */ 6968 ndi_devi_enter(vdip); 6969 cdip = ddi_get_child(vdip); 6970 while (cdip) { 6971 dev_info_t *next = ddi_get_next_sibling(cdip); 6972 6973 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6974 cdip = next; 6975 } 6976 ndi_devi_exit(vdip); 6977 6978 if (*held) 6979 ret = MDI_SUCCESS; 6980 6981 return (ret); 6982 } 6983 6984 static void 6985 i_mdi_pm_post_config_one(dev_info_t *child) 6986 { 6987 mdi_client_t *ct; 6988 6989 ct = i_devi_get_client(child); 6990 if (ct == NULL) 6991 return; 6992 6993 MDI_CLIENT_LOCK(ct); 6994 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6995 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6996 6997 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6998 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6999 MDI_CLIENT_UNLOCK(ct); 7000 return; 7001 } 7002 7003 /* client has not been updated */ 7004 if (MDI_CLIENT_IS_FAILED(ct)) { 7005 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 7006 MDI_CLIENT_UNLOCK(ct); 7007 return; 7008 } 7009 7010 /* another thread might have powered it down or detached it */ 7011 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7012 !DEVI_IS_ATTACHING(child)) || 7013 (!i_ddi_devi_attached(child) && 7014 !DEVI_IS_ATTACHING(child))) { 7015 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7016 i_mdi_pm_reset_client(ct); 7017 } else { 7018 mdi_pathinfo_t *pip, *next; 7019 int valid_path_count = 0; 7020 7021 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7022 pip = ct->ct_path_head; 7023 while (pip != NULL) { 7024 MDI_PI_LOCK(pip); 7025 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7026 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7027 valid_path_count ++; 7028 MDI_PI_UNLOCK(pip); 7029 pip = next; 7030 } 7031 i_mdi_pm_rele_client(ct, valid_path_count); 7032 } 7033 ct->ct_powercnt_config = 0; 7034 MDI_CLIENT_UNLOCK(ct); 7035 } 7036 7037 static void 7038 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 7039 { 7040 dev_info_t *cdip; 7041 7042 ASSERT(MDI_VHCI(vdip)); 7043 7044 /* ndi_devi_config_one */ 7045 if (child) { 7046 ASSERT(DEVI_BUSY_OWNED(vdip)); 7047 i_mdi_pm_post_config_one(child); 7048 return; 7049 } 7050 7051 /* devi_config_common */ 7052 ndi_devi_enter(vdip); 7053 cdip = ddi_get_child(vdip); 7054 while (cdip) { 7055 dev_info_t *next = ddi_get_next_sibling(cdip); 7056 7057 i_mdi_pm_post_config_one(cdip); 7058 cdip = next; 7059 } 7060 ndi_devi_exit(vdip); 7061 } 7062 7063 static void 7064 i_mdi_pm_post_unconfig_one(dev_info_t *child) 7065 { 7066 mdi_client_t *ct; 7067 7068 ct = i_devi_get_client(child); 7069 if (ct == NULL) 7070 return; 7071 7072 MDI_CLIENT_LOCK(ct); 7073 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7074 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7075 7076 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 7077 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 7078 MDI_CLIENT_UNLOCK(ct); 7079 return; 7080 } 7081 7082 /* failure detaching or another thread just attached it */ 7083 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7084 i_ddi_devi_attached(child)) || 7085 (!i_ddi_devi_attached(child) && 7086 !DEVI_IS_ATTACHING(child))) { 7087 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7088 i_mdi_pm_reset_client(ct); 7089 } else { 7090 mdi_pathinfo_t *pip, *next; 7091 int valid_path_count = 0; 7092 7093 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7094 pip = ct->ct_path_head; 7095 while (pip != NULL) { 7096 MDI_PI_LOCK(pip); 7097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7098 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7099 valid_path_count ++; 7100 MDI_PI_UNLOCK(pip); 7101 pip = next; 7102 } 7103 i_mdi_pm_rele_client(ct, valid_path_count); 7104 ct->ct_powercnt_unconfig = 0; 7105 } 7106 7107 MDI_CLIENT_UNLOCK(ct); 7108 } 7109 7110 static void 7111 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7112 { 7113 dev_info_t *cdip; 7114 7115 ASSERT(MDI_VHCI(vdip)); 7116 7117 if (!held) { 7118 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7119 return; 7120 } 7121 7122 if (child) { 7123 ASSERT(DEVI_BUSY_OWNED(vdip)); 7124 i_mdi_pm_post_unconfig_one(child); 7125 return; 7126 } 7127 7128 ndi_devi_enter(vdip); 7129 cdip = ddi_get_child(vdip); 7130 while (cdip) { 7131 dev_info_t *next = ddi_get_next_sibling(cdip); 7132 7133 i_mdi_pm_post_unconfig_one(cdip); 7134 cdip = next; 7135 } 7136 ndi_devi_exit(vdip); 7137 } 7138 7139 int 7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7141 { 7142 int ret = MDI_SUCCESS; 7143 dev_info_t *client_dip = NULL; 7144 mdi_client_t *ct; 7145 7146 /* 7147 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7148 * Power up pHCI for the named client device. 7149 * Note: Before the client is enumerated under vhci by phci, 7150 * client_dip can be NULL. Then proceed to power up all the 7151 * pHCIs. 7152 */ 7153 if (devnm != NULL) { 7154 ndi_devi_enter(vdip); 7155 client_dip = ndi_devi_findchild(vdip, devnm); 7156 } 7157 7158 MDI_DEBUG(4, (MDI_NOTE, vdip, 7159 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7160 7161 switch (op) { 7162 case MDI_PM_PRE_CONFIG: 7163 ret = i_mdi_pm_pre_config(vdip, client_dip); 7164 break; 7165 7166 case MDI_PM_PRE_UNCONFIG: 7167 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7168 flags); 7169 break; 7170 7171 case MDI_PM_POST_CONFIG: 7172 i_mdi_pm_post_config(vdip, client_dip); 7173 break; 7174 7175 case MDI_PM_POST_UNCONFIG: 7176 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7177 break; 7178 7179 case MDI_PM_HOLD_POWER: 7180 case MDI_PM_RELE_POWER: 7181 ASSERT(args); 7182 7183 client_dip = (dev_info_t *)args; 7184 ASSERT(MDI_CLIENT(client_dip)); 7185 7186 ct = i_devi_get_client(client_dip); 7187 MDI_CLIENT_LOCK(ct); 7188 7189 if (op == MDI_PM_HOLD_POWER) { 7190 if (ct->ct_power_cnt == 0) { 7191 (void) i_mdi_power_all_phci(ct); 7192 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7193 "i_mdi_pm_hold_client\n")); 7194 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7195 } 7196 } else { 7197 if (DEVI_IS_ATTACHING(client_dip)) { 7198 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7199 "i_mdi_pm_rele_client\n")); 7200 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7201 } else { 7202 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7203 "i_mdi_pm_reset_client\n")); 7204 i_mdi_pm_reset_client(ct); 7205 } 7206 } 7207 7208 MDI_CLIENT_UNLOCK(ct); 7209 break; 7210 7211 default: 7212 break; 7213 } 7214 7215 if (devnm) 7216 ndi_devi_exit(vdip); 7217 7218 return (ret); 7219 } 7220 7221 int 7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7223 { 7224 mdi_vhci_t *vhci; 7225 7226 if (!MDI_VHCI(dip)) 7227 return (MDI_FAILURE); 7228 7229 if (mdi_class) { 7230 vhci = DEVI(dip)->devi_mdi_xhci; 7231 ASSERT(vhci); 7232 *mdi_class = vhci->vh_class; 7233 } 7234 7235 return (MDI_SUCCESS); 7236 } 7237 7238 int 7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7240 { 7241 mdi_phci_t *phci; 7242 7243 if (!MDI_PHCI(dip)) 7244 return (MDI_FAILURE); 7245 7246 if (mdi_class) { 7247 phci = DEVI(dip)->devi_mdi_xhci; 7248 ASSERT(phci); 7249 *mdi_class = phci->ph_vhci->vh_class; 7250 } 7251 7252 return (MDI_SUCCESS); 7253 } 7254 7255 int 7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7257 { 7258 mdi_client_t *client; 7259 7260 if (!MDI_CLIENT(dip)) 7261 return (MDI_FAILURE); 7262 7263 if (mdi_class) { 7264 client = DEVI(dip)->devi_mdi_client; 7265 ASSERT(client); 7266 *mdi_class = client->ct_vhci->vh_class; 7267 } 7268 7269 return (MDI_SUCCESS); 7270 } 7271 7272 void * 7273 mdi_client_get_vhci_private(dev_info_t *dip) 7274 { 7275 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7276 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7277 mdi_client_t *ct; 7278 ct = i_devi_get_client(dip); 7279 return (ct->ct_vprivate); 7280 } 7281 return (NULL); 7282 } 7283 7284 void 7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7286 { 7287 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7288 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7289 mdi_client_t *ct; 7290 ct = i_devi_get_client(dip); 7291 ct->ct_vprivate = data; 7292 } 7293 } 7294 /* 7295 * mdi_pi_get_vhci_private(): 7296 * Get the vhci private information associated with the 7297 * mdi_pathinfo node 7298 */ 7299 void * 7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7301 { 7302 caddr_t vprivate = NULL; 7303 if (pip) { 7304 vprivate = MDI_PI(pip)->pi_vprivate; 7305 } 7306 return (vprivate); 7307 } 7308 7309 /* 7310 * mdi_pi_set_vhci_private(): 7311 * Set the vhci private information in the mdi_pathinfo node 7312 */ 7313 void 7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7315 { 7316 if (pip) { 7317 MDI_PI(pip)->pi_vprivate = priv; 7318 } 7319 } 7320 7321 /* 7322 * mdi_phci_get_vhci_private(): 7323 * Get the vhci private information associated with the 7324 * mdi_phci node 7325 */ 7326 void * 7327 mdi_phci_get_vhci_private(dev_info_t *dip) 7328 { 7329 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7330 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7331 mdi_phci_t *ph; 7332 ph = i_devi_get_phci(dip); 7333 return (ph->ph_vprivate); 7334 } 7335 return (NULL); 7336 } 7337 7338 /* 7339 * mdi_phci_set_vhci_private(): 7340 * Set the vhci private information in the mdi_phci node 7341 */ 7342 void 7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7344 { 7345 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7346 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7347 mdi_phci_t *ph; 7348 ph = i_devi_get_phci(dip); 7349 ph->ph_vprivate = priv; 7350 } 7351 } 7352 7353 int 7354 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7355 { 7356 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7357 } 7358 7359 int 7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7361 { 7362 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7363 } 7364 7365 /* Return 1 if all client paths are device_removed */ 7366 static int 7367 i_mdi_client_all_devices_removed(mdi_client_t *ct) 7368 { 7369 mdi_pathinfo_t *pip; 7370 int all_devices_removed = 1; 7371 7372 MDI_CLIENT_LOCK(ct); 7373 for (pip = ct->ct_path_head; pip; 7374 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) { 7375 if (!mdi_pi_device_isremoved(pip)) { 7376 all_devices_removed = 0; 7377 break; 7378 } 7379 } 7380 MDI_CLIENT_UNLOCK(ct); 7381 return (all_devices_removed); 7382 } 7383 7384 /* 7385 * When processing path hotunplug, represent device removal. 7386 */ 7387 int 7388 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7389 { 7390 mdi_client_t *ct; 7391 7392 MDI_PI_LOCK(pip); 7393 if (mdi_pi_device_isremoved(pip)) { 7394 MDI_PI_UNLOCK(pip); 7395 return (0); 7396 } 7397 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7398 MDI_PI_FLAGS_SET_HIDDEN(pip); 7399 MDI_PI_UNLOCK(pip); 7400 7401 /* 7402 * If all paths associated with the client are now DEVICE_REMOVED, 7403 * reflect DEVICE_REMOVED in the client. 7404 */ 7405 ct = MDI_PI(pip)->pi_client; 7406 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct)) 7407 (void) ndi_devi_device_remove(ct->ct_dip); 7408 else 7409 i_ddi_di_cache_invalidate(); 7410 7411 return (1); 7412 } 7413 7414 /* 7415 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7416 * is now accessible then this interfaces is used to represent device insertion. 7417 */ 7418 int 7419 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7420 { 7421 MDI_PI_LOCK(pip); 7422 if (!mdi_pi_device_isremoved(pip)) { 7423 MDI_PI_UNLOCK(pip); 7424 return (0); 7425 } 7426 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7427 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7428 MDI_PI_UNLOCK(pip); 7429 7430 i_ddi_di_cache_invalidate(); 7431 7432 return (1); 7433 } 7434 7435 /* 7436 * List of vhci class names: 7437 * A vhci class name must be in this list only if the corresponding vhci 7438 * driver intends to use the mdi provided bus config implementation 7439 * (i.e., mdi_vhci_bus_config()). 7440 */ 7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7442 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7443 7444 /* 7445 * During boot time, the on-disk vhci cache for every vhci class is read 7446 * in the form of an nvlist and stored here. 7447 */ 7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7449 7450 /* nvpair names in vhci cache nvlist */ 7451 #define MDI_VHCI_CACHE_VERSION 1 7452 #define MDI_NVPNAME_VERSION "version" 7453 #define MDI_NVPNAME_PHCIS "phcis" 7454 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7455 7456 /* 7457 * Given vhci class name, return its on-disk vhci cache filename. 7458 * Memory for the returned filename which includes the full path is allocated 7459 * by this function. 7460 */ 7461 static char * 7462 vhclass2vhcache_filename(char *vhclass) 7463 { 7464 char *filename; 7465 int len; 7466 static char *fmt = "/etc/devices/mdi_%s_cache"; 7467 7468 /* 7469 * fmt contains the on-disk vhci cache file name format; 7470 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7471 */ 7472 7473 /* the -1 below is to account for "%s" in the format string */ 7474 len = strlen(fmt) + strlen(vhclass) - 1; 7475 filename = kmem_alloc(len, KM_SLEEP); 7476 (void) snprintf(filename, len, fmt, vhclass); 7477 ASSERT(len == (strlen(filename) + 1)); 7478 return (filename); 7479 } 7480 7481 /* 7482 * initialize the vhci cache related data structures and read the on-disk 7483 * vhci cached data into memory. 7484 */ 7485 static void 7486 setup_vhci_cache(mdi_vhci_t *vh) 7487 { 7488 mdi_vhci_config_t *vhc; 7489 mdi_vhci_cache_t *vhcache; 7490 int i; 7491 nvlist_t *nvl = NULL; 7492 7493 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7494 vh->vh_config = vhc; 7495 vhcache = &vhc->vhc_vhcache; 7496 7497 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7498 7499 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7500 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7501 7502 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7503 7504 /* 7505 * Create string hash; same as mod_hash_create_strhash() except that 7506 * we use NULL key destructor. 7507 */ 7508 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7509 mdi_bus_config_cache_hash_size, 7510 mod_hash_null_keydtor, mod_hash_null_valdtor, 7511 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7512 7513 /* 7514 * The on-disk vhci cache is read during booting prior to the 7515 * lights-out period by mdi_read_devices_files(). 7516 */ 7517 for (i = 0; i < N_VHCI_CLASSES; i++) { 7518 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7519 nvl = vhcache_nvl[i]; 7520 vhcache_nvl[i] = NULL; 7521 break; 7522 } 7523 } 7524 7525 /* 7526 * this is to cover the case of some one manually causing unloading 7527 * (or detaching) and reloading (or attaching) of a vhci driver. 7528 */ 7529 if (nvl == NULL && modrootloaded) 7530 nvl = read_on_disk_vhci_cache(vh->vh_class); 7531 7532 if (nvl != NULL) { 7533 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7534 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7535 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7536 else { 7537 cmn_err(CE_WARN, 7538 "%s: data file corrupted, will recreate", 7539 vhc->vhc_vhcache_filename); 7540 } 7541 rw_exit(&vhcache->vhcache_lock); 7542 nvlist_free(nvl); 7543 } 7544 7545 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7546 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7547 7548 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7549 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7550 } 7551 7552 /* 7553 * free all vhci cache related resources 7554 */ 7555 static int 7556 destroy_vhci_cache(mdi_vhci_t *vh) 7557 { 7558 mdi_vhci_config_t *vhc = vh->vh_config; 7559 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7560 mdi_vhcache_phci_t *cphci, *cphci_next; 7561 mdi_vhcache_client_t *cct, *cct_next; 7562 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7563 7564 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7565 return (MDI_FAILURE); 7566 7567 kmem_free(vhc->vhc_vhcache_filename, 7568 strlen(vhc->vhc_vhcache_filename) + 1); 7569 7570 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7571 7572 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7573 cphci = cphci_next) { 7574 cphci_next = cphci->cphci_next; 7575 free_vhcache_phci(cphci); 7576 } 7577 7578 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7579 cct_next = cct->cct_next; 7580 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7581 cpi_next = cpi->cpi_next; 7582 free_vhcache_pathinfo(cpi); 7583 } 7584 free_vhcache_client(cct); 7585 } 7586 7587 rw_destroy(&vhcache->vhcache_lock); 7588 7589 mutex_destroy(&vhc->vhc_lock); 7590 cv_destroy(&vhc->vhc_cv); 7591 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7592 return (MDI_SUCCESS); 7593 } 7594 7595 /* 7596 * Stop all vhci cache related async threads and free their resources. 7597 */ 7598 static int 7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7600 { 7601 mdi_async_client_config_t *acc, *acc_next; 7602 7603 mutex_enter(&vhc->vhc_lock); 7604 vhc->vhc_flags |= MDI_VHC_EXIT; 7605 ASSERT(vhc->vhc_acc_thrcount >= 0); 7606 cv_broadcast(&vhc->vhc_cv); 7607 7608 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7609 vhc->vhc_acc_thrcount != 0) { 7610 mutex_exit(&vhc->vhc_lock); 7611 delay_random(mdi_delay); 7612 mutex_enter(&vhc->vhc_lock); 7613 } 7614 7615 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7616 7617 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7618 acc_next = acc->acc_next; 7619 free_async_client_config(acc); 7620 } 7621 vhc->vhc_acc_list_head = NULL; 7622 vhc->vhc_acc_list_tail = NULL; 7623 vhc->vhc_acc_count = 0; 7624 7625 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7626 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7627 mutex_exit(&vhc->vhc_lock); 7628 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7629 vhcache_dirty(vhc); 7630 return (MDI_FAILURE); 7631 } 7632 } else 7633 mutex_exit(&vhc->vhc_lock); 7634 7635 if (callb_delete(vhc->vhc_cbid) != 0) 7636 return (MDI_FAILURE); 7637 7638 return (MDI_SUCCESS); 7639 } 7640 7641 /* 7642 * Stop vhci cache flush thread 7643 */ 7644 /* ARGSUSED */ 7645 static boolean_t 7646 stop_vhcache_flush_thread(void *arg, int code) 7647 { 7648 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7649 7650 mutex_enter(&vhc->vhc_lock); 7651 vhc->vhc_flags |= MDI_VHC_EXIT; 7652 cv_broadcast(&vhc->vhc_cv); 7653 7654 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7655 mutex_exit(&vhc->vhc_lock); 7656 delay_random(mdi_delay); 7657 mutex_enter(&vhc->vhc_lock); 7658 } 7659 7660 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7661 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7662 mutex_exit(&vhc->vhc_lock); 7663 (void) flush_vhcache(vhc, 1); 7664 } else 7665 mutex_exit(&vhc->vhc_lock); 7666 7667 return (B_TRUE); 7668 } 7669 7670 /* 7671 * Enqueue the vhcache phci (cphci) at the tail of the list 7672 */ 7673 static void 7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7675 { 7676 cphci->cphci_next = NULL; 7677 if (vhcache->vhcache_phci_head == NULL) 7678 vhcache->vhcache_phci_head = cphci; 7679 else 7680 vhcache->vhcache_phci_tail->cphci_next = cphci; 7681 vhcache->vhcache_phci_tail = cphci; 7682 } 7683 7684 /* 7685 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7686 */ 7687 static void 7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7689 mdi_vhcache_pathinfo_t *cpi) 7690 { 7691 cpi->cpi_next = NULL; 7692 if (cct->cct_cpi_head == NULL) 7693 cct->cct_cpi_head = cpi; 7694 else 7695 cct->cct_cpi_tail->cpi_next = cpi; 7696 cct->cct_cpi_tail = cpi; 7697 } 7698 7699 /* 7700 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7701 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7702 * flag set come at the beginning of the list. All cpis which have this 7703 * flag set come at the end of the list. 7704 */ 7705 static void 7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7707 mdi_vhcache_pathinfo_t *newcpi) 7708 { 7709 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7710 7711 if (cct->cct_cpi_head == NULL || 7712 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7713 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7714 else { 7715 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7716 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7717 prev_cpi = cpi, cpi = cpi->cpi_next) 7718 ; 7719 7720 if (prev_cpi == NULL) 7721 cct->cct_cpi_head = newcpi; 7722 else 7723 prev_cpi->cpi_next = newcpi; 7724 7725 newcpi->cpi_next = cpi; 7726 7727 if (cpi == NULL) 7728 cct->cct_cpi_tail = newcpi; 7729 } 7730 } 7731 7732 /* 7733 * Enqueue the vhcache client (cct) at the tail of the list 7734 */ 7735 static void 7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7737 mdi_vhcache_client_t *cct) 7738 { 7739 cct->cct_next = NULL; 7740 if (vhcache->vhcache_client_head == NULL) 7741 vhcache->vhcache_client_head = cct; 7742 else 7743 vhcache->vhcache_client_tail->cct_next = cct; 7744 vhcache->vhcache_client_tail = cct; 7745 } 7746 7747 static void 7748 free_string_array(char **str, int nelem) 7749 { 7750 int i; 7751 7752 if (str) { 7753 for (i = 0; i < nelem; i++) { 7754 if (str[i]) 7755 kmem_free(str[i], strlen(str[i]) + 1); 7756 } 7757 kmem_free(str, sizeof (char *) * nelem); 7758 } 7759 } 7760 7761 static void 7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7763 { 7764 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7765 kmem_free(cphci, sizeof (*cphci)); 7766 } 7767 7768 static void 7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7770 { 7771 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7772 kmem_free(cpi, sizeof (*cpi)); 7773 } 7774 7775 static void 7776 free_vhcache_client(mdi_vhcache_client_t *cct) 7777 { 7778 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7779 kmem_free(cct, sizeof (*cct)); 7780 } 7781 7782 static char * 7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7784 { 7785 char *name_addr; 7786 int len; 7787 7788 len = strlen(ct_name) + strlen(ct_addr) + 2; 7789 name_addr = kmem_alloc(len, KM_SLEEP); 7790 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7791 7792 if (ret_len) 7793 *ret_len = len; 7794 return (name_addr); 7795 } 7796 7797 /* 7798 * Copy the contents of paddrnvl to vhci cache. 7799 * paddrnvl nvlist contains path information for a vhci client. 7800 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7801 */ 7802 static void 7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7804 mdi_vhcache_client_t *cct) 7805 { 7806 nvpair_t *nvp = NULL; 7807 mdi_vhcache_pathinfo_t *cpi; 7808 uint_t nelem; 7809 uint32_t *val; 7810 7811 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7812 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7813 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7814 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7815 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7816 ASSERT(nelem == 2); 7817 cpi->cpi_cphci = cphci_list[val[0]]; 7818 cpi->cpi_flags = val[1]; 7819 enqueue_tail_vhcache_pathinfo(cct, cpi); 7820 } 7821 } 7822 7823 /* 7824 * Copy the contents of caddrmapnvl to vhci cache. 7825 * caddrmapnvl nvlist contains vhci client address to phci client address 7826 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7827 * this nvlist. 7828 */ 7829 static void 7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7831 mdi_vhcache_phci_t *cphci_list[]) 7832 { 7833 nvpair_t *nvp = NULL; 7834 nvlist_t *paddrnvl; 7835 mdi_vhcache_client_t *cct; 7836 7837 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7838 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7839 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7840 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7841 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7842 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7843 /* the client must contain at least one path */ 7844 ASSERT(cct->cct_cpi_head != NULL); 7845 7846 enqueue_vhcache_client(vhcache, cct); 7847 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7848 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7849 } 7850 } 7851 7852 /* 7853 * Copy the contents of the main nvlist to vhci cache. 7854 * 7855 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7856 * The nvlist contains the mappings between the vhci client addresses and 7857 * their corresponding phci client addresses. 7858 * 7859 * The structure of the nvlist is as follows: 7860 * 7861 * Main nvlist: 7862 * NAME TYPE DATA 7863 * version int32 version number 7864 * phcis string array array of phci paths 7865 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7866 * 7867 * structure of c2paddrs_nvl: 7868 * NAME TYPE DATA 7869 * caddr1 nvlist_t paddrs_nvl1 7870 * caddr2 nvlist_t paddrs_nvl2 7871 * ... 7872 * where caddr1, caddr2, ... are vhci client name and addresses in the 7873 * form of "<clientname>@<clientaddress>". 7874 * (for example: "ssd@2000002037cd9f72"); 7875 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7876 * 7877 * structure of paddrs_nvl: 7878 * NAME TYPE DATA 7879 * pi_addr1 uint32_array (phci-id, cpi_flags) 7880 * pi_addr2 uint32_array (phci-id, cpi_flags) 7881 * ... 7882 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7883 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7884 * phci-ids are integers that identify pHCIs to which the 7885 * the bus specific address belongs to. These integers are used as an index 7886 * into to the phcis string array in the main nvlist to get the pHCI path. 7887 */ 7888 static int 7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7890 { 7891 char **phcis, **phci_namep; 7892 uint_t nphcis; 7893 mdi_vhcache_phci_t *cphci, **cphci_list; 7894 nvlist_t *caddrmapnvl; 7895 int32_t ver; 7896 int i; 7897 size_t cphci_list_size; 7898 7899 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7900 7901 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7902 ver != MDI_VHCI_CACHE_VERSION) 7903 return (MDI_FAILURE); 7904 7905 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7906 &nphcis) != 0) 7907 return (MDI_SUCCESS); 7908 7909 ASSERT(nphcis > 0); 7910 7911 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7912 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7913 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7914 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7915 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7916 enqueue_vhcache_phci(vhcache, cphci); 7917 cphci_list[i] = cphci; 7918 } 7919 7920 ASSERT(vhcache->vhcache_phci_head != NULL); 7921 7922 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7923 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7924 7925 kmem_free(cphci_list, cphci_list_size); 7926 return (MDI_SUCCESS); 7927 } 7928 7929 /* 7930 * Build paddrnvl for the specified client using the information in the 7931 * vhci cache and add it to the caddrmapnnvl. 7932 * Returns 0 on success, errno on failure. 7933 */ 7934 static int 7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7936 nvlist_t *caddrmapnvl) 7937 { 7938 mdi_vhcache_pathinfo_t *cpi; 7939 nvlist_t *nvl; 7940 int err; 7941 uint32_t val[2]; 7942 7943 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7944 7945 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7946 return (err); 7947 7948 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7949 val[0] = cpi->cpi_cphci->cphci_id; 7950 val[1] = cpi->cpi_flags; 7951 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7952 != 0) 7953 goto out; 7954 } 7955 7956 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7957 out: 7958 nvlist_free(nvl); 7959 return (err); 7960 } 7961 7962 /* 7963 * Build caddrmapnvl using the information in the vhci cache 7964 * and add it to the mainnvl. 7965 * Returns 0 on success, errno on failure. 7966 */ 7967 static int 7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7969 { 7970 mdi_vhcache_client_t *cct; 7971 nvlist_t *nvl; 7972 int err; 7973 7974 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7975 7976 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7977 return (err); 7978 7979 for (cct = vhcache->vhcache_client_head; cct != NULL; 7980 cct = cct->cct_next) { 7981 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7982 goto out; 7983 } 7984 7985 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7986 out: 7987 nvlist_free(nvl); 7988 return (err); 7989 } 7990 7991 /* 7992 * Build nvlist using the information in the vhci cache. 7993 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7994 * Returns nvl on success, NULL on failure. 7995 */ 7996 static nvlist_t * 7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7998 { 7999 mdi_vhcache_phci_t *cphci; 8000 uint_t phci_count; 8001 char **phcis; 8002 nvlist_t *nvl; 8003 int err, i; 8004 8005 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 8006 nvl = NULL; 8007 goto out; 8008 } 8009 8010 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 8011 MDI_VHCI_CACHE_VERSION)) != 0) 8012 goto out; 8013 8014 rw_enter(&vhcache->vhcache_lock, RW_READER); 8015 if (vhcache->vhcache_phci_head == NULL) { 8016 rw_exit(&vhcache->vhcache_lock); 8017 return (nvl); 8018 } 8019 8020 phci_count = 0; 8021 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8022 cphci = cphci->cphci_next) 8023 cphci->cphci_id = phci_count++; 8024 8025 /* build phci pathname list */ 8026 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 8027 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 8028 cphci = cphci->cphci_next, i++) 8029 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 8030 8031 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 8032 phci_count); 8033 free_string_array(phcis, phci_count); 8034 8035 if (err == 0 && 8036 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 8037 rw_exit(&vhcache->vhcache_lock); 8038 return (nvl); 8039 } 8040 8041 rw_exit(&vhcache->vhcache_lock); 8042 out: 8043 nvlist_free(nvl); 8044 return (NULL); 8045 } 8046 8047 /* 8048 * Lookup vhcache phci structure for the specified phci path. 8049 */ 8050 static mdi_vhcache_phci_t * 8051 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 8052 { 8053 mdi_vhcache_phci_t *cphci; 8054 8055 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8056 8057 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8058 cphci = cphci->cphci_next) { 8059 if (strcmp(cphci->cphci_path, phci_path) == 0) 8060 return (cphci); 8061 } 8062 8063 return (NULL); 8064 } 8065 8066 /* 8067 * Lookup vhcache phci structure for the specified phci. 8068 */ 8069 static mdi_vhcache_phci_t * 8070 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 8071 { 8072 mdi_vhcache_phci_t *cphci; 8073 8074 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8075 8076 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8077 cphci = cphci->cphci_next) { 8078 if (cphci->cphci_phci == ph) 8079 return (cphci); 8080 } 8081 8082 return (NULL); 8083 } 8084 8085 /* 8086 * Add the specified phci to the vhci cache if not already present. 8087 */ 8088 static void 8089 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8090 { 8091 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8092 mdi_vhcache_phci_t *cphci; 8093 char *pathname; 8094 int cache_updated; 8095 8096 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8097 8098 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 8099 (void) ddi_pathname(ph->ph_dip, pathname); 8100 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 8101 != NULL) { 8102 cphci->cphci_phci = ph; 8103 cache_updated = 0; 8104 } else { 8105 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 8106 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 8107 cphci->cphci_phci = ph; 8108 enqueue_vhcache_phci(vhcache, cphci); 8109 cache_updated = 1; 8110 } 8111 8112 rw_exit(&vhcache->vhcache_lock); 8113 8114 /* 8115 * Since a new phci has been added, reset 8116 * vhc_path_discovery_cutoff_time to allow for discovery of paths 8117 * during next vhcache_discover_paths(). 8118 */ 8119 mutex_enter(&vhc->vhc_lock); 8120 vhc->vhc_path_discovery_cutoff_time = 0; 8121 mutex_exit(&vhc->vhc_lock); 8122 8123 kmem_free(pathname, MAXPATHLEN); 8124 if (cache_updated) 8125 vhcache_dirty(vhc); 8126 } 8127 8128 /* 8129 * Remove the reference to the specified phci from the vhci cache. 8130 */ 8131 static void 8132 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8133 { 8134 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8135 mdi_vhcache_phci_t *cphci; 8136 8137 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8138 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8139 /* do not remove the actual mdi_vhcache_phci structure */ 8140 cphci->cphci_phci = NULL; 8141 } 8142 rw_exit(&vhcache->vhcache_lock); 8143 } 8144 8145 static void 8146 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8147 mdi_vhcache_lookup_token_t *src) 8148 { 8149 if (src == NULL) { 8150 dst->lt_cct = NULL; 8151 dst->lt_cct_lookup_time = 0; 8152 } else { 8153 dst->lt_cct = src->lt_cct; 8154 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8155 } 8156 } 8157 8158 /* 8159 * Look up vhcache client for the specified client. 8160 */ 8161 static mdi_vhcache_client_t * 8162 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8163 mdi_vhcache_lookup_token_t *token) 8164 { 8165 mod_hash_val_t hv; 8166 char *name_addr; 8167 int len; 8168 8169 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8170 8171 /* 8172 * If no vhcache clean occurred since the last lookup, we can 8173 * simply return the cct from the last lookup operation. 8174 * It works because ccts are never freed except during the vhcache 8175 * cleanup operation. 8176 */ 8177 if (token != NULL && 8178 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8179 return (token->lt_cct); 8180 8181 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8182 if (mod_hash_find(vhcache->vhcache_client_hash, 8183 (mod_hash_key_t)name_addr, &hv) == 0) { 8184 if (token) { 8185 token->lt_cct = (mdi_vhcache_client_t *)hv; 8186 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8187 } 8188 } else { 8189 if (token) { 8190 token->lt_cct = NULL; 8191 token->lt_cct_lookup_time = 0; 8192 } 8193 hv = NULL; 8194 } 8195 kmem_free(name_addr, len); 8196 return ((mdi_vhcache_client_t *)hv); 8197 } 8198 8199 /* 8200 * Add the specified path to the vhci cache if not already present. 8201 * Also add the vhcache client for the client corresponding to this path 8202 * if it doesn't already exist. 8203 */ 8204 static void 8205 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8206 { 8207 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8208 mdi_vhcache_client_t *cct; 8209 mdi_vhcache_pathinfo_t *cpi; 8210 mdi_phci_t *ph = pip->pi_phci; 8211 mdi_client_t *ct = pip->pi_client; 8212 int cache_updated = 0; 8213 8214 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8215 8216 /* if vhcache client for this pip doesn't already exist, add it */ 8217 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8218 NULL)) == NULL) { 8219 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8220 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8221 ct->ct_guid, NULL); 8222 enqueue_vhcache_client(vhcache, cct); 8223 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8224 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8225 cache_updated = 1; 8226 } 8227 8228 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8229 if (cpi->cpi_cphci->cphci_phci == ph && 8230 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8231 cpi->cpi_pip = pip; 8232 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8233 cpi->cpi_flags &= 8234 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8235 sort_vhcache_paths(cct); 8236 cache_updated = 1; 8237 } 8238 break; 8239 } 8240 } 8241 8242 if (cpi == NULL) { 8243 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8244 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8245 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8246 ASSERT(cpi->cpi_cphci != NULL); 8247 cpi->cpi_pip = pip; 8248 enqueue_vhcache_pathinfo(cct, cpi); 8249 cache_updated = 1; 8250 } 8251 8252 rw_exit(&vhcache->vhcache_lock); 8253 8254 if (cache_updated) 8255 vhcache_dirty(vhc); 8256 } 8257 8258 /* 8259 * Remove the reference to the specified path from the vhci cache. 8260 */ 8261 static void 8262 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8263 { 8264 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8265 mdi_client_t *ct = pip->pi_client; 8266 mdi_vhcache_client_t *cct; 8267 mdi_vhcache_pathinfo_t *cpi; 8268 8269 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8270 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8271 NULL)) != NULL) { 8272 for (cpi = cct->cct_cpi_head; cpi != NULL; 8273 cpi = cpi->cpi_next) { 8274 if (cpi->cpi_pip == pip) { 8275 cpi->cpi_pip = NULL; 8276 break; 8277 } 8278 } 8279 } 8280 rw_exit(&vhcache->vhcache_lock); 8281 } 8282 8283 /* 8284 * Flush the vhci cache to disk. 8285 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8286 */ 8287 static int 8288 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8289 { 8290 nvlist_t *nvl; 8291 int err; 8292 int rv; 8293 8294 /* 8295 * It is possible that the system may shutdown before 8296 * i_ddi_io_initialized (during stmsboot for example). To allow for 8297 * flushing the cache in this case do not check for 8298 * i_ddi_io_initialized when force flag is set. 8299 */ 8300 if (force_flag == 0 && !i_ddi_io_initialized()) 8301 return (MDI_FAILURE); 8302 8303 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8304 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8305 nvlist_free(nvl); 8306 } else 8307 err = EFAULT; 8308 8309 rv = MDI_SUCCESS; 8310 mutex_enter(&vhc->vhc_lock); 8311 if (err != 0) { 8312 if (err == EROFS) { 8313 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8314 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8315 MDI_VHC_VHCACHE_DIRTY); 8316 } else { 8317 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8318 cmn_err(CE_CONT, "%s: update failed\n", 8319 vhc->vhc_vhcache_filename); 8320 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8321 } 8322 rv = MDI_FAILURE; 8323 } 8324 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8325 cmn_err(CE_CONT, 8326 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8327 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8328 } 8329 mutex_exit(&vhc->vhc_lock); 8330 8331 return (rv); 8332 } 8333 8334 /* 8335 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8336 * Exits itself if left idle for the idle timeout period. 8337 */ 8338 static void 8339 vhcache_flush_thread(void *arg) 8340 { 8341 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8342 clock_t idle_time, quit_at_ticks; 8343 callb_cpr_t cprinfo; 8344 8345 /* number of seconds to sleep idle before exiting */ 8346 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8347 8348 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8349 "mdi_vhcache_flush"); 8350 mutex_enter(&vhc->vhc_lock); 8351 for (; ; ) { 8352 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8353 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8354 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8355 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8356 (void) cv_timedwait(&vhc->vhc_cv, 8357 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8358 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8359 } else { 8360 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8361 mutex_exit(&vhc->vhc_lock); 8362 8363 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8364 vhcache_dirty(vhc); 8365 8366 mutex_enter(&vhc->vhc_lock); 8367 } 8368 } 8369 8370 quit_at_ticks = ddi_get_lbolt() + idle_time; 8371 8372 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8373 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8374 ddi_get_lbolt() < quit_at_ticks) { 8375 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8376 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8377 quit_at_ticks); 8378 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8379 } 8380 8381 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8382 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8383 goto out; 8384 } 8385 8386 out: 8387 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8388 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8389 CALLB_CPR_EXIT(&cprinfo); 8390 } 8391 8392 /* 8393 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8394 */ 8395 static void 8396 vhcache_dirty(mdi_vhci_config_t *vhc) 8397 { 8398 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8399 int create_thread; 8400 8401 rw_enter(&vhcache->vhcache_lock, RW_READER); 8402 /* do not flush cache until the cache is fully built */ 8403 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8404 rw_exit(&vhcache->vhcache_lock); 8405 return; 8406 } 8407 rw_exit(&vhcache->vhcache_lock); 8408 8409 mutex_enter(&vhc->vhc_lock); 8410 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8411 mutex_exit(&vhc->vhc_lock); 8412 return; 8413 } 8414 8415 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8416 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8417 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8418 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8419 cv_broadcast(&vhc->vhc_cv); 8420 create_thread = 0; 8421 } else { 8422 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8423 create_thread = 1; 8424 } 8425 mutex_exit(&vhc->vhc_lock); 8426 8427 if (create_thread) 8428 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8429 0, &p0, TS_RUN, minclsyspri); 8430 } 8431 8432 /* 8433 * phci bus config structure - one for for each phci bus config operation that 8434 * we initiate on behalf of a vhci. 8435 */ 8436 typedef struct mdi_phci_bus_config_s { 8437 char *phbc_phci_path; 8438 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8439 struct mdi_phci_bus_config_s *phbc_next; 8440 } mdi_phci_bus_config_t; 8441 8442 /* vhci bus config structure - one for each vhci bus config operation */ 8443 typedef struct mdi_vhci_bus_config_s { 8444 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8445 major_t vhbc_op_major; /* bus config op major */ 8446 uint_t vhbc_op_flags; /* bus config op flags */ 8447 kmutex_t vhbc_lock; 8448 kcondvar_t vhbc_cv; 8449 int vhbc_thr_count; 8450 } mdi_vhci_bus_config_t; 8451 8452 /* 8453 * bus config the specified phci 8454 */ 8455 static void 8456 bus_config_phci(void *arg) 8457 { 8458 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8459 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8460 dev_info_t *ph_dip; 8461 8462 /* 8463 * first configure all path components upto phci and then configure 8464 * the phci children. 8465 */ 8466 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8467 != NULL) { 8468 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8469 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8470 (void) ndi_devi_config_driver(ph_dip, 8471 vhbc->vhbc_op_flags, 8472 vhbc->vhbc_op_major); 8473 } else 8474 (void) ndi_devi_config(ph_dip, 8475 vhbc->vhbc_op_flags); 8476 8477 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8478 ndi_rele_devi(ph_dip); 8479 } 8480 8481 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8482 kmem_free(phbc, sizeof (*phbc)); 8483 8484 mutex_enter(&vhbc->vhbc_lock); 8485 vhbc->vhbc_thr_count--; 8486 if (vhbc->vhbc_thr_count == 0) 8487 cv_broadcast(&vhbc->vhbc_cv); 8488 mutex_exit(&vhbc->vhbc_lock); 8489 } 8490 8491 /* 8492 * Bus config all phcis associated with the vhci in parallel. 8493 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8494 */ 8495 static void 8496 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8497 ddi_bus_config_op_t op, major_t maj) 8498 { 8499 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8500 mdi_vhci_bus_config_t *vhbc; 8501 mdi_vhcache_phci_t *cphci; 8502 8503 rw_enter(&vhcache->vhcache_lock, RW_READER); 8504 if (vhcache->vhcache_phci_head == NULL) { 8505 rw_exit(&vhcache->vhcache_lock); 8506 return; 8507 } 8508 8509 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8510 8511 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8512 cphci = cphci->cphci_next) { 8513 /* skip phcis that haven't attached before root is available */ 8514 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8515 continue; 8516 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8517 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8518 KM_SLEEP); 8519 phbc->phbc_vhbusconfig = vhbc; 8520 phbc->phbc_next = phbc_head; 8521 phbc_head = phbc; 8522 vhbc->vhbc_thr_count++; 8523 } 8524 rw_exit(&vhcache->vhcache_lock); 8525 8526 vhbc->vhbc_op = op; 8527 vhbc->vhbc_op_major = maj; 8528 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8529 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8530 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8531 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8532 8533 /* now create threads to initiate bus config on all phcis in parallel */ 8534 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8535 phbc_next = phbc->phbc_next; 8536 if (mdi_mtc_off) 8537 bus_config_phci((void *)phbc); 8538 else 8539 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8540 0, &p0, TS_RUN, minclsyspri); 8541 } 8542 8543 mutex_enter(&vhbc->vhbc_lock); 8544 /* wait until all threads exit */ 8545 while (vhbc->vhbc_thr_count > 0) 8546 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8547 mutex_exit(&vhbc->vhbc_lock); 8548 8549 mutex_destroy(&vhbc->vhbc_lock); 8550 cv_destroy(&vhbc->vhbc_cv); 8551 kmem_free(vhbc, sizeof (*vhbc)); 8552 } 8553 8554 /* 8555 * Single threaded version of bus_config_all_phcis() 8556 */ 8557 static void 8558 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8559 ddi_bus_config_op_t op, major_t maj) 8560 { 8561 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8562 8563 single_threaded_vhconfig_enter(vhc); 8564 bus_config_all_phcis(vhcache, flags, op, maj); 8565 single_threaded_vhconfig_exit(vhc); 8566 } 8567 8568 /* 8569 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8570 * The path includes the child component in addition to the phci path. 8571 */ 8572 static int 8573 bus_config_one_phci_child(char *path) 8574 { 8575 dev_info_t *ph_dip, *child; 8576 char *devnm; 8577 int rv = MDI_FAILURE; 8578 8579 /* extract the child component of the phci */ 8580 devnm = strrchr(path, '/'); 8581 *devnm++ = '\0'; 8582 8583 /* 8584 * first configure all path components upto phci and then 8585 * configure the phci child. 8586 */ 8587 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8588 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8589 NDI_SUCCESS) { 8590 /* 8591 * release the hold that ndi_devi_config_one() placed 8592 */ 8593 ndi_rele_devi(child); 8594 rv = MDI_SUCCESS; 8595 } 8596 8597 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8598 ndi_rele_devi(ph_dip); 8599 } 8600 8601 devnm--; 8602 *devnm = '/'; 8603 return (rv); 8604 } 8605 8606 /* 8607 * Build a list of phci client paths for the specified vhci client. 8608 * The list includes only those phci client paths which aren't configured yet. 8609 */ 8610 static mdi_phys_path_t * 8611 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8612 { 8613 mdi_vhcache_pathinfo_t *cpi; 8614 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8615 int config_path, len; 8616 8617 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8618 /* 8619 * include only those paths that aren't configured. 8620 */ 8621 config_path = 0; 8622 if (cpi->cpi_pip == NULL) 8623 config_path = 1; 8624 else { 8625 MDI_PI_LOCK(cpi->cpi_pip); 8626 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8627 config_path = 1; 8628 MDI_PI_UNLOCK(cpi->cpi_pip); 8629 } 8630 8631 if (config_path) { 8632 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8633 len = strlen(cpi->cpi_cphci->cphci_path) + 8634 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8635 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8636 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8637 cpi->cpi_cphci->cphci_path, ct_name, 8638 cpi->cpi_addr); 8639 pp->phys_path_next = NULL; 8640 8641 if (pp_head == NULL) 8642 pp_head = pp; 8643 else 8644 pp_tail->phys_path_next = pp; 8645 pp_tail = pp; 8646 } 8647 } 8648 8649 return (pp_head); 8650 } 8651 8652 /* 8653 * Free the memory allocated for phci client path list. 8654 */ 8655 static void 8656 free_phclient_path_list(mdi_phys_path_t *pp_head) 8657 { 8658 mdi_phys_path_t *pp, *pp_next; 8659 8660 for (pp = pp_head; pp != NULL; pp = pp_next) { 8661 pp_next = pp->phys_path_next; 8662 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8663 kmem_free(pp, sizeof (*pp)); 8664 } 8665 } 8666 8667 /* 8668 * Allocated async client structure and initialize with the specified values. 8669 */ 8670 static mdi_async_client_config_t * 8671 alloc_async_client_config(char *ct_name, char *ct_addr, 8672 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8673 { 8674 mdi_async_client_config_t *acc; 8675 8676 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8677 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8678 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8679 acc->acc_phclient_path_list_head = pp_head; 8680 init_vhcache_lookup_token(&acc->acc_token, tok); 8681 acc->acc_next = NULL; 8682 return (acc); 8683 } 8684 8685 /* 8686 * Free the memory allocated for the async client structure and their members. 8687 */ 8688 static void 8689 free_async_client_config(mdi_async_client_config_t *acc) 8690 { 8691 if (acc->acc_phclient_path_list_head) 8692 free_phclient_path_list(acc->acc_phclient_path_list_head); 8693 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8694 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8695 kmem_free(acc, sizeof (*acc)); 8696 } 8697 8698 /* 8699 * Sort vhcache pathinfos (cpis) of the specified client. 8700 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8701 * flag set come at the beginning of the list. All cpis which have this 8702 * flag set come at the end of the list. 8703 */ 8704 static void 8705 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8706 { 8707 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8708 8709 cpi_head = cct->cct_cpi_head; 8710 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8711 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8712 cpi_next = cpi->cpi_next; 8713 enqueue_vhcache_pathinfo(cct, cpi); 8714 } 8715 } 8716 8717 /* 8718 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8719 * every vhcache pathinfo of the specified client. If not adjust the flag 8720 * setting appropriately. 8721 * 8722 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8723 * on-disk vhci cache. So every time this flag is updated the cache must be 8724 * flushed. 8725 */ 8726 static void 8727 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8728 mdi_vhcache_lookup_token_t *tok) 8729 { 8730 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8731 mdi_vhcache_client_t *cct; 8732 mdi_vhcache_pathinfo_t *cpi; 8733 8734 rw_enter(&vhcache->vhcache_lock, RW_READER); 8735 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8736 == NULL) { 8737 rw_exit(&vhcache->vhcache_lock); 8738 return; 8739 } 8740 8741 /* 8742 * to avoid unnecessary on-disk cache updates, first check if an 8743 * update is really needed. If no update is needed simply return. 8744 */ 8745 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8746 if ((cpi->cpi_pip != NULL && 8747 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8748 (cpi->cpi_pip == NULL && 8749 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8750 break; 8751 } 8752 } 8753 if (cpi == NULL) { 8754 rw_exit(&vhcache->vhcache_lock); 8755 return; 8756 } 8757 8758 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8759 rw_exit(&vhcache->vhcache_lock); 8760 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8761 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8762 tok)) == NULL) { 8763 rw_exit(&vhcache->vhcache_lock); 8764 return; 8765 } 8766 } 8767 8768 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8769 if (cpi->cpi_pip != NULL) 8770 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8771 else 8772 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8773 } 8774 sort_vhcache_paths(cct); 8775 8776 rw_exit(&vhcache->vhcache_lock); 8777 vhcache_dirty(vhc); 8778 } 8779 8780 /* 8781 * Configure all specified paths of the client. 8782 */ 8783 static void 8784 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8785 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8786 { 8787 mdi_phys_path_t *pp; 8788 8789 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8790 (void) bus_config_one_phci_child(pp->phys_path); 8791 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8792 } 8793 8794 /* 8795 * Dequeue elements from vhci async client config list and bus configure 8796 * their corresponding phci clients. 8797 */ 8798 static void 8799 config_client_paths_thread(void *arg) 8800 { 8801 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8802 mdi_async_client_config_t *acc; 8803 clock_t quit_at_ticks; 8804 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8805 callb_cpr_t cprinfo; 8806 8807 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8808 "mdi_config_client_paths"); 8809 8810 for (; ; ) { 8811 quit_at_ticks = ddi_get_lbolt() + idle_time; 8812 8813 mutex_enter(&vhc->vhc_lock); 8814 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8815 vhc->vhc_acc_list_head == NULL && 8816 ddi_get_lbolt() < quit_at_ticks) { 8817 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8818 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8819 quit_at_ticks); 8820 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8821 } 8822 8823 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8824 vhc->vhc_acc_list_head == NULL) 8825 goto out; 8826 8827 acc = vhc->vhc_acc_list_head; 8828 vhc->vhc_acc_list_head = acc->acc_next; 8829 if (vhc->vhc_acc_list_head == NULL) 8830 vhc->vhc_acc_list_tail = NULL; 8831 vhc->vhc_acc_count--; 8832 mutex_exit(&vhc->vhc_lock); 8833 8834 config_client_paths_sync(vhc, acc->acc_ct_name, 8835 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8836 &acc->acc_token); 8837 8838 free_async_client_config(acc); 8839 } 8840 8841 out: 8842 vhc->vhc_acc_thrcount--; 8843 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8844 CALLB_CPR_EXIT(&cprinfo); 8845 } 8846 8847 /* 8848 * Arrange for all the phci client paths (pp_head) for the specified client 8849 * to be bus configured asynchronously by a thread. 8850 */ 8851 static void 8852 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8853 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8854 { 8855 mdi_async_client_config_t *acc, *newacc; 8856 int create_thread; 8857 8858 if (pp_head == NULL) 8859 return; 8860 8861 if (mdi_mtc_off) { 8862 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8863 free_phclient_path_list(pp_head); 8864 return; 8865 } 8866 8867 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8868 ASSERT(newacc); 8869 8870 mutex_enter(&vhc->vhc_lock); 8871 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8872 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8873 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8874 free_async_client_config(newacc); 8875 mutex_exit(&vhc->vhc_lock); 8876 return; 8877 } 8878 } 8879 8880 if (vhc->vhc_acc_list_head == NULL) 8881 vhc->vhc_acc_list_head = newacc; 8882 else 8883 vhc->vhc_acc_list_tail->acc_next = newacc; 8884 vhc->vhc_acc_list_tail = newacc; 8885 vhc->vhc_acc_count++; 8886 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8887 cv_broadcast(&vhc->vhc_cv); 8888 create_thread = 0; 8889 } else { 8890 vhc->vhc_acc_thrcount++; 8891 create_thread = 1; 8892 } 8893 mutex_exit(&vhc->vhc_lock); 8894 8895 if (create_thread) 8896 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8897 0, &p0, TS_RUN, minclsyspri); 8898 } 8899 8900 /* 8901 * Return number of online paths for the specified client. 8902 */ 8903 static int 8904 nonline_paths(mdi_vhcache_client_t *cct) 8905 { 8906 mdi_vhcache_pathinfo_t *cpi; 8907 int online_count = 0; 8908 8909 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8910 if (cpi->cpi_pip != NULL) { 8911 MDI_PI_LOCK(cpi->cpi_pip); 8912 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8913 online_count++; 8914 MDI_PI_UNLOCK(cpi->cpi_pip); 8915 } 8916 } 8917 8918 return (online_count); 8919 } 8920 8921 /* 8922 * Bus configure all paths for the specified vhci client. 8923 * If at least one path for the client is already online, the remaining paths 8924 * will be configured asynchronously. Otherwise, it synchronously configures 8925 * the paths until at least one path is online and then rest of the paths 8926 * will be configured asynchronously. 8927 */ 8928 static void 8929 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8930 { 8931 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8932 mdi_phys_path_t *pp_head, *pp; 8933 mdi_vhcache_client_t *cct; 8934 mdi_vhcache_lookup_token_t tok; 8935 8936 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8937 8938 init_vhcache_lookup_token(&tok, NULL); 8939 8940 if (ct_name == NULL || ct_addr == NULL || 8941 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8942 == NULL || 8943 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8944 rw_exit(&vhcache->vhcache_lock); 8945 return; 8946 } 8947 8948 /* if at least one path is online, configure the rest asynchronously */ 8949 if (nonline_paths(cct) > 0) { 8950 rw_exit(&vhcache->vhcache_lock); 8951 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8952 return; 8953 } 8954 8955 rw_exit(&vhcache->vhcache_lock); 8956 8957 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8958 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8959 rw_enter(&vhcache->vhcache_lock, RW_READER); 8960 8961 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8962 ct_addr, &tok)) == NULL) { 8963 rw_exit(&vhcache->vhcache_lock); 8964 goto out; 8965 } 8966 8967 if (nonline_paths(cct) > 0 && 8968 pp->phys_path_next != NULL) { 8969 rw_exit(&vhcache->vhcache_lock); 8970 config_client_paths_async(vhc, ct_name, ct_addr, 8971 pp->phys_path_next, &tok); 8972 pp->phys_path_next = NULL; 8973 goto out; 8974 } 8975 8976 rw_exit(&vhcache->vhcache_lock); 8977 } 8978 } 8979 8980 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8981 out: 8982 free_phclient_path_list(pp_head); 8983 } 8984 8985 static void 8986 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8987 { 8988 mutex_enter(&vhc->vhc_lock); 8989 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8990 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8991 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8992 mutex_exit(&vhc->vhc_lock); 8993 } 8994 8995 static void 8996 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8997 { 8998 mutex_enter(&vhc->vhc_lock); 8999 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 9000 cv_broadcast(&vhc->vhc_cv); 9001 mutex_exit(&vhc->vhc_lock); 9002 } 9003 9004 typedef struct mdi_phci_driver_info { 9005 char *phdriver_name; /* name of the phci driver */ 9006 9007 /* set to non zero if the phci driver supports root device */ 9008 int phdriver_root_support; 9009 } mdi_phci_driver_info_t; 9010 9011 /* 9012 * vhci class and root support capability of a phci driver can be 9013 * specified using ddi-vhci-class and ddi-no-root-support properties in the 9014 * phci driver.conf file. The built-in tables below contain this information 9015 * for those phci drivers whose driver.conf files don't yet contain this info. 9016 * 9017 * All phci drivers expect iscsi have root device support. 9018 */ 9019 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 9020 { "fp", 1 }, 9021 { "iscsi", 0 }, 9022 { "ibsrp", 1 } 9023 }; 9024 9025 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 9026 9027 static void * 9028 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 9029 { 9030 void *new_ptr; 9031 9032 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 9033 if (old_ptr) { 9034 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 9035 kmem_free(old_ptr, old_size); 9036 } 9037 return (new_ptr); 9038 } 9039 9040 static void 9041 add_to_phci_list(char ***driver_list, int **root_support_list, 9042 int *cur_elements, int *max_elements, char *driver_name, int root_support) 9043 { 9044 ASSERT(*cur_elements <= *max_elements); 9045 if (*cur_elements == *max_elements) { 9046 *max_elements += 10; 9047 *driver_list = mdi_realloc(*driver_list, 9048 sizeof (char *) * (*cur_elements), 9049 sizeof (char *) * (*max_elements)); 9050 *root_support_list = mdi_realloc(*root_support_list, 9051 sizeof (int) * (*cur_elements), 9052 sizeof (int) * (*max_elements)); 9053 } 9054 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 9055 (*root_support_list)[*cur_elements] = root_support; 9056 (*cur_elements)++; 9057 } 9058 9059 static void 9060 get_phci_driver_list(char *vhci_class, char ***driver_list, 9061 int **root_support_list, int *cur_elements, int *max_elements) 9062 { 9063 mdi_phci_driver_info_t *st_driver_list, *p; 9064 int st_ndrivers, root_support, i, j, driver_conf_count; 9065 major_t m; 9066 struct devnames *dnp; 9067 ddi_prop_t *propp; 9068 9069 *driver_list = NULL; 9070 *root_support_list = NULL; 9071 *cur_elements = 0; 9072 *max_elements = 0; 9073 9074 /* add the phci drivers derived from the phci driver.conf files */ 9075 for (m = 0; m < devcnt; m++) { 9076 dnp = &devnamesp[m]; 9077 9078 if (dnp->dn_flags & DN_PHCI_DRIVER) { 9079 LOCK_DEV_OPS(&dnp->dn_lock); 9080 if (dnp->dn_global_prop_ptr != NULL && 9081 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 9082 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 9083 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 9084 strcmp(propp->prop_val, vhci_class) == 0) { 9085 9086 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 9087 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 9088 &dnp->dn_global_prop_ptr->prop_list) 9089 == NULL) ? 1 : 0; 9090 9091 add_to_phci_list(driver_list, root_support_list, 9092 cur_elements, max_elements, dnp->dn_name, 9093 root_support); 9094 9095 UNLOCK_DEV_OPS(&dnp->dn_lock); 9096 } else 9097 UNLOCK_DEV_OPS(&dnp->dn_lock); 9098 } 9099 } 9100 9101 driver_conf_count = *cur_elements; 9102 9103 /* add the phci drivers specified in the built-in tables */ 9104 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 9105 st_driver_list = scsi_phci_driver_list; 9106 st_ndrivers = sizeof (scsi_phci_driver_list) / 9107 sizeof (mdi_phci_driver_info_t); 9108 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 9109 st_driver_list = ib_phci_driver_list; 9110 st_ndrivers = sizeof (ib_phci_driver_list) / 9111 sizeof (mdi_phci_driver_info_t); 9112 } else { 9113 st_driver_list = NULL; 9114 st_ndrivers = 0; 9115 } 9116 9117 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 9118 /* add this phci driver if not already added before */ 9119 for (j = 0; j < driver_conf_count; j++) { 9120 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 9121 break; 9122 } 9123 if (j == driver_conf_count) { 9124 add_to_phci_list(driver_list, root_support_list, 9125 cur_elements, max_elements, p->phdriver_name, 9126 p->phdriver_root_support); 9127 } 9128 } 9129 } 9130 9131 /* 9132 * Attach the phci driver instances associated with the specified vhci class. 9133 * If root is mounted attach all phci driver instances. 9134 * If root is not mounted, attach the instances of only those phci 9135 * drivers that have the root support. 9136 */ 9137 static void 9138 attach_phci_drivers(char *vhci_class) 9139 { 9140 char **driver_list, **p; 9141 int *root_support_list; 9142 int cur_elements, max_elements, i; 9143 major_t m; 9144 9145 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9146 &cur_elements, &max_elements); 9147 9148 for (i = 0; i < cur_elements; i++) { 9149 if (modrootloaded || root_support_list[i]) { 9150 m = ddi_name_to_major(driver_list[i]); 9151 if (m != DDI_MAJOR_T_NONE && 9152 ddi_hold_installed_driver(m)) 9153 ddi_rele_driver(m); 9154 } 9155 } 9156 9157 if (driver_list) { 9158 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9159 kmem_free(*p, strlen(*p) + 1); 9160 kmem_free(driver_list, sizeof (char *) * max_elements); 9161 kmem_free(root_support_list, sizeof (int) * max_elements); 9162 } 9163 } 9164 9165 /* 9166 * Build vhci cache: 9167 * 9168 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9169 * the phci driver instances. During this process the cache gets built. 9170 * 9171 * Cache is built fully if the root is mounted. 9172 * If the root is not mounted, phci drivers that do not have root support 9173 * are not attached. As a result the cache is built partially. The entries 9174 * in the cache reflect only those phci drivers that have root support. 9175 */ 9176 static int 9177 build_vhci_cache(mdi_vhci_t *vh) 9178 { 9179 mdi_vhci_config_t *vhc = vh->vh_config; 9180 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9181 9182 single_threaded_vhconfig_enter(vhc); 9183 9184 rw_enter(&vhcache->vhcache_lock, RW_READER); 9185 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9186 rw_exit(&vhcache->vhcache_lock); 9187 single_threaded_vhconfig_exit(vhc); 9188 return (0); 9189 } 9190 rw_exit(&vhcache->vhcache_lock); 9191 9192 attach_phci_drivers(vh->vh_class); 9193 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9194 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9195 9196 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9197 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9198 rw_exit(&vhcache->vhcache_lock); 9199 9200 single_threaded_vhconfig_exit(vhc); 9201 vhcache_dirty(vhc); 9202 return (1); 9203 } 9204 9205 /* 9206 * Determine if discovery of paths is needed. 9207 */ 9208 static int 9209 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9210 { 9211 int rv = 1; 9212 9213 mutex_enter(&vhc->vhc_lock); 9214 if (i_ddi_io_initialized() == 0) { 9215 if (vhc->vhc_path_discovery_boot > 0) { 9216 vhc->vhc_path_discovery_boot--; 9217 goto out; 9218 } 9219 } else { 9220 if (vhc->vhc_path_discovery_postboot > 0) { 9221 vhc->vhc_path_discovery_postboot--; 9222 goto out; 9223 } 9224 } 9225 9226 /* 9227 * Do full path discovery at most once per mdi_path_discovery_interval. 9228 * This is to avoid a series of full path discoveries when opening 9229 * stale /dev/[r]dsk links. 9230 */ 9231 if (mdi_path_discovery_interval != -1 && 9232 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9233 goto out; 9234 9235 rv = 0; 9236 out: 9237 mutex_exit(&vhc->vhc_lock); 9238 return (rv); 9239 } 9240 9241 /* 9242 * Discover all paths: 9243 * 9244 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9245 * driver instances. During this process all paths will be discovered. 9246 */ 9247 static int 9248 vhcache_discover_paths(mdi_vhci_t *vh) 9249 { 9250 mdi_vhci_config_t *vhc = vh->vh_config; 9251 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9252 int rv = 0; 9253 9254 single_threaded_vhconfig_enter(vhc); 9255 9256 if (vhcache_do_discovery(vhc)) { 9257 attach_phci_drivers(vh->vh_class); 9258 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9259 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9260 9261 mutex_enter(&vhc->vhc_lock); 9262 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9263 mdi_path_discovery_interval * TICKS_PER_SECOND; 9264 mutex_exit(&vhc->vhc_lock); 9265 rv = 1; 9266 } 9267 9268 single_threaded_vhconfig_exit(vhc); 9269 return (rv); 9270 } 9271 9272 /* 9273 * Generic vhci bus config implementation: 9274 * 9275 * Parameters 9276 * vdip vhci dip 9277 * flags bus config flags 9278 * op bus config operation 9279 * The remaining parameters are bus config operation specific 9280 * 9281 * for BUS_CONFIG_ONE 9282 * arg pointer to name@addr 9283 * child upon successful return from this function, *child will be 9284 * set to the configured and held devinfo child node of vdip. 9285 * ct_addr pointer to client address (i.e. GUID) 9286 * 9287 * for BUS_CONFIG_DRIVER 9288 * arg major number of the driver 9289 * child and ct_addr parameters are ignored 9290 * 9291 * for BUS_CONFIG_ALL 9292 * arg, child, and ct_addr parameters are ignored 9293 * 9294 * Note that for the rest of the bus config operations, this function simply 9295 * calls the framework provided default bus config routine. 9296 */ 9297 int 9298 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9299 void *arg, dev_info_t **child, char *ct_addr) 9300 { 9301 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9302 mdi_vhci_config_t *vhc = vh->vh_config; 9303 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9304 int rv = 0; 9305 int params_valid = 0; 9306 char *cp; 9307 9308 /* 9309 * To bus config vhcis we relay operation, possibly using another 9310 * thread, to phcis. The phci driver then interacts with MDI to cause 9311 * vhci child nodes to be enumerated under the vhci node. Adding a 9312 * vhci child requires an ndi_devi_enter of the vhci. Since another 9313 * thread may be adding the child, to avoid deadlock we can't wait 9314 * for the relayed operations to complete if we have already entered 9315 * the vhci node. 9316 */ 9317 if (DEVI_BUSY_OWNED(vdip)) { 9318 MDI_DEBUG(2, (MDI_NOTE, vdip, 9319 "vhci dip is busy owned %p", (void *)vdip)); 9320 goto default_bus_config; 9321 } 9322 9323 rw_enter(&vhcache->vhcache_lock, RW_READER); 9324 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9325 rw_exit(&vhcache->vhcache_lock); 9326 rv = build_vhci_cache(vh); 9327 rw_enter(&vhcache->vhcache_lock, RW_READER); 9328 } 9329 9330 switch (op) { 9331 case BUS_CONFIG_ONE: 9332 if (arg != NULL && ct_addr != NULL) { 9333 /* extract node name */ 9334 cp = (char *)arg; 9335 while (*cp != '\0' && *cp != '@') 9336 cp++; 9337 if (*cp == '@') { 9338 params_valid = 1; 9339 *cp = '\0'; 9340 config_client_paths(vhc, (char *)arg, ct_addr); 9341 /* config_client_paths() releases cache_lock */ 9342 *cp = '@'; 9343 break; 9344 } 9345 } 9346 9347 rw_exit(&vhcache->vhcache_lock); 9348 break; 9349 9350 case BUS_CONFIG_DRIVER: 9351 rw_exit(&vhcache->vhcache_lock); 9352 if (rv == 0) 9353 st_bus_config_all_phcis(vhc, flags, op, 9354 (major_t)(uintptr_t)arg); 9355 break; 9356 9357 case BUS_CONFIG_ALL: 9358 rw_exit(&vhcache->vhcache_lock); 9359 if (rv == 0) 9360 st_bus_config_all_phcis(vhc, flags, op, -1); 9361 break; 9362 9363 default: 9364 rw_exit(&vhcache->vhcache_lock); 9365 break; 9366 } 9367 9368 9369 default_bus_config: 9370 /* 9371 * All requested child nodes are enumerated under the vhci. 9372 * Now configure them. 9373 */ 9374 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9375 NDI_SUCCESS) { 9376 return (MDI_SUCCESS); 9377 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9378 /* discover all paths and try configuring again */ 9379 if (vhcache_discover_paths(vh) && 9380 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9381 NDI_SUCCESS) 9382 return (MDI_SUCCESS); 9383 } 9384 9385 return (MDI_FAILURE); 9386 } 9387 9388 /* 9389 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9390 */ 9391 static nvlist_t * 9392 read_on_disk_vhci_cache(char *vhci_class) 9393 { 9394 nvlist_t *nvl; 9395 int err; 9396 char *filename; 9397 9398 filename = vhclass2vhcache_filename(vhci_class); 9399 9400 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9401 kmem_free(filename, strlen(filename) + 1); 9402 return (nvl); 9403 } else if (err == EIO) 9404 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9405 else if (err == EINVAL) 9406 cmn_err(CE_WARN, 9407 "%s: data file corrupted, will recreate", filename); 9408 9409 kmem_free(filename, strlen(filename) + 1); 9410 return (NULL); 9411 } 9412 9413 /* 9414 * Read on-disk vhci cache into nvlists for all vhci classes. 9415 * Called during booting by i_ddi_read_devices_files(). 9416 */ 9417 void 9418 mdi_read_devices_files(void) 9419 { 9420 int i; 9421 9422 for (i = 0; i < N_VHCI_CLASSES; i++) 9423 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9424 } 9425 9426 /* 9427 * Remove all stale entries from vhci cache. 9428 */ 9429 static void 9430 clean_vhcache(mdi_vhci_config_t *vhc) 9431 { 9432 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9433 mdi_vhcache_phci_t *phci, *nxt_phci; 9434 mdi_vhcache_client_t *client, *nxt_client; 9435 mdi_vhcache_pathinfo_t *path, *nxt_path; 9436 9437 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9438 9439 client = vhcache->vhcache_client_head; 9440 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9441 for ( ; client != NULL; client = nxt_client) { 9442 nxt_client = client->cct_next; 9443 9444 path = client->cct_cpi_head; 9445 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9446 for ( ; path != NULL; path = nxt_path) { 9447 nxt_path = path->cpi_next; 9448 if ((path->cpi_cphci->cphci_phci != NULL) && 9449 (path->cpi_pip != NULL)) { 9450 enqueue_tail_vhcache_pathinfo(client, path); 9451 } else if (path->cpi_pip != NULL) { 9452 /* Not valid to have a path without a phci. */ 9453 free_vhcache_pathinfo(path); 9454 } 9455 } 9456 9457 if (client->cct_cpi_head != NULL) 9458 enqueue_vhcache_client(vhcache, client); 9459 else { 9460 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9461 (mod_hash_key_t)client->cct_name_addr); 9462 free_vhcache_client(client); 9463 } 9464 } 9465 9466 phci = vhcache->vhcache_phci_head; 9467 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9468 for ( ; phci != NULL; phci = nxt_phci) { 9469 9470 nxt_phci = phci->cphci_next; 9471 if (phci->cphci_phci != NULL) 9472 enqueue_vhcache_phci(vhcache, phci); 9473 else 9474 free_vhcache_phci(phci); 9475 } 9476 9477 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9478 rw_exit(&vhcache->vhcache_lock); 9479 vhcache_dirty(vhc); 9480 } 9481 9482 /* 9483 * Remove all stale entries from vhci cache. 9484 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9485 */ 9486 void 9487 mdi_clean_vhcache(void) 9488 { 9489 mdi_vhci_t *vh; 9490 9491 mutex_enter(&mdi_mutex); 9492 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9493 vh->vh_refcnt++; 9494 mutex_exit(&mdi_mutex); 9495 clean_vhcache(vh->vh_config); 9496 mutex_enter(&mdi_mutex); 9497 vh->vh_refcnt--; 9498 } 9499 mutex_exit(&mdi_mutex); 9500 } 9501 9502 /* 9503 * mdi_vhci_walk_clients(): 9504 * Walker routine to traverse client dev_info nodes 9505 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9506 * below the client, including nexus devices, which we dont want. 9507 * So we just traverse the immediate siblings, starting from 1st client. 9508 */ 9509 void 9510 mdi_vhci_walk_clients(dev_info_t *vdip, 9511 int (*f)(dev_info_t *, void *), void *arg) 9512 { 9513 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9514 dev_info_t *cdip; 9515 mdi_client_t *ct; 9516 9517 MDI_VHCI_CLIENT_LOCK(vh); 9518 cdip = ddi_get_child(vdip); 9519 while (cdip) { 9520 ct = i_devi_get_client(cdip); 9521 MDI_CLIENT_LOCK(ct); 9522 9523 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9524 cdip = ddi_get_next_sibling(cdip); 9525 else 9526 cdip = NULL; 9527 9528 MDI_CLIENT_UNLOCK(ct); 9529 } 9530 MDI_VHCI_CLIENT_UNLOCK(vh); 9531 } 9532 9533 /* 9534 * mdi_vhci_walk_phcis(): 9535 * Walker routine to traverse phci dev_info nodes 9536 */ 9537 void 9538 mdi_vhci_walk_phcis(dev_info_t *vdip, 9539 int (*f)(dev_info_t *, void *), void *arg) 9540 { 9541 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9542 mdi_phci_t *ph, *next; 9543 9544 MDI_VHCI_PHCI_LOCK(vh); 9545 ph = vh->vh_phci_head; 9546 while (ph) { 9547 MDI_PHCI_LOCK(ph); 9548 9549 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9550 next = ph->ph_next; 9551 else 9552 next = NULL; 9553 9554 MDI_PHCI_UNLOCK(ph); 9555 ph = next; 9556 } 9557 MDI_VHCI_PHCI_UNLOCK(vh); 9558 } 9559 9560 9561 /* 9562 * mdi_walk_vhcis(): 9563 * Walker routine to traverse vhci dev_info nodes 9564 */ 9565 void 9566 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9567 { 9568 mdi_vhci_t *vh = NULL; 9569 9570 mutex_enter(&mdi_mutex); 9571 /* 9572 * Scan for already registered vhci 9573 */ 9574 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9575 vh->vh_refcnt++; 9576 mutex_exit(&mdi_mutex); 9577 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9578 mutex_enter(&mdi_mutex); 9579 vh->vh_refcnt--; 9580 break; 9581 } else { 9582 mutex_enter(&mdi_mutex); 9583 vh->vh_refcnt--; 9584 } 9585 } 9586 9587 mutex_exit(&mdi_mutex); 9588 } 9589 9590 /* 9591 * i_mdi_log_sysevent(): 9592 * Logs events for pickup by syseventd 9593 */ 9594 static void 9595 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9596 { 9597 char *path_name; 9598 nvlist_t *attr_list; 9599 9600 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9601 KM_SLEEP) != DDI_SUCCESS) { 9602 goto alloc_failed; 9603 } 9604 9605 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9606 (void) ddi_pathname(dip, path_name); 9607 9608 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9609 ddi_driver_name(dip)) != DDI_SUCCESS) { 9610 goto error; 9611 } 9612 9613 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9614 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9615 goto error; 9616 } 9617 9618 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9619 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9620 goto error; 9621 } 9622 9623 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9624 path_name) != DDI_SUCCESS) { 9625 goto error; 9626 } 9627 9628 if (nvlist_add_string(attr_list, DDI_CLASS, 9629 ph_vh_class) != DDI_SUCCESS) { 9630 goto error; 9631 } 9632 9633 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9634 attr_list, NULL, DDI_SLEEP); 9635 9636 error: 9637 kmem_free(path_name, MAXPATHLEN); 9638 nvlist_free(attr_list); 9639 return; 9640 9641 alloc_failed: 9642 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9643 } 9644 9645 char ** 9646 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9647 { 9648 char **driver_list, **ret_driver_list = NULL; 9649 int *root_support_list; 9650 int cur_elements, max_elements; 9651 9652 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9653 &cur_elements, &max_elements); 9654 9655 9656 if (driver_list) { 9657 kmem_free(root_support_list, sizeof (int) * max_elements); 9658 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9659 * max_elements, sizeof (char *) * cur_elements); 9660 } 9661 *ndrivers = cur_elements; 9662 9663 return (ret_driver_list); 9664 9665 } 9666 9667 void 9668 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9669 { 9670 char **p; 9671 int i; 9672 9673 if (driver_list) { 9674 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9675 kmem_free(*p, strlen(*p) + 1); 9676 kmem_free(driver_list, sizeof (char *) * ndrivers); 9677 } 9678 } 9679 9680 /* 9681 * mdi_is_dev_supported(): 9682 * function called by pHCI bus config operation to determine if a 9683 * device should be represented as a child of the vHCI or the 9684 * pHCI. This decision is made by the vHCI, using cinfo idenity 9685 * information passed by the pHCI - specifics of the cinfo 9686 * representation are by agreement between the pHCI and vHCI. 9687 * Return Values: 9688 * MDI_SUCCESS 9689 * MDI_FAILURE 9690 */ 9691 int 9692 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9693 { 9694 mdi_vhci_t *vh; 9695 9696 ASSERT(class && pdip); 9697 9698 /* 9699 * For dev_supported, mdi_phci_register() must have established pdip as 9700 * a pHCI. 9701 * 9702 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9703 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9704 */ 9705 if (!MDI_PHCI(pdip)) 9706 return (MDI_FAILURE); 9707 9708 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9709 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9710 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9711 return (MDI_FAILURE); 9712 } 9713 9714 /* Return vHCI answer */ 9715 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9716 } 9717 9718 int 9719 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9720 { 9721 uint_t devstate = 0; 9722 dev_info_t *cdip; 9723 9724 if ((pip == NULL) || (dcp == NULL)) 9725 return (MDI_FAILURE); 9726 9727 cdip = mdi_pi_get_client(pip); 9728 9729 switch (mdi_pi_get_state(pip)) { 9730 case MDI_PATHINFO_STATE_INIT: 9731 devstate = DEVICE_DOWN; 9732 break; 9733 case MDI_PATHINFO_STATE_ONLINE: 9734 devstate = DEVICE_ONLINE; 9735 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9736 devstate |= DEVICE_BUSY; 9737 break; 9738 case MDI_PATHINFO_STATE_STANDBY: 9739 devstate = DEVICE_ONLINE; 9740 break; 9741 case MDI_PATHINFO_STATE_FAULT: 9742 devstate = DEVICE_DOWN; 9743 break; 9744 case MDI_PATHINFO_STATE_OFFLINE: 9745 devstate = DEVICE_OFFLINE; 9746 break; 9747 default: 9748 ASSERT(MDI_PI(pip)->pi_state); 9749 } 9750 9751 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9752 return (MDI_FAILURE); 9753 9754 return (MDI_SUCCESS); 9755 } 9756