1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * MDI component property name/value string definitions 160 */ 161 const char *mdi_component_prop = "mpxio-component"; 162 const char *mdi_component_prop_vhci = "vhci"; 163 const char *mdi_component_prop_phci = "phci"; 164 const char *mdi_component_prop_client = "client"; 165 166 /* 167 * MDI client global unique identifier property name 168 */ 169 const char *mdi_client_guid_prop = "client-guid"; 170 171 /* 172 * MDI client load balancing property name/value string definitions 173 */ 174 const char *mdi_load_balance = "load-balance"; 175 const char *mdi_load_balance_none = "none"; 176 const char *mdi_load_balance_rr = "round-robin"; 177 const char *mdi_load_balance_lba = "logical-block"; 178 179 /* 180 * Obsolete vHCI class definition; to be removed after Leadville update 181 */ 182 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 183 184 static char vhci_greeting[] = 185 "\tThere already exists one vHCI driver for class %s\n" 186 "\tOnly one vHCI driver for each class is allowed\n"; 187 188 /* 189 * Static function prototypes 190 */ 191 static int i_mdi_phci_offline(dev_info_t *, uint_t); 192 static int i_mdi_client_offline(dev_info_t *, uint_t); 193 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 194 static void i_mdi_phci_post_detach(dev_info_t *, 195 ddi_detach_cmd_t, int); 196 static int i_mdi_client_pre_detach(dev_info_t *, 197 ddi_detach_cmd_t); 198 static void i_mdi_client_post_detach(dev_info_t *, 199 ddi_detach_cmd_t, int); 200 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 201 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 202 static int i_mdi_lba_lb(mdi_client_t *ct, 203 mdi_pathinfo_t **ret_pip, struct buf *buf); 204 static void i_mdi_pm_hold_client(mdi_client_t *, int); 205 static void i_mdi_pm_rele_client(mdi_client_t *, int); 206 static void i_mdi_pm_reset_client(mdi_client_t *); 207 static int i_mdi_power_all_phci(mdi_client_t *); 208 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 209 210 211 /* 212 * Internal mdi_pathinfo node functions 213 */ 214 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static void setup_phci_driver_list(mdi_vhci_t *); 274 static void free_phci_driver_list(mdi_vhci_config_t *); 275 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 276 static boolean_t stop_vhcache_flush_thread(void *, int); 277 static void free_string_array(char **, int); 278 static void free_vhcache_phci(mdi_vhcache_phci_t *); 279 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 280 static void free_vhcache_client(mdi_vhcache_client_t *); 281 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 282 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 283 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 284 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 285 static void vhcache_pi_add(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void vhcache_pi_remove(mdi_vhci_config_t *, 288 struct mdi_pathinfo *); 289 static void free_phclient_path_list(mdi_phys_path_t *); 290 static void sort_vhcache_paths(mdi_vhcache_client_t *); 291 static int flush_vhcache(mdi_vhci_config_t *, int); 292 static void vhcache_dirty(mdi_vhci_config_t *); 293 static void free_async_client_config(mdi_async_client_config_t *); 294 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 295 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 296 static nvlist_t *read_on_disk_vhci_cache(char *); 297 extern int fread_nvlist(char *, nvlist_t **); 298 extern int fwrite_nvlist(char *, nvlist_t *); 299 300 /* called once when first vhci registers with mdi */ 301 static void 302 i_mdi_init() 303 { 304 static int initialized = 0; 305 306 if (initialized) 307 return; 308 initialized = 1; 309 310 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 311 /* 312 * Create our taskq resources 313 */ 314 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 315 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 316 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 317 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 318 } 319 320 /* 321 * mdi_get_component_type(): 322 * Return mpxio component type 323 * Return Values: 324 * MDI_COMPONENT_NONE 325 * MDI_COMPONENT_VHCI 326 * MDI_COMPONENT_PHCI 327 * MDI_COMPONENT_CLIENT 328 * XXX This doesn't work under multi-level MPxIO and should be 329 * removed when clients migrate mdi_component_is_*() interfaces. 330 */ 331 int 332 mdi_get_component_type(dev_info_t *dip) 333 { 334 return (DEVI(dip)->devi_mdi_component); 335 } 336 337 /* 338 * mdi_vhci_register(): 339 * Register a vHCI module with the mpxio framework 340 * mdi_vhci_register() is called by vHCI drivers to register the 341 * 'class_driver' vHCI driver and its MDI entrypoints with the 342 * mpxio framework. The vHCI driver must call this interface as 343 * part of its attach(9e) handler. 344 * Competing threads may try to attach mdi_vhci_register() as 345 * the vHCI drivers are loaded and attached as a result of pHCI 346 * driver instance registration (mdi_phci_register()) with the 347 * framework. 348 * Return Values: 349 * MDI_SUCCESS 350 * MDI_FAILURE 351 */ 352 /*ARGSUSED*/ 353 int 354 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 355 int flags) 356 { 357 mdi_vhci_t *vh = NULL; 358 359 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 360 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 361 362 i_mdi_init(); 363 364 mutex_enter(&mdi_mutex); 365 /* 366 * Scan for already registered vhci 367 */ 368 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 369 if (strcmp(vh->vh_class, class) == 0) { 370 /* 371 * vHCI has already been created. Check for valid 372 * vHCI ops registration. We only support one vHCI 373 * module per class 374 */ 375 if (vh->vh_ops != NULL) { 376 mutex_exit(&mdi_mutex); 377 cmn_err(CE_NOTE, vhci_greeting, class); 378 return (MDI_FAILURE); 379 } 380 break; 381 } 382 } 383 384 /* 385 * if not yet created, create the vHCI component 386 */ 387 if (vh == NULL) { 388 struct client_hash *hash = NULL; 389 char *load_balance; 390 391 /* 392 * Allocate and initialize the mdi extensions 393 */ 394 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 395 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 396 KM_SLEEP); 397 vh->vh_client_table = hash; 398 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 399 (void) strcpy(vh->vh_class, class); 400 vh->vh_lb = LOAD_BALANCE_RR; 401 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 402 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 403 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 404 vh->vh_lb = LOAD_BALANCE_NONE; 405 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 406 == 0) { 407 vh->vh_lb = LOAD_BALANCE_LBA; 408 } 409 ddi_prop_free(load_balance); 410 } 411 412 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 413 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 414 415 /* 416 * Store the vHCI ops vectors 417 */ 418 vh->vh_dip = vdip; 419 vh->vh_ops = vops; 420 421 setup_vhci_cache(vh); 422 423 if (mdi_vhci_head == NULL) { 424 mdi_vhci_head = vh; 425 } 426 if (mdi_vhci_tail) { 427 mdi_vhci_tail->vh_next = vh; 428 } 429 mdi_vhci_tail = vh; 430 mdi_vhci_count++; 431 } 432 433 /* 434 * Claim the devfs node as a vhci component 435 */ 436 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 437 438 /* 439 * Initialize our back reference from dev_info node 440 */ 441 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 442 mutex_exit(&mdi_mutex); 443 return (MDI_SUCCESS); 444 } 445 446 /* 447 * mdi_vhci_unregister(): 448 * Unregister a vHCI module from mpxio framework 449 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 450 * of a vhci to unregister it from the framework. 451 * Return Values: 452 * MDI_SUCCESS 453 * MDI_FAILURE 454 */ 455 /*ARGSUSED*/ 456 int 457 mdi_vhci_unregister(dev_info_t *vdip, int flags) 458 { 459 mdi_vhci_t *found, *vh, *prev = NULL; 460 461 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 462 463 /* 464 * Check for invalid VHCI 465 */ 466 if ((vh = i_devi_get_vhci(vdip)) == NULL) 467 return (MDI_FAILURE); 468 469 /* 470 * Scan the list of registered vHCIs for a match 471 */ 472 mutex_enter(&mdi_mutex); 473 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 474 if (found == vh) 475 break; 476 prev = found; 477 } 478 479 if (found == NULL) { 480 mutex_exit(&mdi_mutex); 481 return (MDI_FAILURE); 482 } 483 484 /* 485 * Check the vHCI, pHCI and client count. All the pHCIs and clients 486 * should have been unregistered, before a vHCI can be 487 * unregistered. 488 */ 489 MDI_VHCI_PHCI_LOCK(vh); 490 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 491 MDI_VHCI_PHCI_UNLOCK(vh); 492 mutex_exit(&mdi_mutex); 493 return (MDI_FAILURE); 494 } 495 MDI_VHCI_PHCI_UNLOCK(vh); 496 497 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 498 mutex_exit(&mdi_mutex); 499 return (MDI_FAILURE); 500 } 501 502 /* 503 * Remove the vHCI from the global list 504 */ 505 if (vh == mdi_vhci_head) { 506 mdi_vhci_head = vh->vh_next; 507 } else { 508 prev->vh_next = vh->vh_next; 509 } 510 if (vh == mdi_vhci_tail) { 511 mdi_vhci_tail = prev; 512 } 513 mdi_vhci_count--; 514 mutex_exit(&mdi_mutex); 515 516 vh->vh_ops = NULL; 517 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 518 DEVI(vdip)->devi_mdi_xhci = NULL; 519 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 520 kmem_free(vh->vh_client_table, 521 mdi_client_table_size * sizeof (struct client_hash)); 522 mutex_destroy(&vh->vh_phci_mutex); 523 mutex_destroy(&vh->vh_client_mutex); 524 525 kmem_free(vh, sizeof (mdi_vhci_t)); 526 return (MDI_SUCCESS); 527 } 528 529 /* 530 * i_mdi_vhci_class2vhci(): 531 * Look for a matching vHCI module given a vHCI class name 532 * Return Values: 533 * Handle to a vHCI component 534 * NULL 535 */ 536 static mdi_vhci_t * 537 i_mdi_vhci_class2vhci(char *class) 538 { 539 mdi_vhci_t *vh = NULL; 540 541 ASSERT(!MUTEX_HELD(&mdi_mutex)); 542 543 mutex_enter(&mdi_mutex); 544 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 545 if (strcmp(vh->vh_class, class) == 0) { 546 break; 547 } 548 } 549 mutex_exit(&mdi_mutex); 550 return (vh); 551 } 552 553 /* 554 * i_devi_get_vhci(): 555 * Utility function to get the handle to a vHCI component 556 * Return Values: 557 * Handle to a vHCI component 558 * NULL 559 */ 560 mdi_vhci_t * 561 i_devi_get_vhci(dev_info_t *vdip) 562 { 563 mdi_vhci_t *vh = NULL; 564 if (MDI_VHCI(vdip)) { 565 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 566 } 567 return (vh); 568 } 569 570 /* 571 * mdi_phci_register(): 572 * Register a pHCI module with mpxio framework 573 * mdi_phci_register() is called by pHCI drivers to register with 574 * the mpxio framework and a specific 'class_driver' vHCI. The 575 * pHCI driver must call this interface as part of its attach(9e) 576 * handler. 577 * Return Values: 578 * MDI_SUCCESS 579 * MDI_FAILURE 580 */ 581 /*ARGSUSED*/ 582 int 583 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 584 { 585 mdi_phci_t *ph; 586 mdi_vhci_t *vh; 587 char *data; 588 char *pathname; 589 590 /* 591 * Some subsystems, like fcp, perform pHCI registration from a 592 * different thread than the one doing the pHCI attach(9E) - the 593 * driver attach code is waiting for this other thread to complete. 594 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 595 * (indicating that some thread has done an ndi_devi_enter of parent) 596 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 597 */ 598 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 599 600 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 601 (void) ddi_pathname(pdip, pathname); 602 603 /* 604 * Check for mpxio-disable property. Enable mpxio if the property is 605 * missing or not set to "yes". 606 * If the property is set to "yes" then emit a brief message. 607 */ 608 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 609 &data) == DDI_SUCCESS)) { 610 if (strcmp(data, "yes") == 0) { 611 MDI_DEBUG(1, (CE_CONT, pdip, 612 "?%s (%s%d) multipath capabilities " 613 "disabled via %s.conf.\n", pathname, 614 ddi_driver_name(pdip), ddi_get_instance(pdip), 615 ddi_driver_name(pdip))); 616 ddi_prop_free(data); 617 kmem_free(pathname, MAXPATHLEN); 618 return (MDI_FAILURE); 619 } 620 ddi_prop_free(data); 621 } 622 623 kmem_free(pathname, MAXPATHLEN); 624 625 /* 626 * Search for a matching vHCI 627 */ 628 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 629 if (vh == NULL) { 630 return (MDI_FAILURE); 631 } 632 633 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 634 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 635 ph->ph_dip = pdip; 636 ph->ph_vhci = vh; 637 ph->ph_next = NULL; 638 ph->ph_unstable = 0; 639 ph->ph_vprivate = 0; 640 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 641 642 MDI_PHCI_LOCK(ph); 643 MDI_PHCI_SET_POWER_UP(ph); 644 MDI_PHCI_UNLOCK(ph); 645 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 646 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 647 648 vhcache_phci_add(vh->vh_config, ph); 649 650 MDI_VHCI_PHCI_LOCK(vh); 651 if (vh->vh_phci_head == NULL) { 652 vh->vh_phci_head = ph; 653 } 654 if (vh->vh_phci_tail) { 655 vh->vh_phci_tail->ph_next = ph; 656 } 657 vh->vh_phci_tail = ph; 658 vh->vh_phci_count++; 659 MDI_VHCI_PHCI_UNLOCK(vh); 660 661 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 662 return (MDI_SUCCESS); 663 } 664 665 /* 666 * mdi_phci_unregister(): 667 * Unregister a pHCI module from mpxio framework 668 * mdi_phci_unregister() is called by the pHCI drivers from their 669 * detach(9E) handler to unregister their instances from the 670 * framework. 671 * Return Values: 672 * MDI_SUCCESS 673 * MDI_FAILURE 674 */ 675 /*ARGSUSED*/ 676 int 677 mdi_phci_unregister(dev_info_t *pdip, int flags) 678 { 679 mdi_vhci_t *vh; 680 mdi_phci_t *ph; 681 mdi_phci_t *tmp; 682 mdi_phci_t *prev = NULL; 683 684 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 685 686 ph = i_devi_get_phci(pdip); 687 if (ph == NULL) { 688 MDI_DEBUG(1, (CE_WARN, pdip, 689 "!pHCI unregister: Not a valid pHCI")); 690 return (MDI_FAILURE); 691 } 692 693 vh = ph->ph_vhci; 694 ASSERT(vh != NULL); 695 if (vh == NULL) { 696 MDI_DEBUG(1, (CE_WARN, pdip, 697 "!pHCI unregister: Not a valid vHCI")); 698 return (MDI_FAILURE); 699 } 700 701 MDI_VHCI_PHCI_LOCK(vh); 702 tmp = vh->vh_phci_head; 703 while (tmp) { 704 if (tmp == ph) { 705 break; 706 } 707 prev = tmp; 708 tmp = tmp->ph_next; 709 } 710 711 if (ph == vh->vh_phci_head) { 712 vh->vh_phci_head = ph->ph_next; 713 } else { 714 prev->ph_next = ph->ph_next; 715 } 716 717 if (ph == vh->vh_phci_tail) { 718 vh->vh_phci_tail = prev; 719 } 720 721 vh->vh_phci_count--; 722 MDI_VHCI_PHCI_UNLOCK(vh); 723 724 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 725 ESC_DDI_INITIATOR_UNREGISTER); 726 vhcache_phci_remove(vh->vh_config, ph); 727 cv_destroy(&ph->ph_unstable_cv); 728 mutex_destroy(&ph->ph_mutex); 729 kmem_free(ph, sizeof (mdi_phci_t)); 730 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 731 DEVI(pdip)->devi_mdi_xhci = NULL; 732 return (MDI_SUCCESS); 733 } 734 735 /* 736 * i_devi_get_phci(): 737 * Utility function to return the phci extensions. 738 */ 739 static mdi_phci_t * 740 i_devi_get_phci(dev_info_t *pdip) 741 { 742 mdi_phci_t *ph = NULL; 743 if (MDI_PHCI(pdip)) { 744 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 745 } 746 return (ph); 747 } 748 749 /* 750 * Single thread mdi entry into devinfo node for modifying its children. 751 * If necessary we perform an ndi_devi_enter of the vHCI before doing 752 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 753 * for the vHCI and one for the pHCI. 754 */ 755 void 756 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 757 { 758 dev_info_t *vdip; 759 int vcircular, pcircular; 760 761 /* Verify calling context */ 762 ASSERT(MDI_PHCI(phci_dip)); 763 vdip = mdi_devi_get_vdip(phci_dip); 764 ASSERT(vdip); /* A pHCI always has a vHCI */ 765 766 /* 767 * If pHCI is detaching then the framework has already entered the 768 * vHCI on a threads that went down the code path leading to 769 * detach_node(). This framework enter of the vHCI during pHCI 770 * detach is done to avoid deadlock with vHCI power management 771 * operations which enter the vHCI and the enter down the path 772 * to the pHCI. If pHCI is detaching then we piggyback this calls 773 * enter of the vHCI on frameworks vHCI enter that has already 774 * occurred - this is OK because we know that the framework thread 775 * doing detach is waiting for our completion. 776 * 777 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 778 * race with detach - but we can't do that because the framework has 779 * already entered the parent, so we have some complexity instead. 780 */ 781 for (;;) { 782 if (ndi_devi_tryenter(vdip, &vcircular)) { 783 ASSERT(vcircular != -1); 784 if (DEVI_IS_DETACHING(phci_dip)) { 785 ndi_devi_exit(vdip, vcircular); 786 vcircular = -1; 787 } 788 break; 789 } else if (DEVI_IS_DETACHING(phci_dip)) { 790 vcircular = -1; 791 break; 792 } else { 793 delay(1); 794 } 795 } 796 797 ndi_devi_enter(phci_dip, &pcircular); 798 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 799 } 800 801 /* 802 * Release mdi_devi_enter or successful mdi_devi_tryenter. 803 */ 804 void 805 mdi_devi_exit(dev_info_t *phci_dip, int circular) 806 { 807 dev_info_t *vdip; 808 int vcircular, pcircular; 809 810 /* Verify calling context */ 811 ASSERT(MDI_PHCI(phci_dip)); 812 vdip = mdi_devi_get_vdip(phci_dip); 813 ASSERT(vdip); /* A pHCI always has a vHCI */ 814 815 /* extract two circular recursion values from single int */ 816 pcircular = (short)(circular & 0xFFFF); 817 vcircular = (short)((circular >> 16) & 0xFFFF); 818 819 ndi_devi_exit(phci_dip, pcircular); 820 if (vcircular != -1) 821 ndi_devi_exit(vdip, vcircular); 822 } 823 824 /* 825 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 826 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 827 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 828 * with vHCI power management code during path online/offline. Each 829 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 830 * occur within the scope of an active mdi_devi_enter that establishes the 831 * circular value. 832 */ 833 void 834 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 835 { 836 int pcircular; 837 838 /* Verify calling context */ 839 ASSERT(MDI_PHCI(phci_dip)); 840 841 pcircular = (short)(circular & 0xFFFF); 842 ndi_devi_exit(phci_dip, pcircular); 843 } 844 845 void 846 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 847 { 848 int pcircular; 849 850 /* Verify calling context */ 851 ASSERT(MDI_PHCI(phci_dip)); 852 853 ndi_devi_enter(phci_dip, &pcircular); 854 855 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 856 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 857 } 858 859 /* 860 * mdi_devi_get_vdip(): 861 * given a pHCI dip return vHCI dip 862 */ 863 dev_info_t * 864 mdi_devi_get_vdip(dev_info_t *pdip) 865 { 866 mdi_phci_t *ph; 867 868 ph = i_devi_get_phci(pdip); 869 if (ph && ph->ph_vhci) 870 return (ph->ph_vhci->vh_dip); 871 return (NULL); 872 } 873 874 /* 875 * mdi_devi_pdip_entered(): 876 * Return 1 if we are vHCI and have done an ndi_devi_enter 877 * of a pHCI 878 */ 879 int 880 mdi_devi_pdip_entered(dev_info_t *vdip) 881 { 882 mdi_vhci_t *vh; 883 mdi_phci_t *ph; 884 885 vh = i_devi_get_vhci(vdip); 886 if (vh == NULL) 887 return (0); 888 889 MDI_VHCI_PHCI_LOCK(vh); 890 ph = vh->vh_phci_head; 891 while (ph) { 892 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 893 MDI_VHCI_PHCI_UNLOCK(vh); 894 return (1); 895 } 896 ph = ph->ph_next; 897 } 898 MDI_VHCI_PHCI_UNLOCK(vh); 899 return (0); 900 } 901 902 /* 903 * mdi_phci_path2devinfo(): 904 * Utility function to search for a valid phci device given 905 * the devfs pathname. 906 */ 907 dev_info_t * 908 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 909 { 910 char *temp_pathname; 911 mdi_vhci_t *vh; 912 mdi_phci_t *ph; 913 dev_info_t *pdip = NULL; 914 915 vh = i_devi_get_vhci(vdip); 916 ASSERT(vh != NULL); 917 918 if (vh == NULL) { 919 /* 920 * Invalid vHCI component, return failure 921 */ 922 return (NULL); 923 } 924 925 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 926 MDI_VHCI_PHCI_LOCK(vh); 927 ph = vh->vh_phci_head; 928 while (ph != NULL) { 929 pdip = ph->ph_dip; 930 ASSERT(pdip != NULL); 931 *temp_pathname = '\0'; 932 (void) ddi_pathname(pdip, temp_pathname); 933 if (strcmp(temp_pathname, pathname) == 0) { 934 break; 935 } 936 ph = ph->ph_next; 937 } 938 if (ph == NULL) { 939 pdip = NULL; 940 } 941 MDI_VHCI_PHCI_UNLOCK(vh); 942 kmem_free(temp_pathname, MAXPATHLEN); 943 return (pdip); 944 } 945 946 /* 947 * mdi_phci_get_path_count(): 948 * get number of path information nodes associated with a given 949 * pHCI device. 950 */ 951 int 952 mdi_phci_get_path_count(dev_info_t *pdip) 953 { 954 mdi_phci_t *ph; 955 int count = 0; 956 957 ph = i_devi_get_phci(pdip); 958 if (ph != NULL) { 959 count = ph->ph_path_count; 960 } 961 return (count); 962 } 963 964 /* 965 * i_mdi_phci_lock(): 966 * Lock a pHCI device 967 * Return Values: 968 * None 969 * Note: 970 * The default locking order is: 971 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 972 * But there are number of situations where locks need to be 973 * grabbed in reverse order. This routine implements try and lock 974 * mechanism depending on the requested parameter option. 975 */ 976 static void 977 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 978 { 979 if (pip) { 980 /* Reverse locking is requested. */ 981 while (MDI_PHCI_TRYLOCK(ph) == 0) { 982 /* 983 * tryenter failed. Try to grab again 984 * after a small delay 985 */ 986 MDI_PI_HOLD(pip); 987 MDI_PI_UNLOCK(pip); 988 delay(1); 989 MDI_PI_LOCK(pip); 990 MDI_PI_RELE(pip); 991 } 992 } else { 993 MDI_PHCI_LOCK(ph); 994 } 995 } 996 997 /* 998 * i_mdi_phci_unlock(): 999 * Unlock the pHCI component 1000 */ 1001 static void 1002 i_mdi_phci_unlock(mdi_phci_t *ph) 1003 { 1004 MDI_PHCI_UNLOCK(ph); 1005 } 1006 1007 /* 1008 * i_mdi_devinfo_create(): 1009 * create client device's devinfo node 1010 * Return Values: 1011 * dev_info 1012 * NULL 1013 * Notes: 1014 */ 1015 static dev_info_t * 1016 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1017 char **compatible, int ncompatible) 1018 { 1019 dev_info_t *cdip = NULL; 1020 1021 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1022 1023 /* Verify for duplicate entry */ 1024 cdip = i_mdi_devinfo_find(vh, name, guid); 1025 ASSERT(cdip == NULL); 1026 if (cdip) { 1027 cmn_err(CE_WARN, 1028 "i_mdi_devinfo_create: client dip %p already exists", 1029 (void *)cdip); 1030 } 1031 1032 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1033 if (cdip == NULL) 1034 goto fail; 1035 1036 /* 1037 * Create component type and Global unique identifier 1038 * properties 1039 */ 1040 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1041 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1042 goto fail; 1043 } 1044 1045 /* Decorate the node with compatible property */ 1046 if (compatible && 1047 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1048 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1049 goto fail; 1050 } 1051 1052 return (cdip); 1053 1054 fail: 1055 if (cdip) { 1056 (void) ndi_prop_remove_all(cdip); 1057 (void) ndi_devi_free(cdip); 1058 } 1059 return (NULL); 1060 } 1061 1062 /* 1063 * i_mdi_devinfo_find(): 1064 * Find a matching devinfo node for given client node name 1065 * and its guid. 1066 * Return Values: 1067 * Handle to a dev_info node or NULL 1068 */ 1069 static dev_info_t * 1070 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1071 { 1072 char *data; 1073 dev_info_t *cdip = NULL; 1074 dev_info_t *ndip = NULL; 1075 int circular; 1076 1077 ndi_devi_enter(vh->vh_dip, &circular); 1078 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1079 while ((cdip = ndip) != NULL) { 1080 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1081 1082 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1083 continue; 1084 } 1085 1086 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1087 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1088 &data) != DDI_PROP_SUCCESS) { 1089 continue; 1090 } 1091 1092 if (strcmp(data, guid) != 0) { 1093 ddi_prop_free(data); 1094 continue; 1095 } 1096 ddi_prop_free(data); 1097 break; 1098 } 1099 ndi_devi_exit(vh->vh_dip, circular); 1100 return (cdip); 1101 } 1102 1103 /* 1104 * i_mdi_devinfo_remove(): 1105 * Remove a client device node 1106 */ 1107 static int 1108 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1109 { 1110 int rv = MDI_SUCCESS; 1111 1112 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1113 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1114 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1115 if (rv != NDI_SUCCESS) { 1116 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1117 " failed. cdip = %p\n", (void *)cdip)); 1118 } 1119 /* 1120 * Convert to MDI error code 1121 */ 1122 switch (rv) { 1123 case NDI_SUCCESS: 1124 rv = MDI_SUCCESS; 1125 break; 1126 case NDI_BUSY: 1127 rv = MDI_BUSY; 1128 break; 1129 default: 1130 rv = MDI_FAILURE; 1131 break; 1132 } 1133 } 1134 return (rv); 1135 } 1136 1137 /* 1138 * i_devi_get_client() 1139 * Utility function to get mpxio component extensions 1140 */ 1141 static mdi_client_t * 1142 i_devi_get_client(dev_info_t *cdip) 1143 { 1144 mdi_client_t *ct = NULL; 1145 1146 if (MDI_CLIENT(cdip)) { 1147 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1148 } 1149 return (ct); 1150 } 1151 1152 /* 1153 * i_mdi_is_child_present(): 1154 * Search for the presence of client device dev_info node 1155 */ 1156 static int 1157 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1158 { 1159 int rv = MDI_FAILURE; 1160 struct dev_info *dip; 1161 int circular; 1162 1163 ndi_devi_enter(vdip, &circular); 1164 dip = DEVI(vdip)->devi_child; 1165 while (dip) { 1166 if (dip == DEVI(cdip)) { 1167 rv = MDI_SUCCESS; 1168 break; 1169 } 1170 dip = dip->devi_sibling; 1171 } 1172 ndi_devi_exit(vdip, circular); 1173 return (rv); 1174 } 1175 1176 1177 /* 1178 * i_mdi_client_lock(): 1179 * Grab client component lock 1180 * Return Values: 1181 * None 1182 * Note: 1183 * The default locking order is: 1184 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1185 * But there are number of situations where locks need to be 1186 * grabbed in reverse order. This routine implements try and lock 1187 * mechanism depending on the requested parameter option. 1188 */ 1189 static void 1190 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1191 { 1192 if (pip) { 1193 /* 1194 * Reverse locking is requested. 1195 */ 1196 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1197 /* 1198 * tryenter failed. Try to grab again 1199 * after a small delay 1200 */ 1201 MDI_PI_HOLD(pip); 1202 MDI_PI_UNLOCK(pip); 1203 delay(1); 1204 MDI_PI_LOCK(pip); 1205 MDI_PI_RELE(pip); 1206 } 1207 } else { 1208 MDI_CLIENT_LOCK(ct); 1209 } 1210 } 1211 1212 /* 1213 * i_mdi_client_unlock(): 1214 * Unlock a client component 1215 */ 1216 static void 1217 i_mdi_client_unlock(mdi_client_t *ct) 1218 { 1219 MDI_CLIENT_UNLOCK(ct); 1220 } 1221 1222 /* 1223 * i_mdi_client_alloc(): 1224 * Allocate and initialize a client structure. Caller should 1225 * hold the vhci client lock. 1226 * Return Values: 1227 * Handle to a client component 1228 */ 1229 /*ARGSUSED*/ 1230 static mdi_client_t * 1231 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1232 { 1233 mdi_client_t *ct; 1234 1235 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1236 1237 /* 1238 * Allocate and initialize a component structure. 1239 */ 1240 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1241 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1242 ct->ct_hnext = NULL; 1243 ct->ct_hprev = NULL; 1244 ct->ct_dip = NULL; 1245 ct->ct_vhci = vh; 1246 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_drvname, name); 1248 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1249 (void) strcpy(ct->ct_guid, lguid); 1250 ct->ct_cprivate = NULL; 1251 ct->ct_vprivate = NULL; 1252 ct->ct_flags = 0; 1253 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1254 MDI_CLIENT_LOCK(ct); 1255 MDI_CLIENT_SET_OFFLINE(ct); 1256 MDI_CLIENT_SET_DETACH(ct); 1257 MDI_CLIENT_SET_POWER_UP(ct); 1258 MDI_CLIENT_UNLOCK(ct); 1259 ct->ct_failover_flags = 0; 1260 ct->ct_failover_status = 0; 1261 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1262 ct->ct_unstable = 0; 1263 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1264 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1265 ct->ct_lb = vh->vh_lb; 1266 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1267 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1268 ct->ct_path_count = 0; 1269 ct->ct_path_head = NULL; 1270 ct->ct_path_tail = NULL; 1271 ct->ct_path_last = NULL; 1272 1273 /* 1274 * Add this client component to our client hash queue 1275 */ 1276 i_mdi_client_enlist_table(vh, ct); 1277 return (ct); 1278 } 1279 1280 /* 1281 * i_mdi_client_enlist_table(): 1282 * Attach the client device to the client hash table. Caller 1283 * should hold the vhci client lock. 1284 */ 1285 static void 1286 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1287 { 1288 int index; 1289 struct client_hash *head; 1290 1291 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1292 1293 index = i_mdi_get_hash_key(ct->ct_guid); 1294 head = &vh->vh_client_table[index]; 1295 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1296 head->ct_hash_head = ct; 1297 head->ct_hash_count++; 1298 vh->vh_client_count++; 1299 } 1300 1301 /* 1302 * i_mdi_client_delist_table(): 1303 * Attach the client device to the client hash table. 1304 * Caller should hold the vhci client lock. 1305 */ 1306 static void 1307 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1308 { 1309 int index; 1310 char *guid; 1311 struct client_hash *head; 1312 mdi_client_t *next; 1313 mdi_client_t *last; 1314 1315 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1316 1317 guid = ct->ct_guid; 1318 index = i_mdi_get_hash_key(guid); 1319 head = &vh->vh_client_table[index]; 1320 1321 last = NULL; 1322 next = (mdi_client_t *)head->ct_hash_head; 1323 while (next != NULL) { 1324 if (next == ct) { 1325 break; 1326 } 1327 last = next; 1328 next = next->ct_hnext; 1329 } 1330 1331 if (next) { 1332 head->ct_hash_count--; 1333 if (last == NULL) { 1334 head->ct_hash_head = ct->ct_hnext; 1335 } else { 1336 last->ct_hnext = ct->ct_hnext; 1337 } 1338 ct->ct_hnext = NULL; 1339 vh->vh_client_count--; 1340 } 1341 } 1342 1343 1344 /* 1345 * i_mdi_client_free(): 1346 * Free a client component 1347 */ 1348 static int 1349 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1350 { 1351 int rv = MDI_SUCCESS; 1352 int flags = ct->ct_flags; 1353 dev_info_t *cdip; 1354 dev_info_t *vdip; 1355 1356 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1357 1358 vdip = vh->vh_dip; 1359 cdip = ct->ct_dip; 1360 1361 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1362 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1363 DEVI(cdip)->devi_mdi_client = NULL; 1364 1365 /* 1366 * Clear out back ref. to dev_info_t node 1367 */ 1368 ct->ct_dip = NULL; 1369 1370 /* 1371 * Remove this client from our hash queue 1372 */ 1373 i_mdi_client_delist_table(vh, ct); 1374 1375 /* 1376 * Uninitialize and free the component 1377 */ 1378 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1379 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1380 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1381 cv_destroy(&ct->ct_failover_cv); 1382 cv_destroy(&ct->ct_unstable_cv); 1383 cv_destroy(&ct->ct_powerchange_cv); 1384 mutex_destroy(&ct->ct_mutex); 1385 kmem_free(ct, sizeof (*ct)); 1386 1387 if (cdip != NULL) { 1388 MDI_VHCI_CLIENT_UNLOCK(vh); 1389 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1390 MDI_VHCI_CLIENT_LOCK(vh); 1391 } 1392 return (rv); 1393 } 1394 1395 /* 1396 * i_mdi_client_find(): 1397 * Find the client structure corresponding to a given guid 1398 * Caller should hold the vhci client lock. 1399 */ 1400 static mdi_client_t * 1401 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1402 { 1403 int index; 1404 struct client_hash *head; 1405 mdi_client_t *ct; 1406 1407 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1408 1409 index = i_mdi_get_hash_key(guid); 1410 head = &vh->vh_client_table[index]; 1411 1412 ct = head->ct_hash_head; 1413 while (ct != NULL) { 1414 if (strcmp(ct->ct_guid, guid) == 0 && 1415 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1416 break; 1417 } 1418 ct = ct->ct_hnext; 1419 } 1420 return (ct); 1421 } 1422 1423 /* 1424 * i_mdi_client_update_state(): 1425 * Compute and update client device state 1426 * Notes: 1427 * A client device can be in any of three possible states: 1428 * 1429 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1430 * one online/standby paths. Can tolerate failures. 1431 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1432 * no alternate paths available as standby. A failure on the online 1433 * would result in loss of access to device data. 1434 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1435 * no paths available to access the device. 1436 */ 1437 static void 1438 i_mdi_client_update_state(mdi_client_t *ct) 1439 { 1440 int state; 1441 1442 ASSERT(MDI_CLIENT_LOCKED(ct)); 1443 state = i_mdi_client_compute_state(ct, NULL); 1444 MDI_CLIENT_SET_STATE(ct, state); 1445 } 1446 1447 /* 1448 * i_mdi_client_compute_state(): 1449 * Compute client device state 1450 * 1451 * mdi_phci_t * Pointer to pHCI structure which should 1452 * while computing the new value. Used by 1453 * i_mdi_phci_offline() to find the new 1454 * client state after DR of a pHCI. 1455 */ 1456 static int 1457 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1458 { 1459 int state; 1460 int online_count = 0; 1461 int standby_count = 0; 1462 mdi_pathinfo_t *pip, *next; 1463 1464 ASSERT(MDI_CLIENT_LOCKED(ct)); 1465 pip = ct->ct_path_head; 1466 while (pip != NULL) { 1467 MDI_PI_LOCK(pip); 1468 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1469 if (MDI_PI(pip)->pi_phci == ph) { 1470 MDI_PI_UNLOCK(pip); 1471 pip = next; 1472 continue; 1473 } 1474 1475 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1476 == MDI_PATHINFO_STATE_ONLINE) 1477 online_count++; 1478 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1479 == MDI_PATHINFO_STATE_STANDBY) 1480 standby_count++; 1481 MDI_PI_UNLOCK(pip); 1482 pip = next; 1483 } 1484 1485 if (online_count == 0) { 1486 if (standby_count == 0) { 1487 state = MDI_CLIENT_STATE_FAILED; 1488 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1489 " ct = %p\n", (void *)ct)); 1490 } else if (standby_count == 1) { 1491 state = MDI_CLIENT_STATE_DEGRADED; 1492 } else { 1493 state = MDI_CLIENT_STATE_OPTIMAL; 1494 } 1495 } else if (online_count == 1) { 1496 if (standby_count == 0) { 1497 state = MDI_CLIENT_STATE_DEGRADED; 1498 } else { 1499 state = MDI_CLIENT_STATE_OPTIMAL; 1500 } 1501 } else { 1502 state = MDI_CLIENT_STATE_OPTIMAL; 1503 } 1504 return (state); 1505 } 1506 1507 /* 1508 * i_mdi_client2devinfo(): 1509 * Utility function 1510 */ 1511 dev_info_t * 1512 i_mdi_client2devinfo(mdi_client_t *ct) 1513 { 1514 return (ct->ct_dip); 1515 } 1516 1517 /* 1518 * mdi_client_path2_devinfo(): 1519 * Given the parent devinfo and child devfs pathname, search for 1520 * a valid devfs node handle. 1521 */ 1522 dev_info_t * 1523 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1524 { 1525 dev_info_t *cdip = NULL; 1526 dev_info_t *ndip = NULL; 1527 char *temp_pathname; 1528 int circular; 1529 1530 /* 1531 * Allocate temp buffer 1532 */ 1533 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1534 1535 /* 1536 * Lock parent against changes 1537 */ 1538 ndi_devi_enter(vdip, &circular); 1539 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1540 while ((cdip = ndip) != NULL) { 1541 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1542 1543 *temp_pathname = '\0'; 1544 (void) ddi_pathname(cdip, temp_pathname); 1545 if (strcmp(temp_pathname, pathname) == 0) { 1546 break; 1547 } 1548 } 1549 /* 1550 * Release devinfo lock 1551 */ 1552 ndi_devi_exit(vdip, circular); 1553 1554 /* 1555 * Free the temp buffer 1556 */ 1557 kmem_free(temp_pathname, MAXPATHLEN); 1558 return (cdip); 1559 } 1560 1561 /* 1562 * mdi_client_get_path_count(): 1563 * Utility function to get number of path information nodes 1564 * associated with a given client device. 1565 */ 1566 int 1567 mdi_client_get_path_count(dev_info_t *cdip) 1568 { 1569 mdi_client_t *ct; 1570 int count = 0; 1571 1572 ct = i_devi_get_client(cdip); 1573 if (ct != NULL) { 1574 count = ct->ct_path_count; 1575 } 1576 return (count); 1577 } 1578 1579 1580 /* 1581 * i_mdi_get_hash_key(): 1582 * Create a hash using strings as keys 1583 * 1584 */ 1585 static int 1586 i_mdi_get_hash_key(char *str) 1587 { 1588 uint32_t g, hash = 0; 1589 char *p; 1590 1591 for (p = str; *p != '\0'; p++) { 1592 g = *p; 1593 hash += g; 1594 } 1595 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1596 } 1597 1598 /* 1599 * mdi_get_lb_policy(): 1600 * Get current load balancing policy for a given client device 1601 */ 1602 client_lb_t 1603 mdi_get_lb_policy(dev_info_t *cdip) 1604 { 1605 client_lb_t lb = LOAD_BALANCE_NONE; 1606 mdi_client_t *ct; 1607 1608 ct = i_devi_get_client(cdip); 1609 if (ct != NULL) { 1610 lb = ct->ct_lb; 1611 } 1612 return (lb); 1613 } 1614 1615 /* 1616 * mdi_set_lb_region_size(): 1617 * Set current region size for the load-balance 1618 */ 1619 int 1620 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1621 { 1622 mdi_client_t *ct; 1623 int rv = MDI_FAILURE; 1624 1625 ct = i_devi_get_client(cdip); 1626 if (ct != NULL && ct->ct_lb_args != NULL) { 1627 ct->ct_lb_args->region_size = region_size; 1628 rv = MDI_SUCCESS; 1629 } 1630 return (rv); 1631 } 1632 1633 /* 1634 * mdi_Set_lb_policy(): 1635 * Set current load balancing policy for a given client device 1636 */ 1637 int 1638 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1639 { 1640 mdi_client_t *ct; 1641 int rv = MDI_FAILURE; 1642 1643 ct = i_devi_get_client(cdip); 1644 if (ct != NULL) { 1645 ct->ct_lb = lb; 1646 rv = MDI_SUCCESS; 1647 } 1648 return (rv); 1649 } 1650 1651 /* 1652 * mdi_failover(): 1653 * failover function called by the vHCI drivers to initiate 1654 * a failover operation. This is typically due to non-availability 1655 * of online paths to route I/O requests. Failover can be 1656 * triggered through user application also. 1657 * 1658 * The vHCI driver calls mdi_failover() to initiate a failover 1659 * operation. mdi_failover() calls back into the vHCI driver's 1660 * vo_failover() entry point to perform the actual failover 1661 * operation. The reason for requiring the vHCI driver to 1662 * initiate failover by calling mdi_failover(), instead of directly 1663 * executing vo_failover() itself, is to ensure that the mdi 1664 * framework can keep track of the client state properly. 1665 * Additionally, mdi_failover() provides as a convenience the 1666 * option of performing the failover operation synchronously or 1667 * asynchronously 1668 * 1669 * Upon successful completion of the failover operation, the 1670 * paths that were previously ONLINE will be in the STANDBY state, 1671 * and the newly activated paths will be in the ONLINE state. 1672 * 1673 * The flags modifier determines whether the activation is done 1674 * synchronously: MDI_FAILOVER_SYNC 1675 * Return Values: 1676 * MDI_SUCCESS 1677 * MDI_FAILURE 1678 * MDI_BUSY 1679 */ 1680 /*ARGSUSED*/ 1681 int 1682 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1683 { 1684 int rv; 1685 mdi_client_t *ct; 1686 1687 ct = i_devi_get_client(cdip); 1688 ASSERT(ct != NULL); 1689 if (ct == NULL) { 1690 /* cdip is not a valid client device. Nothing more to do. */ 1691 return (MDI_FAILURE); 1692 } 1693 1694 MDI_CLIENT_LOCK(ct); 1695 1696 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1697 /* A path to the client is being freed */ 1698 MDI_CLIENT_UNLOCK(ct); 1699 return (MDI_BUSY); 1700 } 1701 1702 1703 if (MDI_CLIENT_IS_FAILED(ct)) { 1704 /* 1705 * Client is in failed state. Nothing more to do. 1706 */ 1707 MDI_CLIENT_UNLOCK(ct); 1708 return (MDI_FAILURE); 1709 } 1710 1711 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1712 /* 1713 * Failover is already in progress; return BUSY 1714 */ 1715 MDI_CLIENT_UNLOCK(ct); 1716 return (MDI_BUSY); 1717 } 1718 /* 1719 * Make sure that mdi_pathinfo node state changes are processed. 1720 * We do not allow failovers to progress while client path state 1721 * changes are in progress 1722 */ 1723 if (ct->ct_unstable) { 1724 if (flags == MDI_FAILOVER_ASYNC) { 1725 MDI_CLIENT_UNLOCK(ct); 1726 return (MDI_BUSY); 1727 } else { 1728 while (ct->ct_unstable) 1729 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1730 } 1731 } 1732 1733 /* 1734 * Client device is in stable state. Before proceeding, perform sanity 1735 * checks again. 1736 */ 1737 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1738 (!i_ddi_devi_attached(ct->ct_dip))) { 1739 /* 1740 * Client is in failed state. Nothing more to do. 1741 */ 1742 MDI_CLIENT_UNLOCK(ct); 1743 return (MDI_FAILURE); 1744 } 1745 1746 /* 1747 * Set the client state as failover in progress. 1748 */ 1749 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1750 ct->ct_failover_flags = flags; 1751 MDI_CLIENT_UNLOCK(ct); 1752 1753 if (flags == MDI_FAILOVER_ASYNC) { 1754 /* 1755 * Submit the initiate failover request via CPR safe 1756 * taskq threads. 1757 */ 1758 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1759 ct, KM_SLEEP); 1760 return (MDI_ACCEPT); 1761 } else { 1762 /* 1763 * Synchronous failover mode. Typically invoked from the user 1764 * land. 1765 */ 1766 rv = i_mdi_failover(ct); 1767 } 1768 return (rv); 1769 } 1770 1771 /* 1772 * i_mdi_failover(): 1773 * internal failover function. Invokes vHCI drivers failover 1774 * callback function and process the failover status 1775 * Return Values: 1776 * None 1777 * 1778 * Note: A client device in failover state can not be detached or freed. 1779 */ 1780 static int 1781 i_mdi_failover(void *arg) 1782 { 1783 int rv = MDI_SUCCESS; 1784 mdi_client_t *ct = (mdi_client_t *)arg; 1785 mdi_vhci_t *vh = ct->ct_vhci; 1786 1787 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1788 1789 if (vh->vh_ops->vo_failover != NULL) { 1790 /* 1791 * Call vHCI drivers callback routine 1792 */ 1793 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1794 ct->ct_failover_flags); 1795 } 1796 1797 MDI_CLIENT_LOCK(ct); 1798 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1799 1800 /* 1801 * Save the failover return status 1802 */ 1803 ct->ct_failover_status = rv; 1804 1805 /* 1806 * As a result of failover, client status would have been changed. 1807 * Update the client state and wake up anyone waiting on this client 1808 * device. 1809 */ 1810 i_mdi_client_update_state(ct); 1811 1812 cv_broadcast(&ct->ct_failover_cv); 1813 MDI_CLIENT_UNLOCK(ct); 1814 return (rv); 1815 } 1816 1817 /* 1818 * Load balancing is logical block. 1819 * IOs within the range described by region_size 1820 * would go on the same path. This would improve the 1821 * performance by cache-hit on some of the RAID devices. 1822 * Search only for online paths(At some point we 1823 * may want to balance across target ports). 1824 * If no paths are found then default to round-robin. 1825 */ 1826 static int 1827 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1828 { 1829 int path_index = -1; 1830 int online_path_count = 0; 1831 int online_nonpref_path_count = 0; 1832 int region_size = ct->ct_lb_args->region_size; 1833 mdi_pathinfo_t *pip; 1834 mdi_pathinfo_t *next; 1835 int preferred, path_cnt; 1836 1837 pip = ct->ct_path_head; 1838 while (pip) { 1839 MDI_PI_LOCK(pip); 1840 if (MDI_PI(pip)->pi_state == 1841 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1842 online_path_count++; 1843 } else if (MDI_PI(pip)->pi_state == 1844 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1845 online_nonpref_path_count++; 1846 } 1847 next = (mdi_pathinfo_t *) 1848 MDI_PI(pip)->pi_client_link; 1849 MDI_PI_UNLOCK(pip); 1850 pip = next; 1851 } 1852 /* if found any online/preferred then use this type */ 1853 if (online_path_count > 0) { 1854 path_cnt = online_path_count; 1855 preferred = 1; 1856 } else if (online_nonpref_path_count > 0) { 1857 path_cnt = online_nonpref_path_count; 1858 preferred = 0; 1859 } else { 1860 path_cnt = 0; 1861 } 1862 if (path_cnt) { 1863 path_index = (bp->b_blkno >> region_size) % path_cnt; 1864 pip = ct->ct_path_head; 1865 while (pip && path_index != -1) { 1866 MDI_PI_LOCK(pip); 1867 if (path_index == 0 && 1868 (MDI_PI(pip)->pi_state == 1869 MDI_PATHINFO_STATE_ONLINE) && 1870 MDI_PI(pip)->pi_preferred == preferred) { 1871 MDI_PI_HOLD(pip); 1872 MDI_PI_UNLOCK(pip); 1873 *ret_pip = pip; 1874 return (MDI_SUCCESS); 1875 } 1876 path_index --; 1877 next = (mdi_pathinfo_t *) 1878 MDI_PI(pip)->pi_client_link; 1879 MDI_PI_UNLOCK(pip); 1880 pip = next; 1881 } 1882 if (pip == NULL) { 1883 MDI_DEBUG(4, (CE_NOTE, NULL, 1884 "!lba %llx, no pip !!\n", 1885 bp->b_lblkno)); 1886 } else { 1887 MDI_DEBUG(4, (CE_NOTE, NULL, 1888 "!lba %llx, no pip for path_index, " 1889 "pip %p\n", bp->b_lblkno, (void *)pip)); 1890 } 1891 } 1892 return (MDI_FAILURE); 1893 } 1894 1895 /* 1896 * mdi_select_path(): 1897 * select a path to access a client device. 1898 * 1899 * mdi_select_path() function is called by the vHCI drivers to 1900 * select a path to route the I/O request to. The caller passes 1901 * the block I/O data transfer structure ("buf") as one of the 1902 * parameters. The mpxio framework uses the buf structure 1903 * contents to maintain per path statistics (total I/O size / 1904 * count pending). If more than one online paths are available to 1905 * select, the framework automatically selects a suitable path 1906 * for routing I/O request. If a failover operation is active for 1907 * this client device the call shall be failed with MDI_BUSY error 1908 * code. 1909 * 1910 * By default this function returns a suitable path in online 1911 * state based on the current load balancing policy. Currently 1912 * we support LOAD_BALANCE_NONE (Previously selected online path 1913 * will continue to be used till the path is usable) and 1914 * LOAD_BALANCE_RR (Online paths will be selected in a round 1915 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1916 * based on the logical block). The load balancing 1917 * through vHCI drivers configuration file (driver.conf). 1918 * 1919 * vHCI drivers may override this default behavior by specifying 1920 * appropriate flags. If start_pip is specified (non NULL) is 1921 * used as start point to walk and find the next appropriate path. 1922 * The following values are currently defined: 1923 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1924 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1925 * 1926 * The non-standard behavior is used by the scsi_vhci driver, 1927 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1928 * attach of client devices (to avoid an unnecessary failover 1929 * when the STANDBY path comes up first), during failover 1930 * (to activate a STANDBY path as ONLINE). 1931 * 1932 * The selected path is returned in a a mdi_hold_path() state 1933 * (pi_ref_cnt). Caller should release the hold by calling 1934 * mdi_rele_path(). 1935 * 1936 * Return Values: 1937 * MDI_SUCCESS - Completed successfully 1938 * MDI_BUSY - Client device is busy failing over 1939 * MDI_NOPATH - Client device is online, but no valid path are 1940 * available to access this client device 1941 * MDI_FAILURE - Invalid client device or state 1942 * MDI_DEVI_ONLINING 1943 * - Client device (struct dev_info state) is in 1944 * onlining state. 1945 */ 1946 1947 /*ARGSUSED*/ 1948 int 1949 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1950 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1951 { 1952 mdi_client_t *ct; 1953 mdi_pathinfo_t *pip; 1954 mdi_pathinfo_t *next; 1955 mdi_pathinfo_t *head; 1956 mdi_pathinfo_t *start; 1957 client_lb_t lbp; /* load balancing policy */ 1958 int sb = 1; /* standard behavior */ 1959 int preferred = 1; /* preferred path */ 1960 int cond, cont = 1; 1961 int retry = 0; 1962 1963 if (flags != 0) { 1964 /* 1965 * disable default behavior 1966 */ 1967 sb = 0; 1968 } 1969 1970 *ret_pip = NULL; 1971 ct = i_devi_get_client(cdip); 1972 if (ct == NULL) { 1973 /* mdi extensions are NULL, Nothing more to do */ 1974 return (MDI_FAILURE); 1975 } 1976 1977 MDI_CLIENT_LOCK(ct); 1978 1979 if (sb) { 1980 if (MDI_CLIENT_IS_FAILED(ct)) { 1981 /* 1982 * Client is not ready to accept any I/O requests. 1983 * Fail this request. 1984 */ 1985 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1986 "client state offline ct = %p\n", (void *)ct)); 1987 MDI_CLIENT_UNLOCK(ct); 1988 return (MDI_FAILURE); 1989 } 1990 1991 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1992 /* 1993 * Check for Failover is in progress. If so tell the 1994 * caller that this device is busy. 1995 */ 1996 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1997 "client failover in progress ct = %p\n", 1998 (void *)ct)); 1999 MDI_CLIENT_UNLOCK(ct); 2000 return (MDI_BUSY); 2001 } 2002 2003 /* 2004 * Check to see whether the client device is attached. 2005 * If not so, let the vHCI driver manually select a path 2006 * (standby) and let the probe/attach process to continue. 2007 */ 2008 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2009 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2010 "ct = %p\n", (void *)ct)); 2011 MDI_CLIENT_UNLOCK(ct); 2012 return (MDI_DEVI_ONLINING); 2013 } 2014 } 2015 2016 /* 2017 * Cache in the client list head. If head of the list is NULL 2018 * return MDI_NOPATH 2019 */ 2020 head = ct->ct_path_head; 2021 if (head == NULL) { 2022 MDI_CLIENT_UNLOCK(ct); 2023 return (MDI_NOPATH); 2024 } 2025 2026 /* 2027 * for non default behavior, bypass current 2028 * load balancing policy and always use LOAD_BALANCE_RR 2029 * except that the start point will be adjusted based 2030 * on the provided start_pip 2031 */ 2032 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2033 2034 switch (lbp) { 2035 case LOAD_BALANCE_NONE: 2036 /* 2037 * Load balancing is None or Alternate path mode 2038 * Start looking for a online mdi_pathinfo node starting from 2039 * last known selected path 2040 */ 2041 preferred = 1; 2042 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2043 if (pip == NULL) { 2044 pip = head; 2045 } 2046 start = pip; 2047 do { 2048 MDI_PI_LOCK(pip); 2049 /* 2050 * No need to explicitly check if the path is disabled. 2051 * Since we are checking for state == ONLINE and the 2052 * same veriable is used for DISABLE/ENABLE information. 2053 */ 2054 if ((MDI_PI(pip)->pi_state == 2055 MDI_PATHINFO_STATE_ONLINE) && 2056 preferred == MDI_PI(pip)->pi_preferred) { 2057 /* 2058 * Return the path in hold state. Caller should 2059 * release the lock by calling mdi_rele_path() 2060 */ 2061 MDI_PI_HOLD(pip); 2062 MDI_PI_UNLOCK(pip); 2063 ct->ct_path_last = pip; 2064 *ret_pip = pip; 2065 MDI_CLIENT_UNLOCK(ct); 2066 return (MDI_SUCCESS); 2067 } 2068 2069 /* 2070 * Path is busy. 2071 */ 2072 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2073 MDI_PI_IS_TRANSIENT(pip)) 2074 retry = 1; 2075 /* 2076 * Keep looking for a next available online path 2077 */ 2078 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2079 if (next == NULL) { 2080 next = head; 2081 } 2082 MDI_PI_UNLOCK(pip); 2083 pip = next; 2084 if (start == pip && preferred) { 2085 preferred = 0; 2086 } else if (start == pip && !preferred) { 2087 cont = 0; 2088 } 2089 } while (cont); 2090 break; 2091 2092 case LOAD_BALANCE_LBA: 2093 /* 2094 * Make sure we are looking 2095 * for an online path. Otherwise, if it is for a STANDBY 2096 * path request, it will go through and fetch an ONLINE 2097 * path which is not desirable. 2098 */ 2099 if ((ct->ct_lb_args != NULL) && 2100 (ct->ct_lb_args->region_size) && bp && 2101 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2102 if (i_mdi_lba_lb(ct, ret_pip, bp) 2103 == MDI_SUCCESS) { 2104 MDI_CLIENT_UNLOCK(ct); 2105 return (MDI_SUCCESS); 2106 } 2107 } 2108 /* FALLTHROUGH */ 2109 case LOAD_BALANCE_RR: 2110 /* 2111 * Load balancing is Round Robin. Start looking for a online 2112 * mdi_pathinfo node starting from last known selected path 2113 * as the start point. If override flags are specified, 2114 * process accordingly. 2115 * If the search is already in effect(start_pip not null), 2116 * then lets just use the same path preference to continue the 2117 * traversal. 2118 */ 2119 2120 if (start_pip != NULL) { 2121 preferred = MDI_PI(start_pip)->pi_preferred; 2122 } else { 2123 preferred = 1; 2124 } 2125 2126 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2127 if (start == NULL) { 2128 pip = head; 2129 } else { 2130 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2131 if (pip == NULL) { 2132 if (!sb) { 2133 if (preferred == 0) { 2134 /* 2135 * Looks like we have completed 2136 * the traversal as preferred 2137 * value is 0. Time to bail out. 2138 */ 2139 *ret_pip = NULL; 2140 MDI_CLIENT_UNLOCK(ct); 2141 return (MDI_NOPATH); 2142 } else { 2143 /* 2144 * Looks like we reached the 2145 * end of the list. Lets enable 2146 * traversal of non preferred 2147 * paths. 2148 */ 2149 preferred = 0; 2150 } 2151 } 2152 pip = head; 2153 } 2154 } 2155 start = pip; 2156 do { 2157 MDI_PI_LOCK(pip); 2158 if (sb) { 2159 cond = ((MDI_PI(pip)->pi_state == 2160 MDI_PATHINFO_STATE_ONLINE && 2161 MDI_PI(pip)->pi_preferred == 2162 preferred) ? 1 : 0); 2163 } else { 2164 if (flags == MDI_SELECT_ONLINE_PATH) { 2165 cond = ((MDI_PI(pip)->pi_state == 2166 MDI_PATHINFO_STATE_ONLINE && 2167 MDI_PI(pip)->pi_preferred == 2168 preferred) ? 1 : 0); 2169 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2170 cond = ((MDI_PI(pip)->pi_state == 2171 MDI_PATHINFO_STATE_STANDBY && 2172 MDI_PI(pip)->pi_preferred == 2173 preferred) ? 1 : 0); 2174 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2175 MDI_SELECT_STANDBY_PATH)) { 2176 cond = (((MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_ONLINE || 2178 (MDI_PI(pip)->pi_state == 2179 MDI_PATHINFO_STATE_STANDBY)) && 2180 MDI_PI(pip)->pi_preferred == 2181 preferred) ? 1 : 0); 2182 } else if (flags == 2183 (MDI_SELECT_STANDBY_PATH | 2184 MDI_SELECT_ONLINE_PATH | 2185 MDI_SELECT_USER_DISABLE_PATH)) { 2186 cond = (((MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_ONLINE || 2188 (MDI_PI(pip)->pi_state == 2189 MDI_PATHINFO_STATE_STANDBY) || 2190 (MDI_PI(pip)->pi_state == 2191 (MDI_PATHINFO_STATE_ONLINE| 2192 MDI_PATHINFO_STATE_USER_DISABLE)) || 2193 (MDI_PI(pip)->pi_state == 2194 (MDI_PATHINFO_STATE_STANDBY | 2195 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2196 MDI_PI(pip)->pi_preferred == 2197 preferred) ? 1 : 0); 2198 } else { 2199 cond = 0; 2200 } 2201 } 2202 /* 2203 * No need to explicitly check if the path is disabled. 2204 * Since we are checking for state == ONLINE and the 2205 * same veriable is used for DISABLE/ENABLE information. 2206 */ 2207 if (cond) { 2208 /* 2209 * Return the path in hold state. Caller should 2210 * release the lock by calling mdi_rele_path() 2211 */ 2212 MDI_PI_HOLD(pip); 2213 MDI_PI_UNLOCK(pip); 2214 if (sb) 2215 ct->ct_path_last = pip; 2216 *ret_pip = pip; 2217 MDI_CLIENT_UNLOCK(ct); 2218 return (MDI_SUCCESS); 2219 } 2220 /* 2221 * Path is busy. 2222 */ 2223 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2224 MDI_PI_IS_TRANSIENT(pip)) 2225 retry = 1; 2226 2227 /* 2228 * Keep looking for a next available online path 2229 */ 2230 do_again: 2231 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2232 if (next == NULL) { 2233 if (!sb) { 2234 if (preferred == 1) { 2235 /* 2236 * Looks like we reached the 2237 * end of the list. Lets enable 2238 * traversal of non preferred 2239 * paths. 2240 */ 2241 preferred = 0; 2242 next = head; 2243 } else { 2244 /* 2245 * We have done both the passes 2246 * Preferred as well as for 2247 * Non-preferred. Bail out now. 2248 */ 2249 cont = 0; 2250 } 2251 } else { 2252 /* 2253 * Standard behavior case. 2254 */ 2255 next = head; 2256 } 2257 } 2258 MDI_PI_UNLOCK(pip); 2259 if (cont == 0) { 2260 break; 2261 } 2262 pip = next; 2263 2264 if (!sb) { 2265 /* 2266 * We need to handle the selection of 2267 * non-preferred path in the following 2268 * case: 2269 * 2270 * +------+ +------+ +------+ +-----+ 2271 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2272 * +------+ +------+ +------+ +-----+ 2273 * 2274 * If we start the search with B, we need to 2275 * skip beyond B to pick C which is non - 2276 * preferred in the second pass. The following 2277 * test, if true, will allow us to skip over 2278 * the 'start'(B in the example) to select 2279 * other non preferred elements. 2280 */ 2281 if ((start_pip != NULL) && (start_pip == pip) && 2282 (MDI_PI(start_pip)->pi_preferred 2283 != preferred)) { 2284 /* 2285 * try again after going past the start 2286 * pip 2287 */ 2288 MDI_PI_LOCK(pip); 2289 goto do_again; 2290 } 2291 } else { 2292 /* 2293 * Standard behavior case 2294 */ 2295 if (start == pip && preferred) { 2296 /* look for nonpreferred paths */ 2297 preferred = 0; 2298 } else if (start == pip && !preferred) { 2299 /* 2300 * Exit condition 2301 */ 2302 cont = 0; 2303 } 2304 } 2305 } while (cont); 2306 break; 2307 } 2308 2309 MDI_CLIENT_UNLOCK(ct); 2310 if (retry == 1) { 2311 return (MDI_BUSY); 2312 } else { 2313 return (MDI_NOPATH); 2314 } 2315 } 2316 2317 /* 2318 * For a client, return the next available path to any phci 2319 * 2320 * Note: 2321 * Caller should hold the branch's devinfo node to get a consistent 2322 * snap shot of the mdi_pathinfo nodes. 2323 * 2324 * Please note that even the list is stable the mdi_pathinfo 2325 * node state and properties are volatile. The caller should lock 2326 * and unlock the nodes by calling mdi_pi_lock() and 2327 * mdi_pi_unlock() functions to get a stable properties. 2328 * 2329 * If there is a need to use the nodes beyond the hold of the 2330 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2331 * need to be held against unexpected removal by calling 2332 * mdi_hold_path() and should be released by calling 2333 * mdi_rele_path() on completion. 2334 */ 2335 mdi_pathinfo_t * 2336 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2337 { 2338 mdi_client_t *ct; 2339 2340 if (!MDI_CLIENT(ct_dip)) 2341 return (NULL); 2342 2343 /* 2344 * Walk through client link 2345 */ 2346 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2347 ASSERT(ct != NULL); 2348 2349 if (pip == NULL) 2350 return ((mdi_pathinfo_t *)ct->ct_path_head); 2351 2352 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2353 } 2354 2355 /* 2356 * For a phci, return the next available path to any client 2357 * Note: ditto mdi_get_next_phci_path() 2358 */ 2359 mdi_pathinfo_t * 2360 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2361 { 2362 mdi_phci_t *ph; 2363 2364 if (!MDI_PHCI(ph_dip)) 2365 return (NULL); 2366 2367 /* 2368 * Walk through pHCI link 2369 */ 2370 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2371 ASSERT(ph != NULL); 2372 2373 if (pip == NULL) 2374 return ((mdi_pathinfo_t *)ph->ph_path_head); 2375 2376 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2377 } 2378 2379 /* 2380 * mdi_hold_path(): 2381 * Hold the mdi_pathinfo node against unwanted unexpected free. 2382 * Return Values: 2383 * None 2384 */ 2385 void 2386 mdi_hold_path(mdi_pathinfo_t *pip) 2387 { 2388 if (pip) { 2389 MDI_PI_LOCK(pip); 2390 MDI_PI_HOLD(pip); 2391 MDI_PI_UNLOCK(pip); 2392 } 2393 } 2394 2395 2396 /* 2397 * mdi_rele_path(): 2398 * Release the mdi_pathinfo node which was selected 2399 * through mdi_select_path() mechanism or manually held by 2400 * calling mdi_hold_path(). 2401 * Return Values: 2402 * None 2403 */ 2404 void 2405 mdi_rele_path(mdi_pathinfo_t *pip) 2406 { 2407 if (pip) { 2408 MDI_PI_LOCK(pip); 2409 MDI_PI_RELE(pip); 2410 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2411 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2412 } 2413 MDI_PI_UNLOCK(pip); 2414 } 2415 } 2416 2417 /* 2418 * mdi_pi_lock(): 2419 * Lock the mdi_pathinfo node. 2420 * Note: 2421 * The caller should release the lock by calling mdi_pi_unlock() 2422 */ 2423 void 2424 mdi_pi_lock(mdi_pathinfo_t *pip) 2425 { 2426 ASSERT(pip != NULL); 2427 if (pip) { 2428 MDI_PI_LOCK(pip); 2429 } 2430 } 2431 2432 2433 /* 2434 * mdi_pi_unlock(): 2435 * Unlock the mdi_pathinfo node. 2436 * Note: 2437 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2438 */ 2439 void 2440 mdi_pi_unlock(mdi_pathinfo_t *pip) 2441 { 2442 ASSERT(pip != NULL); 2443 if (pip) { 2444 MDI_PI_UNLOCK(pip); 2445 } 2446 } 2447 2448 /* 2449 * mdi_pi_find(): 2450 * Search the list of mdi_pathinfo nodes attached to the 2451 * pHCI/Client device node whose path address matches "paddr". 2452 * Returns a pointer to the mdi_pathinfo node if a matching node is 2453 * found. 2454 * Return Values: 2455 * mdi_pathinfo node handle 2456 * NULL 2457 * Notes: 2458 * Caller need not hold any locks to call this function. 2459 */ 2460 mdi_pathinfo_t * 2461 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2462 { 2463 mdi_phci_t *ph; 2464 mdi_vhci_t *vh; 2465 mdi_client_t *ct; 2466 mdi_pathinfo_t *pip = NULL; 2467 2468 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2469 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2470 if ((pdip == NULL) || (paddr == NULL)) { 2471 return (NULL); 2472 } 2473 ph = i_devi_get_phci(pdip); 2474 if (ph == NULL) { 2475 /* 2476 * Invalid pHCI device, Nothing more to do. 2477 */ 2478 MDI_DEBUG(2, (CE_WARN, pdip, 2479 "!mdi_pi_find: invalid phci")); 2480 return (NULL); 2481 } 2482 2483 vh = ph->ph_vhci; 2484 if (vh == NULL) { 2485 /* 2486 * Invalid vHCI device, Nothing more to do. 2487 */ 2488 MDI_DEBUG(2, (CE_WARN, pdip, 2489 "!mdi_pi_find: invalid vhci")); 2490 return (NULL); 2491 } 2492 2493 /* 2494 * Look for pathinfo node identified by paddr. 2495 */ 2496 if (caddr == NULL) { 2497 /* 2498 * Find a mdi_pathinfo node under pHCI list for a matching 2499 * unit address. 2500 */ 2501 MDI_PHCI_LOCK(ph); 2502 if (MDI_PHCI_IS_OFFLINE(ph)) { 2503 MDI_DEBUG(2, (CE_WARN, pdip, 2504 "!mdi_pi_find: offline phci %p", (void *)ph)); 2505 MDI_PHCI_UNLOCK(ph); 2506 return (NULL); 2507 } 2508 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2509 2510 while (pip != NULL) { 2511 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2512 break; 2513 } 2514 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2515 } 2516 MDI_PHCI_UNLOCK(ph); 2517 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2518 (void *)pip)); 2519 return (pip); 2520 } 2521 2522 /* 2523 * XXX - Is the rest of the code in this function really necessary? 2524 * The consumers of mdi_pi_find() can search for the desired pathinfo 2525 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2526 * whether the search is based on the pathinfo nodes attached to 2527 * the pHCI or the client node, the result will be the same. 2528 */ 2529 2530 /* 2531 * Find the client device corresponding to 'caddr' 2532 */ 2533 MDI_VHCI_CLIENT_LOCK(vh); 2534 2535 /* 2536 * XXX - Passing NULL to the following function works as long as the 2537 * the client addresses (caddr) are unique per vhci basis. 2538 */ 2539 ct = i_mdi_client_find(vh, NULL, caddr); 2540 if (ct == NULL) { 2541 /* 2542 * Client not found, Obviously mdi_pathinfo node has not been 2543 * created yet. 2544 */ 2545 MDI_VHCI_CLIENT_UNLOCK(vh); 2546 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2547 "found for caddr %s", caddr ? caddr : "NULL")); 2548 return (NULL); 2549 } 2550 2551 /* 2552 * Hold the client lock and look for a mdi_pathinfo node with matching 2553 * pHCI and paddr 2554 */ 2555 MDI_CLIENT_LOCK(ct); 2556 2557 /* 2558 * Release the global mutex as it is no more needed. Note: We always 2559 * respect the locking order while acquiring. 2560 */ 2561 MDI_VHCI_CLIENT_UNLOCK(vh); 2562 2563 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2564 while (pip != NULL) { 2565 /* 2566 * Compare the unit address 2567 */ 2568 if ((MDI_PI(pip)->pi_phci == ph) && 2569 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2570 break; 2571 } 2572 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2573 } 2574 MDI_CLIENT_UNLOCK(ct); 2575 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2576 return (pip); 2577 } 2578 2579 /* 2580 * mdi_pi_alloc(): 2581 * Allocate and initialize a new instance of a mdi_pathinfo node. 2582 * The mdi_pathinfo node returned by this function identifies a 2583 * unique device path is capable of having properties attached 2584 * and passed to mdi_pi_online() to fully attach and online the 2585 * path and client device node. 2586 * The mdi_pathinfo node returned by this function must be 2587 * destroyed using mdi_pi_free() if the path is no longer 2588 * operational or if the caller fails to attach a client device 2589 * node when calling mdi_pi_online(). The framework will not free 2590 * the resources allocated. 2591 * This function can be called from both interrupt and kernel 2592 * contexts. DDI_NOSLEEP flag should be used while calling 2593 * from interrupt contexts. 2594 * Return Values: 2595 * MDI_SUCCESS 2596 * MDI_FAILURE 2597 * MDI_NOMEM 2598 */ 2599 /*ARGSUSED*/ 2600 int 2601 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2602 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2603 { 2604 mdi_vhci_t *vh; 2605 mdi_phci_t *ph; 2606 mdi_client_t *ct; 2607 mdi_pathinfo_t *pip = NULL; 2608 dev_info_t *cdip; 2609 int rv = MDI_NOMEM; 2610 int path_allocated = 0; 2611 2612 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2613 cname ? cname : "NULL", caddr ? caddr : "NULL", 2614 paddr ? paddr : "NULL")); 2615 2616 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2617 ret_pip == NULL) { 2618 /* Nothing more to do */ 2619 return (MDI_FAILURE); 2620 } 2621 2622 *ret_pip = NULL; 2623 2624 /* No allocations on detaching pHCI */ 2625 if (DEVI_IS_DETACHING(pdip)) { 2626 /* Invalid pHCI device, return failure */ 2627 MDI_DEBUG(1, (CE_WARN, pdip, 2628 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2629 return (MDI_FAILURE); 2630 } 2631 2632 ph = i_devi_get_phci(pdip); 2633 ASSERT(ph != NULL); 2634 if (ph == NULL) { 2635 /* Invalid pHCI device, return failure */ 2636 MDI_DEBUG(1, (CE_WARN, pdip, 2637 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2638 return (MDI_FAILURE); 2639 } 2640 2641 MDI_PHCI_LOCK(ph); 2642 vh = ph->ph_vhci; 2643 if (vh == NULL) { 2644 /* Invalid vHCI device, return failure */ 2645 MDI_DEBUG(1, (CE_WARN, pdip, 2646 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2647 MDI_PHCI_UNLOCK(ph); 2648 return (MDI_FAILURE); 2649 } 2650 2651 if (MDI_PHCI_IS_READY(ph) == 0) { 2652 /* 2653 * Do not allow new node creation when pHCI is in 2654 * offline/suspended states 2655 */ 2656 MDI_DEBUG(1, (CE_WARN, pdip, 2657 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2658 MDI_PHCI_UNLOCK(ph); 2659 return (MDI_BUSY); 2660 } 2661 MDI_PHCI_UNSTABLE(ph); 2662 MDI_PHCI_UNLOCK(ph); 2663 2664 /* look for a matching client, create one if not found */ 2665 MDI_VHCI_CLIENT_LOCK(vh); 2666 ct = i_mdi_client_find(vh, cname, caddr); 2667 if (ct == NULL) { 2668 ct = i_mdi_client_alloc(vh, cname, caddr); 2669 ASSERT(ct != NULL); 2670 } 2671 2672 if (ct->ct_dip == NULL) { 2673 /* 2674 * Allocate a devinfo node 2675 */ 2676 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2677 compatible, ncompatible); 2678 if (ct->ct_dip == NULL) { 2679 (void) i_mdi_client_free(vh, ct); 2680 goto fail; 2681 } 2682 } 2683 cdip = ct->ct_dip; 2684 2685 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2686 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2687 2688 MDI_CLIENT_LOCK(ct); 2689 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2690 while (pip != NULL) { 2691 /* 2692 * Compare the unit address 2693 */ 2694 if ((MDI_PI(pip)->pi_phci == ph) && 2695 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2696 break; 2697 } 2698 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2699 } 2700 MDI_CLIENT_UNLOCK(ct); 2701 2702 if (pip == NULL) { 2703 /* 2704 * This is a new path for this client device. Allocate and 2705 * initialize a new pathinfo node 2706 */ 2707 pip = i_mdi_pi_alloc(ph, paddr, ct); 2708 ASSERT(pip != NULL); 2709 path_allocated = 1; 2710 } 2711 rv = MDI_SUCCESS; 2712 2713 fail: 2714 /* 2715 * Release the global mutex. 2716 */ 2717 MDI_VHCI_CLIENT_UNLOCK(vh); 2718 2719 /* 2720 * Mark the pHCI as stable 2721 */ 2722 MDI_PHCI_LOCK(ph); 2723 MDI_PHCI_STABLE(ph); 2724 MDI_PHCI_UNLOCK(ph); 2725 *ret_pip = pip; 2726 2727 MDI_DEBUG(2, (CE_NOTE, pdip, 2728 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2729 2730 if (path_allocated) 2731 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2732 2733 return (rv); 2734 } 2735 2736 /*ARGSUSED*/ 2737 int 2738 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2739 int flags, mdi_pathinfo_t **ret_pip) 2740 { 2741 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2742 flags, ret_pip)); 2743 } 2744 2745 /* 2746 * i_mdi_pi_alloc(): 2747 * Allocate a mdi_pathinfo node and add to the pHCI path list 2748 * Return Values: 2749 * mdi_pathinfo 2750 */ 2751 /*ARGSUSED*/ 2752 static mdi_pathinfo_t * 2753 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2754 { 2755 mdi_pathinfo_t *pip; 2756 int ct_circular; 2757 int ph_circular; 2758 int se_flag; 2759 int kmem_flag; 2760 2761 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2762 2763 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2764 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2765 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2766 MDI_PATHINFO_STATE_TRANSIENT; 2767 2768 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2769 MDI_PI_SET_USER_DISABLE(pip); 2770 2771 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2772 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2773 2774 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2775 MDI_PI_SET_DRV_DISABLE(pip); 2776 2777 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2778 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2779 MDI_PI(pip)->pi_client = ct; 2780 MDI_PI(pip)->pi_phci = ph; 2781 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2782 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2783 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2784 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2785 MDI_PI(pip)->pi_pprivate = NULL; 2786 MDI_PI(pip)->pi_cprivate = NULL; 2787 MDI_PI(pip)->pi_vprivate = NULL; 2788 MDI_PI(pip)->pi_client_link = NULL; 2789 MDI_PI(pip)->pi_phci_link = NULL; 2790 MDI_PI(pip)->pi_ref_cnt = 0; 2791 MDI_PI(pip)->pi_kstats = NULL; 2792 MDI_PI(pip)->pi_preferred = 1; 2793 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2794 2795 /* 2796 * Lock both dev_info nodes against changes in parallel. 2797 * 2798 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2799 * This atypical operation is done to synchronize pathinfo nodes 2800 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2801 * the pathinfo nodes are children of the Client. 2802 */ 2803 ndi_devi_enter(ct->ct_dip, &ct_circular); 2804 ndi_devi_enter(ph->ph_dip, &ph_circular); 2805 2806 i_mdi_phci_add_path(ph, pip); 2807 i_mdi_client_add_path(ct, pip); 2808 2809 ndi_devi_exit(ph->ph_dip, ph_circular); 2810 ndi_devi_exit(ct->ct_dip, ct_circular); 2811 2812 /* determine interrupt context */ 2813 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2814 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2815 2816 i_ddi_di_cache_invalidate(kmem_flag); 2817 2818 return (pip); 2819 } 2820 2821 /* 2822 * i_mdi_phci_add_path(): 2823 * Add a mdi_pathinfo node to pHCI list. 2824 * Notes: 2825 * Caller should per-pHCI mutex 2826 */ 2827 static void 2828 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2829 { 2830 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2831 2832 MDI_PHCI_LOCK(ph); 2833 if (ph->ph_path_head == NULL) { 2834 ph->ph_path_head = pip; 2835 } else { 2836 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2837 } 2838 ph->ph_path_tail = pip; 2839 ph->ph_path_count++; 2840 MDI_PHCI_UNLOCK(ph); 2841 } 2842 2843 /* 2844 * i_mdi_client_add_path(): 2845 * Add mdi_pathinfo node to client list 2846 */ 2847 static void 2848 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2849 { 2850 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2851 2852 MDI_CLIENT_LOCK(ct); 2853 if (ct->ct_path_head == NULL) { 2854 ct->ct_path_head = pip; 2855 } else { 2856 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2857 } 2858 ct->ct_path_tail = pip; 2859 ct->ct_path_count++; 2860 MDI_CLIENT_UNLOCK(ct); 2861 } 2862 2863 /* 2864 * mdi_pi_free(): 2865 * Free the mdi_pathinfo node and also client device node if this 2866 * is the last path to the device 2867 * Return Values: 2868 * MDI_SUCCESS 2869 * MDI_FAILURE 2870 * MDI_BUSY 2871 */ 2872 /*ARGSUSED*/ 2873 int 2874 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2875 { 2876 int rv = MDI_SUCCESS; 2877 mdi_vhci_t *vh; 2878 mdi_phci_t *ph; 2879 mdi_client_t *ct; 2880 int (*f)(); 2881 int client_held = 0; 2882 2883 MDI_PI_LOCK(pip); 2884 ph = MDI_PI(pip)->pi_phci; 2885 ASSERT(ph != NULL); 2886 if (ph == NULL) { 2887 /* 2888 * Invalid pHCI device, return failure 2889 */ 2890 MDI_DEBUG(1, (CE_WARN, NULL, 2891 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2892 MDI_PI_UNLOCK(pip); 2893 return (MDI_FAILURE); 2894 } 2895 2896 vh = ph->ph_vhci; 2897 ASSERT(vh != NULL); 2898 if (vh == NULL) { 2899 /* Invalid pHCI device, return failure */ 2900 MDI_DEBUG(1, (CE_WARN, NULL, 2901 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2902 MDI_PI_UNLOCK(pip); 2903 return (MDI_FAILURE); 2904 } 2905 2906 ct = MDI_PI(pip)->pi_client; 2907 ASSERT(ct != NULL); 2908 if (ct == NULL) { 2909 /* 2910 * Invalid Client device, return failure 2911 */ 2912 MDI_DEBUG(1, (CE_WARN, NULL, 2913 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2914 MDI_PI_UNLOCK(pip); 2915 return (MDI_FAILURE); 2916 } 2917 2918 /* 2919 * Check to see for busy condition. A mdi_pathinfo can only be freed 2920 * if the node state is either offline or init and the reference count 2921 * is zero. 2922 */ 2923 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2924 MDI_PI_IS_INITING(pip))) { 2925 /* 2926 * Node is busy 2927 */ 2928 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2929 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2930 MDI_PI_UNLOCK(pip); 2931 return (MDI_BUSY); 2932 } 2933 2934 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2935 /* 2936 * Give a chance for pending I/Os to complete. 2937 */ 2938 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2939 "%d cmds still pending on path: %p\n", 2940 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2941 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2942 &MDI_PI(pip)->pi_mutex, 2943 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2944 /* 2945 * The timeout time reached without ref_cnt being zero 2946 * being signaled. 2947 */ 2948 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2949 "!mdi_pi_free: " 2950 "Timeout reached on path %p without the cond\n", 2951 (void *)pip)); 2952 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2953 "!mdi_pi_free: " 2954 "%d cmds still pending on path: %p\n", 2955 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2956 MDI_PI_UNLOCK(pip); 2957 return (MDI_BUSY); 2958 } 2959 } 2960 if (MDI_PI(pip)->pi_pm_held) { 2961 client_held = 1; 2962 } 2963 MDI_PI_UNLOCK(pip); 2964 2965 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2966 2967 MDI_CLIENT_LOCK(ct); 2968 2969 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2970 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2971 2972 /* 2973 * Wait till failover is complete before removing this node. 2974 */ 2975 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2976 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2977 2978 MDI_CLIENT_UNLOCK(ct); 2979 MDI_VHCI_CLIENT_LOCK(vh); 2980 MDI_CLIENT_LOCK(ct); 2981 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2982 2983 if (!MDI_PI_IS_INITING(pip)) { 2984 f = vh->vh_ops->vo_pi_uninit; 2985 if (f != NULL) { 2986 rv = (*f)(vh->vh_dip, pip, 0); 2987 } 2988 } 2989 /* 2990 * If vo_pi_uninit() completed successfully. 2991 */ 2992 if (rv == MDI_SUCCESS) { 2993 if (client_held) { 2994 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2995 "i_mdi_pm_rele_client\n")); 2996 i_mdi_pm_rele_client(ct, 1); 2997 } 2998 i_mdi_pi_free(ph, pip, ct); 2999 if (ct->ct_path_count == 0) { 3000 /* 3001 * Client lost its last path. 3002 * Clean up the client device 3003 */ 3004 MDI_CLIENT_UNLOCK(ct); 3005 (void) i_mdi_client_free(ct->ct_vhci, ct); 3006 MDI_VHCI_CLIENT_UNLOCK(vh); 3007 return (rv); 3008 } 3009 } 3010 MDI_CLIENT_UNLOCK(ct); 3011 MDI_VHCI_CLIENT_UNLOCK(vh); 3012 3013 if (rv == MDI_FAILURE) 3014 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3015 3016 return (rv); 3017 } 3018 3019 /* 3020 * i_mdi_pi_free(): 3021 * Free the mdi_pathinfo node 3022 */ 3023 static void 3024 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3025 { 3026 int ct_circular; 3027 int ph_circular; 3028 int se_flag; 3029 int kmem_flag; 3030 3031 ASSERT(MDI_CLIENT_LOCKED(ct)); 3032 3033 /* 3034 * remove any per-path kstats 3035 */ 3036 i_mdi_pi_kstat_destroy(pip); 3037 3038 /* See comments in i_mdi_pi_alloc() */ 3039 ndi_devi_enter(ct->ct_dip, &ct_circular); 3040 ndi_devi_enter(ph->ph_dip, &ph_circular); 3041 3042 i_mdi_client_remove_path(ct, pip); 3043 i_mdi_phci_remove_path(ph, pip); 3044 3045 ndi_devi_exit(ph->ph_dip, ph_circular); 3046 ndi_devi_exit(ct->ct_dip, ct_circular); 3047 3048 /* determine interrupt context */ 3049 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3050 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3051 3052 i_ddi_di_cache_invalidate(kmem_flag); 3053 3054 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3055 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3056 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3057 if (MDI_PI(pip)->pi_addr) { 3058 kmem_free(MDI_PI(pip)->pi_addr, 3059 strlen(MDI_PI(pip)->pi_addr) + 1); 3060 MDI_PI(pip)->pi_addr = NULL; 3061 } 3062 3063 if (MDI_PI(pip)->pi_prop) { 3064 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3065 MDI_PI(pip)->pi_prop = NULL; 3066 } 3067 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3068 } 3069 3070 3071 /* 3072 * i_mdi_phci_remove_path(): 3073 * Remove a mdi_pathinfo node from pHCI list. 3074 * Notes: 3075 * Caller should hold per-pHCI mutex 3076 */ 3077 static void 3078 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3079 { 3080 mdi_pathinfo_t *prev = NULL; 3081 mdi_pathinfo_t *path = NULL; 3082 3083 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3084 3085 MDI_PHCI_LOCK(ph); 3086 path = ph->ph_path_head; 3087 while (path != NULL) { 3088 if (path == pip) { 3089 break; 3090 } 3091 prev = path; 3092 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3093 } 3094 3095 if (path) { 3096 ph->ph_path_count--; 3097 if (prev) { 3098 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3099 } else { 3100 ph->ph_path_head = 3101 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3102 } 3103 if (ph->ph_path_tail == path) { 3104 ph->ph_path_tail = prev; 3105 } 3106 } 3107 3108 /* 3109 * Clear the pHCI link 3110 */ 3111 MDI_PI(pip)->pi_phci_link = NULL; 3112 MDI_PI(pip)->pi_phci = NULL; 3113 MDI_PHCI_UNLOCK(ph); 3114 } 3115 3116 /* 3117 * i_mdi_client_remove_path(): 3118 * Remove a mdi_pathinfo node from client path list. 3119 */ 3120 static void 3121 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3122 { 3123 mdi_pathinfo_t *prev = NULL; 3124 mdi_pathinfo_t *path; 3125 3126 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3127 3128 ASSERT(MDI_CLIENT_LOCKED(ct)); 3129 path = ct->ct_path_head; 3130 while (path != NULL) { 3131 if (path == pip) { 3132 break; 3133 } 3134 prev = path; 3135 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3136 } 3137 3138 if (path) { 3139 ct->ct_path_count--; 3140 if (prev) { 3141 MDI_PI(prev)->pi_client_link = 3142 MDI_PI(path)->pi_client_link; 3143 } else { 3144 ct->ct_path_head = 3145 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3146 } 3147 if (ct->ct_path_tail == path) { 3148 ct->ct_path_tail = prev; 3149 } 3150 if (ct->ct_path_last == path) { 3151 ct->ct_path_last = ct->ct_path_head; 3152 } 3153 } 3154 MDI_PI(pip)->pi_client_link = NULL; 3155 MDI_PI(pip)->pi_client = NULL; 3156 } 3157 3158 /* 3159 * i_mdi_pi_state_change(): 3160 * online a mdi_pathinfo node 3161 * 3162 * Return Values: 3163 * MDI_SUCCESS 3164 * MDI_FAILURE 3165 */ 3166 /*ARGSUSED*/ 3167 static int 3168 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3169 { 3170 int rv = MDI_SUCCESS; 3171 mdi_vhci_t *vh; 3172 mdi_phci_t *ph; 3173 mdi_client_t *ct; 3174 int (*f)(); 3175 dev_info_t *cdip; 3176 3177 MDI_PI_LOCK(pip); 3178 3179 ph = MDI_PI(pip)->pi_phci; 3180 ASSERT(ph); 3181 if (ph == NULL) { 3182 /* 3183 * Invalid pHCI device, fail the request 3184 */ 3185 MDI_PI_UNLOCK(pip); 3186 MDI_DEBUG(1, (CE_WARN, NULL, 3187 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3188 return (MDI_FAILURE); 3189 } 3190 3191 vh = ph->ph_vhci; 3192 ASSERT(vh); 3193 if (vh == NULL) { 3194 /* 3195 * Invalid vHCI device, fail the request 3196 */ 3197 MDI_PI_UNLOCK(pip); 3198 MDI_DEBUG(1, (CE_WARN, NULL, 3199 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3200 return (MDI_FAILURE); 3201 } 3202 3203 ct = MDI_PI(pip)->pi_client; 3204 ASSERT(ct != NULL); 3205 if (ct == NULL) { 3206 /* 3207 * Invalid client device, fail the request 3208 */ 3209 MDI_PI_UNLOCK(pip); 3210 MDI_DEBUG(1, (CE_WARN, NULL, 3211 "!mdi_pi_state_change: invalid client pip=%p", 3212 (void *)pip)); 3213 return (MDI_FAILURE); 3214 } 3215 3216 /* 3217 * If this path has not been initialized yet, Callback vHCI driver's 3218 * pathinfo node initialize entry point 3219 */ 3220 3221 if (MDI_PI_IS_INITING(pip)) { 3222 MDI_PI_UNLOCK(pip); 3223 f = vh->vh_ops->vo_pi_init; 3224 if (f != NULL) { 3225 rv = (*f)(vh->vh_dip, pip, 0); 3226 if (rv != MDI_SUCCESS) { 3227 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3228 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3229 (void *)vh, (void *)pip)); 3230 return (MDI_FAILURE); 3231 } 3232 } 3233 MDI_PI_LOCK(pip); 3234 MDI_PI_CLEAR_TRANSIENT(pip); 3235 } 3236 3237 /* 3238 * Do not allow state transition when pHCI is in offline/suspended 3239 * states 3240 */ 3241 i_mdi_phci_lock(ph, pip); 3242 if (MDI_PHCI_IS_READY(ph) == 0) { 3243 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3244 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3245 (void *)ph)); 3246 MDI_PI_UNLOCK(pip); 3247 i_mdi_phci_unlock(ph); 3248 return (MDI_BUSY); 3249 } 3250 MDI_PHCI_UNSTABLE(ph); 3251 i_mdi_phci_unlock(ph); 3252 3253 /* 3254 * Check if mdi_pathinfo state is in transient state. 3255 * If yes, offlining is in progress and wait till transient state is 3256 * cleared. 3257 */ 3258 if (MDI_PI_IS_TRANSIENT(pip)) { 3259 while (MDI_PI_IS_TRANSIENT(pip)) { 3260 cv_wait(&MDI_PI(pip)->pi_state_cv, 3261 &MDI_PI(pip)->pi_mutex); 3262 } 3263 } 3264 3265 /* 3266 * Grab the client lock in reverse order sequence and release the 3267 * mdi_pathinfo mutex. 3268 */ 3269 i_mdi_client_lock(ct, pip); 3270 MDI_PI_UNLOCK(pip); 3271 3272 /* 3273 * Wait till failover state is cleared 3274 */ 3275 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3276 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3277 3278 /* 3279 * Mark the mdi_pathinfo node state as transient 3280 */ 3281 MDI_PI_LOCK(pip); 3282 switch (state) { 3283 case MDI_PATHINFO_STATE_ONLINE: 3284 MDI_PI_SET_ONLINING(pip); 3285 break; 3286 3287 case MDI_PATHINFO_STATE_STANDBY: 3288 MDI_PI_SET_STANDBYING(pip); 3289 break; 3290 3291 case MDI_PATHINFO_STATE_FAULT: 3292 /* 3293 * Mark the pathinfo state as FAULTED 3294 */ 3295 MDI_PI_SET_FAULTING(pip); 3296 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3297 break; 3298 3299 case MDI_PATHINFO_STATE_OFFLINE: 3300 /* 3301 * ndi_devi_offline() cannot hold pip or ct locks. 3302 */ 3303 MDI_PI_UNLOCK(pip); 3304 /* 3305 * Do not offline if path will become last path and path 3306 * is busy for user initiated events. 3307 */ 3308 cdip = ct->ct_dip; 3309 if ((flag & NDI_DEVI_REMOVE) && 3310 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3311 i_mdi_client_unlock(ct); 3312 rv = ndi_devi_offline(cdip, 0); 3313 if (rv != NDI_SUCCESS) { 3314 /* 3315 * Convert to MDI error code 3316 */ 3317 switch (rv) { 3318 case NDI_BUSY: 3319 rv = MDI_BUSY; 3320 break; 3321 default: 3322 rv = MDI_FAILURE; 3323 break; 3324 } 3325 goto state_change_exit; 3326 } else { 3327 i_mdi_client_lock(ct, NULL); 3328 } 3329 } 3330 /* 3331 * Mark the mdi_pathinfo node state as transient 3332 */ 3333 MDI_PI_LOCK(pip); 3334 MDI_PI_SET_OFFLINING(pip); 3335 break; 3336 } 3337 MDI_PI_UNLOCK(pip); 3338 MDI_CLIENT_UNSTABLE(ct); 3339 i_mdi_client_unlock(ct); 3340 3341 f = vh->vh_ops->vo_pi_state_change; 3342 if (f != NULL) 3343 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3344 3345 MDI_CLIENT_LOCK(ct); 3346 MDI_PI_LOCK(pip); 3347 if (rv == MDI_NOT_SUPPORTED) { 3348 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3349 } 3350 if (rv != MDI_SUCCESS) { 3351 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3352 "!vo_pi_state_change: failed rv = %x", rv)); 3353 } 3354 if (MDI_PI_IS_TRANSIENT(pip)) { 3355 if (rv == MDI_SUCCESS) { 3356 MDI_PI_CLEAR_TRANSIENT(pip); 3357 } else { 3358 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3359 } 3360 } 3361 3362 /* 3363 * Wake anyone waiting for this mdi_pathinfo node 3364 */ 3365 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3366 MDI_PI_UNLOCK(pip); 3367 3368 /* 3369 * Mark the client device as stable 3370 */ 3371 MDI_CLIENT_STABLE(ct); 3372 if (rv == MDI_SUCCESS) { 3373 if (ct->ct_unstable == 0) { 3374 cdip = ct->ct_dip; 3375 3376 /* 3377 * Onlining the mdi_pathinfo node will impact the 3378 * client state Update the client and dev_info node 3379 * state accordingly 3380 */ 3381 rv = NDI_SUCCESS; 3382 i_mdi_client_update_state(ct); 3383 switch (MDI_CLIENT_STATE(ct)) { 3384 case MDI_CLIENT_STATE_OPTIMAL: 3385 case MDI_CLIENT_STATE_DEGRADED: 3386 if (cdip && !i_ddi_devi_attached(cdip) && 3387 ((state == MDI_PATHINFO_STATE_ONLINE) || 3388 (state == MDI_PATHINFO_STATE_STANDBY))) { 3389 3390 /* 3391 * Must do ndi_devi_online() through 3392 * hotplug thread for deferred 3393 * attach mechanism to work 3394 */ 3395 MDI_CLIENT_UNLOCK(ct); 3396 rv = ndi_devi_online(cdip, 0); 3397 MDI_CLIENT_LOCK(ct); 3398 if ((rv != NDI_SUCCESS) && 3399 (MDI_CLIENT_STATE(ct) == 3400 MDI_CLIENT_STATE_DEGRADED)) { 3401 /* 3402 * ndi_devi_online failed. 3403 * Reset client flags to 3404 * offline. 3405 */ 3406 MDI_DEBUG(1, (CE_WARN, cdip, 3407 "!ndi_devi_online: failed " 3408 " Error: %x", rv)); 3409 MDI_CLIENT_SET_OFFLINE(ct); 3410 } 3411 if (rv != NDI_SUCCESS) { 3412 /* Reset the path state */ 3413 MDI_PI_LOCK(pip); 3414 MDI_PI(pip)->pi_state = 3415 MDI_PI_OLD_STATE(pip); 3416 MDI_PI_UNLOCK(pip); 3417 } 3418 } 3419 break; 3420 3421 case MDI_CLIENT_STATE_FAILED: 3422 /* 3423 * This is the last path case for 3424 * non-user initiated events. 3425 */ 3426 if (((flag & NDI_DEVI_REMOVE) == 0) && 3427 cdip && (i_ddi_node_state(cdip) >= 3428 DS_INITIALIZED)) { 3429 MDI_CLIENT_UNLOCK(ct); 3430 rv = ndi_devi_offline(cdip, 0); 3431 MDI_CLIENT_LOCK(ct); 3432 3433 if (rv != NDI_SUCCESS) { 3434 /* 3435 * ndi_devi_offline failed. 3436 * Reset client flags to 3437 * online as the path could not 3438 * be offlined. 3439 */ 3440 MDI_DEBUG(1, (CE_WARN, cdip, 3441 "!ndi_devi_offline: failed " 3442 " Error: %x", rv)); 3443 MDI_CLIENT_SET_ONLINE(ct); 3444 } 3445 } 3446 break; 3447 } 3448 /* 3449 * Convert to MDI error code 3450 */ 3451 switch (rv) { 3452 case NDI_SUCCESS: 3453 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3454 i_mdi_report_path_state(ct, pip); 3455 rv = MDI_SUCCESS; 3456 break; 3457 case NDI_BUSY: 3458 rv = MDI_BUSY; 3459 break; 3460 default: 3461 rv = MDI_FAILURE; 3462 break; 3463 } 3464 } 3465 } 3466 MDI_CLIENT_UNLOCK(ct); 3467 3468 state_change_exit: 3469 /* 3470 * Mark the pHCI as stable again. 3471 */ 3472 MDI_PHCI_LOCK(ph); 3473 MDI_PHCI_STABLE(ph); 3474 MDI_PHCI_UNLOCK(ph); 3475 return (rv); 3476 } 3477 3478 /* 3479 * mdi_pi_online(): 3480 * Place the path_info node in the online state. The path is 3481 * now available to be selected by mdi_select_path() for 3482 * transporting I/O requests to client devices. 3483 * Return Values: 3484 * MDI_SUCCESS 3485 * MDI_FAILURE 3486 */ 3487 int 3488 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3489 { 3490 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3491 dev_info_t *cdip; 3492 int client_held = 0; 3493 int rv; 3494 3495 ASSERT(ct != NULL); 3496 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3497 if (rv != MDI_SUCCESS) 3498 return (rv); 3499 3500 MDI_PI_LOCK(pip); 3501 if (MDI_PI(pip)->pi_pm_held == 0) { 3502 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3503 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3504 i_mdi_pm_hold_pip(pip); 3505 client_held = 1; 3506 } 3507 MDI_PI_UNLOCK(pip); 3508 3509 if (client_held) { 3510 MDI_CLIENT_LOCK(ct); 3511 if (ct->ct_power_cnt == 0) { 3512 rv = i_mdi_power_all_phci(ct); 3513 } 3514 3515 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3516 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3517 i_mdi_pm_hold_client(ct, 1); 3518 MDI_CLIENT_UNLOCK(ct); 3519 } 3520 3521 /* 3522 * Create the per-path (pathinfo) IO and error kstats which 3523 * are reported via iostat(1m). 3524 * 3525 * Defer creating the per-path kstats if device is not yet 3526 * attached; the names of the kstats are constructed in part 3527 * using the devices instance number which is assigned during 3528 * process of attaching the client device. 3529 * 3530 * The framework post_attach handler, mdi_post_attach(), is 3531 * is responsible for initializing the client's pathinfo list 3532 * once successfully attached. 3533 */ 3534 cdip = ct->ct_dip; 3535 ASSERT(cdip); 3536 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3537 return (rv); 3538 3539 MDI_CLIENT_LOCK(ct); 3540 rv = i_mdi_pi_kstat_create(pip); 3541 MDI_CLIENT_UNLOCK(ct); 3542 return (rv); 3543 } 3544 3545 /* 3546 * mdi_pi_standby(): 3547 * Place the mdi_pathinfo node in standby state 3548 * 3549 * Return Values: 3550 * MDI_SUCCESS 3551 * MDI_FAILURE 3552 */ 3553 int 3554 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3555 { 3556 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3557 } 3558 3559 /* 3560 * mdi_pi_fault(): 3561 * Place the mdi_pathinfo node in fault'ed state 3562 * Return Values: 3563 * MDI_SUCCESS 3564 * MDI_FAILURE 3565 */ 3566 int 3567 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3568 { 3569 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3570 } 3571 3572 /* 3573 * mdi_pi_offline(): 3574 * Offline a mdi_pathinfo node. 3575 * Return Values: 3576 * MDI_SUCCESS 3577 * MDI_FAILURE 3578 */ 3579 int 3580 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3581 { 3582 int ret, client_held = 0; 3583 mdi_client_t *ct; 3584 3585 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3586 3587 if (ret == MDI_SUCCESS) { 3588 MDI_PI_LOCK(pip); 3589 if (MDI_PI(pip)->pi_pm_held) { 3590 client_held = 1; 3591 } 3592 MDI_PI_UNLOCK(pip); 3593 3594 if (client_held) { 3595 ct = MDI_PI(pip)->pi_client; 3596 MDI_CLIENT_LOCK(ct); 3597 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3598 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3599 i_mdi_pm_rele_client(ct, 1); 3600 MDI_CLIENT_UNLOCK(ct); 3601 } 3602 } 3603 3604 return (ret); 3605 } 3606 3607 /* 3608 * i_mdi_pi_offline(): 3609 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3610 */ 3611 static int 3612 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3613 { 3614 dev_info_t *vdip = NULL; 3615 mdi_vhci_t *vh = NULL; 3616 mdi_client_t *ct = NULL; 3617 int (*f)(); 3618 int rv; 3619 3620 MDI_PI_LOCK(pip); 3621 ct = MDI_PI(pip)->pi_client; 3622 ASSERT(ct != NULL); 3623 3624 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3625 /* 3626 * Give a chance for pending I/Os to complete. 3627 */ 3628 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3629 "%d cmds still pending on path: %p\n", 3630 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3631 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3632 &MDI_PI(pip)->pi_mutex, 3633 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3634 /* 3635 * The timeout time reached without ref_cnt being zero 3636 * being signaled. 3637 */ 3638 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3639 "Timeout reached on path %p without the cond\n", 3640 (void *)pip)); 3641 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3642 "%d cmds still pending on path: %p\n", 3643 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3644 } 3645 } 3646 vh = ct->ct_vhci; 3647 vdip = vh->vh_dip; 3648 3649 /* 3650 * Notify vHCI that has registered this event 3651 */ 3652 ASSERT(vh->vh_ops); 3653 f = vh->vh_ops->vo_pi_state_change; 3654 3655 if (f != NULL) { 3656 MDI_PI_UNLOCK(pip); 3657 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3658 flags)) != MDI_SUCCESS) { 3659 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3660 "!vo_path_offline failed " 3661 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3662 } 3663 MDI_PI_LOCK(pip); 3664 } 3665 3666 /* 3667 * Set the mdi_pathinfo node state and clear the transient condition 3668 */ 3669 MDI_PI_SET_OFFLINE(pip); 3670 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3671 MDI_PI_UNLOCK(pip); 3672 3673 MDI_CLIENT_LOCK(ct); 3674 if (rv == MDI_SUCCESS) { 3675 if (ct->ct_unstable == 0) { 3676 dev_info_t *cdip = ct->ct_dip; 3677 3678 /* 3679 * Onlining the mdi_pathinfo node will impact the 3680 * client state Update the client and dev_info node 3681 * state accordingly 3682 */ 3683 i_mdi_client_update_state(ct); 3684 rv = NDI_SUCCESS; 3685 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3686 if (cdip && 3687 (i_ddi_node_state(cdip) >= 3688 DS_INITIALIZED)) { 3689 MDI_CLIENT_UNLOCK(ct); 3690 rv = ndi_devi_offline(cdip, 0); 3691 MDI_CLIENT_LOCK(ct); 3692 if (rv != NDI_SUCCESS) { 3693 /* 3694 * ndi_devi_offline failed. 3695 * Reset client flags to 3696 * online. 3697 */ 3698 MDI_DEBUG(4, (CE_WARN, cdip, 3699 "!ndi_devi_offline: failed " 3700 " Error: %x", rv)); 3701 MDI_CLIENT_SET_ONLINE(ct); 3702 } 3703 } 3704 } 3705 /* 3706 * Convert to MDI error code 3707 */ 3708 switch (rv) { 3709 case NDI_SUCCESS: 3710 rv = MDI_SUCCESS; 3711 break; 3712 case NDI_BUSY: 3713 rv = MDI_BUSY; 3714 break; 3715 default: 3716 rv = MDI_FAILURE; 3717 break; 3718 } 3719 } 3720 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3721 i_mdi_report_path_state(ct, pip); 3722 } 3723 3724 MDI_CLIENT_UNLOCK(ct); 3725 3726 /* 3727 * Change in the mdi_pathinfo node state will impact the client state 3728 */ 3729 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3730 (void *)ct, (void *)pip)); 3731 return (rv); 3732 } 3733 3734 3735 /* 3736 * mdi_pi_get_addr(): 3737 * Get the unit address associated with a mdi_pathinfo node 3738 * 3739 * Return Values: 3740 * char * 3741 */ 3742 char * 3743 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3744 { 3745 if (pip == NULL) 3746 return (NULL); 3747 3748 return (MDI_PI(pip)->pi_addr); 3749 } 3750 3751 /* 3752 * mdi_pi_get_client(): 3753 * Get the client devinfo associated with a mdi_pathinfo node 3754 * 3755 * Return Values: 3756 * Handle to client device dev_info node 3757 */ 3758 dev_info_t * 3759 mdi_pi_get_client(mdi_pathinfo_t *pip) 3760 { 3761 dev_info_t *dip = NULL; 3762 if (pip) { 3763 dip = MDI_PI(pip)->pi_client->ct_dip; 3764 } 3765 return (dip); 3766 } 3767 3768 /* 3769 * mdi_pi_get_phci(): 3770 * Get the pHCI devinfo associated with the mdi_pathinfo node 3771 * Return Values: 3772 * Handle to dev_info node 3773 */ 3774 dev_info_t * 3775 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3776 { 3777 dev_info_t *dip = NULL; 3778 if (pip) { 3779 dip = MDI_PI(pip)->pi_phci->ph_dip; 3780 } 3781 return (dip); 3782 } 3783 3784 /* 3785 * mdi_pi_get_client_private(): 3786 * Get the client private information associated with the 3787 * mdi_pathinfo node 3788 */ 3789 void * 3790 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3791 { 3792 void *cprivate = NULL; 3793 if (pip) { 3794 cprivate = MDI_PI(pip)->pi_cprivate; 3795 } 3796 return (cprivate); 3797 } 3798 3799 /* 3800 * mdi_pi_set_client_private(): 3801 * Set the client private information in the mdi_pathinfo node 3802 */ 3803 void 3804 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3805 { 3806 if (pip) { 3807 MDI_PI(pip)->pi_cprivate = priv; 3808 } 3809 } 3810 3811 /* 3812 * mdi_pi_get_phci_private(): 3813 * Get the pHCI private information associated with the 3814 * mdi_pathinfo node 3815 */ 3816 caddr_t 3817 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3818 { 3819 caddr_t pprivate = NULL; 3820 if (pip) { 3821 pprivate = MDI_PI(pip)->pi_pprivate; 3822 } 3823 return (pprivate); 3824 } 3825 3826 /* 3827 * mdi_pi_set_phci_private(): 3828 * Set the pHCI private information in the mdi_pathinfo node 3829 */ 3830 void 3831 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3832 { 3833 if (pip) { 3834 MDI_PI(pip)->pi_pprivate = priv; 3835 } 3836 } 3837 3838 /* 3839 * mdi_pi_get_state(): 3840 * Get the mdi_pathinfo node state. Transient states are internal 3841 * and not provided to the users 3842 */ 3843 mdi_pathinfo_state_t 3844 mdi_pi_get_state(mdi_pathinfo_t *pip) 3845 { 3846 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3847 3848 if (pip) { 3849 if (MDI_PI_IS_TRANSIENT(pip)) { 3850 /* 3851 * mdi_pathinfo is in state transition. Return the 3852 * last good state. 3853 */ 3854 state = MDI_PI_OLD_STATE(pip); 3855 } else { 3856 state = MDI_PI_STATE(pip); 3857 } 3858 } 3859 return (state); 3860 } 3861 3862 /* 3863 * Note that the following function needs to be the new interface for 3864 * mdi_pi_get_state when mpxio gets integrated to ON. 3865 */ 3866 int 3867 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3868 uint32_t *ext_state) 3869 { 3870 *state = MDI_PATHINFO_STATE_INIT; 3871 3872 if (pip) { 3873 if (MDI_PI_IS_TRANSIENT(pip)) { 3874 /* 3875 * mdi_pathinfo is in state transition. Return the 3876 * last good state. 3877 */ 3878 *state = MDI_PI_OLD_STATE(pip); 3879 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3880 } else { 3881 *state = MDI_PI_STATE(pip); 3882 *ext_state = MDI_PI_EXT_STATE(pip); 3883 } 3884 } 3885 return (MDI_SUCCESS); 3886 } 3887 3888 /* 3889 * mdi_pi_get_preferred: 3890 * Get the preferred path flag 3891 */ 3892 int 3893 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3894 { 3895 if (pip) { 3896 return (MDI_PI(pip)->pi_preferred); 3897 } 3898 return (0); 3899 } 3900 3901 /* 3902 * mdi_pi_set_preferred: 3903 * Set the preferred path flag 3904 */ 3905 void 3906 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3907 { 3908 if (pip) { 3909 MDI_PI(pip)->pi_preferred = preferred; 3910 } 3911 } 3912 3913 /* 3914 * mdi_pi_set_state(): 3915 * Set the mdi_pathinfo node state 3916 */ 3917 void 3918 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3919 { 3920 uint32_t ext_state; 3921 3922 if (pip) { 3923 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3924 MDI_PI(pip)->pi_state = state; 3925 MDI_PI(pip)->pi_state |= ext_state; 3926 } 3927 } 3928 3929 /* 3930 * Property functions: 3931 */ 3932 int 3933 i_map_nvlist_error_to_mdi(int val) 3934 { 3935 int rv; 3936 3937 switch (val) { 3938 case 0: 3939 rv = DDI_PROP_SUCCESS; 3940 break; 3941 case EINVAL: 3942 case ENOTSUP: 3943 rv = DDI_PROP_INVAL_ARG; 3944 break; 3945 case ENOMEM: 3946 rv = DDI_PROP_NO_MEMORY; 3947 break; 3948 default: 3949 rv = DDI_PROP_NOT_FOUND; 3950 break; 3951 } 3952 return (rv); 3953 } 3954 3955 /* 3956 * mdi_pi_get_next_prop(): 3957 * Property walk function. The caller should hold mdi_pi_lock() 3958 * and release by calling mdi_pi_unlock() at the end of walk to 3959 * get a consistent value. 3960 */ 3961 nvpair_t * 3962 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3963 { 3964 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3965 return (NULL); 3966 } 3967 ASSERT(MDI_PI_LOCKED(pip)); 3968 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3969 } 3970 3971 /* 3972 * mdi_prop_remove(): 3973 * Remove the named property from the named list. 3974 */ 3975 int 3976 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3977 { 3978 if (pip == NULL) { 3979 return (DDI_PROP_NOT_FOUND); 3980 } 3981 ASSERT(!MDI_PI_LOCKED(pip)); 3982 MDI_PI_LOCK(pip); 3983 if (MDI_PI(pip)->pi_prop == NULL) { 3984 MDI_PI_UNLOCK(pip); 3985 return (DDI_PROP_NOT_FOUND); 3986 } 3987 if (name) { 3988 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3989 } else { 3990 char nvp_name[MAXNAMELEN]; 3991 nvpair_t *nvp; 3992 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3993 while (nvp) { 3994 nvpair_t *next; 3995 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3996 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3997 nvpair_name(nvp)); 3998 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3999 nvp_name); 4000 nvp = next; 4001 } 4002 } 4003 MDI_PI_UNLOCK(pip); 4004 return (DDI_PROP_SUCCESS); 4005 } 4006 4007 /* 4008 * mdi_prop_size(): 4009 * Get buffer size needed to pack the property data. 4010 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4011 * buffer size. 4012 */ 4013 int 4014 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4015 { 4016 int rv; 4017 size_t bufsize; 4018 4019 *buflenp = 0; 4020 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4021 return (DDI_PROP_NOT_FOUND); 4022 } 4023 ASSERT(MDI_PI_LOCKED(pip)); 4024 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4025 &bufsize, NV_ENCODE_NATIVE); 4026 *buflenp = bufsize; 4027 return (i_map_nvlist_error_to_mdi(rv)); 4028 } 4029 4030 /* 4031 * mdi_prop_pack(): 4032 * pack the property list. The caller should hold the 4033 * mdi_pathinfo_t node to get a consistent data 4034 */ 4035 int 4036 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4037 { 4038 int rv; 4039 size_t bufsize; 4040 4041 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4042 return (DDI_PROP_NOT_FOUND); 4043 } 4044 4045 ASSERT(MDI_PI_LOCKED(pip)); 4046 4047 bufsize = buflen; 4048 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4049 NV_ENCODE_NATIVE, KM_SLEEP); 4050 4051 return (i_map_nvlist_error_to_mdi(rv)); 4052 } 4053 4054 /* 4055 * mdi_prop_update_byte(): 4056 * Create/Update a byte property 4057 */ 4058 int 4059 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4060 { 4061 int rv; 4062 4063 if (pip == NULL) { 4064 return (DDI_PROP_INVAL_ARG); 4065 } 4066 ASSERT(!MDI_PI_LOCKED(pip)); 4067 MDI_PI_LOCK(pip); 4068 if (MDI_PI(pip)->pi_prop == NULL) { 4069 MDI_PI_UNLOCK(pip); 4070 return (DDI_PROP_NOT_FOUND); 4071 } 4072 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4073 MDI_PI_UNLOCK(pip); 4074 return (i_map_nvlist_error_to_mdi(rv)); 4075 } 4076 4077 /* 4078 * mdi_prop_update_byte_array(): 4079 * Create/Update a byte array property 4080 */ 4081 int 4082 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4083 uint_t nelements) 4084 { 4085 int rv; 4086 4087 if (pip == NULL) { 4088 return (DDI_PROP_INVAL_ARG); 4089 } 4090 ASSERT(!MDI_PI_LOCKED(pip)); 4091 MDI_PI_LOCK(pip); 4092 if (MDI_PI(pip)->pi_prop == NULL) { 4093 MDI_PI_UNLOCK(pip); 4094 return (DDI_PROP_NOT_FOUND); 4095 } 4096 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4097 MDI_PI_UNLOCK(pip); 4098 return (i_map_nvlist_error_to_mdi(rv)); 4099 } 4100 4101 /* 4102 * mdi_prop_update_int(): 4103 * Create/Update a 32 bit integer property 4104 */ 4105 int 4106 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4107 { 4108 int rv; 4109 4110 if (pip == NULL) { 4111 return (DDI_PROP_INVAL_ARG); 4112 } 4113 ASSERT(!MDI_PI_LOCKED(pip)); 4114 MDI_PI_LOCK(pip); 4115 if (MDI_PI(pip)->pi_prop == NULL) { 4116 MDI_PI_UNLOCK(pip); 4117 return (DDI_PROP_NOT_FOUND); 4118 } 4119 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4120 MDI_PI_UNLOCK(pip); 4121 return (i_map_nvlist_error_to_mdi(rv)); 4122 } 4123 4124 /* 4125 * mdi_prop_update_int64(): 4126 * Create/Update a 64 bit integer property 4127 */ 4128 int 4129 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4130 { 4131 int rv; 4132 4133 if (pip == NULL) { 4134 return (DDI_PROP_INVAL_ARG); 4135 } 4136 ASSERT(!MDI_PI_LOCKED(pip)); 4137 MDI_PI_LOCK(pip); 4138 if (MDI_PI(pip)->pi_prop == NULL) { 4139 MDI_PI_UNLOCK(pip); 4140 return (DDI_PROP_NOT_FOUND); 4141 } 4142 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4143 MDI_PI_UNLOCK(pip); 4144 return (i_map_nvlist_error_to_mdi(rv)); 4145 } 4146 4147 /* 4148 * mdi_prop_update_int_array(): 4149 * Create/Update a int array property 4150 */ 4151 int 4152 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4153 uint_t nelements) 4154 { 4155 int rv; 4156 4157 if (pip == NULL) { 4158 return (DDI_PROP_INVAL_ARG); 4159 } 4160 ASSERT(!MDI_PI_LOCKED(pip)); 4161 MDI_PI_LOCK(pip); 4162 if (MDI_PI(pip)->pi_prop == NULL) { 4163 MDI_PI_UNLOCK(pip); 4164 return (DDI_PROP_NOT_FOUND); 4165 } 4166 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4167 nelements); 4168 MDI_PI_UNLOCK(pip); 4169 return (i_map_nvlist_error_to_mdi(rv)); 4170 } 4171 4172 /* 4173 * mdi_prop_update_string(): 4174 * Create/Update a string property 4175 */ 4176 int 4177 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4178 { 4179 int rv; 4180 4181 if (pip == NULL) { 4182 return (DDI_PROP_INVAL_ARG); 4183 } 4184 ASSERT(!MDI_PI_LOCKED(pip)); 4185 MDI_PI_LOCK(pip); 4186 if (MDI_PI(pip)->pi_prop == NULL) { 4187 MDI_PI_UNLOCK(pip); 4188 return (DDI_PROP_NOT_FOUND); 4189 } 4190 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4191 MDI_PI_UNLOCK(pip); 4192 return (i_map_nvlist_error_to_mdi(rv)); 4193 } 4194 4195 /* 4196 * mdi_prop_update_string_array(): 4197 * Create/Update a string array property 4198 */ 4199 int 4200 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4201 uint_t nelements) 4202 { 4203 int rv; 4204 4205 if (pip == NULL) { 4206 return (DDI_PROP_INVAL_ARG); 4207 } 4208 ASSERT(!MDI_PI_LOCKED(pip)); 4209 MDI_PI_LOCK(pip); 4210 if (MDI_PI(pip)->pi_prop == NULL) { 4211 MDI_PI_UNLOCK(pip); 4212 return (DDI_PROP_NOT_FOUND); 4213 } 4214 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4215 nelements); 4216 MDI_PI_UNLOCK(pip); 4217 return (i_map_nvlist_error_to_mdi(rv)); 4218 } 4219 4220 /* 4221 * mdi_prop_lookup_byte(): 4222 * Look for byte property identified by name. The data returned 4223 * is the actual property and valid as long as mdi_pathinfo_t node 4224 * is alive. 4225 */ 4226 int 4227 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4228 { 4229 int rv; 4230 4231 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4232 return (DDI_PROP_NOT_FOUND); 4233 } 4234 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4235 return (i_map_nvlist_error_to_mdi(rv)); 4236 } 4237 4238 4239 /* 4240 * mdi_prop_lookup_byte_array(): 4241 * Look for byte array property identified by name. The data 4242 * returned is the actual property and valid as long as 4243 * mdi_pathinfo_t node is alive. 4244 */ 4245 int 4246 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4247 uint_t *nelements) 4248 { 4249 int rv; 4250 4251 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4252 return (DDI_PROP_NOT_FOUND); 4253 } 4254 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4255 nelements); 4256 return (i_map_nvlist_error_to_mdi(rv)); 4257 } 4258 4259 /* 4260 * mdi_prop_lookup_int(): 4261 * Look for int property identified by name. The data returned 4262 * is the actual property and valid as long as mdi_pathinfo_t 4263 * node is alive. 4264 */ 4265 int 4266 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4267 { 4268 int rv; 4269 4270 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4271 return (DDI_PROP_NOT_FOUND); 4272 } 4273 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4274 return (i_map_nvlist_error_to_mdi(rv)); 4275 } 4276 4277 /* 4278 * mdi_prop_lookup_int64(): 4279 * Look for int64 property identified by name. The data returned 4280 * is the actual property and valid as long as mdi_pathinfo_t node 4281 * is alive. 4282 */ 4283 int 4284 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4285 { 4286 int rv; 4287 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4288 return (DDI_PROP_NOT_FOUND); 4289 } 4290 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4291 return (i_map_nvlist_error_to_mdi(rv)); 4292 } 4293 4294 /* 4295 * mdi_prop_lookup_int_array(): 4296 * Look for int array property identified by name. The data 4297 * returned is the actual property and valid as long as 4298 * mdi_pathinfo_t node is alive. 4299 */ 4300 int 4301 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4302 uint_t *nelements) 4303 { 4304 int rv; 4305 4306 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4307 return (DDI_PROP_NOT_FOUND); 4308 } 4309 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4310 (int32_t **)data, nelements); 4311 return (i_map_nvlist_error_to_mdi(rv)); 4312 } 4313 4314 /* 4315 * mdi_prop_lookup_string(): 4316 * Look for string property identified by name. The data 4317 * returned is the actual property and valid as long as 4318 * mdi_pathinfo_t node is alive. 4319 */ 4320 int 4321 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4322 { 4323 int rv; 4324 4325 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4326 return (DDI_PROP_NOT_FOUND); 4327 } 4328 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4329 return (i_map_nvlist_error_to_mdi(rv)); 4330 } 4331 4332 /* 4333 * mdi_prop_lookup_string_array(): 4334 * Look for string array property identified by name. The data 4335 * returned is the actual property and valid as long as 4336 * mdi_pathinfo_t node is alive. 4337 */ 4338 int 4339 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4340 uint_t *nelements) 4341 { 4342 int rv; 4343 4344 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4345 return (DDI_PROP_NOT_FOUND); 4346 } 4347 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4348 nelements); 4349 return (i_map_nvlist_error_to_mdi(rv)); 4350 } 4351 4352 /* 4353 * mdi_prop_free(): 4354 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4355 * functions return the pointer to actual property data and not a 4356 * copy of it. So the data returned is valid as long as 4357 * mdi_pathinfo_t node is valid. 4358 */ 4359 /*ARGSUSED*/ 4360 int 4361 mdi_prop_free(void *data) 4362 { 4363 return (DDI_PROP_SUCCESS); 4364 } 4365 4366 /*ARGSUSED*/ 4367 static void 4368 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4369 { 4370 char *phci_path, *ct_path; 4371 char *ct_status; 4372 char *status; 4373 dev_info_t *dip = ct->ct_dip; 4374 char lb_buf[64]; 4375 4376 ASSERT(MDI_CLIENT_LOCKED(ct)); 4377 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4378 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4379 return; 4380 } 4381 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4382 ct_status = "optimal"; 4383 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4384 ct_status = "degraded"; 4385 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4386 ct_status = "failed"; 4387 } else { 4388 ct_status = "unknown"; 4389 } 4390 4391 if (MDI_PI_IS_OFFLINE(pip)) { 4392 status = "offline"; 4393 } else if (MDI_PI_IS_ONLINE(pip)) { 4394 status = "online"; 4395 } else if (MDI_PI_IS_STANDBY(pip)) { 4396 status = "standby"; 4397 } else if (MDI_PI_IS_FAULT(pip)) { 4398 status = "faulted"; 4399 } else { 4400 status = "unknown"; 4401 } 4402 4403 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4404 (void) snprintf(lb_buf, sizeof (lb_buf), 4405 "%s, region-size: %d", mdi_load_balance_lba, 4406 ct->ct_lb_args->region_size); 4407 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4408 (void) snprintf(lb_buf, sizeof (lb_buf), 4409 "%s", mdi_load_balance_none); 4410 } else { 4411 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4412 mdi_load_balance_rr); 4413 } 4414 4415 if (dip) { 4416 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4417 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4418 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4419 "path %s (%s%d) to target address: %s is %s" 4420 " Load balancing: %s\n", 4421 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4422 ddi_get_instance(dip), ct_status, 4423 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4424 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4425 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4426 MDI_PI(pip)->pi_addr, status, lb_buf); 4427 kmem_free(phci_path, MAXPATHLEN); 4428 kmem_free(ct_path, MAXPATHLEN); 4429 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4430 } 4431 } 4432 4433 #ifdef DEBUG 4434 /* 4435 * i_mdi_log(): 4436 * Utility function for error message management 4437 * 4438 */ 4439 /*PRINTFLIKE3*/ 4440 static void 4441 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4442 { 4443 char name[MAXNAMELEN]; 4444 char buf[MAXNAMELEN]; 4445 char *bp; 4446 va_list ap; 4447 int log_only = 0; 4448 int boot_only = 0; 4449 int console_only = 0; 4450 4451 if (dip) { 4452 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4453 ddi_node_name(dip), ddi_get_instance(dip)); 4454 } else { 4455 name[0] = 0; 4456 } 4457 4458 va_start(ap, fmt); 4459 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4460 va_end(ap); 4461 4462 switch (buf[0]) { 4463 case '!': 4464 bp = &buf[1]; 4465 log_only = 1; 4466 break; 4467 case '?': 4468 bp = &buf[1]; 4469 boot_only = 1; 4470 break; 4471 case '^': 4472 bp = &buf[1]; 4473 console_only = 1; 4474 break; 4475 default: 4476 bp = buf; 4477 break; 4478 } 4479 if (mdi_debug_logonly) { 4480 log_only = 1; 4481 boot_only = 0; 4482 console_only = 0; 4483 } 4484 4485 switch (level) { 4486 case CE_NOTE: 4487 level = CE_CONT; 4488 /* FALLTHROUGH */ 4489 case CE_CONT: 4490 case CE_WARN: 4491 case CE_PANIC: 4492 if (boot_only) { 4493 cmn_err(level, "?mdi: %s%s", name, bp); 4494 } else if (console_only) { 4495 cmn_err(level, "^mdi: %s%s", name, bp); 4496 } else if (log_only) { 4497 cmn_err(level, "!mdi: %s%s", name, bp); 4498 } else { 4499 cmn_err(level, "mdi: %s%s", name, bp); 4500 } 4501 break; 4502 default: 4503 cmn_err(level, "mdi: %s%s", name, bp); 4504 break; 4505 } 4506 } 4507 #endif /* DEBUG */ 4508 4509 void 4510 i_mdi_client_online(dev_info_t *ct_dip) 4511 { 4512 mdi_client_t *ct; 4513 4514 /* 4515 * Client online notification. Mark client state as online 4516 * restore our binding with dev_info node 4517 */ 4518 ct = i_devi_get_client(ct_dip); 4519 ASSERT(ct != NULL); 4520 MDI_CLIENT_LOCK(ct); 4521 MDI_CLIENT_SET_ONLINE(ct); 4522 /* catch for any memory leaks */ 4523 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4524 ct->ct_dip = ct_dip; 4525 4526 if (ct->ct_power_cnt == 0) 4527 (void) i_mdi_power_all_phci(ct); 4528 4529 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4530 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4531 i_mdi_pm_hold_client(ct, 1); 4532 4533 MDI_CLIENT_UNLOCK(ct); 4534 } 4535 4536 void 4537 i_mdi_phci_online(dev_info_t *ph_dip) 4538 { 4539 mdi_phci_t *ph; 4540 4541 /* pHCI online notification. Mark state accordingly */ 4542 ph = i_devi_get_phci(ph_dip); 4543 ASSERT(ph != NULL); 4544 MDI_PHCI_LOCK(ph); 4545 MDI_PHCI_SET_ONLINE(ph); 4546 MDI_PHCI_UNLOCK(ph); 4547 } 4548 4549 /* 4550 * mdi_devi_online(): 4551 * Online notification from NDI framework on pHCI/client 4552 * device online. 4553 * Return Values: 4554 * NDI_SUCCESS 4555 * MDI_FAILURE 4556 */ 4557 /*ARGSUSED*/ 4558 int 4559 mdi_devi_online(dev_info_t *dip, uint_t flags) 4560 { 4561 if (MDI_PHCI(dip)) { 4562 i_mdi_phci_online(dip); 4563 } 4564 4565 if (MDI_CLIENT(dip)) { 4566 i_mdi_client_online(dip); 4567 } 4568 return (NDI_SUCCESS); 4569 } 4570 4571 /* 4572 * mdi_devi_offline(): 4573 * Offline notification from NDI framework on pHCI/Client device 4574 * offline. 4575 * 4576 * Return Values: 4577 * NDI_SUCCESS 4578 * NDI_FAILURE 4579 */ 4580 /*ARGSUSED*/ 4581 int 4582 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4583 { 4584 int rv = NDI_SUCCESS; 4585 4586 if (MDI_CLIENT(dip)) { 4587 rv = i_mdi_client_offline(dip, flags); 4588 if (rv != NDI_SUCCESS) 4589 return (rv); 4590 } 4591 4592 if (MDI_PHCI(dip)) { 4593 rv = i_mdi_phci_offline(dip, flags); 4594 4595 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4596 /* set client back online */ 4597 i_mdi_client_online(dip); 4598 } 4599 } 4600 4601 return (rv); 4602 } 4603 4604 /*ARGSUSED*/ 4605 static int 4606 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4607 { 4608 int rv = NDI_SUCCESS; 4609 mdi_phci_t *ph; 4610 mdi_client_t *ct; 4611 mdi_pathinfo_t *pip; 4612 mdi_pathinfo_t *next; 4613 mdi_pathinfo_t *failed_pip = NULL; 4614 dev_info_t *cdip; 4615 4616 /* 4617 * pHCI component offline notification 4618 * Make sure that this pHCI instance is free to be offlined. 4619 * If it is OK to proceed, Offline and remove all the child 4620 * mdi_pathinfo nodes. This process automatically offlines 4621 * corresponding client devices, for which this pHCI provides 4622 * critical services. 4623 */ 4624 ph = i_devi_get_phci(dip); 4625 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4626 (void *)dip, (void *)ph)); 4627 if (ph == NULL) { 4628 return (rv); 4629 } 4630 4631 MDI_PHCI_LOCK(ph); 4632 4633 if (MDI_PHCI_IS_OFFLINE(ph)) { 4634 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4635 (void *)ph)); 4636 MDI_PHCI_UNLOCK(ph); 4637 return (NDI_SUCCESS); 4638 } 4639 4640 /* 4641 * Check to see if the pHCI can be offlined 4642 */ 4643 if (ph->ph_unstable) { 4644 MDI_DEBUG(1, (CE_WARN, dip, 4645 "!One or more target devices are in transient " 4646 "state. This device can not be removed at " 4647 "this moment. Please try again later.")); 4648 MDI_PHCI_UNLOCK(ph); 4649 return (NDI_BUSY); 4650 } 4651 4652 pip = ph->ph_path_head; 4653 while (pip != NULL) { 4654 MDI_PI_LOCK(pip); 4655 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4656 4657 /* 4658 * The mdi_pathinfo state is OK. Check the client state. 4659 * If failover in progress fail the pHCI from offlining 4660 */ 4661 ct = MDI_PI(pip)->pi_client; 4662 i_mdi_client_lock(ct, pip); 4663 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4664 (ct->ct_unstable)) { 4665 /* 4666 * Failover is in progress, Fail the DR 4667 */ 4668 MDI_DEBUG(1, (CE_WARN, dip, 4669 "!pHCI device (%s%d) is Busy. %s", 4670 ddi_driver_name(dip), ddi_get_instance(dip), 4671 "This device can not be removed at " 4672 "this moment. Please try again later.")); 4673 MDI_PI_UNLOCK(pip); 4674 i_mdi_client_unlock(ct); 4675 MDI_PHCI_UNLOCK(ph); 4676 return (NDI_BUSY); 4677 } 4678 MDI_PI_UNLOCK(pip); 4679 4680 /* 4681 * Check to see of we are removing the last path of this 4682 * client device... 4683 */ 4684 cdip = ct->ct_dip; 4685 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4686 (i_mdi_client_compute_state(ct, ph) == 4687 MDI_CLIENT_STATE_FAILED)) { 4688 i_mdi_client_unlock(ct); 4689 MDI_PHCI_UNLOCK(ph); 4690 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4691 /* 4692 * ndi_devi_offline() failed. 4693 * This pHCI provides the critical path 4694 * to one or more client devices. 4695 * Return busy. 4696 */ 4697 MDI_PHCI_LOCK(ph); 4698 MDI_DEBUG(1, (CE_WARN, dip, 4699 "!pHCI device (%s%d) is Busy. %s", 4700 ddi_driver_name(dip), ddi_get_instance(dip), 4701 "This device can not be removed at " 4702 "this moment. Please try again later.")); 4703 failed_pip = pip; 4704 break; 4705 } else { 4706 MDI_PHCI_LOCK(ph); 4707 pip = next; 4708 } 4709 } else { 4710 i_mdi_client_unlock(ct); 4711 pip = next; 4712 } 4713 } 4714 4715 if (failed_pip) { 4716 pip = ph->ph_path_head; 4717 while (pip != failed_pip) { 4718 MDI_PI_LOCK(pip); 4719 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4720 ct = MDI_PI(pip)->pi_client; 4721 i_mdi_client_lock(ct, pip); 4722 cdip = ct->ct_dip; 4723 switch (MDI_CLIENT_STATE(ct)) { 4724 case MDI_CLIENT_STATE_OPTIMAL: 4725 case MDI_CLIENT_STATE_DEGRADED: 4726 if (cdip) { 4727 MDI_PI_UNLOCK(pip); 4728 i_mdi_client_unlock(ct); 4729 MDI_PHCI_UNLOCK(ph); 4730 (void) ndi_devi_online(cdip, 0); 4731 MDI_PHCI_LOCK(ph); 4732 pip = next; 4733 continue; 4734 } 4735 break; 4736 4737 case MDI_CLIENT_STATE_FAILED: 4738 if (cdip) { 4739 MDI_PI_UNLOCK(pip); 4740 i_mdi_client_unlock(ct); 4741 MDI_PHCI_UNLOCK(ph); 4742 (void) ndi_devi_offline(cdip, 0); 4743 MDI_PHCI_LOCK(ph); 4744 pip = next; 4745 continue; 4746 } 4747 break; 4748 } 4749 MDI_PI_UNLOCK(pip); 4750 i_mdi_client_unlock(ct); 4751 pip = next; 4752 } 4753 MDI_PHCI_UNLOCK(ph); 4754 return (NDI_BUSY); 4755 } 4756 4757 /* 4758 * Mark the pHCI as offline 4759 */ 4760 MDI_PHCI_SET_OFFLINE(ph); 4761 4762 /* 4763 * Mark the child mdi_pathinfo nodes as transient 4764 */ 4765 pip = ph->ph_path_head; 4766 while (pip != NULL) { 4767 MDI_PI_LOCK(pip); 4768 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4769 MDI_PI_SET_OFFLINING(pip); 4770 MDI_PI_UNLOCK(pip); 4771 pip = next; 4772 } 4773 MDI_PHCI_UNLOCK(ph); 4774 /* 4775 * Give a chance for any pending commands to execute 4776 */ 4777 delay(1); 4778 MDI_PHCI_LOCK(ph); 4779 pip = ph->ph_path_head; 4780 while (pip != NULL) { 4781 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4782 (void) i_mdi_pi_offline(pip, flags); 4783 MDI_PI_LOCK(pip); 4784 ct = MDI_PI(pip)->pi_client; 4785 if (!MDI_PI_IS_OFFLINE(pip)) { 4786 MDI_DEBUG(1, (CE_WARN, dip, 4787 "!pHCI device (%s%d) is Busy. %s", 4788 ddi_driver_name(dip), ddi_get_instance(dip), 4789 "This device can not be removed at " 4790 "this moment. Please try again later.")); 4791 MDI_PI_UNLOCK(pip); 4792 MDI_PHCI_SET_ONLINE(ph); 4793 MDI_PHCI_UNLOCK(ph); 4794 return (NDI_BUSY); 4795 } 4796 MDI_PI_UNLOCK(pip); 4797 pip = next; 4798 } 4799 MDI_PHCI_UNLOCK(ph); 4800 4801 return (rv); 4802 } 4803 4804 /*ARGSUSED*/ 4805 static int 4806 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4807 { 4808 int rv = NDI_SUCCESS; 4809 mdi_client_t *ct; 4810 4811 /* 4812 * Client component to go offline. Make sure that we are 4813 * not in failing over state and update client state 4814 * accordingly 4815 */ 4816 ct = i_devi_get_client(dip); 4817 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4818 (void *)dip, (void *)ct)); 4819 if (ct != NULL) { 4820 MDI_CLIENT_LOCK(ct); 4821 if (ct->ct_unstable) { 4822 /* 4823 * One or more paths are in transient state, 4824 * Dont allow offline of a client device 4825 */ 4826 MDI_DEBUG(1, (CE_WARN, dip, 4827 "!One or more paths to this device is " 4828 "in transient state. This device can not " 4829 "be removed at this moment. " 4830 "Please try again later.")); 4831 MDI_CLIENT_UNLOCK(ct); 4832 return (NDI_BUSY); 4833 } 4834 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4835 /* 4836 * Failover is in progress, Dont allow DR of 4837 * a client device 4838 */ 4839 MDI_DEBUG(1, (CE_WARN, dip, 4840 "!Client device (%s%d) is Busy. %s", 4841 ddi_driver_name(dip), ddi_get_instance(dip), 4842 "This device can not be removed at " 4843 "this moment. Please try again later.")); 4844 MDI_CLIENT_UNLOCK(ct); 4845 return (NDI_BUSY); 4846 } 4847 MDI_CLIENT_SET_OFFLINE(ct); 4848 4849 /* 4850 * Unbind our relationship with the dev_info node 4851 */ 4852 if (flags & NDI_DEVI_REMOVE) { 4853 ct->ct_dip = NULL; 4854 } 4855 MDI_CLIENT_UNLOCK(ct); 4856 } 4857 return (rv); 4858 } 4859 4860 /* 4861 * mdi_pre_attach(): 4862 * Pre attach() notification handler 4863 */ 4864 /*ARGSUSED*/ 4865 int 4866 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4867 { 4868 /* don't support old DDI_PM_RESUME */ 4869 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4870 (cmd == DDI_PM_RESUME)) 4871 return (DDI_FAILURE); 4872 4873 return (DDI_SUCCESS); 4874 } 4875 4876 /* 4877 * mdi_post_attach(): 4878 * Post attach() notification handler 4879 */ 4880 /*ARGSUSED*/ 4881 void 4882 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4883 { 4884 mdi_phci_t *ph; 4885 mdi_client_t *ct; 4886 mdi_pathinfo_t *pip; 4887 4888 if (MDI_PHCI(dip)) { 4889 ph = i_devi_get_phci(dip); 4890 ASSERT(ph != NULL); 4891 4892 MDI_PHCI_LOCK(ph); 4893 switch (cmd) { 4894 case DDI_ATTACH: 4895 MDI_DEBUG(2, (CE_NOTE, dip, 4896 "!pHCI post_attach: called %p\n", (void *)ph)); 4897 if (error == DDI_SUCCESS) { 4898 MDI_PHCI_SET_ATTACH(ph); 4899 } else { 4900 MDI_DEBUG(1, (CE_NOTE, dip, 4901 "!pHCI post_attach: failed error=%d\n", 4902 error)); 4903 MDI_PHCI_SET_DETACH(ph); 4904 } 4905 break; 4906 4907 case DDI_RESUME: 4908 MDI_DEBUG(2, (CE_NOTE, dip, 4909 "!pHCI post_resume: called %p\n", (void *)ph)); 4910 if (error == DDI_SUCCESS) { 4911 MDI_PHCI_SET_RESUME(ph); 4912 } else { 4913 MDI_DEBUG(1, (CE_NOTE, dip, 4914 "!pHCI post_resume: failed error=%d\n", 4915 error)); 4916 MDI_PHCI_SET_SUSPEND(ph); 4917 } 4918 break; 4919 } 4920 MDI_PHCI_UNLOCK(ph); 4921 } 4922 4923 if (MDI_CLIENT(dip)) { 4924 ct = i_devi_get_client(dip); 4925 ASSERT(ct != NULL); 4926 4927 MDI_CLIENT_LOCK(ct); 4928 switch (cmd) { 4929 case DDI_ATTACH: 4930 MDI_DEBUG(2, (CE_NOTE, dip, 4931 "!Client post_attach: called %p\n", (void *)ct)); 4932 if (error != DDI_SUCCESS) { 4933 MDI_DEBUG(1, (CE_NOTE, dip, 4934 "!Client post_attach: failed error=%d\n", 4935 error)); 4936 MDI_CLIENT_SET_DETACH(ct); 4937 MDI_DEBUG(4, (CE_WARN, dip, 4938 "mdi_post_attach i_mdi_pm_reset_client\n")); 4939 i_mdi_pm_reset_client(ct); 4940 break; 4941 } 4942 4943 /* 4944 * Client device has successfully attached. 4945 * Create kstats for any pathinfo structures 4946 * initially associated with this client. 4947 */ 4948 for (pip = ct->ct_path_head; pip != NULL; 4949 pip = (mdi_pathinfo_t *) 4950 MDI_PI(pip)->pi_client_link) { 4951 if (!MDI_PI_IS_OFFLINE(pip)) { 4952 (void) i_mdi_pi_kstat_create(pip); 4953 i_mdi_report_path_state(ct, pip); 4954 } 4955 } 4956 MDI_CLIENT_SET_ATTACH(ct); 4957 break; 4958 4959 case DDI_RESUME: 4960 MDI_DEBUG(2, (CE_NOTE, dip, 4961 "!Client post_attach: called %p\n", (void *)ct)); 4962 if (error == DDI_SUCCESS) { 4963 MDI_CLIENT_SET_RESUME(ct); 4964 } else { 4965 MDI_DEBUG(1, (CE_NOTE, dip, 4966 "!Client post_resume: failed error=%d\n", 4967 error)); 4968 MDI_CLIENT_SET_SUSPEND(ct); 4969 } 4970 break; 4971 } 4972 MDI_CLIENT_UNLOCK(ct); 4973 } 4974 } 4975 4976 /* 4977 * mdi_pre_detach(): 4978 * Pre detach notification handler 4979 */ 4980 /*ARGSUSED*/ 4981 int 4982 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4983 { 4984 int rv = DDI_SUCCESS; 4985 4986 if (MDI_CLIENT(dip)) { 4987 (void) i_mdi_client_pre_detach(dip, cmd); 4988 } 4989 4990 if (MDI_PHCI(dip)) { 4991 rv = i_mdi_phci_pre_detach(dip, cmd); 4992 } 4993 4994 return (rv); 4995 } 4996 4997 /*ARGSUSED*/ 4998 static int 4999 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5000 { 5001 int rv = DDI_SUCCESS; 5002 mdi_phci_t *ph; 5003 mdi_client_t *ct; 5004 mdi_pathinfo_t *pip; 5005 mdi_pathinfo_t *failed_pip = NULL; 5006 mdi_pathinfo_t *next; 5007 5008 ph = i_devi_get_phci(dip); 5009 if (ph == NULL) { 5010 return (rv); 5011 } 5012 5013 MDI_PHCI_LOCK(ph); 5014 switch (cmd) { 5015 case DDI_DETACH: 5016 MDI_DEBUG(2, (CE_NOTE, dip, 5017 "!pHCI pre_detach: called %p\n", (void *)ph)); 5018 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5019 /* 5020 * mdi_pathinfo nodes are still attached to 5021 * this pHCI. Fail the detach for this pHCI. 5022 */ 5023 MDI_DEBUG(2, (CE_WARN, dip, 5024 "!pHCI pre_detach: " 5025 "mdi_pathinfo nodes are still attached " 5026 "%p\n", (void *)ph)); 5027 rv = DDI_FAILURE; 5028 break; 5029 } 5030 MDI_PHCI_SET_DETACH(ph); 5031 break; 5032 5033 case DDI_SUSPEND: 5034 /* 5035 * pHCI is getting suspended. Since mpxio client 5036 * devices may not be suspended at this point, to avoid 5037 * a potential stack overflow, it is important to suspend 5038 * client devices before pHCI can be suspended. 5039 */ 5040 5041 MDI_DEBUG(2, (CE_NOTE, dip, 5042 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5043 /* 5044 * Suspend all the client devices accessible through this pHCI 5045 */ 5046 pip = ph->ph_path_head; 5047 while (pip != NULL && rv == DDI_SUCCESS) { 5048 dev_info_t *cdip; 5049 MDI_PI_LOCK(pip); 5050 next = 5051 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5052 ct = MDI_PI(pip)->pi_client; 5053 i_mdi_client_lock(ct, pip); 5054 cdip = ct->ct_dip; 5055 MDI_PI_UNLOCK(pip); 5056 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5057 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5058 i_mdi_client_unlock(ct); 5059 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5060 DDI_SUCCESS) { 5061 /* 5062 * Suspend of one of the client 5063 * device has failed. 5064 */ 5065 MDI_DEBUG(1, (CE_WARN, dip, 5066 "!Suspend of device (%s%d) failed.", 5067 ddi_driver_name(cdip), 5068 ddi_get_instance(cdip))); 5069 failed_pip = pip; 5070 break; 5071 } 5072 } else { 5073 i_mdi_client_unlock(ct); 5074 } 5075 pip = next; 5076 } 5077 5078 if (rv == DDI_SUCCESS) { 5079 /* 5080 * Suspend of client devices is complete. Proceed 5081 * with pHCI suspend. 5082 */ 5083 MDI_PHCI_SET_SUSPEND(ph); 5084 } else { 5085 /* 5086 * Revert back all the suspended client device states 5087 * to converse. 5088 */ 5089 pip = ph->ph_path_head; 5090 while (pip != failed_pip) { 5091 dev_info_t *cdip; 5092 MDI_PI_LOCK(pip); 5093 next = 5094 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5095 ct = MDI_PI(pip)->pi_client; 5096 i_mdi_client_lock(ct, pip); 5097 cdip = ct->ct_dip; 5098 MDI_PI_UNLOCK(pip); 5099 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5100 i_mdi_client_unlock(ct); 5101 (void) devi_attach(cdip, DDI_RESUME); 5102 } else { 5103 i_mdi_client_unlock(ct); 5104 } 5105 pip = next; 5106 } 5107 } 5108 break; 5109 5110 default: 5111 rv = DDI_FAILURE; 5112 break; 5113 } 5114 MDI_PHCI_UNLOCK(ph); 5115 return (rv); 5116 } 5117 5118 /*ARGSUSED*/ 5119 static int 5120 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5121 { 5122 int rv = DDI_SUCCESS; 5123 mdi_client_t *ct; 5124 5125 ct = i_devi_get_client(dip); 5126 if (ct == NULL) { 5127 return (rv); 5128 } 5129 5130 MDI_CLIENT_LOCK(ct); 5131 switch (cmd) { 5132 case DDI_DETACH: 5133 MDI_DEBUG(2, (CE_NOTE, dip, 5134 "!Client pre_detach: called %p\n", (void *)ct)); 5135 MDI_CLIENT_SET_DETACH(ct); 5136 break; 5137 5138 case DDI_SUSPEND: 5139 MDI_DEBUG(2, (CE_NOTE, dip, 5140 "!Client pre_suspend: called %p\n", (void *)ct)); 5141 MDI_CLIENT_SET_SUSPEND(ct); 5142 break; 5143 5144 default: 5145 rv = DDI_FAILURE; 5146 break; 5147 } 5148 MDI_CLIENT_UNLOCK(ct); 5149 return (rv); 5150 } 5151 5152 /* 5153 * mdi_post_detach(): 5154 * Post detach notification handler 5155 */ 5156 /*ARGSUSED*/ 5157 void 5158 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5159 { 5160 /* 5161 * Detach/Suspend of mpxio component failed. Update our state 5162 * too 5163 */ 5164 if (MDI_PHCI(dip)) 5165 i_mdi_phci_post_detach(dip, cmd, error); 5166 5167 if (MDI_CLIENT(dip)) 5168 i_mdi_client_post_detach(dip, cmd, error); 5169 } 5170 5171 /*ARGSUSED*/ 5172 static void 5173 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5174 { 5175 mdi_phci_t *ph; 5176 5177 /* 5178 * Detach/Suspend of phci component failed. Update our state 5179 * too 5180 */ 5181 ph = i_devi_get_phci(dip); 5182 if (ph == NULL) { 5183 return; 5184 } 5185 5186 MDI_PHCI_LOCK(ph); 5187 /* 5188 * Detach of pHCI failed. Restore back converse 5189 * state 5190 */ 5191 switch (cmd) { 5192 case DDI_DETACH: 5193 MDI_DEBUG(2, (CE_NOTE, dip, 5194 "!pHCI post_detach: called %p\n", (void *)ph)); 5195 if (error != DDI_SUCCESS) 5196 MDI_PHCI_SET_ATTACH(ph); 5197 break; 5198 5199 case DDI_SUSPEND: 5200 MDI_DEBUG(2, (CE_NOTE, dip, 5201 "!pHCI post_suspend: called %p\n", (void *)ph)); 5202 if (error != DDI_SUCCESS) 5203 MDI_PHCI_SET_RESUME(ph); 5204 break; 5205 } 5206 MDI_PHCI_UNLOCK(ph); 5207 } 5208 5209 /*ARGSUSED*/ 5210 static void 5211 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5212 { 5213 mdi_client_t *ct; 5214 5215 ct = i_devi_get_client(dip); 5216 if (ct == NULL) { 5217 return; 5218 } 5219 MDI_CLIENT_LOCK(ct); 5220 /* 5221 * Detach of Client failed. Restore back converse 5222 * state 5223 */ 5224 switch (cmd) { 5225 case DDI_DETACH: 5226 MDI_DEBUG(2, (CE_NOTE, dip, 5227 "!Client post_detach: called %p\n", (void *)ct)); 5228 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5229 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5230 "i_mdi_pm_rele_client\n")); 5231 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5232 } else { 5233 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5234 "i_mdi_pm_reset_client\n")); 5235 i_mdi_pm_reset_client(ct); 5236 } 5237 if (error != DDI_SUCCESS) 5238 MDI_CLIENT_SET_ATTACH(ct); 5239 break; 5240 5241 case DDI_SUSPEND: 5242 MDI_DEBUG(2, (CE_NOTE, dip, 5243 "!Client post_suspend: called %p\n", (void *)ct)); 5244 if (error != DDI_SUCCESS) 5245 MDI_CLIENT_SET_RESUME(ct); 5246 break; 5247 } 5248 MDI_CLIENT_UNLOCK(ct); 5249 } 5250 5251 /* 5252 * create and install per-path (client - pHCI) statistics 5253 * I/O stats supported: nread, nwritten, reads, and writes 5254 * Error stats - hard errors, soft errors, & transport errors 5255 */ 5256 static int 5257 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5258 { 5259 5260 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5261 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5262 char ksname[KSTAT_STRLEN]; 5263 mdi_pathinfo_t *cpip; 5264 const char *err_postfix = ",err"; 5265 kstat_t *kiosp, *kerrsp; 5266 struct pi_errs *nsp; 5267 struct mdi_pi_kstats *mdi_statp; 5268 5269 ASSERT(client != NULL && ppath != NULL); 5270 5271 ASSERT(MDI_CLIENT_LOCKED(MDI_PI(pip)->pi_client)); 5272 5273 if (MDI_PI(pip)->pi_kstats != NULL) 5274 return (MDI_SUCCESS); 5275 5276 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5277 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5278 if ((cpip == pip) || MDI_PI_IS_OFFLINE(pip)) 5279 continue; 5280 /* 5281 * We have found a different path with same parent 5282 * kstats for a given client-pHCI are common 5283 */ 5284 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5285 (MDI_PI(cpip)->pi_kstats != NULL)) { 5286 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5287 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5288 return (MDI_SUCCESS); 5289 } 5290 } 5291 5292 /* 5293 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5294 * clamp length of name against max length of error kstat name 5295 */ 5296 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5297 ddi_driver_name(client), ddi_get_instance(client), 5298 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5299 (KSTAT_STRLEN - strlen(err_postfix))) { 5300 return (MDI_FAILURE); 5301 } 5302 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5303 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5304 return (MDI_FAILURE); 5305 } 5306 5307 (void) strcat(ksname, err_postfix); 5308 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5309 KSTAT_TYPE_NAMED, 5310 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5311 5312 if (kerrsp == NULL) { 5313 kstat_delete(kiosp); 5314 return (MDI_FAILURE); 5315 } 5316 5317 nsp = (struct pi_errs *)kerrsp->ks_data; 5318 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5319 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5320 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5321 KSTAT_DATA_UINT32); 5322 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5323 KSTAT_DATA_UINT32); 5324 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5325 KSTAT_DATA_UINT32); 5326 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5327 KSTAT_DATA_UINT32); 5328 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5329 KSTAT_DATA_UINT32); 5330 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5331 KSTAT_DATA_UINT32); 5332 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5333 KSTAT_DATA_UINT32); 5334 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5335 5336 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5337 mdi_statp->pi_kstat_ref = 1; 5338 mdi_statp->pi_kstat_iostats = kiosp; 5339 mdi_statp->pi_kstat_errstats = kerrsp; 5340 kstat_install(kiosp); 5341 kstat_install(kerrsp); 5342 MDI_PI(pip)->pi_kstats = mdi_statp; 5343 return (MDI_SUCCESS); 5344 } 5345 5346 /* 5347 * destroy per-path properties 5348 */ 5349 static void 5350 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5351 { 5352 5353 struct mdi_pi_kstats *mdi_statp; 5354 5355 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5356 return; 5357 5358 MDI_PI(pip)->pi_kstats = NULL; 5359 5360 /* 5361 * the kstat may be shared between multiple pathinfo nodes 5362 * decrement this pathinfo's usage, removing the kstats 5363 * themselves when the last pathinfo reference is removed. 5364 */ 5365 ASSERT(mdi_statp->pi_kstat_ref > 0); 5366 if (--mdi_statp->pi_kstat_ref != 0) 5367 return; 5368 5369 kstat_delete(mdi_statp->pi_kstat_iostats); 5370 kstat_delete(mdi_statp->pi_kstat_errstats); 5371 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5372 } 5373 5374 /* 5375 * update I/O paths KSTATS 5376 */ 5377 void 5378 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5379 { 5380 kstat_t *iostatp; 5381 size_t xfer_cnt; 5382 5383 ASSERT(pip != NULL); 5384 5385 /* 5386 * I/O can be driven across a path prior to having path 5387 * statistics available, i.e. probe(9e). 5388 */ 5389 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5390 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5391 xfer_cnt = bp->b_bcount - bp->b_resid; 5392 if (bp->b_flags & B_READ) { 5393 KSTAT_IO_PTR(iostatp)->reads++; 5394 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5395 } else { 5396 KSTAT_IO_PTR(iostatp)->writes++; 5397 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5398 } 5399 } 5400 } 5401 5402 /* 5403 * Enable the path(specific client/target/initiator) 5404 * Enabling a path means that MPxIO may select the enabled path for routing 5405 * future I/O requests, subject to other path state constraints. 5406 */ 5407 int 5408 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5409 { 5410 mdi_phci_t *ph; 5411 5412 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5413 if (ph == NULL) { 5414 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5415 " failed. pip: %p ph = NULL\n", (void *)pip)); 5416 return (MDI_FAILURE); 5417 } 5418 5419 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5420 MDI_ENABLE_OP); 5421 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5422 " Returning success pip = %p. ph = %p\n", 5423 (void *)pip, (void *)ph)); 5424 return (MDI_SUCCESS); 5425 5426 } 5427 5428 /* 5429 * Disable the path (specific client/target/initiator) 5430 * Disabling a path means that MPxIO will not select the disabled path for 5431 * routing any new I/O requests. 5432 */ 5433 int 5434 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5435 { 5436 mdi_phci_t *ph; 5437 5438 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5439 if (ph == NULL) { 5440 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5441 " failed. pip: %p ph = NULL\n", (void *)pip)); 5442 return (MDI_FAILURE); 5443 } 5444 5445 (void) i_mdi_enable_disable_path(pip, 5446 ph->ph_vhci, flags, MDI_DISABLE_OP); 5447 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5448 "Returning success pip = %p. ph = %p", 5449 (void *)pip, (void *)ph)); 5450 return (MDI_SUCCESS); 5451 } 5452 5453 /* 5454 * disable the path to a particular pHCI (pHCI specified in the phci_path 5455 * argument) for a particular client (specified in the client_path argument). 5456 * Disabling a path means that MPxIO will not select the disabled path for 5457 * routing any new I/O requests. 5458 * NOTE: this will be removed once the NWS files are changed to use the new 5459 * mdi_{enable,disable}_path interfaces 5460 */ 5461 int 5462 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5463 { 5464 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5465 } 5466 5467 /* 5468 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5469 * argument) for a particular client (specified in the client_path argument). 5470 * Enabling a path means that MPxIO may select the enabled path for routing 5471 * future I/O requests, subject to other path state constraints. 5472 * NOTE: this will be removed once the NWS files are changed to use the new 5473 * mdi_{enable,disable}_path interfaces 5474 */ 5475 5476 int 5477 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5478 { 5479 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5480 } 5481 5482 /* 5483 * Common routine for doing enable/disable. 5484 */ 5485 static mdi_pathinfo_t * 5486 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5487 int op) 5488 { 5489 int sync_flag = 0; 5490 int rv; 5491 mdi_pathinfo_t *next; 5492 int (*f)() = NULL; 5493 5494 f = vh->vh_ops->vo_pi_state_change; 5495 5496 sync_flag = (flags << 8) & 0xf00; 5497 5498 /* 5499 * Do a callback into the mdi consumer to let it 5500 * know that path is about to get enabled/disabled. 5501 */ 5502 if (f != NULL) { 5503 rv = (*f)(vh->vh_dip, pip, 0, 5504 MDI_PI_EXT_STATE(pip), 5505 MDI_EXT_STATE_CHANGE | sync_flag | 5506 op | MDI_BEFORE_STATE_CHANGE); 5507 if (rv != MDI_SUCCESS) { 5508 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5509 "!vo_pi_state_change: failed rv = %x", rv)); 5510 } 5511 } 5512 MDI_PI_LOCK(pip); 5513 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5514 5515 switch (flags) { 5516 case USER_DISABLE: 5517 if (op == MDI_DISABLE_OP) { 5518 MDI_PI_SET_USER_DISABLE(pip); 5519 } else { 5520 MDI_PI_SET_USER_ENABLE(pip); 5521 } 5522 break; 5523 case DRIVER_DISABLE: 5524 if (op == MDI_DISABLE_OP) { 5525 MDI_PI_SET_DRV_DISABLE(pip); 5526 } else { 5527 MDI_PI_SET_DRV_ENABLE(pip); 5528 } 5529 break; 5530 case DRIVER_DISABLE_TRANSIENT: 5531 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5532 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5533 } else { 5534 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5535 } 5536 break; 5537 } 5538 MDI_PI_UNLOCK(pip); 5539 /* 5540 * Do a callback into the mdi consumer to let it 5541 * know that path is now enabled/disabled. 5542 */ 5543 if (f != NULL) { 5544 rv = (*f)(vh->vh_dip, pip, 0, 5545 MDI_PI_EXT_STATE(pip), 5546 MDI_EXT_STATE_CHANGE | sync_flag | 5547 op | MDI_AFTER_STATE_CHANGE); 5548 if (rv != MDI_SUCCESS) { 5549 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5550 "!vo_pi_state_change: failed rv = %x", rv)); 5551 } 5552 } 5553 return (next); 5554 } 5555 5556 /* 5557 * Common routine for doing enable/disable. 5558 * NOTE: this will be removed once the NWS files are changed to use the new 5559 * mdi_{enable,disable}_path has been putback 5560 */ 5561 int 5562 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5563 { 5564 5565 mdi_phci_t *ph; 5566 mdi_vhci_t *vh = NULL; 5567 mdi_client_t *ct; 5568 mdi_pathinfo_t *next, *pip; 5569 int found_it; 5570 5571 ph = i_devi_get_phci(pdip); 5572 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5573 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5574 (void *)cdip)); 5575 if (ph == NULL) { 5576 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5577 "Op %d failed. ph = NULL\n", op)); 5578 return (MDI_FAILURE); 5579 } 5580 5581 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5582 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5583 "Op Invalid operation = %d\n", op)); 5584 return (MDI_FAILURE); 5585 } 5586 5587 vh = ph->ph_vhci; 5588 5589 if (cdip == NULL) { 5590 /* 5591 * Need to mark the Phci as enabled/disabled. 5592 */ 5593 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5594 "Op %d for the phci\n", op)); 5595 MDI_PHCI_LOCK(ph); 5596 switch (flags) { 5597 case USER_DISABLE: 5598 if (op == MDI_DISABLE_OP) { 5599 MDI_PHCI_SET_USER_DISABLE(ph); 5600 } else { 5601 MDI_PHCI_SET_USER_ENABLE(ph); 5602 } 5603 break; 5604 case DRIVER_DISABLE: 5605 if (op == MDI_DISABLE_OP) { 5606 MDI_PHCI_SET_DRV_DISABLE(ph); 5607 } else { 5608 MDI_PHCI_SET_DRV_ENABLE(ph); 5609 } 5610 break; 5611 case DRIVER_DISABLE_TRANSIENT: 5612 if (op == MDI_DISABLE_OP) { 5613 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5614 } else { 5615 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5616 } 5617 break; 5618 default: 5619 MDI_PHCI_UNLOCK(ph); 5620 MDI_DEBUG(1, (CE_NOTE, NULL, 5621 "!i_mdi_pi_enable_disable:" 5622 " Invalid flag argument= %d\n", flags)); 5623 } 5624 5625 /* 5626 * Phci has been disabled. Now try to enable/disable 5627 * path info's to each client. 5628 */ 5629 pip = ph->ph_path_head; 5630 while (pip != NULL) { 5631 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5632 } 5633 MDI_PHCI_UNLOCK(ph); 5634 } else { 5635 5636 /* 5637 * Disable a specific client. 5638 */ 5639 ct = i_devi_get_client(cdip); 5640 if (ct == NULL) { 5641 MDI_DEBUG(1, (CE_NOTE, NULL, 5642 "!i_mdi_pi_enable_disable:" 5643 " failed. ct = NULL operation = %d\n", op)); 5644 return (MDI_FAILURE); 5645 } 5646 5647 MDI_CLIENT_LOCK(ct); 5648 pip = ct->ct_path_head; 5649 found_it = 0; 5650 while (pip != NULL) { 5651 MDI_PI_LOCK(pip); 5652 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5653 if (MDI_PI(pip)->pi_phci == ph) { 5654 MDI_PI_UNLOCK(pip); 5655 found_it = 1; 5656 break; 5657 } 5658 MDI_PI_UNLOCK(pip); 5659 pip = next; 5660 } 5661 5662 5663 MDI_CLIENT_UNLOCK(ct); 5664 if (found_it == 0) { 5665 MDI_DEBUG(1, (CE_NOTE, NULL, 5666 "!i_mdi_pi_enable_disable:" 5667 " failed. Could not find corresponding pip\n")); 5668 return (MDI_FAILURE); 5669 } 5670 5671 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5672 } 5673 5674 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5675 "Op %d Returning success pdip = %p cdip = %p\n", 5676 op, (void *)pdip, (void *)cdip)); 5677 return (MDI_SUCCESS); 5678 } 5679 5680 /* 5681 * Ensure phci powered up 5682 */ 5683 static void 5684 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5685 { 5686 dev_info_t *ph_dip; 5687 5688 ASSERT(pip != NULL); 5689 ASSERT(MDI_PI_LOCKED(pip)); 5690 5691 if (MDI_PI(pip)->pi_pm_held) { 5692 return; 5693 } 5694 5695 ph_dip = mdi_pi_get_phci(pip); 5696 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5697 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5698 if (ph_dip == NULL) { 5699 return; 5700 } 5701 5702 MDI_PI_UNLOCK(pip); 5703 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5704 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5705 5706 pm_hold_power(ph_dip); 5707 5708 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5709 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5710 MDI_PI_LOCK(pip); 5711 5712 MDI_PI(pip)->pi_pm_held = 1; 5713 } 5714 5715 /* 5716 * Allow phci powered down 5717 */ 5718 static void 5719 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5720 { 5721 dev_info_t *ph_dip = NULL; 5722 5723 ASSERT(pip != NULL); 5724 ASSERT(MDI_PI_LOCKED(pip)); 5725 5726 if (MDI_PI(pip)->pi_pm_held == 0) { 5727 return; 5728 } 5729 5730 ph_dip = mdi_pi_get_phci(pip); 5731 ASSERT(ph_dip != NULL); 5732 5733 MDI_PI_UNLOCK(pip); 5734 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5735 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5736 5737 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5738 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5739 pm_rele_power(ph_dip); 5740 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5741 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5742 5743 MDI_PI_LOCK(pip); 5744 MDI_PI(pip)->pi_pm_held = 0; 5745 } 5746 5747 static void 5748 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5749 { 5750 ASSERT(MDI_CLIENT_LOCKED(ct)); 5751 5752 ct->ct_power_cnt += incr; 5753 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5754 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5755 ct->ct_power_cnt, incr)); 5756 ASSERT(ct->ct_power_cnt >= 0); 5757 } 5758 5759 static void 5760 i_mdi_rele_all_phci(mdi_client_t *ct) 5761 { 5762 mdi_pathinfo_t *pip; 5763 5764 ASSERT(MDI_CLIENT_LOCKED(ct)); 5765 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5766 while (pip != NULL) { 5767 mdi_hold_path(pip); 5768 MDI_PI_LOCK(pip); 5769 i_mdi_pm_rele_pip(pip); 5770 MDI_PI_UNLOCK(pip); 5771 mdi_rele_path(pip); 5772 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5773 } 5774 } 5775 5776 static void 5777 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5778 { 5779 ASSERT(MDI_CLIENT_LOCKED(ct)); 5780 5781 if (i_ddi_devi_attached(ct->ct_dip)) { 5782 ct->ct_power_cnt -= decr; 5783 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5784 "ct_power_cnt = %d decr = %d\n", 5785 (void *)ct, ct->ct_power_cnt, decr)); 5786 } 5787 5788 ASSERT(ct->ct_power_cnt >= 0); 5789 if (ct->ct_power_cnt == 0) { 5790 i_mdi_rele_all_phci(ct); 5791 return; 5792 } 5793 } 5794 5795 static void 5796 i_mdi_pm_reset_client(mdi_client_t *ct) 5797 { 5798 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5799 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5800 ASSERT(MDI_CLIENT_LOCKED(ct)); 5801 ct->ct_power_cnt = 0; 5802 i_mdi_rele_all_phci(ct); 5803 ct->ct_powercnt_config = 0; 5804 ct->ct_powercnt_unconfig = 0; 5805 ct->ct_powercnt_reset = 1; 5806 } 5807 5808 static int 5809 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5810 { 5811 int ret; 5812 dev_info_t *ph_dip; 5813 5814 MDI_PI_LOCK(pip); 5815 i_mdi_pm_hold_pip(pip); 5816 5817 ph_dip = mdi_pi_get_phci(pip); 5818 MDI_PI_UNLOCK(pip); 5819 5820 /* bring all components of phci to full power */ 5821 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5822 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5823 ddi_get_instance(ph_dip), (void *)pip)); 5824 5825 ret = pm_powerup(ph_dip); 5826 5827 if (ret == DDI_FAILURE) { 5828 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5829 "pm_powerup FAILED for %s%d %p\n", 5830 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5831 (void *)pip)); 5832 5833 MDI_PI_LOCK(pip); 5834 i_mdi_pm_rele_pip(pip); 5835 MDI_PI_UNLOCK(pip); 5836 return (MDI_FAILURE); 5837 } 5838 5839 return (MDI_SUCCESS); 5840 } 5841 5842 static int 5843 i_mdi_power_all_phci(mdi_client_t *ct) 5844 { 5845 mdi_pathinfo_t *pip; 5846 int succeeded = 0; 5847 5848 ASSERT(MDI_CLIENT_LOCKED(ct)); 5849 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5850 while (pip != NULL) { 5851 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5852 mdi_hold_path(pip); 5853 MDI_CLIENT_UNLOCK(ct); 5854 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5855 succeeded = 1; 5856 5857 ASSERT(ct == MDI_PI(pip)->pi_client); 5858 MDI_CLIENT_LOCK(ct); 5859 mdi_rele_path(pip); 5860 } 5861 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5862 } 5863 5864 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5865 } 5866 5867 /* 5868 * mdi_bus_power(): 5869 * 1. Place the phci(s) into powered up state so that 5870 * client can do power management 5871 * 2. Ensure phci powered up as client power managing 5872 * Return Values: 5873 * MDI_SUCCESS 5874 * MDI_FAILURE 5875 */ 5876 int 5877 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5878 void *arg, void *result) 5879 { 5880 int ret = MDI_SUCCESS; 5881 pm_bp_child_pwrchg_t *bpc; 5882 mdi_client_t *ct; 5883 dev_info_t *cdip; 5884 pm_bp_has_changed_t *bphc; 5885 5886 /* 5887 * BUS_POWER_NOINVOL not supported 5888 */ 5889 if (op == BUS_POWER_NOINVOL) 5890 return (MDI_FAILURE); 5891 5892 /* 5893 * ignore other OPs. 5894 * return quickly to save cou cycles on the ct processing 5895 */ 5896 switch (op) { 5897 case BUS_POWER_PRE_NOTIFICATION: 5898 case BUS_POWER_POST_NOTIFICATION: 5899 bpc = (pm_bp_child_pwrchg_t *)arg; 5900 cdip = bpc->bpc_dip; 5901 break; 5902 case BUS_POWER_HAS_CHANGED: 5903 bphc = (pm_bp_has_changed_t *)arg; 5904 cdip = bphc->bphc_dip; 5905 break; 5906 default: 5907 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5908 } 5909 5910 ASSERT(MDI_CLIENT(cdip)); 5911 5912 ct = i_devi_get_client(cdip); 5913 if (ct == NULL) 5914 return (MDI_FAILURE); 5915 5916 /* 5917 * wait till the mdi_pathinfo node state change are processed 5918 */ 5919 MDI_CLIENT_LOCK(ct); 5920 switch (op) { 5921 case BUS_POWER_PRE_NOTIFICATION: 5922 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5923 "BUS_POWER_PRE_NOTIFICATION:" 5924 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5925 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5926 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5927 5928 /* serialize power level change per client */ 5929 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5930 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5931 5932 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5933 5934 if (ct->ct_power_cnt == 0) { 5935 ret = i_mdi_power_all_phci(ct); 5936 } 5937 5938 /* 5939 * if new_level > 0: 5940 * - hold phci(s) 5941 * - power up phci(s) if not already 5942 * ignore power down 5943 */ 5944 if (bpc->bpc_nlevel > 0) { 5945 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5946 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5947 "mdi_bus_power i_mdi_pm_hold_client\n")); 5948 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5949 } 5950 } 5951 break; 5952 case BUS_POWER_POST_NOTIFICATION: 5953 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5954 "BUS_POWER_POST_NOTIFICATION:" 5955 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5956 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5957 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5958 *(int *)result)); 5959 5960 if (*(int *)result == DDI_SUCCESS) { 5961 if (bpc->bpc_nlevel > 0) { 5962 MDI_CLIENT_SET_POWER_UP(ct); 5963 } else { 5964 MDI_CLIENT_SET_POWER_DOWN(ct); 5965 } 5966 } 5967 5968 /* release the hold we did in pre-notification */ 5969 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5970 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5971 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5972 "mdi_bus_power i_mdi_pm_rele_client\n")); 5973 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5974 } 5975 5976 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5977 /* another thread might started attaching */ 5978 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5979 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5980 "mdi_bus_power i_mdi_pm_rele_client\n")); 5981 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5982 /* detaching has been taken care in pm_post_unconfig */ 5983 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5984 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5985 "mdi_bus_power i_mdi_pm_reset_client\n")); 5986 i_mdi_pm_reset_client(ct); 5987 } 5988 } 5989 5990 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5991 cv_broadcast(&ct->ct_powerchange_cv); 5992 5993 break; 5994 5995 /* need to do more */ 5996 case BUS_POWER_HAS_CHANGED: 5997 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5998 "BUS_POWER_HAS_CHANGED:" 5999 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6000 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6001 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6002 6003 if (bphc->bphc_nlevel > 0 && 6004 bphc->bphc_nlevel > bphc->bphc_olevel) { 6005 if (ct->ct_power_cnt == 0) { 6006 ret = i_mdi_power_all_phci(ct); 6007 } 6008 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6009 "mdi_bus_power i_mdi_pm_hold_client\n")); 6010 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6011 } 6012 6013 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6014 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6015 "mdi_bus_power i_mdi_pm_rele_client\n")); 6016 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6017 } 6018 break; 6019 } 6020 6021 MDI_CLIENT_UNLOCK(ct); 6022 return (ret); 6023 } 6024 6025 static int 6026 i_mdi_pm_pre_config_one(dev_info_t *child) 6027 { 6028 int ret = MDI_SUCCESS; 6029 mdi_client_t *ct; 6030 6031 ct = i_devi_get_client(child); 6032 if (ct == NULL) 6033 return (MDI_FAILURE); 6034 6035 MDI_CLIENT_LOCK(ct); 6036 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6037 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6038 6039 if (!MDI_CLIENT_IS_FAILED(ct)) { 6040 MDI_CLIENT_UNLOCK(ct); 6041 MDI_DEBUG(4, (CE_NOTE, child, 6042 "i_mdi_pm_pre_config_one already configured\n")); 6043 return (MDI_SUCCESS); 6044 } 6045 6046 if (ct->ct_powercnt_config) { 6047 MDI_CLIENT_UNLOCK(ct); 6048 MDI_DEBUG(4, (CE_NOTE, child, 6049 "i_mdi_pm_pre_config_one ALREADY held\n")); 6050 return (MDI_SUCCESS); 6051 } 6052 6053 if (ct->ct_power_cnt == 0) { 6054 ret = i_mdi_power_all_phci(ct); 6055 } 6056 MDI_DEBUG(4, (CE_NOTE, child, 6057 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6058 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6059 ct->ct_powercnt_config = 1; 6060 ct->ct_powercnt_reset = 0; 6061 MDI_CLIENT_UNLOCK(ct); 6062 return (ret); 6063 } 6064 6065 static int 6066 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6067 { 6068 int ret = MDI_SUCCESS; 6069 dev_info_t *cdip; 6070 int circ; 6071 6072 ASSERT(MDI_VHCI(vdip)); 6073 6074 /* ndi_devi_config_one */ 6075 if (child) { 6076 ASSERT(DEVI_BUSY_OWNED(vdip)); 6077 return (i_mdi_pm_pre_config_one(child)); 6078 } 6079 6080 /* devi_config_common */ 6081 ndi_devi_enter(vdip, &circ); 6082 cdip = ddi_get_child(vdip); 6083 while (cdip) { 6084 dev_info_t *next = ddi_get_next_sibling(cdip); 6085 6086 ret = i_mdi_pm_pre_config_one(cdip); 6087 if (ret != MDI_SUCCESS) 6088 break; 6089 cdip = next; 6090 } 6091 ndi_devi_exit(vdip, circ); 6092 return (ret); 6093 } 6094 6095 static int 6096 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6097 { 6098 int ret = MDI_SUCCESS; 6099 mdi_client_t *ct; 6100 6101 ct = i_devi_get_client(child); 6102 if (ct == NULL) 6103 return (MDI_FAILURE); 6104 6105 MDI_CLIENT_LOCK(ct); 6106 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6107 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6108 6109 if (!i_ddi_devi_attached(ct->ct_dip)) { 6110 MDI_DEBUG(4, (CE_NOTE, child, 6111 "i_mdi_pm_pre_unconfig node detached already\n")); 6112 MDI_CLIENT_UNLOCK(ct); 6113 return (MDI_SUCCESS); 6114 } 6115 6116 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6117 (flags & NDI_AUTODETACH)) { 6118 MDI_DEBUG(4, (CE_NOTE, child, 6119 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6120 MDI_CLIENT_UNLOCK(ct); 6121 return (MDI_FAILURE); 6122 } 6123 6124 if (ct->ct_powercnt_unconfig) { 6125 MDI_DEBUG(4, (CE_NOTE, child, 6126 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6127 MDI_CLIENT_UNLOCK(ct); 6128 *held = 1; 6129 return (MDI_SUCCESS); 6130 } 6131 6132 if (ct->ct_power_cnt == 0) { 6133 ret = i_mdi_power_all_phci(ct); 6134 } 6135 MDI_DEBUG(4, (CE_NOTE, child, 6136 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6137 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6138 ct->ct_powercnt_unconfig = 1; 6139 ct->ct_powercnt_reset = 0; 6140 MDI_CLIENT_UNLOCK(ct); 6141 if (ret == MDI_SUCCESS) 6142 *held = 1; 6143 return (ret); 6144 } 6145 6146 static int 6147 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6148 int flags) 6149 { 6150 int ret = MDI_SUCCESS; 6151 dev_info_t *cdip; 6152 int circ; 6153 6154 ASSERT(MDI_VHCI(vdip)); 6155 *held = 0; 6156 6157 /* ndi_devi_unconfig_one */ 6158 if (child) { 6159 ASSERT(DEVI_BUSY_OWNED(vdip)); 6160 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6161 } 6162 6163 /* devi_unconfig_common */ 6164 ndi_devi_enter(vdip, &circ); 6165 cdip = ddi_get_child(vdip); 6166 while (cdip) { 6167 dev_info_t *next = ddi_get_next_sibling(cdip); 6168 6169 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6170 cdip = next; 6171 } 6172 ndi_devi_exit(vdip, circ); 6173 6174 if (*held) 6175 ret = MDI_SUCCESS; 6176 6177 return (ret); 6178 } 6179 6180 static void 6181 i_mdi_pm_post_config_one(dev_info_t *child) 6182 { 6183 mdi_client_t *ct; 6184 6185 ct = i_devi_get_client(child); 6186 if (ct == NULL) 6187 return; 6188 6189 MDI_CLIENT_LOCK(ct); 6190 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6191 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6192 6193 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6194 MDI_DEBUG(4, (CE_NOTE, child, 6195 "i_mdi_pm_post_config_one NOT configured\n")); 6196 MDI_CLIENT_UNLOCK(ct); 6197 return; 6198 } 6199 6200 /* client has not been updated */ 6201 if (MDI_CLIENT_IS_FAILED(ct)) { 6202 MDI_DEBUG(4, (CE_NOTE, child, 6203 "i_mdi_pm_post_config_one NOT configured\n")); 6204 MDI_CLIENT_UNLOCK(ct); 6205 return; 6206 } 6207 6208 /* another thread might have powered it down or detached it */ 6209 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6210 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6211 (!i_ddi_devi_attached(ct->ct_dip) && 6212 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6213 MDI_DEBUG(4, (CE_NOTE, child, 6214 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6215 i_mdi_pm_reset_client(ct); 6216 } else { 6217 mdi_pathinfo_t *pip, *next; 6218 int valid_path_count = 0; 6219 6220 MDI_DEBUG(4, (CE_NOTE, child, 6221 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6222 pip = ct->ct_path_head; 6223 while (pip != NULL) { 6224 MDI_PI_LOCK(pip); 6225 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6226 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6227 valid_path_count ++; 6228 MDI_PI_UNLOCK(pip); 6229 pip = next; 6230 } 6231 i_mdi_pm_rele_client(ct, valid_path_count); 6232 } 6233 ct->ct_powercnt_config = 0; 6234 MDI_CLIENT_UNLOCK(ct); 6235 } 6236 6237 static void 6238 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6239 { 6240 int circ; 6241 dev_info_t *cdip; 6242 6243 ASSERT(MDI_VHCI(vdip)); 6244 6245 /* ndi_devi_config_one */ 6246 if (child) { 6247 ASSERT(DEVI_BUSY_OWNED(vdip)); 6248 i_mdi_pm_post_config_one(child); 6249 return; 6250 } 6251 6252 /* devi_config_common */ 6253 ndi_devi_enter(vdip, &circ); 6254 cdip = ddi_get_child(vdip); 6255 while (cdip) { 6256 dev_info_t *next = ddi_get_next_sibling(cdip); 6257 6258 i_mdi_pm_post_config_one(cdip); 6259 cdip = next; 6260 } 6261 ndi_devi_exit(vdip, circ); 6262 } 6263 6264 static void 6265 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6266 { 6267 mdi_client_t *ct; 6268 6269 ct = i_devi_get_client(child); 6270 if (ct == NULL) 6271 return; 6272 6273 MDI_CLIENT_LOCK(ct); 6274 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6275 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6276 6277 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6278 MDI_DEBUG(4, (CE_NOTE, child, 6279 "i_mdi_pm_post_unconfig NOT held\n")); 6280 MDI_CLIENT_UNLOCK(ct); 6281 return; 6282 } 6283 6284 /* failure detaching or another thread just attached it */ 6285 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6286 i_ddi_devi_attached(ct->ct_dip)) || 6287 (!i_ddi_devi_attached(ct->ct_dip) && 6288 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6289 MDI_DEBUG(4, (CE_NOTE, child, 6290 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6291 i_mdi_pm_reset_client(ct); 6292 } else { 6293 mdi_pathinfo_t *pip, *next; 6294 int valid_path_count = 0; 6295 6296 MDI_DEBUG(4, (CE_NOTE, child, 6297 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6298 pip = ct->ct_path_head; 6299 while (pip != NULL) { 6300 MDI_PI_LOCK(pip); 6301 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6302 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6303 valid_path_count ++; 6304 MDI_PI_UNLOCK(pip); 6305 pip = next; 6306 } 6307 i_mdi_pm_rele_client(ct, valid_path_count); 6308 ct->ct_powercnt_unconfig = 0; 6309 } 6310 6311 MDI_CLIENT_UNLOCK(ct); 6312 } 6313 6314 static void 6315 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6316 { 6317 int circ; 6318 dev_info_t *cdip; 6319 6320 ASSERT(MDI_VHCI(vdip)); 6321 6322 if (!held) { 6323 MDI_DEBUG(4, (CE_NOTE, vdip, 6324 "i_mdi_pm_post_unconfig held = %d\n", held)); 6325 return; 6326 } 6327 6328 if (child) { 6329 ASSERT(DEVI_BUSY_OWNED(vdip)); 6330 i_mdi_pm_post_unconfig_one(child); 6331 return; 6332 } 6333 6334 ndi_devi_enter(vdip, &circ); 6335 cdip = ddi_get_child(vdip); 6336 while (cdip) { 6337 dev_info_t *next = ddi_get_next_sibling(cdip); 6338 6339 i_mdi_pm_post_unconfig_one(cdip); 6340 cdip = next; 6341 } 6342 ndi_devi_exit(vdip, circ); 6343 } 6344 6345 int 6346 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6347 { 6348 int circ, ret = MDI_SUCCESS; 6349 dev_info_t *client_dip = NULL; 6350 mdi_client_t *ct; 6351 6352 /* 6353 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6354 * Power up pHCI for the named client device. 6355 * Note: Before the client is enumerated under vhci by phci, 6356 * client_dip can be NULL. Then proceed to power up all the 6357 * pHCIs. 6358 */ 6359 if (devnm != NULL) { 6360 ndi_devi_enter(vdip, &circ); 6361 client_dip = ndi_devi_findchild(vdip, devnm); 6362 } 6363 6364 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6365 op, devnm ? devnm : "NULL", (void *)client_dip)); 6366 6367 switch (op) { 6368 case MDI_PM_PRE_CONFIG: 6369 ret = i_mdi_pm_pre_config(vdip, client_dip); 6370 break; 6371 6372 case MDI_PM_PRE_UNCONFIG: 6373 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6374 flags); 6375 break; 6376 6377 case MDI_PM_POST_CONFIG: 6378 i_mdi_pm_post_config(vdip, client_dip); 6379 break; 6380 6381 case MDI_PM_POST_UNCONFIG: 6382 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6383 break; 6384 6385 case MDI_PM_HOLD_POWER: 6386 case MDI_PM_RELE_POWER: 6387 ASSERT(args); 6388 6389 client_dip = (dev_info_t *)args; 6390 ASSERT(MDI_CLIENT(client_dip)); 6391 6392 ct = i_devi_get_client(client_dip); 6393 MDI_CLIENT_LOCK(ct); 6394 6395 if (op == MDI_PM_HOLD_POWER) { 6396 if (ct->ct_power_cnt == 0) { 6397 (void) i_mdi_power_all_phci(ct); 6398 MDI_DEBUG(4, (CE_NOTE, client_dip, 6399 "mdi_power i_mdi_pm_hold_client\n")); 6400 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6401 } 6402 } else { 6403 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6404 MDI_DEBUG(4, (CE_NOTE, client_dip, 6405 "mdi_power i_mdi_pm_rele_client\n")); 6406 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6407 } else { 6408 MDI_DEBUG(4, (CE_NOTE, client_dip, 6409 "mdi_power i_mdi_pm_reset_client\n")); 6410 i_mdi_pm_reset_client(ct); 6411 } 6412 } 6413 6414 MDI_CLIENT_UNLOCK(ct); 6415 break; 6416 6417 default: 6418 break; 6419 } 6420 6421 if (devnm) 6422 ndi_devi_exit(vdip, circ); 6423 6424 return (ret); 6425 } 6426 6427 int 6428 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6429 { 6430 mdi_vhci_t *vhci; 6431 6432 if (!MDI_VHCI(dip)) 6433 return (MDI_FAILURE); 6434 6435 if (mdi_class) { 6436 vhci = DEVI(dip)->devi_mdi_xhci; 6437 ASSERT(vhci); 6438 *mdi_class = vhci->vh_class; 6439 } 6440 6441 return (MDI_SUCCESS); 6442 } 6443 6444 int 6445 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6446 { 6447 mdi_phci_t *phci; 6448 6449 if (!MDI_PHCI(dip)) 6450 return (MDI_FAILURE); 6451 6452 if (mdi_class) { 6453 phci = DEVI(dip)->devi_mdi_xhci; 6454 ASSERT(phci); 6455 *mdi_class = phci->ph_vhci->vh_class; 6456 } 6457 6458 return (MDI_SUCCESS); 6459 } 6460 6461 int 6462 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6463 { 6464 mdi_client_t *client; 6465 6466 if (!MDI_CLIENT(dip)) 6467 return (MDI_FAILURE); 6468 6469 if (mdi_class) { 6470 client = DEVI(dip)->devi_mdi_client; 6471 ASSERT(client); 6472 *mdi_class = client->ct_vhci->vh_class; 6473 } 6474 6475 return (MDI_SUCCESS); 6476 } 6477 6478 void * 6479 mdi_client_get_vhci_private(dev_info_t *dip) 6480 { 6481 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6482 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6483 mdi_client_t *ct; 6484 ct = i_devi_get_client(dip); 6485 return (ct->ct_vprivate); 6486 } 6487 return (NULL); 6488 } 6489 6490 void 6491 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6492 { 6493 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6494 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6495 mdi_client_t *ct; 6496 ct = i_devi_get_client(dip); 6497 ct->ct_vprivate = data; 6498 } 6499 } 6500 /* 6501 * mdi_pi_get_vhci_private(): 6502 * Get the vhci private information associated with the 6503 * mdi_pathinfo node 6504 */ 6505 void * 6506 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6507 { 6508 caddr_t vprivate = NULL; 6509 if (pip) { 6510 vprivate = MDI_PI(pip)->pi_vprivate; 6511 } 6512 return (vprivate); 6513 } 6514 6515 /* 6516 * mdi_pi_set_vhci_private(): 6517 * Set the vhci private information in the mdi_pathinfo node 6518 */ 6519 void 6520 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6521 { 6522 if (pip) { 6523 MDI_PI(pip)->pi_vprivate = priv; 6524 } 6525 } 6526 6527 /* 6528 * mdi_phci_get_vhci_private(): 6529 * Get the vhci private information associated with the 6530 * mdi_phci node 6531 */ 6532 void * 6533 mdi_phci_get_vhci_private(dev_info_t *dip) 6534 { 6535 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6536 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6537 mdi_phci_t *ph; 6538 ph = i_devi_get_phci(dip); 6539 return (ph->ph_vprivate); 6540 } 6541 return (NULL); 6542 } 6543 6544 /* 6545 * mdi_phci_set_vhci_private(): 6546 * Set the vhci private information in the mdi_phci node 6547 */ 6548 void 6549 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6550 { 6551 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6552 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6553 mdi_phci_t *ph; 6554 ph = i_devi_get_phci(dip); 6555 ph->ph_vprivate = priv; 6556 } 6557 } 6558 6559 /* 6560 * List of vhci class names: 6561 * A vhci class name must be in this list only if the corresponding vhci 6562 * driver intends to use the mdi provided bus config implementation 6563 * (i.e., mdi_vhci_bus_config()). 6564 */ 6565 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6566 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6567 6568 /* 6569 * Built-in list of phci drivers for every vhci class. 6570 * All phci drivers expect iscsi have root device support. 6571 */ 6572 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 6573 { "fp", 1 }, 6574 { "iscsi", 0 }, 6575 { "ibsrp", 1 } 6576 }; 6577 6578 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 6579 6580 /* 6581 * During boot time, the on-disk vhci cache for every vhci class is read 6582 * in the form of an nvlist and stored here. 6583 */ 6584 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6585 6586 /* nvpair names in vhci cache nvlist */ 6587 #define MDI_VHCI_CACHE_VERSION 1 6588 #define MDI_NVPNAME_VERSION "version" 6589 #define MDI_NVPNAME_PHCIS "phcis" 6590 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6591 6592 /* 6593 * Given vhci class name, return its on-disk vhci cache filename. 6594 * Memory for the returned filename which includes the full path is allocated 6595 * by this function. 6596 */ 6597 static char * 6598 vhclass2vhcache_filename(char *vhclass) 6599 { 6600 char *filename; 6601 int len; 6602 static char *fmt = "/etc/devices/mdi_%s_cache"; 6603 6604 /* 6605 * fmt contains the on-disk vhci cache file name format; 6606 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6607 */ 6608 6609 /* the -1 below is to account for "%s" in the format string */ 6610 len = strlen(fmt) + strlen(vhclass) - 1; 6611 filename = kmem_alloc(len, KM_SLEEP); 6612 (void) snprintf(filename, len, fmt, vhclass); 6613 ASSERT(len == (strlen(filename) + 1)); 6614 return (filename); 6615 } 6616 6617 /* 6618 * initialize the vhci cache related data structures and read the on-disk 6619 * vhci cached data into memory. 6620 */ 6621 static void 6622 setup_vhci_cache(mdi_vhci_t *vh) 6623 { 6624 mdi_vhci_config_t *vhc; 6625 mdi_vhci_cache_t *vhcache; 6626 int i; 6627 nvlist_t *nvl = NULL; 6628 6629 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6630 vh->vh_config = vhc; 6631 vhcache = &vhc->vhc_vhcache; 6632 6633 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6634 6635 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6636 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6637 6638 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6639 6640 /* 6641 * Create string hash; same as mod_hash_create_strhash() except that 6642 * we use NULL key destructor. 6643 */ 6644 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6645 mdi_bus_config_cache_hash_size, 6646 mod_hash_null_keydtor, mod_hash_null_valdtor, 6647 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6648 6649 setup_phci_driver_list(vh); 6650 6651 /* 6652 * The on-disk vhci cache is read during booting prior to the 6653 * lights-out period by mdi_read_devices_files(). 6654 */ 6655 for (i = 0; i < N_VHCI_CLASSES; i++) { 6656 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6657 nvl = vhcache_nvl[i]; 6658 vhcache_nvl[i] = NULL; 6659 break; 6660 } 6661 } 6662 6663 /* 6664 * this is to cover the case of some one manually causing unloading 6665 * (or detaching) and reloading (or attaching) of a vhci driver. 6666 */ 6667 if (nvl == NULL && modrootloaded) 6668 nvl = read_on_disk_vhci_cache(vh->vh_class); 6669 6670 if (nvl != NULL) { 6671 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6672 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6673 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6674 else { 6675 cmn_err(CE_WARN, 6676 "%s: data file corrupted, will recreate\n", 6677 vhc->vhc_vhcache_filename); 6678 } 6679 rw_exit(&vhcache->vhcache_lock); 6680 nvlist_free(nvl); 6681 } 6682 6683 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6684 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6685 6686 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6687 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6688 } 6689 6690 /* 6691 * free all vhci cache related resources 6692 */ 6693 static int 6694 destroy_vhci_cache(mdi_vhci_t *vh) 6695 { 6696 mdi_vhci_config_t *vhc = vh->vh_config; 6697 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6698 mdi_vhcache_phci_t *cphci, *cphci_next; 6699 mdi_vhcache_client_t *cct, *cct_next; 6700 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6701 6702 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6703 return (MDI_FAILURE); 6704 6705 kmem_free(vhc->vhc_vhcache_filename, 6706 strlen(vhc->vhc_vhcache_filename) + 1); 6707 6708 if (vhc->vhc_phci_driver_list) 6709 free_phci_driver_list(vhc); 6710 6711 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6712 6713 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6714 cphci = cphci_next) { 6715 cphci_next = cphci->cphci_next; 6716 free_vhcache_phci(cphci); 6717 } 6718 6719 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6720 cct_next = cct->cct_next; 6721 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6722 cpi_next = cpi->cpi_next; 6723 free_vhcache_pathinfo(cpi); 6724 } 6725 free_vhcache_client(cct); 6726 } 6727 6728 rw_destroy(&vhcache->vhcache_lock); 6729 6730 mutex_destroy(&vhc->vhc_lock); 6731 cv_destroy(&vhc->vhc_cv); 6732 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6733 return (MDI_SUCCESS); 6734 } 6735 6736 /* 6737 * Setup the list of phci drivers associated with the specified vhci class. 6738 * MDI uses this information to rebuild bus config cache if in case the 6739 * cache is not available or corrupted. 6740 */ 6741 static void 6742 setup_phci_driver_list(mdi_vhci_t *vh) 6743 { 6744 mdi_vhci_config_t *vhc = vh->vh_config; 6745 mdi_phci_driver_info_t *driver_list; 6746 char **driver_list1; 6747 uint_t ndrivers, ndrivers1; 6748 int i, j; 6749 6750 if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { 6751 driver_list = scsi_phci_driver_list; 6752 ndrivers = sizeof (scsi_phci_driver_list) / 6753 sizeof (mdi_phci_driver_info_t); 6754 } else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) { 6755 driver_list = ib_phci_driver_list; 6756 ndrivers = sizeof (ib_phci_driver_list) / 6757 sizeof (mdi_phci_driver_info_t); 6758 } else { 6759 driver_list = NULL; 6760 ndrivers = 0; 6761 } 6762 6763 /* 6764 * The driver.conf file of a vhci driver can specify additional 6765 * phci drivers using a project private "phci-drivers" property. 6766 */ 6767 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip, 6768 DDI_PROP_DONTPASS, "phci-drivers", &driver_list1, 6769 &ndrivers1) != DDI_PROP_SUCCESS) 6770 ndrivers1 = 0; 6771 6772 vhc->vhc_nphci_drivers = ndrivers + ndrivers1; 6773 if (vhc->vhc_nphci_drivers == 0) 6774 return; 6775 6776 vhc->vhc_phci_driver_list = kmem_alloc( 6777 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP); 6778 6779 for (i = 0; i < ndrivers; i++) { 6780 vhc->vhc_phci_driver_list[i].phdriver_name = 6781 i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP); 6782 vhc->vhc_phci_driver_list[i].phdriver_root_support = 6783 driver_list[i].phdriver_root_support; 6784 } 6785 6786 for (j = 0; j < ndrivers1; j++, i++) { 6787 vhc->vhc_phci_driver_list[i].phdriver_name = 6788 i_ddi_strdup(driver_list1[j], KM_SLEEP); 6789 vhc->vhc_phci_driver_list[i].phdriver_root_support = 1; 6790 } 6791 6792 if (ndrivers1) 6793 ddi_prop_free(driver_list1); 6794 } 6795 6796 /* 6797 * Free the memory allocated for the phci driver list 6798 */ 6799 static void 6800 free_phci_driver_list(mdi_vhci_config_t *vhc) 6801 { 6802 int i; 6803 6804 if (vhc->vhc_phci_driver_list == NULL) 6805 return; 6806 6807 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 6808 kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name, 6809 strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1); 6810 } 6811 6812 kmem_free(vhc->vhc_phci_driver_list, 6813 sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers); 6814 } 6815 6816 /* 6817 * Stop all vhci cache related async threads and free their resources. 6818 */ 6819 static int 6820 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6821 { 6822 mdi_async_client_config_t *acc, *acc_next; 6823 6824 mutex_enter(&vhc->vhc_lock); 6825 vhc->vhc_flags |= MDI_VHC_EXIT; 6826 ASSERT(vhc->vhc_acc_thrcount >= 0); 6827 cv_broadcast(&vhc->vhc_cv); 6828 6829 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6830 vhc->vhc_acc_thrcount != 0) { 6831 mutex_exit(&vhc->vhc_lock); 6832 delay(1); 6833 mutex_enter(&vhc->vhc_lock); 6834 } 6835 6836 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6837 6838 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6839 acc_next = acc->acc_next; 6840 free_async_client_config(acc); 6841 } 6842 vhc->vhc_acc_list_head = NULL; 6843 vhc->vhc_acc_list_tail = NULL; 6844 vhc->vhc_acc_count = 0; 6845 6846 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6847 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6848 mutex_exit(&vhc->vhc_lock); 6849 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6850 vhcache_dirty(vhc); 6851 return (MDI_FAILURE); 6852 } 6853 } else 6854 mutex_exit(&vhc->vhc_lock); 6855 6856 if (callb_delete(vhc->vhc_cbid) != 0) 6857 return (MDI_FAILURE); 6858 6859 return (MDI_SUCCESS); 6860 } 6861 6862 /* 6863 * Stop vhci cache flush thread 6864 */ 6865 /* ARGSUSED */ 6866 static boolean_t 6867 stop_vhcache_flush_thread(void *arg, int code) 6868 { 6869 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6870 6871 mutex_enter(&vhc->vhc_lock); 6872 vhc->vhc_flags |= MDI_VHC_EXIT; 6873 cv_broadcast(&vhc->vhc_cv); 6874 6875 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6876 mutex_exit(&vhc->vhc_lock); 6877 delay(1); 6878 mutex_enter(&vhc->vhc_lock); 6879 } 6880 6881 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6882 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6883 mutex_exit(&vhc->vhc_lock); 6884 (void) flush_vhcache(vhc, 1); 6885 } else 6886 mutex_exit(&vhc->vhc_lock); 6887 6888 return (B_TRUE); 6889 } 6890 6891 /* 6892 * Enqueue the vhcache phci (cphci) at the tail of the list 6893 */ 6894 static void 6895 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6896 { 6897 cphci->cphci_next = NULL; 6898 if (vhcache->vhcache_phci_head == NULL) 6899 vhcache->vhcache_phci_head = cphci; 6900 else 6901 vhcache->vhcache_phci_tail->cphci_next = cphci; 6902 vhcache->vhcache_phci_tail = cphci; 6903 } 6904 6905 /* 6906 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6907 */ 6908 static void 6909 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6910 mdi_vhcache_pathinfo_t *cpi) 6911 { 6912 cpi->cpi_next = NULL; 6913 if (cct->cct_cpi_head == NULL) 6914 cct->cct_cpi_head = cpi; 6915 else 6916 cct->cct_cpi_tail->cpi_next = cpi; 6917 cct->cct_cpi_tail = cpi; 6918 } 6919 6920 /* 6921 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6922 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6923 * flag set come at the beginning of the list. All cpis which have this 6924 * flag set come at the end of the list. 6925 */ 6926 static void 6927 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6928 mdi_vhcache_pathinfo_t *newcpi) 6929 { 6930 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6931 6932 if (cct->cct_cpi_head == NULL || 6933 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6934 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6935 else { 6936 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6937 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6938 prev_cpi = cpi, cpi = cpi->cpi_next) 6939 ; 6940 6941 if (prev_cpi == NULL) 6942 cct->cct_cpi_head = newcpi; 6943 else 6944 prev_cpi->cpi_next = newcpi; 6945 6946 newcpi->cpi_next = cpi; 6947 6948 if (cpi == NULL) 6949 cct->cct_cpi_tail = newcpi; 6950 } 6951 } 6952 6953 /* 6954 * Enqueue the vhcache client (cct) at the tail of the list 6955 */ 6956 static void 6957 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6958 mdi_vhcache_client_t *cct) 6959 { 6960 cct->cct_next = NULL; 6961 if (vhcache->vhcache_client_head == NULL) 6962 vhcache->vhcache_client_head = cct; 6963 else 6964 vhcache->vhcache_client_tail->cct_next = cct; 6965 vhcache->vhcache_client_tail = cct; 6966 } 6967 6968 static void 6969 free_string_array(char **str, int nelem) 6970 { 6971 int i; 6972 6973 if (str) { 6974 for (i = 0; i < nelem; i++) { 6975 if (str[i]) 6976 kmem_free(str[i], strlen(str[i]) + 1); 6977 } 6978 kmem_free(str, sizeof (char *) * nelem); 6979 } 6980 } 6981 6982 static void 6983 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6984 { 6985 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6986 kmem_free(cphci, sizeof (*cphci)); 6987 } 6988 6989 static void 6990 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6991 { 6992 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6993 kmem_free(cpi, sizeof (*cpi)); 6994 } 6995 6996 static void 6997 free_vhcache_client(mdi_vhcache_client_t *cct) 6998 { 6999 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7000 kmem_free(cct, sizeof (*cct)); 7001 } 7002 7003 static char * 7004 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7005 { 7006 char *name_addr; 7007 int len; 7008 7009 len = strlen(ct_name) + strlen(ct_addr) + 2; 7010 name_addr = kmem_alloc(len, KM_SLEEP); 7011 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7012 7013 if (ret_len) 7014 *ret_len = len; 7015 return (name_addr); 7016 } 7017 7018 /* 7019 * Copy the contents of paddrnvl to vhci cache. 7020 * paddrnvl nvlist contains path information for a vhci client. 7021 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7022 */ 7023 static void 7024 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7025 mdi_vhcache_client_t *cct) 7026 { 7027 nvpair_t *nvp = NULL; 7028 mdi_vhcache_pathinfo_t *cpi; 7029 uint_t nelem; 7030 uint32_t *val; 7031 7032 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7033 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7034 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7035 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7036 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7037 ASSERT(nelem == 2); 7038 cpi->cpi_cphci = cphci_list[val[0]]; 7039 cpi->cpi_flags = val[1]; 7040 enqueue_tail_vhcache_pathinfo(cct, cpi); 7041 } 7042 } 7043 7044 /* 7045 * Copy the contents of caddrmapnvl to vhci cache. 7046 * caddrmapnvl nvlist contains vhci client address to phci client address 7047 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7048 * this nvlist. 7049 */ 7050 static void 7051 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7052 mdi_vhcache_phci_t *cphci_list[]) 7053 { 7054 nvpair_t *nvp = NULL; 7055 nvlist_t *paddrnvl; 7056 mdi_vhcache_client_t *cct; 7057 7058 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7059 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7060 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7061 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7062 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7063 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7064 /* the client must contain at least one path */ 7065 ASSERT(cct->cct_cpi_head != NULL); 7066 7067 enqueue_vhcache_client(vhcache, cct); 7068 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7069 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7070 } 7071 } 7072 7073 /* 7074 * Copy the contents of the main nvlist to vhci cache. 7075 * 7076 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7077 * The nvlist contains the mappings between the vhci client addresses and 7078 * their corresponding phci client addresses. 7079 * 7080 * The structure of the nvlist is as follows: 7081 * 7082 * Main nvlist: 7083 * NAME TYPE DATA 7084 * version int32 version number 7085 * phcis string array array of phci paths 7086 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7087 * 7088 * structure of c2paddrs_nvl: 7089 * NAME TYPE DATA 7090 * caddr1 nvlist_t paddrs_nvl1 7091 * caddr2 nvlist_t paddrs_nvl2 7092 * ... 7093 * where caddr1, caddr2, ... are vhci client name and addresses in the 7094 * form of "<clientname>@<clientaddress>". 7095 * (for example: "ssd@2000002037cd9f72"); 7096 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7097 * 7098 * structure of paddrs_nvl: 7099 * NAME TYPE DATA 7100 * pi_addr1 uint32_array (phci-id, cpi_flags) 7101 * pi_addr2 uint32_array (phci-id, cpi_flags) 7102 * ... 7103 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7104 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7105 * phci-ids are integers that identify PHCIs to which the 7106 * the bus specific address belongs to. These integers are used as an index 7107 * into to the phcis string array in the main nvlist to get the PHCI path. 7108 */ 7109 static int 7110 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7111 { 7112 char **phcis, **phci_namep; 7113 uint_t nphcis; 7114 mdi_vhcache_phci_t *cphci, **cphci_list; 7115 nvlist_t *caddrmapnvl; 7116 int32_t ver; 7117 int i; 7118 size_t cphci_list_size; 7119 7120 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7121 7122 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7123 ver != MDI_VHCI_CACHE_VERSION) 7124 return (MDI_FAILURE); 7125 7126 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7127 &nphcis) != 0) 7128 return (MDI_SUCCESS); 7129 7130 ASSERT(nphcis > 0); 7131 7132 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7133 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7134 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7135 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7136 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7137 enqueue_vhcache_phci(vhcache, cphci); 7138 cphci_list[i] = cphci; 7139 } 7140 7141 ASSERT(vhcache->vhcache_phci_head != NULL); 7142 7143 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7144 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7145 7146 kmem_free(cphci_list, cphci_list_size); 7147 return (MDI_SUCCESS); 7148 } 7149 7150 /* 7151 * Build paddrnvl for the specified client using the information in the 7152 * vhci cache and add it to the caddrmapnnvl. 7153 * Returns 0 on success, errno on failure. 7154 */ 7155 static int 7156 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7157 nvlist_t *caddrmapnvl) 7158 { 7159 mdi_vhcache_pathinfo_t *cpi; 7160 nvlist_t *nvl; 7161 int err; 7162 uint32_t val[2]; 7163 7164 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7165 7166 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7167 return (err); 7168 7169 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7170 val[0] = cpi->cpi_cphci->cphci_id; 7171 val[1] = cpi->cpi_flags; 7172 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7173 != 0) 7174 goto out; 7175 } 7176 7177 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7178 out: 7179 nvlist_free(nvl); 7180 return (err); 7181 } 7182 7183 /* 7184 * Build caddrmapnvl using the information in the vhci cache 7185 * and add it to the mainnvl. 7186 * Returns 0 on success, errno on failure. 7187 */ 7188 static int 7189 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7190 { 7191 mdi_vhcache_client_t *cct; 7192 nvlist_t *nvl; 7193 int err; 7194 7195 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7196 7197 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7198 return (err); 7199 7200 for (cct = vhcache->vhcache_client_head; cct != NULL; 7201 cct = cct->cct_next) { 7202 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7203 goto out; 7204 } 7205 7206 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7207 out: 7208 nvlist_free(nvl); 7209 return (err); 7210 } 7211 7212 /* 7213 * Build nvlist using the information in the vhci cache. 7214 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7215 * Returns nvl on success, NULL on failure. 7216 */ 7217 static nvlist_t * 7218 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7219 { 7220 mdi_vhcache_phci_t *cphci; 7221 uint_t phci_count; 7222 char **phcis; 7223 nvlist_t *nvl; 7224 int err, i; 7225 7226 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7227 nvl = NULL; 7228 goto out; 7229 } 7230 7231 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7232 MDI_VHCI_CACHE_VERSION)) != 0) 7233 goto out; 7234 7235 rw_enter(&vhcache->vhcache_lock, RW_READER); 7236 if (vhcache->vhcache_phci_head == NULL) { 7237 rw_exit(&vhcache->vhcache_lock); 7238 return (nvl); 7239 } 7240 7241 phci_count = 0; 7242 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7243 cphci = cphci->cphci_next) 7244 cphci->cphci_id = phci_count++; 7245 7246 /* build phci pathname list */ 7247 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7248 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7249 cphci = cphci->cphci_next, i++) 7250 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7251 7252 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7253 phci_count); 7254 free_string_array(phcis, phci_count); 7255 7256 if (err == 0 && 7257 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7258 rw_exit(&vhcache->vhcache_lock); 7259 return (nvl); 7260 } 7261 7262 rw_exit(&vhcache->vhcache_lock); 7263 out: 7264 if (nvl) 7265 nvlist_free(nvl); 7266 return (NULL); 7267 } 7268 7269 /* 7270 * Lookup vhcache phci structure for the specified phci path. 7271 */ 7272 static mdi_vhcache_phci_t * 7273 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7274 { 7275 mdi_vhcache_phci_t *cphci; 7276 7277 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7278 7279 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7280 cphci = cphci->cphci_next) { 7281 if (strcmp(cphci->cphci_path, phci_path) == 0) 7282 return (cphci); 7283 } 7284 7285 return (NULL); 7286 } 7287 7288 /* 7289 * Lookup vhcache phci structure for the specified phci. 7290 */ 7291 static mdi_vhcache_phci_t * 7292 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7293 { 7294 mdi_vhcache_phci_t *cphci; 7295 7296 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7297 7298 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7299 cphci = cphci->cphci_next) { 7300 if (cphci->cphci_phci == ph) 7301 return (cphci); 7302 } 7303 7304 return (NULL); 7305 } 7306 7307 /* 7308 * Add the specified phci to the vhci cache if not already present. 7309 */ 7310 static void 7311 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7312 { 7313 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7314 mdi_vhcache_phci_t *cphci; 7315 char *pathname; 7316 int cache_updated; 7317 7318 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7319 7320 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7321 (void) ddi_pathname(ph->ph_dip, pathname); 7322 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7323 != NULL) { 7324 cphci->cphci_phci = ph; 7325 cache_updated = 0; 7326 } else { 7327 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7328 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7329 cphci->cphci_phci = ph; 7330 enqueue_vhcache_phci(vhcache, cphci); 7331 cache_updated = 1; 7332 } 7333 7334 rw_exit(&vhcache->vhcache_lock); 7335 7336 /* 7337 * Since a new phci has been added, reset 7338 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7339 * during next vhcache_discover_paths(). 7340 */ 7341 mutex_enter(&vhc->vhc_lock); 7342 vhc->vhc_path_discovery_cutoff_time = 0; 7343 mutex_exit(&vhc->vhc_lock); 7344 7345 kmem_free(pathname, MAXPATHLEN); 7346 if (cache_updated) 7347 vhcache_dirty(vhc); 7348 } 7349 7350 /* 7351 * Remove the reference to the specified phci from the vhci cache. 7352 */ 7353 static void 7354 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7355 { 7356 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7357 mdi_vhcache_phci_t *cphci; 7358 7359 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7360 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7361 /* do not remove the actual mdi_vhcache_phci structure */ 7362 cphci->cphci_phci = NULL; 7363 } 7364 rw_exit(&vhcache->vhcache_lock); 7365 } 7366 7367 static void 7368 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7369 mdi_vhcache_lookup_token_t *src) 7370 { 7371 if (src == NULL) { 7372 dst->lt_cct = NULL; 7373 dst->lt_cct_lookup_time = 0; 7374 } else { 7375 dst->lt_cct = src->lt_cct; 7376 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7377 } 7378 } 7379 7380 /* 7381 * Look up vhcache client for the specified client. 7382 */ 7383 static mdi_vhcache_client_t * 7384 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7385 mdi_vhcache_lookup_token_t *token) 7386 { 7387 mod_hash_val_t hv; 7388 char *name_addr; 7389 int len; 7390 7391 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7392 7393 /* 7394 * If no vhcache clean occurred since the last lookup, we can 7395 * simply return the cct from the last lookup operation. 7396 * It works because ccts are never freed except during the vhcache 7397 * cleanup operation. 7398 */ 7399 if (token != NULL && 7400 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7401 return (token->lt_cct); 7402 7403 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7404 if (mod_hash_find(vhcache->vhcache_client_hash, 7405 (mod_hash_key_t)name_addr, &hv) == 0) { 7406 if (token) { 7407 token->lt_cct = (mdi_vhcache_client_t *)hv; 7408 token->lt_cct_lookup_time = lbolt64; 7409 } 7410 } else { 7411 if (token) { 7412 token->lt_cct = NULL; 7413 token->lt_cct_lookup_time = 0; 7414 } 7415 hv = NULL; 7416 } 7417 kmem_free(name_addr, len); 7418 return ((mdi_vhcache_client_t *)hv); 7419 } 7420 7421 /* 7422 * Add the specified path to the vhci cache if not already present. 7423 * Also add the vhcache client for the client corresponding to this path 7424 * if it doesn't already exist. 7425 */ 7426 static void 7427 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7428 { 7429 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7430 mdi_vhcache_client_t *cct; 7431 mdi_vhcache_pathinfo_t *cpi; 7432 mdi_phci_t *ph = pip->pi_phci; 7433 mdi_client_t *ct = pip->pi_client; 7434 int cache_updated = 0; 7435 7436 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7437 7438 /* if vhcache client for this pip doesn't already exist, add it */ 7439 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7440 NULL)) == NULL) { 7441 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7442 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7443 ct->ct_guid, NULL); 7444 enqueue_vhcache_client(vhcache, cct); 7445 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7446 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7447 cache_updated = 1; 7448 } 7449 7450 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7451 if (cpi->cpi_cphci->cphci_phci == ph && 7452 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7453 cpi->cpi_pip = pip; 7454 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7455 cpi->cpi_flags &= 7456 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7457 sort_vhcache_paths(cct); 7458 cache_updated = 1; 7459 } 7460 break; 7461 } 7462 } 7463 7464 if (cpi == NULL) { 7465 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7466 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7467 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7468 ASSERT(cpi->cpi_cphci != NULL); 7469 cpi->cpi_pip = pip; 7470 enqueue_vhcache_pathinfo(cct, cpi); 7471 cache_updated = 1; 7472 } 7473 7474 rw_exit(&vhcache->vhcache_lock); 7475 7476 if (cache_updated) 7477 vhcache_dirty(vhc); 7478 } 7479 7480 /* 7481 * Remove the reference to the specified path from the vhci cache. 7482 */ 7483 static void 7484 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7485 { 7486 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7487 mdi_client_t *ct = pip->pi_client; 7488 mdi_vhcache_client_t *cct; 7489 mdi_vhcache_pathinfo_t *cpi; 7490 7491 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7492 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7493 NULL)) != NULL) { 7494 for (cpi = cct->cct_cpi_head; cpi != NULL; 7495 cpi = cpi->cpi_next) { 7496 if (cpi->cpi_pip == pip) { 7497 cpi->cpi_pip = NULL; 7498 break; 7499 } 7500 } 7501 } 7502 rw_exit(&vhcache->vhcache_lock); 7503 } 7504 7505 /* 7506 * Flush the vhci cache to disk. 7507 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7508 */ 7509 static int 7510 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7511 { 7512 nvlist_t *nvl; 7513 int err; 7514 int rv; 7515 7516 /* 7517 * It is possible that the system may shutdown before 7518 * i_ddi_io_initialized (during stmsboot for example). To allow for 7519 * flushing the cache in this case do not check for 7520 * i_ddi_io_initialized when force flag is set. 7521 */ 7522 if (force_flag == 0 && !i_ddi_io_initialized()) 7523 return (MDI_FAILURE); 7524 7525 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7526 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7527 nvlist_free(nvl); 7528 } else 7529 err = EFAULT; 7530 7531 rv = MDI_SUCCESS; 7532 mutex_enter(&vhc->vhc_lock); 7533 if (err != 0) { 7534 if (err == EROFS) { 7535 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7536 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7537 MDI_VHC_VHCACHE_DIRTY); 7538 } else { 7539 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7540 cmn_err(CE_CONT, "%s: update failed\n", 7541 vhc->vhc_vhcache_filename); 7542 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7543 } 7544 rv = MDI_FAILURE; 7545 } 7546 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7547 cmn_err(CE_CONT, 7548 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7549 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7550 } 7551 mutex_exit(&vhc->vhc_lock); 7552 7553 return (rv); 7554 } 7555 7556 /* 7557 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7558 * Exits itself if left idle for the idle timeout period. 7559 */ 7560 static void 7561 vhcache_flush_thread(void *arg) 7562 { 7563 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7564 clock_t idle_time, quit_at_ticks; 7565 callb_cpr_t cprinfo; 7566 7567 /* number of seconds to sleep idle before exiting */ 7568 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7569 7570 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7571 "mdi_vhcache_flush"); 7572 mutex_enter(&vhc->vhc_lock); 7573 for (; ; ) { 7574 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7575 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7576 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7577 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7578 (void) cv_timedwait(&vhc->vhc_cv, 7579 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7580 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7581 } else { 7582 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7583 mutex_exit(&vhc->vhc_lock); 7584 7585 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7586 vhcache_dirty(vhc); 7587 7588 mutex_enter(&vhc->vhc_lock); 7589 } 7590 } 7591 7592 quit_at_ticks = ddi_get_lbolt() + idle_time; 7593 7594 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7595 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7596 ddi_get_lbolt() < quit_at_ticks) { 7597 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7598 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7599 quit_at_ticks); 7600 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7601 } 7602 7603 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7604 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7605 goto out; 7606 } 7607 7608 out: 7609 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7610 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7611 CALLB_CPR_EXIT(&cprinfo); 7612 } 7613 7614 /* 7615 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7616 */ 7617 static void 7618 vhcache_dirty(mdi_vhci_config_t *vhc) 7619 { 7620 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7621 int create_thread; 7622 7623 rw_enter(&vhcache->vhcache_lock, RW_READER); 7624 /* do not flush cache until the cache is fully built */ 7625 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7626 rw_exit(&vhcache->vhcache_lock); 7627 return; 7628 } 7629 rw_exit(&vhcache->vhcache_lock); 7630 7631 mutex_enter(&vhc->vhc_lock); 7632 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7633 mutex_exit(&vhc->vhc_lock); 7634 return; 7635 } 7636 7637 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7638 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7639 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7640 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7641 cv_broadcast(&vhc->vhc_cv); 7642 create_thread = 0; 7643 } else { 7644 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7645 create_thread = 1; 7646 } 7647 mutex_exit(&vhc->vhc_lock); 7648 7649 if (create_thread) 7650 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7651 0, &p0, TS_RUN, minclsyspri); 7652 } 7653 7654 /* 7655 * phci bus config structure - one for for each phci bus config operation that 7656 * we initiate on behalf of a vhci. 7657 */ 7658 typedef struct mdi_phci_bus_config_s { 7659 char *phbc_phci_path; 7660 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7661 struct mdi_phci_bus_config_s *phbc_next; 7662 } mdi_phci_bus_config_t; 7663 7664 /* vhci bus config structure - one for each vhci bus config operation */ 7665 typedef struct mdi_vhci_bus_config_s { 7666 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7667 major_t vhbc_op_major; /* bus config op major */ 7668 uint_t vhbc_op_flags; /* bus config op flags */ 7669 kmutex_t vhbc_lock; 7670 kcondvar_t vhbc_cv; 7671 int vhbc_thr_count; 7672 } mdi_vhci_bus_config_t; 7673 7674 /* 7675 * bus config the specified phci 7676 */ 7677 static void 7678 bus_config_phci(void *arg) 7679 { 7680 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7681 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7682 dev_info_t *ph_dip; 7683 7684 /* 7685 * first configure all path components upto phci and then configure 7686 * the phci children. 7687 */ 7688 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7689 != NULL) { 7690 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7691 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7692 (void) ndi_devi_config_driver(ph_dip, 7693 vhbc->vhbc_op_flags, 7694 vhbc->vhbc_op_major); 7695 } else 7696 (void) ndi_devi_config(ph_dip, 7697 vhbc->vhbc_op_flags); 7698 7699 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7700 ndi_rele_devi(ph_dip); 7701 } 7702 7703 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7704 kmem_free(phbc, sizeof (*phbc)); 7705 7706 mutex_enter(&vhbc->vhbc_lock); 7707 vhbc->vhbc_thr_count--; 7708 if (vhbc->vhbc_thr_count == 0) 7709 cv_broadcast(&vhbc->vhbc_cv); 7710 mutex_exit(&vhbc->vhbc_lock); 7711 } 7712 7713 /* 7714 * Bus config all phcis associated with the vhci in parallel. 7715 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7716 */ 7717 static void 7718 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7719 ddi_bus_config_op_t op, major_t maj) 7720 { 7721 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7722 mdi_vhci_bus_config_t *vhbc; 7723 mdi_vhcache_phci_t *cphci; 7724 7725 rw_enter(&vhcache->vhcache_lock, RW_READER); 7726 if (vhcache->vhcache_phci_head == NULL) { 7727 rw_exit(&vhcache->vhcache_lock); 7728 return; 7729 } 7730 7731 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7732 7733 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7734 cphci = cphci->cphci_next) { 7735 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7736 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7737 KM_SLEEP); 7738 phbc->phbc_vhbusconfig = vhbc; 7739 phbc->phbc_next = phbc_head; 7740 phbc_head = phbc; 7741 vhbc->vhbc_thr_count++; 7742 } 7743 rw_exit(&vhcache->vhcache_lock); 7744 7745 vhbc->vhbc_op = op; 7746 vhbc->vhbc_op_major = maj; 7747 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7748 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7749 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7750 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7751 7752 /* now create threads to initiate bus config on all phcis in parallel */ 7753 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7754 phbc_next = phbc->phbc_next; 7755 if (mdi_mtc_off) 7756 bus_config_phci((void *)phbc); 7757 else 7758 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7759 0, &p0, TS_RUN, minclsyspri); 7760 } 7761 7762 mutex_enter(&vhbc->vhbc_lock); 7763 /* wait until all threads exit */ 7764 while (vhbc->vhbc_thr_count > 0) 7765 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7766 mutex_exit(&vhbc->vhbc_lock); 7767 7768 mutex_destroy(&vhbc->vhbc_lock); 7769 cv_destroy(&vhbc->vhbc_cv); 7770 kmem_free(vhbc, sizeof (*vhbc)); 7771 } 7772 7773 /* 7774 * Single threaded version of bus_config_all_phcis() 7775 */ 7776 static void 7777 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7778 ddi_bus_config_op_t op, major_t maj) 7779 { 7780 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7781 7782 single_threaded_vhconfig_enter(vhc); 7783 bus_config_all_phcis(vhcache, flags, op, maj); 7784 single_threaded_vhconfig_exit(vhc); 7785 } 7786 7787 /* 7788 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7789 * The path includes the child component in addition to the phci path. 7790 */ 7791 static int 7792 bus_config_one_phci_child(char *path) 7793 { 7794 dev_info_t *ph_dip, *child; 7795 char *devnm; 7796 int rv = MDI_FAILURE; 7797 7798 /* extract the child component of the phci */ 7799 devnm = strrchr(path, '/'); 7800 *devnm++ = '\0'; 7801 7802 /* 7803 * first configure all path components upto phci and then 7804 * configure the phci child. 7805 */ 7806 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7807 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7808 NDI_SUCCESS) { 7809 /* 7810 * release the hold that ndi_devi_config_one() placed 7811 */ 7812 ndi_rele_devi(child); 7813 rv = MDI_SUCCESS; 7814 } 7815 7816 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7817 ndi_rele_devi(ph_dip); 7818 } 7819 7820 devnm--; 7821 *devnm = '/'; 7822 return (rv); 7823 } 7824 7825 /* 7826 * Build a list of phci client paths for the specified vhci client. 7827 * The list includes only those phci client paths which aren't configured yet. 7828 */ 7829 static mdi_phys_path_t * 7830 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7831 { 7832 mdi_vhcache_pathinfo_t *cpi; 7833 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7834 int config_path, len; 7835 7836 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7837 /* 7838 * include only those paths that aren't configured. 7839 */ 7840 config_path = 0; 7841 if (cpi->cpi_pip == NULL) 7842 config_path = 1; 7843 else { 7844 MDI_PI_LOCK(cpi->cpi_pip); 7845 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7846 config_path = 1; 7847 MDI_PI_UNLOCK(cpi->cpi_pip); 7848 } 7849 7850 if (config_path) { 7851 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7852 len = strlen(cpi->cpi_cphci->cphci_path) + 7853 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7854 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7855 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7856 cpi->cpi_cphci->cphci_path, ct_name, 7857 cpi->cpi_addr); 7858 pp->phys_path_next = NULL; 7859 7860 if (pp_head == NULL) 7861 pp_head = pp; 7862 else 7863 pp_tail->phys_path_next = pp; 7864 pp_tail = pp; 7865 } 7866 } 7867 7868 return (pp_head); 7869 } 7870 7871 /* 7872 * Free the memory allocated for phci client path list. 7873 */ 7874 static void 7875 free_phclient_path_list(mdi_phys_path_t *pp_head) 7876 { 7877 mdi_phys_path_t *pp, *pp_next; 7878 7879 for (pp = pp_head; pp != NULL; pp = pp_next) { 7880 pp_next = pp->phys_path_next; 7881 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7882 kmem_free(pp, sizeof (*pp)); 7883 } 7884 } 7885 7886 /* 7887 * Allocated async client structure and initialize with the specified values. 7888 */ 7889 static mdi_async_client_config_t * 7890 alloc_async_client_config(char *ct_name, char *ct_addr, 7891 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7892 { 7893 mdi_async_client_config_t *acc; 7894 7895 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7896 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7897 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7898 acc->acc_phclient_path_list_head = pp_head; 7899 init_vhcache_lookup_token(&acc->acc_token, tok); 7900 acc->acc_next = NULL; 7901 return (acc); 7902 } 7903 7904 /* 7905 * Free the memory allocated for the async client structure and their members. 7906 */ 7907 static void 7908 free_async_client_config(mdi_async_client_config_t *acc) 7909 { 7910 if (acc->acc_phclient_path_list_head) 7911 free_phclient_path_list(acc->acc_phclient_path_list_head); 7912 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7913 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7914 kmem_free(acc, sizeof (*acc)); 7915 } 7916 7917 /* 7918 * Sort vhcache pathinfos (cpis) of the specified client. 7919 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7920 * flag set come at the beginning of the list. All cpis which have this 7921 * flag set come at the end of the list. 7922 */ 7923 static void 7924 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7925 { 7926 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7927 7928 cpi_head = cct->cct_cpi_head; 7929 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7930 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7931 cpi_next = cpi->cpi_next; 7932 enqueue_vhcache_pathinfo(cct, cpi); 7933 } 7934 } 7935 7936 /* 7937 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7938 * every vhcache pathinfo of the specified client. If not adjust the flag 7939 * setting appropriately. 7940 * 7941 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7942 * on-disk vhci cache. So every time this flag is updated the cache must be 7943 * flushed. 7944 */ 7945 static void 7946 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7947 mdi_vhcache_lookup_token_t *tok) 7948 { 7949 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7950 mdi_vhcache_client_t *cct; 7951 mdi_vhcache_pathinfo_t *cpi; 7952 7953 rw_enter(&vhcache->vhcache_lock, RW_READER); 7954 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7955 == NULL) { 7956 rw_exit(&vhcache->vhcache_lock); 7957 return; 7958 } 7959 7960 /* 7961 * to avoid unnecessary on-disk cache updates, first check if an 7962 * update is really needed. If no update is needed simply return. 7963 */ 7964 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7965 if ((cpi->cpi_pip != NULL && 7966 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7967 (cpi->cpi_pip == NULL && 7968 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7969 break; 7970 } 7971 } 7972 if (cpi == NULL) { 7973 rw_exit(&vhcache->vhcache_lock); 7974 return; 7975 } 7976 7977 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7978 rw_exit(&vhcache->vhcache_lock); 7979 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7980 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7981 tok)) == NULL) { 7982 rw_exit(&vhcache->vhcache_lock); 7983 return; 7984 } 7985 } 7986 7987 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7988 if (cpi->cpi_pip != NULL) 7989 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7990 else 7991 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7992 } 7993 sort_vhcache_paths(cct); 7994 7995 rw_exit(&vhcache->vhcache_lock); 7996 vhcache_dirty(vhc); 7997 } 7998 7999 /* 8000 * Configure all specified paths of the client. 8001 */ 8002 static void 8003 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8004 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8005 { 8006 mdi_phys_path_t *pp; 8007 8008 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8009 (void) bus_config_one_phci_child(pp->phys_path); 8010 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8011 } 8012 8013 /* 8014 * Dequeue elements from vhci async client config list and bus configure 8015 * their corresponding phci clients. 8016 */ 8017 static void 8018 config_client_paths_thread(void *arg) 8019 { 8020 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8021 mdi_async_client_config_t *acc; 8022 clock_t quit_at_ticks; 8023 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8024 callb_cpr_t cprinfo; 8025 8026 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8027 "mdi_config_client_paths"); 8028 8029 for (; ; ) { 8030 quit_at_ticks = ddi_get_lbolt() + idle_time; 8031 8032 mutex_enter(&vhc->vhc_lock); 8033 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8034 vhc->vhc_acc_list_head == NULL && 8035 ddi_get_lbolt() < quit_at_ticks) { 8036 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8037 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8038 quit_at_ticks); 8039 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8040 } 8041 8042 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8043 vhc->vhc_acc_list_head == NULL) 8044 goto out; 8045 8046 acc = vhc->vhc_acc_list_head; 8047 vhc->vhc_acc_list_head = acc->acc_next; 8048 if (vhc->vhc_acc_list_head == NULL) 8049 vhc->vhc_acc_list_tail = NULL; 8050 vhc->vhc_acc_count--; 8051 mutex_exit(&vhc->vhc_lock); 8052 8053 config_client_paths_sync(vhc, acc->acc_ct_name, 8054 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8055 &acc->acc_token); 8056 8057 free_async_client_config(acc); 8058 } 8059 8060 out: 8061 vhc->vhc_acc_thrcount--; 8062 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8063 CALLB_CPR_EXIT(&cprinfo); 8064 } 8065 8066 /* 8067 * Arrange for all the phci client paths (pp_head) for the specified client 8068 * to be bus configured asynchronously by a thread. 8069 */ 8070 static void 8071 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8072 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8073 { 8074 mdi_async_client_config_t *acc, *newacc; 8075 int create_thread; 8076 8077 if (pp_head == NULL) 8078 return; 8079 8080 if (mdi_mtc_off) { 8081 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8082 free_phclient_path_list(pp_head); 8083 return; 8084 } 8085 8086 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8087 ASSERT(newacc); 8088 8089 mutex_enter(&vhc->vhc_lock); 8090 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8091 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8092 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8093 free_async_client_config(newacc); 8094 mutex_exit(&vhc->vhc_lock); 8095 return; 8096 } 8097 } 8098 8099 if (vhc->vhc_acc_list_head == NULL) 8100 vhc->vhc_acc_list_head = newacc; 8101 else 8102 vhc->vhc_acc_list_tail->acc_next = newacc; 8103 vhc->vhc_acc_list_tail = newacc; 8104 vhc->vhc_acc_count++; 8105 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8106 cv_broadcast(&vhc->vhc_cv); 8107 create_thread = 0; 8108 } else { 8109 vhc->vhc_acc_thrcount++; 8110 create_thread = 1; 8111 } 8112 mutex_exit(&vhc->vhc_lock); 8113 8114 if (create_thread) 8115 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8116 0, &p0, TS_RUN, minclsyspri); 8117 } 8118 8119 /* 8120 * Return number of online paths for the specified client. 8121 */ 8122 static int 8123 nonline_paths(mdi_vhcache_client_t *cct) 8124 { 8125 mdi_vhcache_pathinfo_t *cpi; 8126 int online_count = 0; 8127 8128 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8129 if (cpi->cpi_pip != NULL) { 8130 MDI_PI_LOCK(cpi->cpi_pip); 8131 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8132 online_count++; 8133 MDI_PI_UNLOCK(cpi->cpi_pip); 8134 } 8135 } 8136 8137 return (online_count); 8138 } 8139 8140 /* 8141 * Bus configure all paths for the specified vhci client. 8142 * If at least one path for the client is already online, the remaining paths 8143 * will be configured asynchronously. Otherwise, it synchronously configures 8144 * the paths until at least one path is online and then rest of the paths 8145 * will be configured asynchronously. 8146 */ 8147 static void 8148 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8149 { 8150 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8151 mdi_phys_path_t *pp_head, *pp; 8152 mdi_vhcache_client_t *cct; 8153 mdi_vhcache_lookup_token_t tok; 8154 8155 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8156 8157 init_vhcache_lookup_token(&tok, NULL); 8158 8159 if (ct_name == NULL || ct_addr == NULL || 8160 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8161 == NULL || 8162 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8163 rw_exit(&vhcache->vhcache_lock); 8164 return; 8165 } 8166 8167 /* if at least one path is online, configure the rest asynchronously */ 8168 if (nonline_paths(cct) > 0) { 8169 rw_exit(&vhcache->vhcache_lock); 8170 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8171 return; 8172 } 8173 8174 rw_exit(&vhcache->vhcache_lock); 8175 8176 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8177 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8178 rw_enter(&vhcache->vhcache_lock, RW_READER); 8179 8180 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8181 ct_addr, &tok)) == NULL) { 8182 rw_exit(&vhcache->vhcache_lock); 8183 goto out; 8184 } 8185 8186 if (nonline_paths(cct) > 0 && 8187 pp->phys_path_next != NULL) { 8188 rw_exit(&vhcache->vhcache_lock); 8189 config_client_paths_async(vhc, ct_name, ct_addr, 8190 pp->phys_path_next, &tok); 8191 pp->phys_path_next = NULL; 8192 goto out; 8193 } 8194 8195 rw_exit(&vhcache->vhcache_lock); 8196 } 8197 } 8198 8199 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8200 out: 8201 free_phclient_path_list(pp_head); 8202 } 8203 8204 static void 8205 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8206 { 8207 mutex_enter(&vhc->vhc_lock); 8208 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8209 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8210 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8211 mutex_exit(&vhc->vhc_lock); 8212 } 8213 8214 static void 8215 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8216 { 8217 mutex_enter(&vhc->vhc_lock); 8218 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8219 cv_broadcast(&vhc->vhc_cv); 8220 mutex_exit(&vhc->vhc_lock); 8221 } 8222 8223 /* 8224 * Attach the phci driver instances associated with the vhci: 8225 * If root is mounted attach all phci driver instances. 8226 * If root is not mounted, attach the instances of only those phci 8227 * drivers that have the root support. 8228 */ 8229 static void 8230 attach_phci_drivers(mdi_vhci_config_t *vhc) 8231 { 8232 int i; 8233 major_t m; 8234 8235 for (i = 0; i < vhc->vhc_nphci_drivers; i++) { 8236 if (modrootloaded == 0 && 8237 vhc->vhc_phci_driver_list[i].phdriver_root_support == 0) 8238 continue; 8239 8240 m = ddi_name_to_major( 8241 vhc->vhc_phci_driver_list[i].phdriver_name); 8242 if (m != (major_t)-1) { 8243 if (ddi_hold_installed_driver(m) != NULL) 8244 ddi_rele_driver(m); 8245 } 8246 } 8247 } 8248 8249 /* 8250 * Build vhci cache: 8251 * 8252 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8253 * the phci driver instances. During this process the cache gets built. 8254 * 8255 * Cache is built fully if the root is mounted. 8256 * If the root is not mounted, phci drivers that do not have root support 8257 * are not attached. As a result the cache is built partially. The entries 8258 * in the cache reflect only those phci drivers that have root support. 8259 */ 8260 static int 8261 build_vhci_cache(mdi_vhci_config_t *vhc) 8262 { 8263 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8264 8265 single_threaded_vhconfig_enter(vhc); 8266 8267 rw_enter(&vhcache->vhcache_lock, RW_READER); 8268 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8269 rw_exit(&vhcache->vhcache_lock); 8270 single_threaded_vhconfig_exit(vhc); 8271 return (0); 8272 } 8273 rw_exit(&vhcache->vhcache_lock); 8274 8275 attach_phci_drivers(vhc); 8276 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8277 BUS_CONFIG_ALL, (major_t)-1); 8278 8279 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8280 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8281 rw_exit(&vhcache->vhcache_lock); 8282 8283 single_threaded_vhconfig_exit(vhc); 8284 vhcache_dirty(vhc); 8285 return (1); 8286 } 8287 8288 /* 8289 * Determine if discovery of paths is needed. 8290 */ 8291 static int 8292 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8293 { 8294 int rv = 1; 8295 8296 mutex_enter(&vhc->vhc_lock); 8297 if (i_ddi_io_initialized() == 0) { 8298 if (vhc->vhc_path_discovery_boot > 0) { 8299 vhc->vhc_path_discovery_boot--; 8300 goto out; 8301 } 8302 } else { 8303 if (vhc->vhc_path_discovery_postboot > 0) { 8304 vhc->vhc_path_discovery_postboot--; 8305 goto out; 8306 } 8307 } 8308 8309 /* 8310 * Do full path discovery at most once per mdi_path_discovery_interval. 8311 * This is to avoid a series of full path discoveries when opening 8312 * stale /dev/[r]dsk links. 8313 */ 8314 if (mdi_path_discovery_interval != -1 && 8315 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8316 goto out; 8317 8318 rv = 0; 8319 out: 8320 mutex_exit(&vhc->vhc_lock); 8321 return (rv); 8322 } 8323 8324 /* 8325 * Discover all paths: 8326 * 8327 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8328 * driver instances. During this process all paths will be discovered. 8329 */ 8330 static int 8331 vhcache_discover_paths(mdi_vhci_config_t *vhc) 8332 { 8333 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8334 int rv = 0; 8335 8336 single_threaded_vhconfig_enter(vhc); 8337 8338 if (vhcache_do_discovery(vhc)) { 8339 attach_phci_drivers(vhc); 8340 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8341 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8342 8343 mutex_enter(&vhc->vhc_lock); 8344 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8345 mdi_path_discovery_interval * TICKS_PER_SECOND; 8346 mutex_exit(&vhc->vhc_lock); 8347 rv = 1; 8348 } 8349 8350 single_threaded_vhconfig_exit(vhc); 8351 return (rv); 8352 } 8353 8354 /* 8355 * Generic vhci bus config implementation: 8356 * 8357 * Parameters 8358 * vdip vhci dip 8359 * flags bus config flags 8360 * op bus config operation 8361 * The remaining parameters are bus config operation specific 8362 * 8363 * for BUS_CONFIG_ONE 8364 * arg pointer to name@addr 8365 * child upon successful return from this function, *child will be 8366 * set to the configured and held devinfo child node of vdip. 8367 * ct_addr pointer to client address (i.e. GUID) 8368 * 8369 * for BUS_CONFIG_DRIVER 8370 * arg major number of the driver 8371 * child and ct_addr parameters are ignored 8372 * 8373 * for BUS_CONFIG_ALL 8374 * arg, child, and ct_addr parameters are ignored 8375 * 8376 * Note that for the rest of the bus config operations, this function simply 8377 * calls the framework provided default bus config routine. 8378 */ 8379 int 8380 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8381 void *arg, dev_info_t **child, char *ct_addr) 8382 { 8383 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8384 mdi_vhci_config_t *vhc = vh->vh_config; 8385 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8386 int rv = 0; 8387 int params_valid = 0; 8388 char *cp; 8389 8390 /* 8391 * To bus config vhcis we relay operation, possibly using another 8392 * thread, to phcis. The phci driver then interacts with MDI to cause 8393 * vhci child nodes to be enumerated under the vhci node. Adding a 8394 * vhci child requires an ndi_devi_enter of the vhci. Since another 8395 * thread may be adding the child, to avoid deadlock we can't wait 8396 * for the relayed operations to complete if we have already entered 8397 * the vhci node. 8398 */ 8399 if (DEVI_BUSY_OWNED(vdip)) { 8400 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8401 "vhci dip is busy owned %p\n", (void *)vdip)); 8402 goto default_bus_config; 8403 } 8404 8405 rw_enter(&vhcache->vhcache_lock, RW_READER); 8406 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8407 rw_exit(&vhcache->vhcache_lock); 8408 rv = build_vhci_cache(vhc); 8409 rw_enter(&vhcache->vhcache_lock, RW_READER); 8410 } 8411 8412 switch (op) { 8413 case BUS_CONFIG_ONE: 8414 if (arg != NULL && ct_addr != NULL) { 8415 /* extract node name */ 8416 cp = (char *)arg; 8417 while (*cp != '\0' && *cp != '@') 8418 cp++; 8419 if (*cp == '@') { 8420 params_valid = 1; 8421 *cp = '\0'; 8422 config_client_paths(vhc, (char *)arg, ct_addr); 8423 /* config_client_paths() releases cache_lock */ 8424 *cp = '@'; 8425 break; 8426 } 8427 } 8428 8429 rw_exit(&vhcache->vhcache_lock); 8430 break; 8431 8432 case BUS_CONFIG_DRIVER: 8433 rw_exit(&vhcache->vhcache_lock); 8434 if (rv == 0) 8435 st_bus_config_all_phcis(vhc, flags, op, 8436 (major_t)(uintptr_t)arg); 8437 break; 8438 8439 case BUS_CONFIG_ALL: 8440 rw_exit(&vhcache->vhcache_lock); 8441 if (rv == 0) 8442 st_bus_config_all_phcis(vhc, flags, op, -1); 8443 break; 8444 8445 default: 8446 rw_exit(&vhcache->vhcache_lock); 8447 break; 8448 } 8449 8450 8451 default_bus_config: 8452 /* 8453 * All requested child nodes are enumerated under the vhci. 8454 * Now configure them. 8455 */ 8456 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8457 NDI_SUCCESS) { 8458 return (MDI_SUCCESS); 8459 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8460 /* discover all paths and try configuring again */ 8461 if (vhcache_discover_paths(vhc) && 8462 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8463 NDI_SUCCESS) 8464 return (MDI_SUCCESS); 8465 } 8466 8467 return (MDI_FAILURE); 8468 } 8469 8470 /* 8471 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8472 */ 8473 static nvlist_t * 8474 read_on_disk_vhci_cache(char *vhci_class) 8475 { 8476 nvlist_t *nvl; 8477 int err; 8478 char *filename; 8479 8480 filename = vhclass2vhcache_filename(vhci_class); 8481 8482 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8483 kmem_free(filename, strlen(filename) + 1); 8484 return (nvl); 8485 } else if (err == EIO) 8486 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8487 else if (err == EINVAL) 8488 cmn_err(CE_WARN, 8489 "%s: data file corrupted, will recreate\n", filename); 8490 8491 kmem_free(filename, strlen(filename) + 1); 8492 return (NULL); 8493 } 8494 8495 /* 8496 * Read on-disk vhci cache into nvlists for all vhci classes. 8497 * Called during booting by i_ddi_read_devices_files(). 8498 */ 8499 void 8500 mdi_read_devices_files(void) 8501 { 8502 int i; 8503 8504 for (i = 0; i < N_VHCI_CLASSES; i++) 8505 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8506 } 8507 8508 /* 8509 * Remove all stale entries from vhci cache. 8510 */ 8511 static void 8512 clean_vhcache(mdi_vhci_config_t *vhc) 8513 { 8514 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8515 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8516 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8517 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8518 8519 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8520 8521 cct_head = vhcache->vhcache_client_head; 8522 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8523 for (cct = cct_head; cct != NULL; cct = cct_next) { 8524 cct_next = cct->cct_next; 8525 8526 cpi_head = cct->cct_cpi_head; 8527 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8528 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8529 cpi_next = cpi->cpi_next; 8530 if (cpi->cpi_pip != NULL) { 8531 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8532 enqueue_tail_vhcache_pathinfo(cct, cpi); 8533 } else 8534 free_vhcache_pathinfo(cpi); 8535 } 8536 8537 if (cct->cct_cpi_head != NULL) 8538 enqueue_vhcache_client(vhcache, cct); 8539 else { 8540 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8541 (mod_hash_key_t)cct->cct_name_addr); 8542 free_vhcache_client(cct); 8543 } 8544 } 8545 8546 cphci_head = vhcache->vhcache_phci_head; 8547 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8548 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8549 cphci_next = cphci->cphci_next; 8550 if (cphci->cphci_phci != NULL) 8551 enqueue_vhcache_phci(vhcache, cphci); 8552 else 8553 free_vhcache_phci(cphci); 8554 } 8555 8556 vhcache->vhcache_clean_time = lbolt64; 8557 rw_exit(&vhcache->vhcache_lock); 8558 vhcache_dirty(vhc); 8559 } 8560 8561 /* 8562 * Remove all stale entries from vhci cache. 8563 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8564 */ 8565 void 8566 mdi_clean_vhcache(void) 8567 { 8568 mdi_vhci_t *vh; 8569 8570 mutex_enter(&mdi_mutex); 8571 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8572 vh->vh_refcnt++; 8573 mutex_exit(&mdi_mutex); 8574 clean_vhcache(vh->vh_config); 8575 mutex_enter(&mdi_mutex); 8576 vh->vh_refcnt--; 8577 } 8578 mutex_exit(&mdi_mutex); 8579 } 8580 8581 /* 8582 * mdi_vhci_walk_clients(): 8583 * Walker routine to traverse client dev_info nodes 8584 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8585 * below the client, including nexus devices, which we dont want. 8586 * So we just traverse the immediate siblings, starting from 1st client. 8587 */ 8588 void 8589 mdi_vhci_walk_clients(dev_info_t *vdip, 8590 int (*f)(dev_info_t *, void *), void *arg) 8591 { 8592 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8593 dev_info_t *cdip; 8594 mdi_client_t *ct; 8595 8596 MDI_VHCI_CLIENT_LOCK(vh); 8597 cdip = ddi_get_child(vdip); 8598 while (cdip) { 8599 ct = i_devi_get_client(cdip); 8600 MDI_CLIENT_LOCK(ct); 8601 8602 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8603 cdip = ddi_get_next_sibling(cdip); 8604 else 8605 cdip = NULL; 8606 8607 MDI_CLIENT_UNLOCK(ct); 8608 } 8609 MDI_VHCI_CLIENT_UNLOCK(vh); 8610 } 8611 8612 /* 8613 * mdi_vhci_walk_phcis(): 8614 * Walker routine to traverse phci dev_info nodes 8615 */ 8616 void 8617 mdi_vhci_walk_phcis(dev_info_t *vdip, 8618 int (*f)(dev_info_t *, void *), void *arg) 8619 { 8620 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8621 mdi_phci_t *ph, *next; 8622 8623 MDI_VHCI_PHCI_LOCK(vh); 8624 ph = vh->vh_phci_head; 8625 while (ph) { 8626 MDI_PHCI_LOCK(ph); 8627 8628 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8629 next = ph->ph_next; 8630 else 8631 next = NULL; 8632 8633 MDI_PHCI_UNLOCK(ph); 8634 ph = next; 8635 } 8636 MDI_VHCI_PHCI_UNLOCK(vh); 8637 } 8638 8639 8640 /* 8641 * mdi_walk_vhcis(): 8642 * Walker routine to traverse vhci dev_info nodes 8643 */ 8644 void 8645 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8646 { 8647 mdi_vhci_t *vh = NULL; 8648 8649 mutex_enter(&mdi_mutex); 8650 /* 8651 * Scan for already registered vhci 8652 */ 8653 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8654 vh->vh_refcnt++; 8655 mutex_exit(&mdi_mutex); 8656 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8657 mutex_enter(&mdi_mutex); 8658 vh->vh_refcnt--; 8659 break; 8660 } else { 8661 mutex_enter(&mdi_mutex); 8662 vh->vh_refcnt--; 8663 } 8664 } 8665 8666 mutex_exit(&mdi_mutex); 8667 } 8668 8669 /* 8670 * i_mdi_log_sysevent(): 8671 * Logs events for pickup by syseventd 8672 */ 8673 static void 8674 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8675 { 8676 char *path_name; 8677 nvlist_t *attr_list; 8678 8679 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8680 KM_SLEEP) != DDI_SUCCESS) { 8681 goto alloc_failed; 8682 } 8683 8684 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8685 (void) ddi_pathname(dip, path_name); 8686 8687 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8688 ddi_driver_name(dip)) != DDI_SUCCESS) { 8689 goto error; 8690 } 8691 8692 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8693 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8694 goto error; 8695 } 8696 8697 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8698 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8699 goto error; 8700 } 8701 8702 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8703 path_name) != DDI_SUCCESS) { 8704 goto error; 8705 } 8706 8707 if (nvlist_add_string(attr_list, DDI_CLASS, 8708 ph_vh_class) != DDI_SUCCESS) { 8709 goto error; 8710 } 8711 8712 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8713 attr_list, NULL, DDI_SLEEP); 8714 8715 error: 8716 kmem_free(path_name, MAXPATHLEN); 8717 nvlist_free(attr_list); 8718 return; 8719 8720 alloc_failed: 8721 MDI_DEBUG(1, (CE_WARN, dip, 8722 "!i_mdi_log_sysevent: Unable to send sysevent")); 8723 } 8724