1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * MDI component property name/value string definitions 160 */ 161 const char *mdi_component_prop = "mpxio-component"; 162 const char *mdi_component_prop_vhci = "vhci"; 163 const char *mdi_component_prop_phci = "phci"; 164 const char *mdi_component_prop_client = "client"; 165 166 /* 167 * MDI client global unique identifier property name 168 */ 169 const char *mdi_client_guid_prop = "client-guid"; 170 171 /* 172 * MDI client load balancing property name/value string definitions 173 */ 174 const char *mdi_load_balance = "load-balance"; 175 const char *mdi_load_balance_none = "none"; 176 const char *mdi_load_balance_rr = "round-robin"; 177 const char *mdi_load_balance_lba = "logical-block"; 178 179 /* 180 * Obsolete vHCI class definition; to be removed after Leadville update 181 */ 182 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 183 184 static char vhci_greeting[] = 185 "\tThere already exists one vHCI driver for class %s\n" 186 "\tOnly one vHCI driver for each class is allowed\n"; 187 188 /* 189 * Static function prototypes 190 */ 191 static int i_mdi_phci_offline(dev_info_t *, uint_t); 192 static int i_mdi_client_offline(dev_info_t *, uint_t); 193 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 194 static void i_mdi_phci_post_detach(dev_info_t *, 195 ddi_detach_cmd_t, int); 196 static int i_mdi_client_pre_detach(dev_info_t *, 197 ddi_detach_cmd_t); 198 static void i_mdi_client_post_detach(dev_info_t *, 199 ddi_detach_cmd_t, int); 200 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 201 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 202 static int i_mdi_lba_lb(mdi_client_t *ct, 203 mdi_pathinfo_t **ret_pip, struct buf *buf); 204 static void i_mdi_pm_hold_client(mdi_client_t *, int); 205 static void i_mdi_pm_rele_client(mdi_client_t *, int); 206 static void i_mdi_pm_reset_client(mdi_client_t *); 207 static int i_mdi_power_all_phci(mdi_client_t *); 208 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 209 210 211 /* 212 * Internal mdi_pathinfo node functions 213 */ 214 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_component_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 /*ARGSUSED*/ 351 int 352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 353 int flags) 354 { 355 mdi_vhci_t *vh = NULL; 356 357 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 358 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 411 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 412 413 /* 414 * Store the vHCI ops vectors 415 */ 416 vh->vh_dip = vdip; 417 vh->vh_ops = vops; 418 419 setup_vhci_cache(vh); 420 421 if (mdi_vhci_head == NULL) { 422 mdi_vhci_head = vh; 423 } 424 if (mdi_vhci_tail) { 425 mdi_vhci_tail->vh_next = vh; 426 } 427 mdi_vhci_tail = vh; 428 mdi_vhci_count++; 429 } 430 431 /* 432 * Claim the devfs node as a vhci component 433 */ 434 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 435 436 /* 437 * Initialize our back reference from dev_info node 438 */ 439 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 440 mutex_exit(&mdi_mutex); 441 return (MDI_SUCCESS); 442 } 443 444 /* 445 * mdi_vhci_unregister(): 446 * Unregister a vHCI module from mpxio framework 447 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 448 * of a vhci to unregister it from the framework. 449 * Return Values: 450 * MDI_SUCCESS 451 * MDI_FAILURE 452 */ 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 460 461 /* 462 * Check for invalid VHCI 463 */ 464 if ((vh = i_devi_get_vhci(vdip)) == NULL) 465 return (MDI_FAILURE); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 mutex_enter(&mdi_mutex); 471 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 472 if (found == vh) 473 break; 474 prev = found; 475 } 476 477 if (found == NULL) { 478 mutex_exit(&mdi_mutex); 479 return (MDI_FAILURE); 480 } 481 482 /* 483 * Check the vHCI, pHCI and client count. All the pHCIs and clients 484 * should have been unregistered, before a vHCI can be 485 * unregistered. 486 */ 487 MDI_VHCI_PHCI_LOCK(vh); 488 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 489 MDI_VHCI_PHCI_UNLOCK(vh); 490 mutex_exit(&mdi_mutex); 491 return (MDI_FAILURE); 492 } 493 MDI_VHCI_PHCI_UNLOCK(vh); 494 495 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 496 mutex_exit(&mdi_mutex); 497 return (MDI_FAILURE); 498 } 499 500 /* 501 * Remove the vHCI from the global list 502 */ 503 if (vh == mdi_vhci_head) { 504 mdi_vhci_head = vh->vh_next; 505 } else { 506 prev->vh_next = vh->vh_next; 507 } 508 if (vh == mdi_vhci_tail) { 509 mdi_vhci_tail = prev; 510 } 511 mdi_vhci_count--; 512 mutex_exit(&mdi_mutex); 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 mutex_destroy(&vh->vh_phci_mutex); 521 mutex_destroy(&vh->vh_client_mutex); 522 523 kmem_free(vh, sizeof (mdi_vhci_t)); 524 return (MDI_SUCCESS); 525 } 526 527 /* 528 * i_mdi_vhci_class2vhci(): 529 * Look for a matching vHCI module given a vHCI class name 530 * Return Values: 531 * Handle to a vHCI component 532 * NULL 533 */ 534 static mdi_vhci_t * 535 i_mdi_vhci_class2vhci(char *class) 536 { 537 mdi_vhci_t *vh = NULL; 538 539 ASSERT(!MUTEX_HELD(&mdi_mutex)); 540 541 mutex_enter(&mdi_mutex); 542 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 543 if (strcmp(vh->vh_class, class) == 0) { 544 break; 545 } 546 } 547 mutex_exit(&mdi_mutex); 548 return (vh); 549 } 550 551 /* 552 * i_devi_get_vhci(): 553 * Utility function to get the handle to a vHCI component 554 * Return Values: 555 * Handle to a vHCI component 556 * NULL 557 */ 558 mdi_vhci_t * 559 i_devi_get_vhci(dev_info_t *vdip) 560 { 561 mdi_vhci_t *vh = NULL; 562 if (MDI_VHCI(vdip)) { 563 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 564 } 565 return (vh); 566 } 567 568 /* 569 * mdi_phci_register(): 570 * Register a pHCI module with mpxio framework 571 * mdi_phci_register() is called by pHCI drivers to register with 572 * the mpxio framework and a specific 'class_driver' vHCI. The 573 * pHCI driver must call this interface as part of its attach(9e) 574 * handler. 575 * Return Values: 576 * MDI_SUCCESS 577 * MDI_FAILURE 578 */ 579 /*ARGSUSED*/ 580 int 581 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 582 { 583 mdi_phci_t *ph; 584 mdi_vhci_t *vh; 585 char *data; 586 char *pathname; 587 588 /* 589 * Some subsystems, like fcp, perform pHCI registration from a 590 * different thread than the one doing the pHCI attach(9E) - the 591 * driver attach code is waiting for this other thread to complete. 592 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 593 * (indicating that some thread has done an ndi_devi_enter of parent) 594 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 595 */ 596 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 597 598 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 599 (void) ddi_pathname(pdip, pathname); 600 601 /* 602 * Check for mpxio-disable property. Enable mpxio if the property is 603 * missing or not set to "yes". 604 * If the property is set to "yes" then emit a brief message. 605 */ 606 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 607 &data) == DDI_SUCCESS)) { 608 if (strcmp(data, "yes") == 0) { 609 MDI_DEBUG(1, (CE_CONT, pdip, 610 "?%s (%s%d) multipath capabilities " 611 "disabled via %s.conf.\n", pathname, 612 ddi_driver_name(pdip), ddi_get_instance(pdip), 613 ddi_driver_name(pdip))); 614 ddi_prop_free(data); 615 kmem_free(pathname, MAXPATHLEN); 616 return (MDI_FAILURE); 617 } 618 ddi_prop_free(data); 619 } 620 621 kmem_free(pathname, MAXPATHLEN); 622 623 /* 624 * Search for a matching vHCI 625 */ 626 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 627 if (vh == NULL) { 628 return (MDI_FAILURE); 629 } 630 631 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 632 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 633 ph->ph_dip = pdip; 634 ph->ph_vhci = vh; 635 ph->ph_next = NULL; 636 ph->ph_unstable = 0; 637 ph->ph_vprivate = 0; 638 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 639 640 MDI_PHCI_LOCK(ph); 641 MDI_PHCI_SET_POWER_UP(ph); 642 MDI_PHCI_UNLOCK(ph); 643 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 644 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 645 646 vhcache_phci_add(vh->vh_config, ph); 647 648 MDI_VHCI_PHCI_LOCK(vh); 649 if (vh->vh_phci_head == NULL) { 650 vh->vh_phci_head = ph; 651 } 652 if (vh->vh_phci_tail) { 653 vh->vh_phci_tail->ph_next = ph; 654 } 655 vh->vh_phci_tail = ph; 656 vh->vh_phci_count++; 657 MDI_VHCI_PHCI_UNLOCK(vh); 658 659 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 660 return (MDI_SUCCESS); 661 } 662 663 /* 664 * mdi_phci_unregister(): 665 * Unregister a pHCI module from mpxio framework 666 * mdi_phci_unregister() is called by the pHCI drivers from their 667 * detach(9E) handler to unregister their instances from the 668 * framework. 669 * Return Values: 670 * MDI_SUCCESS 671 * MDI_FAILURE 672 */ 673 /*ARGSUSED*/ 674 int 675 mdi_phci_unregister(dev_info_t *pdip, int flags) 676 { 677 mdi_vhci_t *vh; 678 mdi_phci_t *ph; 679 mdi_phci_t *tmp; 680 mdi_phci_t *prev = NULL; 681 682 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 683 684 ph = i_devi_get_phci(pdip); 685 if (ph == NULL) { 686 MDI_DEBUG(1, (CE_WARN, pdip, 687 "!pHCI unregister: Not a valid pHCI")); 688 return (MDI_FAILURE); 689 } 690 691 vh = ph->ph_vhci; 692 ASSERT(vh != NULL); 693 if (vh == NULL) { 694 MDI_DEBUG(1, (CE_WARN, pdip, 695 "!pHCI unregister: Not a valid vHCI")); 696 return (MDI_FAILURE); 697 } 698 699 MDI_VHCI_PHCI_LOCK(vh); 700 tmp = vh->vh_phci_head; 701 while (tmp) { 702 if (tmp == ph) { 703 break; 704 } 705 prev = tmp; 706 tmp = tmp->ph_next; 707 } 708 709 if (ph == vh->vh_phci_head) { 710 vh->vh_phci_head = ph->ph_next; 711 } else { 712 prev->ph_next = ph->ph_next; 713 } 714 715 if (ph == vh->vh_phci_tail) { 716 vh->vh_phci_tail = prev; 717 } 718 719 vh->vh_phci_count--; 720 MDI_VHCI_PHCI_UNLOCK(vh); 721 722 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 723 ESC_DDI_INITIATOR_UNREGISTER); 724 vhcache_phci_remove(vh->vh_config, ph); 725 cv_destroy(&ph->ph_unstable_cv); 726 mutex_destroy(&ph->ph_mutex); 727 kmem_free(ph, sizeof (mdi_phci_t)); 728 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 729 DEVI(pdip)->devi_mdi_xhci = NULL; 730 return (MDI_SUCCESS); 731 } 732 733 /* 734 * i_devi_get_phci(): 735 * Utility function to return the phci extensions. 736 */ 737 static mdi_phci_t * 738 i_devi_get_phci(dev_info_t *pdip) 739 { 740 mdi_phci_t *ph = NULL; 741 if (MDI_PHCI(pdip)) { 742 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 743 } 744 return (ph); 745 } 746 747 /* 748 * Single thread mdi entry into devinfo node for modifying its children. 749 * If necessary we perform an ndi_devi_enter of the vHCI before doing 750 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 751 * for the vHCI and one for the pHCI. 752 */ 753 void 754 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 755 { 756 dev_info_t *vdip; 757 int vcircular, pcircular; 758 759 /* Verify calling context */ 760 ASSERT(MDI_PHCI(phci_dip)); 761 vdip = mdi_devi_get_vdip(phci_dip); 762 ASSERT(vdip); /* A pHCI always has a vHCI */ 763 764 /* 765 * If pHCI is detaching then the framework has already entered the 766 * vHCI on a threads that went down the code path leading to 767 * detach_node(). This framework enter of the vHCI during pHCI 768 * detach is done to avoid deadlock with vHCI power management 769 * operations which enter the vHCI and the enter down the path 770 * to the pHCI. If pHCI is detaching then we piggyback this calls 771 * enter of the vHCI on frameworks vHCI enter that has already 772 * occurred - this is OK because we know that the framework thread 773 * doing detach is waiting for our completion. 774 * 775 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 776 * race with detach - but we can't do that because the framework has 777 * already entered the parent, so we have some complexity instead. 778 */ 779 for (;;) { 780 if (ndi_devi_tryenter(vdip, &vcircular)) { 781 ASSERT(vcircular != -1); 782 if (DEVI_IS_DETACHING(phci_dip)) { 783 ndi_devi_exit(vdip, vcircular); 784 vcircular = -1; 785 } 786 break; 787 } else if (DEVI_IS_DETACHING(phci_dip)) { 788 vcircular = -1; 789 break; 790 } else { 791 delay(1); 792 } 793 } 794 795 ndi_devi_enter(phci_dip, &pcircular); 796 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 797 } 798 799 /* 800 * Release mdi_devi_enter or successful mdi_devi_tryenter. 801 */ 802 void 803 mdi_devi_exit(dev_info_t *phci_dip, int circular) 804 { 805 dev_info_t *vdip; 806 int vcircular, pcircular; 807 808 /* Verify calling context */ 809 ASSERT(MDI_PHCI(phci_dip)); 810 vdip = mdi_devi_get_vdip(phci_dip); 811 ASSERT(vdip); /* A pHCI always has a vHCI */ 812 813 /* extract two circular recursion values from single int */ 814 pcircular = (short)(circular & 0xFFFF); 815 vcircular = (short)((circular >> 16) & 0xFFFF); 816 817 ndi_devi_exit(phci_dip, pcircular); 818 if (vcircular != -1) 819 ndi_devi_exit(vdip, vcircular); 820 } 821 822 /* 823 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 824 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 825 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 826 * with vHCI power management code during path online/offline. Each 827 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 828 * occur within the scope of an active mdi_devi_enter that establishes the 829 * circular value. 830 */ 831 void 832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 833 { 834 int pcircular; 835 836 /* Verify calling context */ 837 ASSERT(MDI_PHCI(phci_dip)); 838 839 pcircular = (short)(circular & 0xFFFF); 840 ndi_devi_exit(phci_dip, pcircular); 841 } 842 843 void 844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 845 { 846 int pcircular; 847 848 /* Verify calling context */ 849 ASSERT(MDI_PHCI(phci_dip)); 850 851 ndi_devi_enter(phci_dip, &pcircular); 852 853 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 854 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 855 } 856 857 /* 858 * mdi_devi_get_vdip(): 859 * given a pHCI dip return vHCI dip 860 */ 861 dev_info_t * 862 mdi_devi_get_vdip(dev_info_t *pdip) 863 { 864 mdi_phci_t *ph; 865 866 ph = i_devi_get_phci(pdip); 867 if (ph && ph->ph_vhci) 868 return (ph->ph_vhci->vh_dip); 869 return (NULL); 870 } 871 872 /* 873 * mdi_devi_pdip_entered(): 874 * Return 1 if we are vHCI and have done an ndi_devi_enter 875 * of a pHCI 876 */ 877 int 878 mdi_devi_pdip_entered(dev_info_t *vdip) 879 { 880 mdi_vhci_t *vh; 881 mdi_phci_t *ph; 882 883 vh = i_devi_get_vhci(vdip); 884 if (vh == NULL) 885 return (0); 886 887 MDI_VHCI_PHCI_LOCK(vh); 888 ph = vh->vh_phci_head; 889 while (ph) { 890 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 891 MDI_VHCI_PHCI_UNLOCK(vh); 892 return (1); 893 } 894 ph = ph->ph_next; 895 } 896 MDI_VHCI_PHCI_UNLOCK(vh); 897 return (0); 898 } 899 900 /* 901 * mdi_phci_path2devinfo(): 902 * Utility function to search for a valid phci device given 903 * the devfs pathname. 904 */ 905 dev_info_t * 906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 907 { 908 char *temp_pathname; 909 mdi_vhci_t *vh; 910 mdi_phci_t *ph; 911 dev_info_t *pdip = NULL; 912 913 vh = i_devi_get_vhci(vdip); 914 ASSERT(vh != NULL); 915 916 if (vh == NULL) { 917 /* 918 * Invalid vHCI component, return failure 919 */ 920 return (NULL); 921 } 922 923 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 924 MDI_VHCI_PHCI_LOCK(vh); 925 ph = vh->vh_phci_head; 926 while (ph != NULL) { 927 pdip = ph->ph_dip; 928 ASSERT(pdip != NULL); 929 *temp_pathname = '\0'; 930 (void) ddi_pathname(pdip, temp_pathname); 931 if (strcmp(temp_pathname, pathname) == 0) { 932 break; 933 } 934 ph = ph->ph_next; 935 } 936 if (ph == NULL) { 937 pdip = NULL; 938 } 939 MDI_VHCI_PHCI_UNLOCK(vh); 940 kmem_free(temp_pathname, MAXPATHLEN); 941 return (pdip); 942 } 943 944 /* 945 * mdi_phci_get_path_count(): 946 * get number of path information nodes associated with a given 947 * pHCI device. 948 */ 949 int 950 mdi_phci_get_path_count(dev_info_t *pdip) 951 { 952 mdi_phci_t *ph; 953 int count = 0; 954 955 ph = i_devi_get_phci(pdip); 956 if (ph != NULL) { 957 count = ph->ph_path_count; 958 } 959 return (count); 960 } 961 962 /* 963 * i_mdi_phci_lock(): 964 * Lock a pHCI device 965 * Return Values: 966 * None 967 * Note: 968 * The default locking order is: 969 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 970 * But there are number of situations where locks need to be 971 * grabbed in reverse order. This routine implements try and lock 972 * mechanism depending on the requested parameter option. 973 */ 974 static void 975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 976 { 977 if (pip) { 978 /* Reverse locking is requested. */ 979 while (MDI_PHCI_TRYLOCK(ph) == 0) { 980 /* 981 * tryenter failed. Try to grab again 982 * after a small delay 983 */ 984 MDI_PI_HOLD(pip); 985 MDI_PI_UNLOCK(pip); 986 delay(1); 987 MDI_PI_LOCK(pip); 988 MDI_PI_RELE(pip); 989 } 990 } else { 991 MDI_PHCI_LOCK(ph); 992 } 993 } 994 995 /* 996 * i_mdi_phci_unlock(): 997 * Unlock the pHCI component 998 */ 999 static void 1000 i_mdi_phci_unlock(mdi_phci_t *ph) 1001 { 1002 MDI_PHCI_UNLOCK(ph); 1003 } 1004 1005 /* 1006 * i_mdi_devinfo_create(): 1007 * create client device's devinfo node 1008 * Return Values: 1009 * dev_info 1010 * NULL 1011 * Notes: 1012 */ 1013 static dev_info_t * 1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1015 char **compatible, int ncompatible) 1016 { 1017 dev_info_t *cdip = NULL; 1018 1019 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1020 1021 /* Verify for duplicate entry */ 1022 cdip = i_mdi_devinfo_find(vh, name, guid); 1023 ASSERT(cdip == NULL); 1024 if (cdip) { 1025 cmn_err(CE_WARN, 1026 "i_mdi_devinfo_create: client dip %p already exists", 1027 (void *)cdip); 1028 } 1029 1030 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1031 if (cdip == NULL) 1032 goto fail; 1033 1034 /* 1035 * Create component type and Global unique identifier 1036 * properties 1037 */ 1038 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1039 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1040 goto fail; 1041 } 1042 1043 /* Decorate the node with compatible property */ 1044 if (compatible && 1045 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1046 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1047 goto fail; 1048 } 1049 1050 return (cdip); 1051 1052 fail: 1053 if (cdip) { 1054 (void) ndi_prop_remove_all(cdip); 1055 (void) ndi_devi_free(cdip); 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * i_mdi_devinfo_find(): 1062 * Find a matching devinfo node for given client node name 1063 * and its guid. 1064 * Return Values: 1065 * Handle to a dev_info node or NULL 1066 */ 1067 static dev_info_t * 1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1069 { 1070 char *data; 1071 dev_info_t *cdip = NULL; 1072 dev_info_t *ndip = NULL; 1073 int circular; 1074 1075 ndi_devi_enter(vh->vh_dip, &circular); 1076 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1077 while ((cdip = ndip) != NULL) { 1078 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1079 1080 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1081 continue; 1082 } 1083 1084 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1085 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1086 &data) != DDI_PROP_SUCCESS) { 1087 continue; 1088 } 1089 1090 if (strcmp(data, guid) != 0) { 1091 ddi_prop_free(data); 1092 continue; 1093 } 1094 ddi_prop_free(data); 1095 break; 1096 } 1097 ndi_devi_exit(vh->vh_dip, circular); 1098 return (cdip); 1099 } 1100 1101 /* 1102 * i_mdi_devinfo_remove(): 1103 * Remove a client device node 1104 */ 1105 static int 1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1107 { 1108 int rv = MDI_SUCCESS; 1109 1110 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1111 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1112 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1113 if (rv != NDI_SUCCESS) { 1114 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1115 " failed. cdip = %p\n", (void *)cdip)); 1116 } 1117 /* 1118 * Convert to MDI error code 1119 */ 1120 switch (rv) { 1121 case NDI_SUCCESS: 1122 rv = MDI_SUCCESS; 1123 break; 1124 case NDI_BUSY: 1125 rv = MDI_BUSY; 1126 break; 1127 default: 1128 rv = MDI_FAILURE; 1129 break; 1130 } 1131 } 1132 return (rv); 1133 } 1134 1135 /* 1136 * i_devi_get_client() 1137 * Utility function to get mpxio component extensions 1138 */ 1139 static mdi_client_t * 1140 i_devi_get_client(dev_info_t *cdip) 1141 { 1142 mdi_client_t *ct = NULL; 1143 1144 if (MDI_CLIENT(cdip)) { 1145 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1146 } 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_is_child_present(): 1152 * Search for the presence of client device dev_info node 1153 */ 1154 static int 1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1156 { 1157 int rv = MDI_FAILURE; 1158 struct dev_info *dip; 1159 int circular; 1160 1161 ndi_devi_enter(vdip, &circular); 1162 dip = DEVI(vdip)->devi_child; 1163 while (dip) { 1164 if (dip == DEVI(cdip)) { 1165 rv = MDI_SUCCESS; 1166 break; 1167 } 1168 dip = dip->devi_sibling; 1169 } 1170 ndi_devi_exit(vdip, circular); 1171 return (rv); 1172 } 1173 1174 1175 /* 1176 * i_mdi_client_lock(): 1177 * Grab client component lock 1178 * Return Values: 1179 * None 1180 * Note: 1181 * The default locking order is: 1182 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1183 * But there are number of situations where locks need to be 1184 * grabbed in reverse order. This routine implements try and lock 1185 * mechanism depending on the requested parameter option. 1186 */ 1187 static void 1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1189 { 1190 if (pip) { 1191 /* 1192 * Reverse locking is requested. 1193 */ 1194 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1195 /* 1196 * tryenter failed. Try to grab again 1197 * after a small delay 1198 */ 1199 MDI_PI_HOLD(pip); 1200 MDI_PI_UNLOCK(pip); 1201 delay(1); 1202 MDI_PI_LOCK(pip); 1203 MDI_PI_RELE(pip); 1204 } 1205 } else { 1206 MDI_CLIENT_LOCK(ct); 1207 } 1208 } 1209 1210 /* 1211 * i_mdi_client_unlock(): 1212 * Unlock a client component 1213 */ 1214 static void 1215 i_mdi_client_unlock(mdi_client_t *ct) 1216 { 1217 MDI_CLIENT_UNLOCK(ct); 1218 } 1219 1220 /* 1221 * i_mdi_client_alloc(): 1222 * Allocate and initialize a client structure. Caller should 1223 * hold the vhci client lock. 1224 * Return Values: 1225 * Handle to a client component 1226 */ 1227 /*ARGSUSED*/ 1228 static mdi_client_t * 1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1230 { 1231 mdi_client_t *ct; 1232 1233 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1234 1235 /* 1236 * Allocate and initialize a component structure. 1237 */ 1238 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1239 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1240 ct->ct_hnext = NULL; 1241 ct->ct_hprev = NULL; 1242 ct->ct_dip = NULL; 1243 ct->ct_vhci = vh; 1244 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1245 (void) strcpy(ct->ct_drvname, name); 1246 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_guid, lguid); 1248 ct->ct_cprivate = NULL; 1249 ct->ct_vprivate = NULL; 1250 ct->ct_flags = 0; 1251 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1252 MDI_CLIENT_LOCK(ct); 1253 MDI_CLIENT_SET_OFFLINE(ct); 1254 MDI_CLIENT_SET_DETACH(ct); 1255 MDI_CLIENT_SET_POWER_UP(ct); 1256 MDI_CLIENT_UNLOCK(ct); 1257 ct->ct_failover_flags = 0; 1258 ct->ct_failover_status = 0; 1259 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1260 ct->ct_unstable = 0; 1261 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1262 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1263 ct->ct_lb = vh->vh_lb; 1264 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1265 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1266 ct->ct_path_count = 0; 1267 ct->ct_path_head = NULL; 1268 ct->ct_path_tail = NULL; 1269 ct->ct_path_last = NULL; 1270 1271 /* 1272 * Add this client component to our client hash queue 1273 */ 1274 i_mdi_client_enlist_table(vh, ct); 1275 return (ct); 1276 } 1277 1278 /* 1279 * i_mdi_client_enlist_table(): 1280 * Attach the client device to the client hash table. Caller 1281 * should hold the vhci client lock. 1282 */ 1283 static void 1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1285 { 1286 int index; 1287 struct client_hash *head; 1288 1289 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1290 1291 index = i_mdi_get_hash_key(ct->ct_guid); 1292 head = &vh->vh_client_table[index]; 1293 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1294 head->ct_hash_head = ct; 1295 head->ct_hash_count++; 1296 vh->vh_client_count++; 1297 } 1298 1299 /* 1300 * i_mdi_client_delist_table(): 1301 * Attach the client device to the client hash table. 1302 * Caller should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 char *guid; 1309 struct client_hash *head; 1310 mdi_client_t *next; 1311 mdi_client_t *last; 1312 1313 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1314 1315 guid = ct->ct_guid; 1316 index = i_mdi_get_hash_key(guid); 1317 head = &vh->vh_client_table[index]; 1318 1319 last = NULL; 1320 next = (mdi_client_t *)head->ct_hash_head; 1321 while (next != NULL) { 1322 if (next == ct) { 1323 break; 1324 } 1325 last = next; 1326 next = next->ct_hnext; 1327 } 1328 1329 if (next) { 1330 head->ct_hash_count--; 1331 if (last == NULL) { 1332 head->ct_hash_head = ct->ct_hnext; 1333 } else { 1334 last->ct_hnext = ct->ct_hnext; 1335 } 1336 ct->ct_hnext = NULL; 1337 vh->vh_client_count--; 1338 } 1339 } 1340 1341 1342 /* 1343 * i_mdi_client_free(): 1344 * Free a client component 1345 */ 1346 static int 1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1348 { 1349 int rv = MDI_SUCCESS; 1350 int flags = ct->ct_flags; 1351 dev_info_t *cdip; 1352 dev_info_t *vdip; 1353 1354 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1355 1356 vdip = vh->vh_dip; 1357 cdip = ct->ct_dip; 1358 1359 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1360 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1361 DEVI(cdip)->devi_mdi_client = NULL; 1362 1363 /* 1364 * Clear out back ref. to dev_info_t node 1365 */ 1366 ct->ct_dip = NULL; 1367 1368 /* 1369 * Remove this client from our hash queue 1370 */ 1371 i_mdi_client_delist_table(vh, ct); 1372 1373 /* 1374 * Uninitialize and free the component 1375 */ 1376 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1377 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1378 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1379 cv_destroy(&ct->ct_failover_cv); 1380 cv_destroy(&ct->ct_unstable_cv); 1381 cv_destroy(&ct->ct_powerchange_cv); 1382 mutex_destroy(&ct->ct_mutex); 1383 kmem_free(ct, sizeof (*ct)); 1384 1385 if (cdip != NULL) { 1386 MDI_VHCI_CLIENT_UNLOCK(vh); 1387 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1388 MDI_VHCI_CLIENT_LOCK(vh); 1389 } 1390 return (rv); 1391 } 1392 1393 /* 1394 * i_mdi_client_find(): 1395 * Find the client structure corresponding to a given guid 1396 * Caller should hold the vhci client lock. 1397 */ 1398 static mdi_client_t * 1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1400 { 1401 int index; 1402 struct client_hash *head; 1403 mdi_client_t *ct; 1404 1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1406 1407 index = i_mdi_get_hash_key(guid); 1408 head = &vh->vh_client_table[index]; 1409 1410 ct = head->ct_hash_head; 1411 while (ct != NULL) { 1412 if (strcmp(ct->ct_guid, guid) == 0 && 1413 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1414 break; 1415 } 1416 ct = ct->ct_hnext; 1417 } 1418 return (ct); 1419 } 1420 1421 /* 1422 * i_mdi_client_update_state(): 1423 * Compute and update client device state 1424 * Notes: 1425 * A client device can be in any of three possible states: 1426 * 1427 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1428 * one online/standby paths. Can tolerate failures. 1429 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1430 * no alternate paths available as standby. A failure on the online 1431 * would result in loss of access to device data. 1432 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1433 * no paths available to access the device. 1434 */ 1435 static void 1436 i_mdi_client_update_state(mdi_client_t *ct) 1437 { 1438 int state; 1439 1440 ASSERT(MDI_CLIENT_LOCKED(ct)); 1441 state = i_mdi_client_compute_state(ct, NULL); 1442 MDI_CLIENT_SET_STATE(ct, state); 1443 } 1444 1445 /* 1446 * i_mdi_client_compute_state(): 1447 * Compute client device state 1448 * 1449 * mdi_phci_t * Pointer to pHCI structure which should 1450 * while computing the new value. Used by 1451 * i_mdi_phci_offline() to find the new 1452 * client state after DR of a pHCI. 1453 */ 1454 static int 1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1456 { 1457 int state; 1458 int online_count = 0; 1459 int standby_count = 0; 1460 mdi_pathinfo_t *pip, *next; 1461 1462 ASSERT(MDI_CLIENT_LOCKED(ct)); 1463 pip = ct->ct_path_head; 1464 while (pip != NULL) { 1465 MDI_PI_LOCK(pip); 1466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1467 if (MDI_PI(pip)->pi_phci == ph) { 1468 MDI_PI_UNLOCK(pip); 1469 pip = next; 1470 continue; 1471 } 1472 1473 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1474 == MDI_PATHINFO_STATE_ONLINE) 1475 online_count++; 1476 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1477 == MDI_PATHINFO_STATE_STANDBY) 1478 standby_count++; 1479 MDI_PI_UNLOCK(pip); 1480 pip = next; 1481 } 1482 1483 if (online_count == 0) { 1484 if (standby_count == 0) { 1485 state = MDI_CLIENT_STATE_FAILED; 1486 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1487 " ct = %p\n", (void *)ct)); 1488 } else if (standby_count == 1) { 1489 state = MDI_CLIENT_STATE_DEGRADED; 1490 } else { 1491 state = MDI_CLIENT_STATE_OPTIMAL; 1492 } 1493 } else if (online_count == 1) { 1494 if (standby_count == 0) { 1495 state = MDI_CLIENT_STATE_DEGRADED; 1496 } else { 1497 state = MDI_CLIENT_STATE_OPTIMAL; 1498 } 1499 } else { 1500 state = MDI_CLIENT_STATE_OPTIMAL; 1501 } 1502 return (state); 1503 } 1504 1505 /* 1506 * i_mdi_client2devinfo(): 1507 * Utility function 1508 */ 1509 dev_info_t * 1510 i_mdi_client2devinfo(mdi_client_t *ct) 1511 { 1512 return (ct->ct_dip); 1513 } 1514 1515 /* 1516 * mdi_client_path2_devinfo(): 1517 * Given the parent devinfo and child devfs pathname, search for 1518 * a valid devfs node handle. 1519 */ 1520 dev_info_t * 1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1522 { 1523 dev_info_t *cdip = NULL; 1524 dev_info_t *ndip = NULL; 1525 char *temp_pathname; 1526 int circular; 1527 1528 /* 1529 * Allocate temp buffer 1530 */ 1531 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1532 1533 /* 1534 * Lock parent against changes 1535 */ 1536 ndi_devi_enter(vdip, &circular); 1537 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1538 while ((cdip = ndip) != NULL) { 1539 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1540 1541 *temp_pathname = '\0'; 1542 (void) ddi_pathname(cdip, temp_pathname); 1543 if (strcmp(temp_pathname, pathname) == 0) { 1544 break; 1545 } 1546 } 1547 /* 1548 * Release devinfo lock 1549 */ 1550 ndi_devi_exit(vdip, circular); 1551 1552 /* 1553 * Free the temp buffer 1554 */ 1555 kmem_free(temp_pathname, MAXPATHLEN); 1556 return (cdip); 1557 } 1558 1559 /* 1560 * mdi_client_get_path_count(): 1561 * Utility function to get number of path information nodes 1562 * associated with a given client device. 1563 */ 1564 int 1565 mdi_client_get_path_count(dev_info_t *cdip) 1566 { 1567 mdi_client_t *ct; 1568 int count = 0; 1569 1570 ct = i_devi_get_client(cdip); 1571 if (ct != NULL) { 1572 count = ct->ct_path_count; 1573 } 1574 return (count); 1575 } 1576 1577 1578 /* 1579 * i_mdi_get_hash_key(): 1580 * Create a hash using strings as keys 1581 * 1582 */ 1583 static int 1584 i_mdi_get_hash_key(char *str) 1585 { 1586 uint32_t g, hash = 0; 1587 char *p; 1588 1589 for (p = str; *p != '\0'; p++) { 1590 g = *p; 1591 hash += g; 1592 } 1593 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1594 } 1595 1596 /* 1597 * mdi_get_lb_policy(): 1598 * Get current load balancing policy for a given client device 1599 */ 1600 client_lb_t 1601 mdi_get_lb_policy(dev_info_t *cdip) 1602 { 1603 client_lb_t lb = LOAD_BALANCE_NONE; 1604 mdi_client_t *ct; 1605 1606 ct = i_devi_get_client(cdip); 1607 if (ct != NULL) { 1608 lb = ct->ct_lb; 1609 } 1610 return (lb); 1611 } 1612 1613 /* 1614 * mdi_set_lb_region_size(): 1615 * Set current region size for the load-balance 1616 */ 1617 int 1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1619 { 1620 mdi_client_t *ct; 1621 int rv = MDI_FAILURE; 1622 1623 ct = i_devi_get_client(cdip); 1624 if (ct != NULL && ct->ct_lb_args != NULL) { 1625 ct->ct_lb_args->region_size = region_size; 1626 rv = MDI_SUCCESS; 1627 } 1628 return (rv); 1629 } 1630 1631 /* 1632 * mdi_Set_lb_policy(): 1633 * Set current load balancing policy for a given client device 1634 */ 1635 int 1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1637 { 1638 mdi_client_t *ct; 1639 int rv = MDI_FAILURE; 1640 1641 ct = i_devi_get_client(cdip); 1642 if (ct != NULL) { 1643 ct->ct_lb = lb; 1644 rv = MDI_SUCCESS; 1645 } 1646 return (rv); 1647 } 1648 1649 /* 1650 * mdi_failover(): 1651 * failover function called by the vHCI drivers to initiate 1652 * a failover operation. This is typically due to non-availability 1653 * of online paths to route I/O requests. Failover can be 1654 * triggered through user application also. 1655 * 1656 * The vHCI driver calls mdi_failover() to initiate a failover 1657 * operation. mdi_failover() calls back into the vHCI driver's 1658 * vo_failover() entry point to perform the actual failover 1659 * operation. The reason for requiring the vHCI driver to 1660 * initiate failover by calling mdi_failover(), instead of directly 1661 * executing vo_failover() itself, is to ensure that the mdi 1662 * framework can keep track of the client state properly. 1663 * Additionally, mdi_failover() provides as a convenience the 1664 * option of performing the failover operation synchronously or 1665 * asynchronously 1666 * 1667 * Upon successful completion of the failover operation, the 1668 * paths that were previously ONLINE will be in the STANDBY state, 1669 * and the newly activated paths will be in the ONLINE state. 1670 * 1671 * The flags modifier determines whether the activation is done 1672 * synchronously: MDI_FAILOVER_SYNC 1673 * Return Values: 1674 * MDI_SUCCESS 1675 * MDI_FAILURE 1676 * MDI_BUSY 1677 */ 1678 /*ARGSUSED*/ 1679 int 1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1681 { 1682 int rv; 1683 mdi_client_t *ct; 1684 1685 ct = i_devi_get_client(cdip); 1686 ASSERT(ct != NULL); 1687 if (ct == NULL) { 1688 /* cdip is not a valid client device. Nothing more to do. */ 1689 return (MDI_FAILURE); 1690 } 1691 1692 MDI_CLIENT_LOCK(ct); 1693 1694 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1695 /* A path to the client is being freed */ 1696 MDI_CLIENT_UNLOCK(ct); 1697 return (MDI_BUSY); 1698 } 1699 1700 1701 if (MDI_CLIENT_IS_FAILED(ct)) { 1702 /* 1703 * Client is in failed state. Nothing more to do. 1704 */ 1705 MDI_CLIENT_UNLOCK(ct); 1706 return (MDI_FAILURE); 1707 } 1708 1709 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1710 /* 1711 * Failover is already in progress; return BUSY 1712 */ 1713 MDI_CLIENT_UNLOCK(ct); 1714 return (MDI_BUSY); 1715 } 1716 /* 1717 * Make sure that mdi_pathinfo node state changes are processed. 1718 * We do not allow failovers to progress while client path state 1719 * changes are in progress 1720 */ 1721 if (ct->ct_unstable) { 1722 if (flags == MDI_FAILOVER_ASYNC) { 1723 MDI_CLIENT_UNLOCK(ct); 1724 return (MDI_BUSY); 1725 } else { 1726 while (ct->ct_unstable) 1727 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1728 } 1729 } 1730 1731 /* 1732 * Client device is in stable state. Before proceeding, perform sanity 1733 * checks again. 1734 */ 1735 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1736 (!i_ddi_devi_attached(ct->ct_dip))) { 1737 /* 1738 * Client is in failed state. Nothing more to do. 1739 */ 1740 MDI_CLIENT_UNLOCK(ct); 1741 return (MDI_FAILURE); 1742 } 1743 1744 /* 1745 * Set the client state as failover in progress. 1746 */ 1747 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1748 ct->ct_failover_flags = flags; 1749 MDI_CLIENT_UNLOCK(ct); 1750 1751 if (flags == MDI_FAILOVER_ASYNC) { 1752 /* 1753 * Submit the initiate failover request via CPR safe 1754 * taskq threads. 1755 */ 1756 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1757 ct, KM_SLEEP); 1758 return (MDI_ACCEPT); 1759 } else { 1760 /* 1761 * Synchronous failover mode. Typically invoked from the user 1762 * land. 1763 */ 1764 rv = i_mdi_failover(ct); 1765 } 1766 return (rv); 1767 } 1768 1769 /* 1770 * i_mdi_failover(): 1771 * internal failover function. Invokes vHCI drivers failover 1772 * callback function and process the failover status 1773 * Return Values: 1774 * None 1775 * 1776 * Note: A client device in failover state can not be detached or freed. 1777 */ 1778 static int 1779 i_mdi_failover(void *arg) 1780 { 1781 int rv = MDI_SUCCESS; 1782 mdi_client_t *ct = (mdi_client_t *)arg; 1783 mdi_vhci_t *vh = ct->ct_vhci; 1784 1785 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1786 1787 if (vh->vh_ops->vo_failover != NULL) { 1788 /* 1789 * Call vHCI drivers callback routine 1790 */ 1791 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1792 ct->ct_failover_flags); 1793 } 1794 1795 MDI_CLIENT_LOCK(ct); 1796 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1797 1798 /* 1799 * Save the failover return status 1800 */ 1801 ct->ct_failover_status = rv; 1802 1803 /* 1804 * As a result of failover, client status would have been changed. 1805 * Update the client state and wake up anyone waiting on this client 1806 * device. 1807 */ 1808 i_mdi_client_update_state(ct); 1809 1810 cv_broadcast(&ct->ct_failover_cv); 1811 MDI_CLIENT_UNLOCK(ct); 1812 return (rv); 1813 } 1814 1815 /* 1816 * Load balancing is logical block. 1817 * IOs within the range described by region_size 1818 * would go on the same path. This would improve the 1819 * performance by cache-hit on some of the RAID devices. 1820 * Search only for online paths(At some point we 1821 * may want to balance across target ports). 1822 * If no paths are found then default to round-robin. 1823 */ 1824 static int 1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1826 { 1827 int path_index = -1; 1828 int online_path_count = 0; 1829 int online_nonpref_path_count = 0; 1830 int region_size = ct->ct_lb_args->region_size; 1831 mdi_pathinfo_t *pip; 1832 mdi_pathinfo_t *next; 1833 int preferred, path_cnt; 1834 1835 pip = ct->ct_path_head; 1836 while (pip) { 1837 MDI_PI_LOCK(pip); 1838 if (MDI_PI(pip)->pi_state == 1839 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1840 online_path_count++; 1841 } else if (MDI_PI(pip)->pi_state == 1842 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1843 online_nonpref_path_count++; 1844 } 1845 next = (mdi_pathinfo_t *) 1846 MDI_PI(pip)->pi_client_link; 1847 MDI_PI_UNLOCK(pip); 1848 pip = next; 1849 } 1850 /* if found any online/preferred then use this type */ 1851 if (online_path_count > 0) { 1852 path_cnt = online_path_count; 1853 preferred = 1; 1854 } else if (online_nonpref_path_count > 0) { 1855 path_cnt = online_nonpref_path_count; 1856 preferred = 0; 1857 } else { 1858 path_cnt = 0; 1859 } 1860 if (path_cnt) { 1861 path_index = (bp->b_blkno >> region_size) % path_cnt; 1862 pip = ct->ct_path_head; 1863 while (pip && path_index != -1) { 1864 MDI_PI_LOCK(pip); 1865 if (path_index == 0 && 1866 (MDI_PI(pip)->pi_state == 1867 MDI_PATHINFO_STATE_ONLINE) && 1868 MDI_PI(pip)->pi_preferred == preferred) { 1869 MDI_PI_HOLD(pip); 1870 MDI_PI_UNLOCK(pip); 1871 *ret_pip = pip; 1872 return (MDI_SUCCESS); 1873 } 1874 path_index --; 1875 next = (mdi_pathinfo_t *) 1876 MDI_PI(pip)->pi_client_link; 1877 MDI_PI_UNLOCK(pip); 1878 pip = next; 1879 } 1880 if (pip == NULL) { 1881 MDI_DEBUG(4, (CE_NOTE, NULL, 1882 "!lba %llx, no pip !!\n", 1883 bp->b_lblkno)); 1884 } else { 1885 MDI_DEBUG(4, (CE_NOTE, NULL, 1886 "!lba %llx, no pip for path_index, " 1887 "pip %p\n", bp->b_lblkno, (void *)pip)); 1888 } 1889 } 1890 return (MDI_FAILURE); 1891 } 1892 1893 /* 1894 * mdi_select_path(): 1895 * select a path to access a client device. 1896 * 1897 * mdi_select_path() function is called by the vHCI drivers to 1898 * select a path to route the I/O request to. The caller passes 1899 * the block I/O data transfer structure ("buf") as one of the 1900 * parameters. The mpxio framework uses the buf structure 1901 * contents to maintain per path statistics (total I/O size / 1902 * count pending). If more than one online paths are available to 1903 * select, the framework automatically selects a suitable path 1904 * for routing I/O request. If a failover operation is active for 1905 * this client device the call shall be failed with MDI_BUSY error 1906 * code. 1907 * 1908 * By default this function returns a suitable path in online 1909 * state based on the current load balancing policy. Currently 1910 * we support LOAD_BALANCE_NONE (Previously selected online path 1911 * will continue to be used till the path is usable) and 1912 * LOAD_BALANCE_RR (Online paths will be selected in a round 1913 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1914 * based on the logical block). The load balancing 1915 * through vHCI drivers configuration file (driver.conf). 1916 * 1917 * vHCI drivers may override this default behavior by specifying 1918 * appropriate flags. If start_pip is specified (non NULL) is 1919 * used as start point to walk and find the next appropriate path. 1920 * The following values are currently defined: 1921 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1922 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1923 * 1924 * The non-standard behavior is used by the scsi_vhci driver, 1925 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1926 * attach of client devices (to avoid an unnecessary failover 1927 * when the STANDBY path comes up first), during failover 1928 * (to activate a STANDBY path as ONLINE). 1929 * 1930 * The selected path is returned in a a mdi_hold_path() state 1931 * (pi_ref_cnt). Caller should release the hold by calling 1932 * mdi_rele_path(). 1933 * 1934 * Return Values: 1935 * MDI_SUCCESS - Completed successfully 1936 * MDI_BUSY - Client device is busy failing over 1937 * MDI_NOPATH - Client device is online, but no valid path are 1938 * available to access this client device 1939 * MDI_FAILURE - Invalid client device or state 1940 * MDI_DEVI_ONLINING 1941 * - Client device (struct dev_info state) is in 1942 * onlining state. 1943 */ 1944 1945 /*ARGSUSED*/ 1946 int 1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1948 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1949 { 1950 mdi_client_t *ct; 1951 mdi_pathinfo_t *pip; 1952 mdi_pathinfo_t *next; 1953 mdi_pathinfo_t *head; 1954 mdi_pathinfo_t *start; 1955 client_lb_t lbp; /* load balancing policy */ 1956 int sb = 1; /* standard behavior */ 1957 int preferred = 1; /* preferred path */ 1958 int cond, cont = 1; 1959 int retry = 0; 1960 1961 if (flags != 0) { 1962 /* 1963 * disable default behavior 1964 */ 1965 sb = 0; 1966 } 1967 1968 *ret_pip = NULL; 1969 ct = i_devi_get_client(cdip); 1970 if (ct == NULL) { 1971 /* mdi extensions are NULL, Nothing more to do */ 1972 return (MDI_FAILURE); 1973 } 1974 1975 MDI_CLIENT_LOCK(ct); 1976 1977 if (sb) { 1978 if (MDI_CLIENT_IS_FAILED(ct)) { 1979 /* 1980 * Client is not ready to accept any I/O requests. 1981 * Fail this request. 1982 */ 1983 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1984 "client state offline ct = %p\n", (void *)ct)); 1985 MDI_CLIENT_UNLOCK(ct); 1986 return (MDI_FAILURE); 1987 } 1988 1989 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1990 /* 1991 * Check for Failover is in progress. If so tell the 1992 * caller that this device is busy. 1993 */ 1994 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1995 "client failover in progress ct = %p\n", 1996 (void *)ct)); 1997 MDI_CLIENT_UNLOCK(ct); 1998 return (MDI_BUSY); 1999 } 2000 2001 /* 2002 * Check to see whether the client device is attached. 2003 * If not so, let the vHCI driver manually select a path 2004 * (standby) and let the probe/attach process to continue. 2005 */ 2006 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2007 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2008 "ct = %p\n", (void *)ct)); 2009 MDI_CLIENT_UNLOCK(ct); 2010 return (MDI_DEVI_ONLINING); 2011 } 2012 } 2013 2014 /* 2015 * Cache in the client list head. If head of the list is NULL 2016 * return MDI_NOPATH 2017 */ 2018 head = ct->ct_path_head; 2019 if (head == NULL) { 2020 MDI_CLIENT_UNLOCK(ct); 2021 return (MDI_NOPATH); 2022 } 2023 2024 /* 2025 * for non default behavior, bypass current 2026 * load balancing policy and always use LOAD_BALANCE_RR 2027 * except that the start point will be adjusted based 2028 * on the provided start_pip 2029 */ 2030 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2031 2032 switch (lbp) { 2033 case LOAD_BALANCE_NONE: 2034 /* 2035 * Load balancing is None or Alternate path mode 2036 * Start looking for a online mdi_pathinfo node starting from 2037 * last known selected path 2038 */ 2039 preferred = 1; 2040 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2041 if (pip == NULL) { 2042 pip = head; 2043 } 2044 start = pip; 2045 do { 2046 MDI_PI_LOCK(pip); 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if ((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE) && 2054 preferred == MDI_PI(pip)->pi_preferred) { 2055 /* 2056 * Return the path in hold state. Caller should 2057 * release the lock by calling mdi_rele_path() 2058 */ 2059 MDI_PI_HOLD(pip); 2060 MDI_PI_UNLOCK(pip); 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Path is busy. 2069 */ 2070 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2071 MDI_PI_IS_TRANSIENT(pip)) 2072 retry = 1; 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 next = head; 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 pip = next; 2082 if (start == pip && preferred) { 2083 preferred = 0; 2084 } else if (start == pip && !preferred) { 2085 cont = 0; 2086 } 2087 } while (cont); 2088 break; 2089 2090 case LOAD_BALANCE_LBA: 2091 /* 2092 * Make sure we are looking 2093 * for an online path. Otherwise, if it is for a STANDBY 2094 * path request, it will go through and fetch an ONLINE 2095 * path which is not desirable. 2096 */ 2097 if ((ct->ct_lb_args != NULL) && 2098 (ct->ct_lb_args->region_size) && bp && 2099 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2100 if (i_mdi_lba_lb(ct, ret_pip, bp) 2101 == MDI_SUCCESS) { 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_SUCCESS); 2104 } 2105 } 2106 /* FALLTHROUGH */ 2107 case LOAD_BALANCE_RR: 2108 /* 2109 * Load balancing is Round Robin. Start looking for a online 2110 * mdi_pathinfo node starting from last known selected path 2111 * as the start point. If override flags are specified, 2112 * process accordingly. 2113 * If the search is already in effect(start_pip not null), 2114 * then lets just use the same path preference to continue the 2115 * traversal. 2116 */ 2117 2118 if (start_pip != NULL) { 2119 preferred = MDI_PI(start_pip)->pi_preferred; 2120 } else { 2121 preferred = 1; 2122 } 2123 2124 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2125 if (start == NULL) { 2126 pip = head; 2127 } else { 2128 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2129 if (pip == NULL) { 2130 if (!sb) { 2131 if (preferred == 0) { 2132 /* 2133 * Looks like we have completed 2134 * the traversal as preferred 2135 * value is 0. Time to bail out. 2136 */ 2137 *ret_pip = NULL; 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } else { 2141 /* 2142 * Looks like we reached the 2143 * end of the list. Lets enable 2144 * traversal of non preferred 2145 * paths. 2146 */ 2147 preferred = 0; 2148 } 2149 } 2150 pip = head; 2151 } 2152 } 2153 start = pip; 2154 do { 2155 MDI_PI_LOCK(pip); 2156 if (sb) { 2157 cond = ((MDI_PI(pip)->pi_state == 2158 MDI_PATHINFO_STATE_ONLINE && 2159 MDI_PI(pip)->pi_preferred == 2160 preferred) ? 1 : 0); 2161 } else { 2162 if (flags == MDI_SELECT_ONLINE_PATH) { 2163 cond = ((MDI_PI(pip)->pi_state == 2164 MDI_PATHINFO_STATE_ONLINE && 2165 MDI_PI(pip)->pi_preferred == 2166 preferred) ? 1 : 0); 2167 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2168 cond = ((MDI_PI(pip)->pi_state == 2169 MDI_PATHINFO_STATE_STANDBY && 2170 MDI_PI(pip)->pi_preferred == 2171 preferred) ? 1 : 0); 2172 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2173 MDI_SELECT_STANDBY_PATH)) { 2174 cond = (((MDI_PI(pip)->pi_state == 2175 MDI_PATHINFO_STATE_ONLINE || 2176 (MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_STANDBY)) && 2178 MDI_PI(pip)->pi_preferred == 2179 preferred) ? 1 : 0); 2180 } else if (flags == 2181 (MDI_SELECT_STANDBY_PATH | 2182 MDI_SELECT_ONLINE_PATH | 2183 MDI_SELECT_USER_DISABLE_PATH)) { 2184 cond = (((MDI_PI(pip)->pi_state == 2185 MDI_PATHINFO_STATE_ONLINE || 2186 (MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_STANDBY) || 2188 (MDI_PI(pip)->pi_state == 2189 (MDI_PATHINFO_STATE_ONLINE| 2190 MDI_PATHINFO_STATE_USER_DISABLE)) || 2191 (MDI_PI(pip)->pi_state == 2192 (MDI_PATHINFO_STATE_STANDBY | 2193 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2194 MDI_PI(pip)->pi_preferred == 2195 preferred) ? 1 : 0); 2196 } else { 2197 cond = 0; 2198 } 2199 } 2200 /* 2201 * No need to explicitly check if the path is disabled. 2202 * Since we are checking for state == ONLINE and the 2203 * same veriable is used for DISABLE/ENABLE information. 2204 */ 2205 if (cond) { 2206 /* 2207 * Return the path in hold state. Caller should 2208 * release the lock by calling mdi_rele_path() 2209 */ 2210 MDI_PI_HOLD(pip); 2211 MDI_PI_UNLOCK(pip); 2212 if (sb) 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 /* 2219 * Path is busy. 2220 */ 2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2222 MDI_PI_IS_TRANSIENT(pip)) 2223 retry = 1; 2224 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 do_again: 2229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2230 if (next == NULL) { 2231 if (!sb) { 2232 if (preferred == 1) { 2233 /* 2234 * Looks like we reached the 2235 * end of the list. Lets enable 2236 * traversal of non preferred 2237 * paths. 2238 */ 2239 preferred = 0; 2240 next = head; 2241 } else { 2242 /* 2243 * We have done both the passes 2244 * Preferred as well as for 2245 * Non-preferred. Bail out now. 2246 */ 2247 cont = 0; 2248 } 2249 } else { 2250 /* 2251 * Standard behavior case. 2252 */ 2253 next = head; 2254 } 2255 } 2256 MDI_PI_UNLOCK(pip); 2257 if (cont == 0) { 2258 break; 2259 } 2260 pip = next; 2261 2262 if (!sb) { 2263 /* 2264 * We need to handle the selection of 2265 * non-preferred path in the following 2266 * case: 2267 * 2268 * +------+ +------+ +------+ +-----+ 2269 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2270 * +------+ +------+ +------+ +-----+ 2271 * 2272 * If we start the search with B, we need to 2273 * skip beyond B to pick C which is non - 2274 * preferred in the second pass. The following 2275 * test, if true, will allow us to skip over 2276 * the 'start'(B in the example) to select 2277 * other non preferred elements. 2278 */ 2279 if ((start_pip != NULL) && (start_pip == pip) && 2280 (MDI_PI(start_pip)->pi_preferred 2281 != preferred)) { 2282 /* 2283 * try again after going past the start 2284 * pip 2285 */ 2286 MDI_PI_LOCK(pip); 2287 goto do_again; 2288 } 2289 } else { 2290 /* 2291 * Standard behavior case 2292 */ 2293 if (start == pip && preferred) { 2294 /* look for nonpreferred paths */ 2295 preferred = 0; 2296 } else if (start == pip && !preferred) { 2297 /* 2298 * Exit condition 2299 */ 2300 cont = 0; 2301 } 2302 } 2303 } while (cont); 2304 break; 2305 } 2306 2307 MDI_CLIENT_UNLOCK(ct); 2308 if (retry == 1) { 2309 return (MDI_BUSY); 2310 } else { 2311 return (MDI_NOPATH); 2312 } 2313 } 2314 2315 /* 2316 * For a client, return the next available path to any phci 2317 * 2318 * Note: 2319 * Caller should hold the branch's devinfo node to get a consistent 2320 * snap shot of the mdi_pathinfo nodes. 2321 * 2322 * Please note that even the list is stable the mdi_pathinfo 2323 * node state and properties are volatile. The caller should lock 2324 * and unlock the nodes by calling mdi_pi_lock() and 2325 * mdi_pi_unlock() functions to get a stable properties. 2326 * 2327 * If there is a need to use the nodes beyond the hold of the 2328 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2329 * need to be held against unexpected removal by calling 2330 * mdi_hold_path() and should be released by calling 2331 * mdi_rele_path() on completion. 2332 */ 2333 mdi_pathinfo_t * 2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2335 { 2336 mdi_client_t *ct; 2337 2338 if (!MDI_CLIENT(ct_dip)) 2339 return (NULL); 2340 2341 /* 2342 * Walk through client link 2343 */ 2344 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2345 ASSERT(ct != NULL); 2346 2347 if (pip == NULL) 2348 return ((mdi_pathinfo_t *)ct->ct_path_head); 2349 2350 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2351 } 2352 2353 /* 2354 * For a phci, return the next available path to any client 2355 * Note: ditto mdi_get_next_phci_path() 2356 */ 2357 mdi_pathinfo_t * 2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2359 { 2360 mdi_phci_t *ph; 2361 2362 if (!MDI_PHCI(ph_dip)) 2363 return (NULL); 2364 2365 /* 2366 * Walk through pHCI link 2367 */ 2368 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2369 ASSERT(ph != NULL); 2370 2371 if (pip == NULL) 2372 return ((mdi_pathinfo_t *)ph->ph_path_head); 2373 2374 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2375 } 2376 2377 /* 2378 * mdi_hold_path(): 2379 * Hold the mdi_pathinfo node against unwanted unexpected free. 2380 * Return Values: 2381 * None 2382 */ 2383 void 2384 mdi_hold_path(mdi_pathinfo_t *pip) 2385 { 2386 if (pip) { 2387 MDI_PI_LOCK(pip); 2388 MDI_PI_HOLD(pip); 2389 MDI_PI_UNLOCK(pip); 2390 } 2391 } 2392 2393 2394 /* 2395 * mdi_rele_path(): 2396 * Release the mdi_pathinfo node which was selected 2397 * through mdi_select_path() mechanism or manually held by 2398 * calling mdi_hold_path(). 2399 * Return Values: 2400 * None 2401 */ 2402 void 2403 mdi_rele_path(mdi_pathinfo_t *pip) 2404 { 2405 if (pip) { 2406 MDI_PI_LOCK(pip); 2407 MDI_PI_RELE(pip); 2408 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2409 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2410 } 2411 MDI_PI_UNLOCK(pip); 2412 } 2413 } 2414 2415 /* 2416 * mdi_pi_lock(): 2417 * Lock the mdi_pathinfo node. 2418 * Note: 2419 * The caller should release the lock by calling mdi_pi_unlock() 2420 */ 2421 void 2422 mdi_pi_lock(mdi_pathinfo_t *pip) 2423 { 2424 ASSERT(pip != NULL); 2425 if (pip) { 2426 MDI_PI_LOCK(pip); 2427 } 2428 } 2429 2430 2431 /* 2432 * mdi_pi_unlock(): 2433 * Unlock the mdi_pathinfo node. 2434 * Note: 2435 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2436 */ 2437 void 2438 mdi_pi_unlock(mdi_pathinfo_t *pip) 2439 { 2440 ASSERT(pip != NULL); 2441 if (pip) { 2442 MDI_PI_UNLOCK(pip); 2443 } 2444 } 2445 2446 /* 2447 * mdi_pi_find(): 2448 * Search the list of mdi_pathinfo nodes attached to the 2449 * pHCI/Client device node whose path address matches "paddr". 2450 * Returns a pointer to the mdi_pathinfo node if a matching node is 2451 * found. 2452 * Return Values: 2453 * mdi_pathinfo node handle 2454 * NULL 2455 * Notes: 2456 * Caller need not hold any locks to call this function. 2457 */ 2458 mdi_pathinfo_t * 2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2460 { 2461 mdi_phci_t *ph; 2462 mdi_vhci_t *vh; 2463 mdi_client_t *ct; 2464 mdi_pathinfo_t *pip = NULL; 2465 2466 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2467 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2468 if ((pdip == NULL) || (paddr == NULL)) { 2469 return (NULL); 2470 } 2471 ph = i_devi_get_phci(pdip); 2472 if (ph == NULL) { 2473 /* 2474 * Invalid pHCI device, Nothing more to do. 2475 */ 2476 MDI_DEBUG(2, (CE_WARN, pdip, 2477 "!mdi_pi_find: invalid phci")); 2478 return (NULL); 2479 } 2480 2481 vh = ph->ph_vhci; 2482 if (vh == NULL) { 2483 /* 2484 * Invalid vHCI device, Nothing more to do. 2485 */ 2486 MDI_DEBUG(2, (CE_WARN, pdip, 2487 "!mdi_pi_find: invalid vhci")); 2488 return (NULL); 2489 } 2490 2491 /* 2492 * Look for pathinfo node identified by paddr. 2493 */ 2494 if (caddr == NULL) { 2495 /* 2496 * Find a mdi_pathinfo node under pHCI list for a matching 2497 * unit address. 2498 */ 2499 MDI_PHCI_LOCK(ph); 2500 if (MDI_PHCI_IS_OFFLINE(ph)) { 2501 MDI_DEBUG(2, (CE_WARN, pdip, 2502 "!mdi_pi_find: offline phci %p", (void *)ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (NULL); 2505 } 2506 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2507 2508 while (pip != NULL) { 2509 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2510 break; 2511 } 2512 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2513 } 2514 MDI_PHCI_UNLOCK(ph); 2515 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2516 (void *)pip)); 2517 return (pip); 2518 } 2519 2520 /* 2521 * XXX - Is the rest of the code in this function really necessary? 2522 * The consumers of mdi_pi_find() can search for the desired pathinfo 2523 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2524 * whether the search is based on the pathinfo nodes attached to 2525 * the pHCI or the client node, the result will be the same. 2526 */ 2527 2528 /* 2529 * Find the client device corresponding to 'caddr' 2530 */ 2531 MDI_VHCI_CLIENT_LOCK(vh); 2532 2533 /* 2534 * XXX - Passing NULL to the following function works as long as the 2535 * the client addresses (caddr) are unique per vhci basis. 2536 */ 2537 ct = i_mdi_client_find(vh, NULL, caddr); 2538 if (ct == NULL) { 2539 /* 2540 * Client not found, Obviously mdi_pathinfo node has not been 2541 * created yet. 2542 */ 2543 MDI_VHCI_CLIENT_UNLOCK(vh); 2544 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2545 "found for caddr %s", caddr ? caddr : "NULL")); 2546 return (NULL); 2547 } 2548 2549 /* 2550 * Hold the client lock and look for a mdi_pathinfo node with matching 2551 * pHCI and paddr 2552 */ 2553 MDI_CLIENT_LOCK(ct); 2554 2555 /* 2556 * Release the global mutex as it is no more needed. Note: We always 2557 * respect the locking order while acquiring. 2558 */ 2559 MDI_VHCI_CLIENT_UNLOCK(vh); 2560 2561 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2562 while (pip != NULL) { 2563 /* 2564 * Compare the unit address 2565 */ 2566 if ((MDI_PI(pip)->pi_phci == ph) && 2567 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2568 break; 2569 } 2570 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2571 } 2572 MDI_CLIENT_UNLOCK(ct); 2573 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2574 return (pip); 2575 } 2576 2577 /* 2578 * mdi_pi_alloc(): 2579 * Allocate and initialize a new instance of a mdi_pathinfo node. 2580 * The mdi_pathinfo node returned by this function identifies a 2581 * unique device path is capable of having properties attached 2582 * and passed to mdi_pi_online() to fully attach and online the 2583 * path and client device node. 2584 * The mdi_pathinfo node returned by this function must be 2585 * destroyed using mdi_pi_free() if the path is no longer 2586 * operational or if the caller fails to attach a client device 2587 * node when calling mdi_pi_online(). The framework will not free 2588 * the resources allocated. 2589 * This function can be called from both interrupt and kernel 2590 * contexts. DDI_NOSLEEP flag should be used while calling 2591 * from interrupt contexts. 2592 * Return Values: 2593 * MDI_SUCCESS 2594 * MDI_FAILURE 2595 * MDI_NOMEM 2596 */ 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 mdi_vhci_t *vh; 2603 mdi_phci_t *ph; 2604 mdi_client_t *ct; 2605 mdi_pathinfo_t *pip = NULL; 2606 dev_info_t *cdip; 2607 int rv = MDI_NOMEM; 2608 int path_allocated = 0; 2609 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2611 cname ? cname : "NULL", caddr ? caddr : "NULL", 2612 paddr ? paddr : "NULL")); 2613 2614 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2615 ret_pip == NULL) { 2616 /* Nothing more to do */ 2617 return (MDI_FAILURE); 2618 } 2619 2620 *ret_pip = NULL; 2621 2622 /* No allocations on detaching pHCI */ 2623 if (DEVI_IS_DETACHING(pdip)) { 2624 /* Invalid pHCI device, return failure */ 2625 MDI_DEBUG(1, (CE_WARN, pdip, 2626 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2627 return (MDI_FAILURE); 2628 } 2629 2630 ph = i_devi_get_phci(pdip); 2631 ASSERT(ph != NULL); 2632 if (ph == NULL) { 2633 /* Invalid pHCI device, return failure */ 2634 MDI_DEBUG(1, (CE_WARN, pdip, 2635 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2636 return (MDI_FAILURE); 2637 } 2638 2639 MDI_PHCI_LOCK(ph); 2640 vh = ph->ph_vhci; 2641 if (vh == NULL) { 2642 /* Invalid vHCI device, return failure */ 2643 MDI_DEBUG(1, (CE_WARN, pdip, 2644 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2645 MDI_PHCI_UNLOCK(ph); 2646 return (MDI_FAILURE); 2647 } 2648 2649 if (MDI_PHCI_IS_READY(ph) == 0) { 2650 /* 2651 * Do not allow new node creation when pHCI is in 2652 * offline/suspended states 2653 */ 2654 MDI_DEBUG(1, (CE_WARN, pdip, 2655 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2656 MDI_PHCI_UNLOCK(ph); 2657 return (MDI_BUSY); 2658 } 2659 MDI_PHCI_UNSTABLE(ph); 2660 MDI_PHCI_UNLOCK(ph); 2661 2662 /* look for a matching client, create one if not found */ 2663 MDI_VHCI_CLIENT_LOCK(vh); 2664 ct = i_mdi_client_find(vh, cname, caddr); 2665 if (ct == NULL) { 2666 ct = i_mdi_client_alloc(vh, cname, caddr); 2667 ASSERT(ct != NULL); 2668 } 2669 2670 if (ct->ct_dip == NULL) { 2671 /* 2672 * Allocate a devinfo node 2673 */ 2674 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2675 compatible, ncompatible); 2676 if (ct->ct_dip == NULL) { 2677 (void) i_mdi_client_free(vh, ct); 2678 goto fail; 2679 } 2680 } 2681 cdip = ct->ct_dip; 2682 2683 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2684 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2685 2686 MDI_CLIENT_LOCK(ct); 2687 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2688 while (pip != NULL) { 2689 /* 2690 * Compare the unit address 2691 */ 2692 if ((MDI_PI(pip)->pi_phci == ph) && 2693 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2694 break; 2695 } 2696 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2697 } 2698 MDI_CLIENT_UNLOCK(ct); 2699 2700 if (pip == NULL) { 2701 /* 2702 * This is a new path for this client device. Allocate and 2703 * initialize a new pathinfo node 2704 */ 2705 pip = i_mdi_pi_alloc(ph, paddr, ct); 2706 ASSERT(pip != NULL); 2707 path_allocated = 1; 2708 } 2709 rv = MDI_SUCCESS; 2710 2711 fail: 2712 /* 2713 * Release the global mutex. 2714 */ 2715 MDI_VHCI_CLIENT_UNLOCK(vh); 2716 2717 /* 2718 * Mark the pHCI as stable 2719 */ 2720 MDI_PHCI_LOCK(ph); 2721 MDI_PHCI_STABLE(ph); 2722 MDI_PHCI_UNLOCK(ph); 2723 *ret_pip = pip; 2724 2725 MDI_DEBUG(2, (CE_NOTE, pdip, 2726 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2727 2728 if (path_allocated) 2729 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2730 2731 return (rv); 2732 } 2733 2734 /*ARGSUSED*/ 2735 int 2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2737 int flags, mdi_pathinfo_t **ret_pip) 2738 { 2739 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2740 flags, ret_pip)); 2741 } 2742 2743 /* 2744 * i_mdi_pi_alloc(): 2745 * Allocate a mdi_pathinfo node and add to the pHCI path list 2746 * Return Values: 2747 * mdi_pathinfo 2748 */ 2749 /*ARGSUSED*/ 2750 static mdi_pathinfo_t * 2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2752 { 2753 mdi_pathinfo_t *pip; 2754 int ct_circular; 2755 int ph_circular; 2756 int se_flag; 2757 int kmem_flag; 2758 2759 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2760 2761 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2762 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2763 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2764 MDI_PATHINFO_STATE_TRANSIENT; 2765 2766 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2767 MDI_PI_SET_USER_DISABLE(pip); 2768 2769 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2770 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2771 2772 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2773 MDI_PI_SET_DRV_DISABLE(pip); 2774 2775 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2776 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2777 MDI_PI(pip)->pi_client = ct; 2778 MDI_PI(pip)->pi_phci = ph; 2779 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2780 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2781 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2782 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2783 MDI_PI(pip)->pi_pprivate = NULL; 2784 MDI_PI(pip)->pi_cprivate = NULL; 2785 MDI_PI(pip)->pi_vprivate = NULL; 2786 MDI_PI(pip)->pi_client_link = NULL; 2787 MDI_PI(pip)->pi_phci_link = NULL; 2788 MDI_PI(pip)->pi_ref_cnt = 0; 2789 MDI_PI(pip)->pi_kstats = NULL; 2790 MDI_PI(pip)->pi_preferred = 1; 2791 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2792 2793 /* 2794 * Lock both dev_info nodes against changes in parallel. 2795 * 2796 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2797 * This atypical operation is done to synchronize pathinfo nodes 2798 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2799 * the pathinfo nodes are children of the Client. 2800 */ 2801 ndi_devi_enter(ct->ct_dip, &ct_circular); 2802 ndi_devi_enter(ph->ph_dip, &ph_circular); 2803 2804 i_mdi_phci_add_path(ph, pip); 2805 i_mdi_client_add_path(ct, pip); 2806 2807 ndi_devi_exit(ph->ph_dip, ph_circular); 2808 ndi_devi_exit(ct->ct_dip, ct_circular); 2809 2810 /* determine interrupt context */ 2811 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2812 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2813 2814 i_ddi_di_cache_invalidate(kmem_flag); 2815 2816 return (pip); 2817 } 2818 2819 /* 2820 * i_mdi_phci_add_path(): 2821 * Add a mdi_pathinfo node to pHCI list. 2822 * Notes: 2823 * Caller should per-pHCI mutex 2824 */ 2825 static void 2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2827 { 2828 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2829 2830 MDI_PHCI_LOCK(ph); 2831 if (ph->ph_path_head == NULL) { 2832 ph->ph_path_head = pip; 2833 } else { 2834 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2835 } 2836 ph->ph_path_tail = pip; 2837 ph->ph_path_count++; 2838 MDI_PHCI_UNLOCK(ph); 2839 } 2840 2841 /* 2842 * i_mdi_client_add_path(): 2843 * Add mdi_pathinfo node to client list 2844 */ 2845 static void 2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2847 { 2848 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2849 2850 MDI_CLIENT_LOCK(ct); 2851 if (ct->ct_path_head == NULL) { 2852 ct->ct_path_head = pip; 2853 } else { 2854 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2855 } 2856 ct->ct_path_tail = pip; 2857 ct->ct_path_count++; 2858 MDI_CLIENT_UNLOCK(ct); 2859 } 2860 2861 /* 2862 * mdi_pi_free(): 2863 * Free the mdi_pathinfo node and also client device node if this 2864 * is the last path to the device 2865 * Return Values: 2866 * MDI_SUCCESS 2867 * MDI_FAILURE 2868 * MDI_BUSY 2869 */ 2870 /*ARGSUSED*/ 2871 int 2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2873 { 2874 int rv = MDI_SUCCESS; 2875 mdi_vhci_t *vh; 2876 mdi_phci_t *ph; 2877 mdi_client_t *ct; 2878 int (*f)(); 2879 int client_held = 0; 2880 2881 MDI_PI_LOCK(pip); 2882 ph = MDI_PI(pip)->pi_phci; 2883 ASSERT(ph != NULL); 2884 if (ph == NULL) { 2885 /* 2886 * Invalid pHCI device, return failure 2887 */ 2888 MDI_DEBUG(1, (CE_WARN, NULL, 2889 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2890 MDI_PI_UNLOCK(pip); 2891 return (MDI_FAILURE); 2892 } 2893 2894 vh = ph->ph_vhci; 2895 ASSERT(vh != NULL); 2896 if (vh == NULL) { 2897 /* Invalid pHCI device, return failure */ 2898 MDI_DEBUG(1, (CE_WARN, NULL, 2899 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2900 MDI_PI_UNLOCK(pip); 2901 return (MDI_FAILURE); 2902 } 2903 2904 ct = MDI_PI(pip)->pi_client; 2905 ASSERT(ct != NULL); 2906 if (ct == NULL) { 2907 /* 2908 * Invalid Client device, return failure 2909 */ 2910 MDI_DEBUG(1, (CE_WARN, NULL, 2911 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2912 MDI_PI_UNLOCK(pip); 2913 return (MDI_FAILURE); 2914 } 2915 2916 /* 2917 * Check to see for busy condition. A mdi_pathinfo can only be freed 2918 * if the node state is either offline or init and the reference count 2919 * is zero. 2920 */ 2921 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2922 MDI_PI_IS_INITING(pip))) { 2923 /* 2924 * Node is busy 2925 */ 2926 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2927 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2928 MDI_PI_UNLOCK(pip); 2929 return (MDI_BUSY); 2930 } 2931 2932 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2933 /* 2934 * Give a chance for pending I/Os to complete. 2935 */ 2936 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2937 "%d cmds still pending on path: %p\n", 2938 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2939 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2940 &MDI_PI(pip)->pi_mutex, 2941 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2942 /* 2943 * The timeout time reached without ref_cnt being zero 2944 * being signaled. 2945 */ 2946 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2947 "!mdi_pi_free: " 2948 "Timeout reached on path %p without the cond\n", 2949 (void *)pip)); 2950 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2951 "!mdi_pi_free: " 2952 "%d cmds still pending on path: %p\n", 2953 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2954 MDI_PI_UNLOCK(pip); 2955 return (MDI_BUSY); 2956 } 2957 } 2958 if (MDI_PI(pip)->pi_pm_held) { 2959 client_held = 1; 2960 } 2961 MDI_PI_UNLOCK(pip); 2962 2963 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2964 2965 MDI_CLIENT_LOCK(ct); 2966 2967 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2968 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2969 2970 /* 2971 * Wait till failover is complete before removing this node. 2972 */ 2973 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2974 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2975 2976 MDI_CLIENT_UNLOCK(ct); 2977 MDI_VHCI_CLIENT_LOCK(vh); 2978 MDI_CLIENT_LOCK(ct); 2979 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2980 2981 if (!MDI_PI_IS_INITING(pip)) { 2982 f = vh->vh_ops->vo_pi_uninit; 2983 if (f != NULL) { 2984 rv = (*f)(vh->vh_dip, pip, 0); 2985 } 2986 } 2987 /* 2988 * If vo_pi_uninit() completed successfully. 2989 */ 2990 if (rv == MDI_SUCCESS) { 2991 if (client_held) { 2992 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2993 "i_mdi_pm_rele_client\n")); 2994 i_mdi_pm_rele_client(ct, 1); 2995 } 2996 i_mdi_pi_free(ph, pip, ct); 2997 if (ct->ct_path_count == 0) { 2998 /* 2999 * Client lost its last path. 3000 * Clean up the client device 3001 */ 3002 MDI_CLIENT_UNLOCK(ct); 3003 (void) i_mdi_client_free(ct->ct_vhci, ct); 3004 MDI_VHCI_CLIENT_UNLOCK(vh); 3005 return (rv); 3006 } 3007 } 3008 MDI_CLIENT_UNLOCK(ct); 3009 MDI_VHCI_CLIENT_UNLOCK(vh); 3010 3011 if (rv == MDI_FAILURE) 3012 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3013 3014 return (rv); 3015 } 3016 3017 /* 3018 * i_mdi_pi_free(): 3019 * Free the mdi_pathinfo node 3020 */ 3021 static void 3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3023 { 3024 int ct_circular; 3025 int ph_circular; 3026 int se_flag; 3027 int kmem_flag; 3028 3029 ASSERT(MDI_CLIENT_LOCKED(ct)); 3030 3031 /* 3032 * remove any per-path kstats 3033 */ 3034 i_mdi_pi_kstat_destroy(pip); 3035 3036 /* See comments in i_mdi_pi_alloc() */ 3037 ndi_devi_enter(ct->ct_dip, &ct_circular); 3038 ndi_devi_enter(ph->ph_dip, &ph_circular); 3039 3040 i_mdi_client_remove_path(ct, pip); 3041 i_mdi_phci_remove_path(ph, pip); 3042 3043 ndi_devi_exit(ph->ph_dip, ph_circular); 3044 ndi_devi_exit(ct->ct_dip, ct_circular); 3045 3046 /* determine interrupt context */ 3047 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3048 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3049 3050 i_ddi_di_cache_invalidate(kmem_flag); 3051 3052 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3053 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3054 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3055 if (MDI_PI(pip)->pi_addr) { 3056 kmem_free(MDI_PI(pip)->pi_addr, 3057 strlen(MDI_PI(pip)->pi_addr) + 1); 3058 MDI_PI(pip)->pi_addr = NULL; 3059 } 3060 3061 if (MDI_PI(pip)->pi_prop) { 3062 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3063 MDI_PI(pip)->pi_prop = NULL; 3064 } 3065 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_remove_path(): 3071 * Remove a mdi_pathinfo node from pHCI list. 3072 * Notes: 3073 * Caller should hold per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 mdi_pathinfo_t *prev = NULL; 3079 mdi_pathinfo_t *path = NULL; 3080 3081 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3082 3083 MDI_PHCI_LOCK(ph); 3084 path = ph->ph_path_head; 3085 while (path != NULL) { 3086 if (path == pip) { 3087 break; 3088 } 3089 prev = path; 3090 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3091 } 3092 3093 if (path) { 3094 ph->ph_path_count--; 3095 if (prev) { 3096 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3097 } else { 3098 ph->ph_path_head = 3099 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3100 } 3101 if (ph->ph_path_tail == path) { 3102 ph->ph_path_tail = prev; 3103 } 3104 } 3105 3106 /* 3107 * Clear the pHCI link 3108 */ 3109 MDI_PI(pip)->pi_phci_link = NULL; 3110 MDI_PI(pip)->pi_phci = NULL; 3111 MDI_PHCI_UNLOCK(ph); 3112 } 3113 3114 /* 3115 * i_mdi_client_remove_path(): 3116 * Remove a mdi_pathinfo node from client path list. 3117 */ 3118 static void 3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3120 { 3121 mdi_pathinfo_t *prev = NULL; 3122 mdi_pathinfo_t *path; 3123 3124 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3125 3126 ASSERT(MDI_CLIENT_LOCKED(ct)); 3127 path = ct->ct_path_head; 3128 while (path != NULL) { 3129 if (path == pip) { 3130 break; 3131 } 3132 prev = path; 3133 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3134 } 3135 3136 if (path) { 3137 ct->ct_path_count--; 3138 if (prev) { 3139 MDI_PI(prev)->pi_client_link = 3140 MDI_PI(path)->pi_client_link; 3141 } else { 3142 ct->ct_path_head = 3143 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3144 } 3145 if (ct->ct_path_tail == path) { 3146 ct->ct_path_tail = prev; 3147 } 3148 if (ct->ct_path_last == path) { 3149 ct->ct_path_last = ct->ct_path_head; 3150 } 3151 } 3152 MDI_PI(pip)->pi_client_link = NULL; 3153 MDI_PI(pip)->pi_client = NULL; 3154 } 3155 3156 /* 3157 * i_mdi_pi_state_change(): 3158 * online a mdi_pathinfo node 3159 * 3160 * Return Values: 3161 * MDI_SUCCESS 3162 * MDI_FAILURE 3163 */ 3164 /*ARGSUSED*/ 3165 static int 3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3167 { 3168 int rv = MDI_SUCCESS; 3169 mdi_vhci_t *vh; 3170 mdi_phci_t *ph; 3171 mdi_client_t *ct; 3172 int (*f)(); 3173 dev_info_t *cdip; 3174 3175 MDI_PI_LOCK(pip); 3176 3177 ph = MDI_PI(pip)->pi_phci; 3178 ASSERT(ph); 3179 if (ph == NULL) { 3180 /* 3181 * Invalid pHCI device, fail the request 3182 */ 3183 MDI_PI_UNLOCK(pip); 3184 MDI_DEBUG(1, (CE_WARN, NULL, 3185 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3186 return (MDI_FAILURE); 3187 } 3188 3189 vh = ph->ph_vhci; 3190 ASSERT(vh); 3191 if (vh == NULL) { 3192 /* 3193 * Invalid vHCI device, fail the request 3194 */ 3195 MDI_PI_UNLOCK(pip); 3196 MDI_DEBUG(1, (CE_WARN, NULL, 3197 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3198 return (MDI_FAILURE); 3199 } 3200 3201 ct = MDI_PI(pip)->pi_client; 3202 ASSERT(ct != NULL); 3203 if (ct == NULL) { 3204 /* 3205 * Invalid client device, fail the request 3206 */ 3207 MDI_PI_UNLOCK(pip); 3208 MDI_DEBUG(1, (CE_WARN, NULL, 3209 "!mdi_pi_state_change: invalid client pip=%p", 3210 (void *)pip)); 3211 return (MDI_FAILURE); 3212 } 3213 3214 /* 3215 * If this path has not been initialized yet, Callback vHCI driver's 3216 * pathinfo node initialize entry point 3217 */ 3218 3219 if (MDI_PI_IS_INITING(pip)) { 3220 MDI_PI_UNLOCK(pip); 3221 f = vh->vh_ops->vo_pi_init; 3222 if (f != NULL) { 3223 rv = (*f)(vh->vh_dip, pip, 0); 3224 if (rv != MDI_SUCCESS) { 3225 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3226 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3227 (void *)vh, (void *)pip)); 3228 return (MDI_FAILURE); 3229 } 3230 } 3231 MDI_PI_LOCK(pip); 3232 MDI_PI_CLEAR_TRANSIENT(pip); 3233 } 3234 3235 /* 3236 * Do not allow state transition when pHCI is in offline/suspended 3237 * states 3238 */ 3239 i_mdi_phci_lock(ph, pip); 3240 if (MDI_PHCI_IS_READY(ph) == 0) { 3241 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3242 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3243 (void *)ph)); 3244 MDI_PI_UNLOCK(pip); 3245 i_mdi_phci_unlock(ph); 3246 return (MDI_BUSY); 3247 } 3248 MDI_PHCI_UNSTABLE(ph); 3249 i_mdi_phci_unlock(ph); 3250 3251 /* 3252 * Check if mdi_pathinfo state is in transient state. 3253 * If yes, offlining is in progress and wait till transient state is 3254 * cleared. 3255 */ 3256 if (MDI_PI_IS_TRANSIENT(pip)) { 3257 while (MDI_PI_IS_TRANSIENT(pip)) { 3258 cv_wait(&MDI_PI(pip)->pi_state_cv, 3259 &MDI_PI(pip)->pi_mutex); 3260 } 3261 } 3262 3263 /* 3264 * Grab the client lock in reverse order sequence and release the 3265 * mdi_pathinfo mutex. 3266 */ 3267 i_mdi_client_lock(ct, pip); 3268 MDI_PI_UNLOCK(pip); 3269 3270 /* 3271 * Wait till failover state is cleared 3272 */ 3273 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3274 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3275 3276 /* 3277 * Mark the mdi_pathinfo node state as transient 3278 */ 3279 MDI_PI_LOCK(pip); 3280 switch (state) { 3281 case MDI_PATHINFO_STATE_ONLINE: 3282 MDI_PI_SET_ONLINING(pip); 3283 break; 3284 3285 case MDI_PATHINFO_STATE_STANDBY: 3286 MDI_PI_SET_STANDBYING(pip); 3287 break; 3288 3289 case MDI_PATHINFO_STATE_FAULT: 3290 /* 3291 * Mark the pathinfo state as FAULTED 3292 */ 3293 MDI_PI_SET_FAULTING(pip); 3294 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3295 break; 3296 3297 case MDI_PATHINFO_STATE_OFFLINE: 3298 /* 3299 * ndi_devi_offline() cannot hold pip or ct locks. 3300 */ 3301 MDI_PI_UNLOCK(pip); 3302 /* 3303 * Do not offline if path will become last path and path 3304 * is busy for user initiated events. 3305 */ 3306 cdip = ct->ct_dip; 3307 if ((flag & NDI_DEVI_REMOVE) && 3308 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3309 i_mdi_client_unlock(ct); 3310 rv = ndi_devi_offline(cdip, 0); 3311 if (rv != NDI_SUCCESS) { 3312 /* 3313 * Convert to MDI error code 3314 */ 3315 switch (rv) { 3316 case NDI_BUSY: 3317 rv = MDI_BUSY; 3318 break; 3319 default: 3320 rv = MDI_FAILURE; 3321 break; 3322 } 3323 goto state_change_exit; 3324 } else { 3325 i_mdi_client_lock(ct, NULL); 3326 } 3327 } 3328 /* 3329 * Mark the mdi_pathinfo node state as transient 3330 */ 3331 MDI_PI_LOCK(pip); 3332 MDI_PI_SET_OFFLINING(pip); 3333 break; 3334 } 3335 MDI_PI_UNLOCK(pip); 3336 MDI_CLIENT_UNSTABLE(ct); 3337 i_mdi_client_unlock(ct); 3338 3339 f = vh->vh_ops->vo_pi_state_change; 3340 if (f != NULL) 3341 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3342 3343 MDI_CLIENT_LOCK(ct); 3344 MDI_PI_LOCK(pip); 3345 if (rv == MDI_NOT_SUPPORTED) { 3346 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3347 } 3348 if (rv != MDI_SUCCESS) { 3349 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3350 "!vo_pi_state_change: failed rv = %x", rv)); 3351 } 3352 if (MDI_PI_IS_TRANSIENT(pip)) { 3353 if (rv == MDI_SUCCESS) { 3354 MDI_PI_CLEAR_TRANSIENT(pip); 3355 } else { 3356 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3357 } 3358 } 3359 3360 /* 3361 * Wake anyone waiting for this mdi_pathinfo node 3362 */ 3363 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3364 MDI_PI_UNLOCK(pip); 3365 3366 /* 3367 * Mark the client device as stable 3368 */ 3369 MDI_CLIENT_STABLE(ct); 3370 if (rv == MDI_SUCCESS) { 3371 if (ct->ct_unstable == 0) { 3372 cdip = ct->ct_dip; 3373 3374 /* 3375 * Onlining the mdi_pathinfo node will impact the 3376 * client state Update the client and dev_info node 3377 * state accordingly 3378 */ 3379 rv = NDI_SUCCESS; 3380 i_mdi_client_update_state(ct); 3381 switch (MDI_CLIENT_STATE(ct)) { 3382 case MDI_CLIENT_STATE_OPTIMAL: 3383 case MDI_CLIENT_STATE_DEGRADED: 3384 if (cdip && !i_ddi_devi_attached(cdip) && 3385 ((state == MDI_PATHINFO_STATE_ONLINE) || 3386 (state == MDI_PATHINFO_STATE_STANDBY))) { 3387 3388 /* 3389 * Must do ndi_devi_online() through 3390 * hotplug thread for deferred 3391 * attach mechanism to work 3392 */ 3393 MDI_CLIENT_UNLOCK(ct); 3394 rv = ndi_devi_online(cdip, 0); 3395 MDI_CLIENT_LOCK(ct); 3396 if ((rv != NDI_SUCCESS) && 3397 (MDI_CLIENT_STATE(ct) == 3398 MDI_CLIENT_STATE_DEGRADED)) { 3399 /* 3400 * ndi_devi_online failed. 3401 * Reset client flags to 3402 * offline. 3403 */ 3404 MDI_DEBUG(1, (CE_WARN, cdip, 3405 "!ndi_devi_online: failed " 3406 " Error: %x", rv)); 3407 MDI_CLIENT_SET_OFFLINE(ct); 3408 } 3409 if (rv != NDI_SUCCESS) { 3410 /* Reset the path state */ 3411 MDI_PI_LOCK(pip); 3412 MDI_PI(pip)->pi_state = 3413 MDI_PI_OLD_STATE(pip); 3414 MDI_PI_UNLOCK(pip); 3415 } 3416 } 3417 break; 3418 3419 case MDI_CLIENT_STATE_FAILED: 3420 /* 3421 * This is the last path case for 3422 * non-user initiated events. 3423 */ 3424 if (((flag & NDI_DEVI_REMOVE) == 0) && 3425 cdip && (i_ddi_node_state(cdip) >= 3426 DS_INITIALIZED)) { 3427 MDI_CLIENT_UNLOCK(ct); 3428 rv = ndi_devi_offline(cdip, 0); 3429 MDI_CLIENT_LOCK(ct); 3430 3431 if (rv != NDI_SUCCESS) { 3432 /* 3433 * ndi_devi_offline failed. 3434 * Reset client flags to 3435 * online as the path could not 3436 * be offlined. 3437 */ 3438 MDI_DEBUG(1, (CE_WARN, cdip, 3439 "!ndi_devi_offline: failed " 3440 " Error: %x", rv)); 3441 MDI_CLIENT_SET_ONLINE(ct); 3442 } 3443 } 3444 break; 3445 } 3446 /* 3447 * Convert to MDI error code 3448 */ 3449 switch (rv) { 3450 case NDI_SUCCESS: 3451 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3452 i_mdi_report_path_state(ct, pip); 3453 rv = MDI_SUCCESS; 3454 break; 3455 case NDI_BUSY: 3456 rv = MDI_BUSY; 3457 break; 3458 default: 3459 rv = MDI_FAILURE; 3460 break; 3461 } 3462 } 3463 } 3464 MDI_CLIENT_UNLOCK(ct); 3465 3466 state_change_exit: 3467 /* 3468 * Mark the pHCI as stable again. 3469 */ 3470 MDI_PHCI_LOCK(ph); 3471 MDI_PHCI_STABLE(ph); 3472 MDI_PHCI_UNLOCK(ph); 3473 return (rv); 3474 } 3475 3476 /* 3477 * mdi_pi_online(): 3478 * Place the path_info node in the online state. The path is 3479 * now available to be selected by mdi_select_path() for 3480 * transporting I/O requests to client devices. 3481 * Return Values: 3482 * MDI_SUCCESS 3483 * MDI_FAILURE 3484 */ 3485 int 3486 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3487 { 3488 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3489 dev_info_t *cdip; 3490 int client_held = 0; 3491 int rv; 3492 3493 ASSERT(ct != NULL); 3494 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3495 if (rv != MDI_SUCCESS) 3496 return (rv); 3497 3498 MDI_PI_LOCK(pip); 3499 if (MDI_PI(pip)->pi_pm_held == 0) { 3500 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3501 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3502 i_mdi_pm_hold_pip(pip); 3503 client_held = 1; 3504 } 3505 MDI_PI_UNLOCK(pip); 3506 3507 if (client_held) { 3508 MDI_CLIENT_LOCK(ct); 3509 if (ct->ct_power_cnt == 0) { 3510 rv = i_mdi_power_all_phci(ct); 3511 } 3512 3513 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3514 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3515 i_mdi_pm_hold_client(ct, 1); 3516 MDI_CLIENT_UNLOCK(ct); 3517 } 3518 3519 /* 3520 * Create the per-path (pathinfo) IO and error kstats which 3521 * are reported via iostat(1m). 3522 * 3523 * Defer creating the per-path kstats if device is not yet 3524 * attached; the names of the kstats are constructed in part 3525 * using the devices instance number which is assigned during 3526 * process of attaching the client device. 3527 * 3528 * The framework post_attach handler, mdi_post_attach(), is 3529 * is responsible for initializing the client's pathinfo list 3530 * once successfully attached. 3531 */ 3532 cdip = ct->ct_dip; 3533 ASSERT(cdip); 3534 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3535 return (rv); 3536 3537 MDI_CLIENT_LOCK(ct); 3538 rv = i_mdi_pi_kstat_create(pip); 3539 MDI_CLIENT_UNLOCK(ct); 3540 return (rv); 3541 } 3542 3543 /* 3544 * mdi_pi_standby(): 3545 * Place the mdi_pathinfo node in standby state 3546 * 3547 * Return Values: 3548 * MDI_SUCCESS 3549 * MDI_FAILURE 3550 */ 3551 int 3552 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3553 { 3554 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3555 } 3556 3557 /* 3558 * mdi_pi_fault(): 3559 * Place the mdi_pathinfo node in fault'ed state 3560 * Return Values: 3561 * MDI_SUCCESS 3562 * MDI_FAILURE 3563 */ 3564 int 3565 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3566 { 3567 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3568 } 3569 3570 /* 3571 * mdi_pi_offline(): 3572 * Offline a mdi_pathinfo node. 3573 * Return Values: 3574 * MDI_SUCCESS 3575 * MDI_FAILURE 3576 */ 3577 int 3578 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3579 { 3580 int ret, client_held = 0; 3581 mdi_client_t *ct; 3582 3583 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3584 3585 if (ret == MDI_SUCCESS) { 3586 MDI_PI_LOCK(pip); 3587 if (MDI_PI(pip)->pi_pm_held) { 3588 client_held = 1; 3589 } 3590 MDI_PI_UNLOCK(pip); 3591 3592 if (client_held) { 3593 ct = MDI_PI(pip)->pi_client; 3594 MDI_CLIENT_LOCK(ct); 3595 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3596 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3597 i_mdi_pm_rele_client(ct, 1); 3598 MDI_CLIENT_UNLOCK(ct); 3599 } 3600 } 3601 3602 return (ret); 3603 } 3604 3605 /* 3606 * i_mdi_pi_offline(): 3607 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3608 */ 3609 static int 3610 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3611 { 3612 dev_info_t *vdip = NULL; 3613 mdi_vhci_t *vh = NULL; 3614 mdi_client_t *ct = NULL; 3615 int (*f)(); 3616 int rv; 3617 3618 MDI_PI_LOCK(pip); 3619 ct = MDI_PI(pip)->pi_client; 3620 ASSERT(ct != NULL); 3621 3622 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3623 /* 3624 * Give a chance for pending I/Os to complete. 3625 */ 3626 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3627 "%d cmds still pending on path: %p\n", 3628 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3629 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3630 &MDI_PI(pip)->pi_mutex, 3631 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3632 /* 3633 * The timeout time reached without ref_cnt being zero 3634 * being signaled. 3635 */ 3636 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3637 "Timeout reached on path %p without the cond\n", 3638 (void *)pip)); 3639 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3640 "%d cmds still pending on path: %p\n", 3641 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3642 } 3643 } 3644 vh = ct->ct_vhci; 3645 vdip = vh->vh_dip; 3646 3647 /* 3648 * Notify vHCI that has registered this event 3649 */ 3650 ASSERT(vh->vh_ops); 3651 f = vh->vh_ops->vo_pi_state_change; 3652 3653 if (f != NULL) { 3654 MDI_PI_UNLOCK(pip); 3655 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3656 flags)) != MDI_SUCCESS) { 3657 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3658 "!vo_path_offline failed " 3659 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3660 } 3661 MDI_PI_LOCK(pip); 3662 } 3663 3664 /* 3665 * Set the mdi_pathinfo node state and clear the transient condition 3666 */ 3667 MDI_PI_SET_OFFLINE(pip); 3668 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3669 MDI_PI_UNLOCK(pip); 3670 3671 MDI_CLIENT_LOCK(ct); 3672 if (rv == MDI_SUCCESS) { 3673 if (ct->ct_unstable == 0) { 3674 dev_info_t *cdip = ct->ct_dip; 3675 3676 /* 3677 * Onlining the mdi_pathinfo node will impact the 3678 * client state Update the client and dev_info node 3679 * state accordingly 3680 */ 3681 i_mdi_client_update_state(ct); 3682 rv = NDI_SUCCESS; 3683 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3684 if (cdip && 3685 (i_ddi_node_state(cdip) >= 3686 DS_INITIALIZED)) { 3687 MDI_CLIENT_UNLOCK(ct); 3688 rv = ndi_devi_offline(cdip, 0); 3689 MDI_CLIENT_LOCK(ct); 3690 if (rv != NDI_SUCCESS) { 3691 /* 3692 * ndi_devi_offline failed. 3693 * Reset client flags to 3694 * online. 3695 */ 3696 MDI_DEBUG(4, (CE_WARN, cdip, 3697 "!ndi_devi_offline: failed " 3698 " Error: %x", rv)); 3699 MDI_CLIENT_SET_ONLINE(ct); 3700 } 3701 } 3702 } 3703 /* 3704 * Convert to MDI error code 3705 */ 3706 switch (rv) { 3707 case NDI_SUCCESS: 3708 rv = MDI_SUCCESS; 3709 break; 3710 case NDI_BUSY: 3711 rv = MDI_BUSY; 3712 break; 3713 default: 3714 rv = MDI_FAILURE; 3715 break; 3716 } 3717 } 3718 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3719 i_mdi_report_path_state(ct, pip); 3720 } 3721 3722 MDI_CLIENT_UNLOCK(ct); 3723 3724 /* 3725 * Change in the mdi_pathinfo node state will impact the client state 3726 */ 3727 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3728 (void *)ct, (void *)pip)); 3729 return (rv); 3730 } 3731 3732 3733 /* 3734 * mdi_pi_get_addr(): 3735 * Get the unit address associated with a mdi_pathinfo node 3736 * 3737 * Return Values: 3738 * char * 3739 */ 3740 char * 3741 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3742 { 3743 if (pip == NULL) 3744 return (NULL); 3745 3746 return (MDI_PI(pip)->pi_addr); 3747 } 3748 3749 /* 3750 * mdi_pi_get_client(): 3751 * Get the client devinfo associated with a mdi_pathinfo node 3752 * 3753 * Return Values: 3754 * Handle to client device dev_info node 3755 */ 3756 dev_info_t * 3757 mdi_pi_get_client(mdi_pathinfo_t *pip) 3758 { 3759 dev_info_t *dip = NULL; 3760 if (pip) { 3761 dip = MDI_PI(pip)->pi_client->ct_dip; 3762 } 3763 return (dip); 3764 } 3765 3766 /* 3767 * mdi_pi_get_phci(): 3768 * Get the pHCI devinfo associated with the mdi_pathinfo node 3769 * Return Values: 3770 * Handle to dev_info node 3771 */ 3772 dev_info_t * 3773 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3774 { 3775 dev_info_t *dip = NULL; 3776 if (pip) { 3777 dip = MDI_PI(pip)->pi_phci->ph_dip; 3778 } 3779 return (dip); 3780 } 3781 3782 /* 3783 * mdi_pi_get_client_private(): 3784 * Get the client private information associated with the 3785 * mdi_pathinfo node 3786 */ 3787 void * 3788 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3789 { 3790 void *cprivate = NULL; 3791 if (pip) { 3792 cprivate = MDI_PI(pip)->pi_cprivate; 3793 } 3794 return (cprivate); 3795 } 3796 3797 /* 3798 * mdi_pi_set_client_private(): 3799 * Set the client private information in the mdi_pathinfo node 3800 */ 3801 void 3802 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3803 { 3804 if (pip) { 3805 MDI_PI(pip)->pi_cprivate = priv; 3806 } 3807 } 3808 3809 /* 3810 * mdi_pi_get_phci_private(): 3811 * Get the pHCI private information associated with the 3812 * mdi_pathinfo node 3813 */ 3814 caddr_t 3815 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3816 { 3817 caddr_t pprivate = NULL; 3818 if (pip) { 3819 pprivate = MDI_PI(pip)->pi_pprivate; 3820 } 3821 return (pprivate); 3822 } 3823 3824 /* 3825 * mdi_pi_set_phci_private(): 3826 * Set the pHCI private information in the mdi_pathinfo node 3827 */ 3828 void 3829 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3830 { 3831 if (pip) { 3832 MDI_PI(pip)->pi_pprivate = priv; 3833 } 3834 } 3835 3836 /* 3837 * mdi_pi_get_state(): 3838 * Get the mdi_pathinfo node state. Transient states are internal 3839 * and not provided to the users 3840 */ 3841 mdi_pathinfo_state_t 3842 mdi_pi_get_state(mdi_pathinfo_t *pip) 3843 { 3844 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3845 3846 if (pip) { 3847 if (MDI_PI_IS_TRANSIENT(pip)) { 3848 /* 3849 * mdi_pathinfo is in state transition. Return the 3850 * last good state. 3851 */ 3852 state = MDI_PI_OLD_STATE(pip); 3853 } else { 3854 state = MDI_PI_STATE(pip); 3855 } 3856 } 3857 return (state); 3858 } 3859 3860 /* 3861 * Note that the following function needs to be the new interface for 3862 * mdi_pi_get_state when mpxio gets integrated to ON. 3863 */ 3864 int 3865 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3866 uint32_t *ext_state) 3867 { 3868 *state = MDI_PATHINFO_STATE_INIT; 3869 3870 if (pip) { 3871 if (MDI_PI_IS_TRANSIENT(pip)) { 3872 /* 3873 * mdi_pathinfo is in state transition. Return the 3874 * last good state. 3875 */ 3876 *state = MDI_PI_OLD_STATE(pip); 3877 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3878 } else { 3879 *state = MDI_PI_STATE(pip); 3880 *ext_state = MDI_PI_EXT_STATE(pip); 3881 } 3882 } 3883 return (MDI_SUCCESS); 3884 } 3885 3886 /* 3887 * mdi_pi_get_preferred: 3888 * Get the preferred path flag 3889 */ 3890 int 3891 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3892 { 3893 if (pip) { 3894 return (MDI_PI(pip)->pi_preferred); 3895 } 3896 return (0); 3897 } 3898 3899 /* 3900 * mdi_pi_set_preferred: 3901 * Set the preferred path flag 3902 */ 3903 void 3904 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3905 { 3906 if (pip) { 3907 MDI_PI(pip)->pi_preferred = preferred; 3908 } 3909 } 3910 3911 /* 3912 * mdi_pi_set_state(): 3913 * Set the mdi_pathinfo node state 3914 */ 3915 void 3916 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3917 { 3918 uint32_t ext_state; 3919 3920 if (pip) { 3921 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3922 MDI_PI(pip)->pi_state = state; 3923 MDI_PI(pip)->pi_state |= ext_state; 3924 } 3925 } 3926 3927 /* 3928 * Property functions: 3929 */ 3930 int 3931 i_map_nvlist_error_to_mdi(int val) 3932 { 3933 int rv; 3934 3935 switch (val) { 3936 case 0: 3937 rv = DDI_PROP_SUCCESS; 3938 break; 3939 case EINVAL: 3940 case ENOTSUP: 3941 rv = DDI_PROP_INVAL_ARG; 3942 break; 3943 case ENOMEM: 3944 rv = DDI_PROP_NO_MEMORY; 3945 break; 3946 default: 3947 rv = DDI_PROP_NOT_FOUND; 3948 break; 3949 } 3950 return (rv); 3951 } 3952 3953 /* 3954 * mdi_pi_get_next_prop(): 3955 * Property walk function. The caller should hold mdi_pi_lock() 3956 * and release by calling mdi_pi_unlock() at the end of walk to 3957 * get a consistent value. 3958 */ 3959 nvpair_t * 3960 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3961 { 3962 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3963 return (NULL); 3964 } 3965 ASSERT(MDI_PI_LOCKED(pip)); 3966 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3967 } 3968 3969 /* 3970 * mdi_prop_remove(): 3971 * Remove the named property from the named list. 3972 */ 3973 int 3974 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3975 { 3976 if (pip == NULL) { 3977 return (DDI_PROP_NOT_FOUND); 3978 } 3979 ASSERT(!MDI_PI_LOCKED(pip)); 3980 MDI_PI_LOCK(pip); 3981 if (MDI_PI(pip)->pi_prop == NULL) { 3982 MDI_PI_UNLOCK(pip); 3983 return (DDI_PROP_NOT_FOUND); 3984 } 3985 if (name) { 3986 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3987 } else { 3988 char nvp_name[MAXNAMELEN]; 3989 nvpair_t *nvp; 3990 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3991 while (nvp) { 3992 nvpair_t *next; 3993 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3994 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3995 nvpair_name(nvp)); 3996 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3997 nvp_name); 3998 nvp = next; 3999 } 4000 } 4001 MDI_PI_UNLOCK(pip); 4002 return (DDI_PROP_SUCCESS); 4003 } 4004 4005 /* 4006 * mdi_prop_size(): 4007 * Get buffer size needed to pack the property data. 4008 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4009 * buffer size. 4010 */ 4011 int 4012 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4013 { 4014 int rv; 4015 size_t bufsize; 4016 4017 *buflenp = 0; 4018 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4019 return (DDI_PROP_NOT_FOUND); 4020 } 4021 ASSERT(MDI_PI_LOCKED(pip)); 4022 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4023 &bufsize, NV_ENCODE_NATIVE); 4024 *buflenp = bufsize; 4025 return (i_map_nvlist_error_to_mdi(rv)); 4026 } 4027 4028 /* 4029 * mdi_prop_pack(): 4030 * pack the property list. The caller should hold the 4031 * mdi_pathinfo_t node to get a consistent data 4032 */ 4033 int 4034 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4035 { 4036 int rv; 4037 size_t bufsize; 4038 4039 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4040 return (DDI_PROP_NOT_FOUND); 4041 } 4042 4043 ASSERT(MDI_PI_LOCKED(pip)); 4044 4045 bufsize = buflen; 4046 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4047 NV_ENCODE_NATIVE, KM_SLEEP); 4048 4049 return (i_map_nvlist_error_to_mdi(rv)); 4050 } 4051 4052 /* 4053 * mdi_prop_update_byte(): 4054 * Create/Update a byte property 4055 */ 4056 int 4057 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4058 { 4059 int rv; 4060 4061 if (pip == NULL) { 4062 return (DDI_PROP_INVAL_ARG); 4063 } 4064 ASSERT(!MDI_PI_LOCKED(pip)); 4065 MDI_PI_LOCK(pip); 4066 if (MDI_PI(pip)->pi_prop == NULL) { 4067 MDI_PI_UNLOCK(pip); 4068 return (DDI_PROP_NOT_FOUND); 4069 } 4070 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4071 MDI_PI_UNLOCK(pip); 4072 return (i_map_nvlist_error_to_mdi(rv)); 4073 } 4074 4075 /* 4076 * mdi_prop_update_byte_array(): 4077 * Create/Update a byte array property 4078 */ 4079 int 4080 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4081 uint_t nelements) 4082 { 4083 int rv; 4084 4085 if (pip == NULL) { 4086 return (DDI_PROP_INVAL_ARG); 4087 } 4088 ASSERT(!MDI_PI_LOCKED(pip)); 4089 MDI_PI_LOCK(pip); 4090 if (MDI_PI(pip)->pi_prop == NULL) { 4091 MDI_PI_UNLOCK(pip); 4092 return (DDI_PROP_NOT_FOUND); 4093 } 4094 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4095 MDI_PI_UNLOCK(pip); 4096 return (i_map_nvlist_error_to_mdi(rv)); 4097 } 4098 4099 /* 4100 * mdi_prop_update_int(): 4101 * Create/Update a 32 bit integer property 4102 */ 4103 int 4104 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4105 { 4106 int rv; 4107 4108 if (pip == NULL) { 4109 return (DDI_PROP_INVAL_ARG); 4110 } 4111 ASSERT(!MDI_PI_LOCKED(pip)); 4112 MDI_PI_LOCK(pip); 4113 if (MDI_PI(pip)->pi_prop == NULL) { 4114 MDI_PI_UNLOCK(pip); 4115 return (DDI_PROP_NOT_FOUND); 4116 } 4117 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4118 MDI_PI_UNLOCK(pip); 4119 return (i_map_nvlist_error_to_mdi(rv)); 4120 } 4121 4122 /* 4123 * mdi_prop_update_int64(): 4124 * Create/Update a 64 bit integer property 4125 */ 4126 int 4127 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4128 { 4129 int rv; 4130 4131 if (pip == NULL) { 4132 return (DDI_PROP_INVAL_ARG); 4133 } 4134 ASSERT(!MDI_PI_LOCKED(pip)); 4135 MDI_PI_LOCK(pip); 4136 if (MDI_PI(pip)->pi_prop == NULL) { 4137 MDI_PI_UNLOCK(pip); 4138 return (DDI_PROP_NOT_FOUND); 4139 } 4140 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4141 MDI_PI_UNLOCK(pip); 4142 return (i_map_nvlist_error_to_mdi(rv)); 4143 } 4144 4145 /* 4146 * mdi_prop_update_int_array(): 4147 * Create/Update a int array property 4148 */ 4149 int 4150 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4151 uint_t nelements) 4152 { 4153 int rv; 4154 4155 if (pip == NULL) { 4156 return (DDI_PROP_INVAL_ARG); 4157 } 4158 ASSERT(!MDI_PI_LOCKED(pip)); 4159 MDI_PI_LOCK(pip); 4160 if (MDI_PI(pip)->pi_prop == NULL) { 4161 MDI_PI_UNLOCK(pip); 4162 return (DDI_PROP_NOT_FOUND); 4163 } 4164 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4165 nelements); 4166 MDI_PI_UNLOCK(pip); 4167 return (i_map_nvlist_error_to_mdi(rv)); 4168 } 4169 4170 /* 4171 * mdi_prop_update_string(): 4172 * Create/Update a string property 4173 */ 4174 int 4175 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4176 { 4177 int rv; 4178 4179 if (pip == NULL) { 4180 return (DDI_PROP_INVAL_ARG); 4181 } 4182 ASSERT(!MDI_PI_LOCKED(pip)); 4183 MDI_PI_LOCK(pip); 4184 if (MDI_PI(pip)->pi_prop == NULL) { 4185 MDI_PI_UNLOCK(pip); 4186 return (DDI_PROP_NOT_FOUND); 4187 } 4188 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4189 MDI_PI_UNLOCK(pip); 4190 return (i_map_nvlist_error_to_mdi(rv)); 4191 } 4192 4193 /* 4194 * mdi_prop_update_string_array(): 4195 * Create/Update a string array property 4196 */ 4197 int 4198 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4199 uint_t nelements) 4200 { 4201 int rv; 4202 4203 if (pip == NULL) { 4204 return (DDI_PROP_INVAL_ARG); 4205 } 4206 ASSERT(!MDI_PI_LOCKED(pip)); 4207 MDI_PI_LOCK(pip); 4208 if (MDI_PI(pip)->pi_prop == NULL) { 4209 MDI_PI_UNLOCK(pip); 4210 return (DDI_PROP_NOT_FOUND); 4211 } 4212 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4213 nelements); 4214 MDI_PI_UNLOCK(pip); 4215 return (i_map_nvlist_error_to_mdi(rv)); 4216 } 4217 4218 /* 4219 * mdi_prop_lookup_byte(): 4220 * Look for byte property identified by name. The data returned 4221 * is the actual property and valid as long as mdi_pathinfo_t node 4222 * is alive. 4223 */ 4224 int 4225 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4226 { 4227 int rv; 4228 4229 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4230 return (DDI_PROP_NOT_FOUND); 4231 } 4232 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4233 return (i_map_nvlist_error_to_mdi(rv)); 4234 } 4235 4236 4237 /* 4238 * mdi_prop_lookup_byte_array(): 4239 * Look for byte array property identified by name. The data 4240 * returned is the actual property and valid as long as 4241 * mdi_pathinfo_t node is alive. 4242 */ 4243 int 4244 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4245 uint_t *nelements) 4246 { 4247 int rv; 4248 4249 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4250 return (DDI_PROP_NOT_FOUND); 4251 } 4252 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4253 nelements); 4254 return (i_map_nvlist_error_to_mdi(rv)); 4255 } 4256 4257 /* 4258 * mdi_prop_lookup_int(): 4259 * Look for int property identified by name. The data returned 4260 * is the actual property and valid as long as mdi_pathinfo_t 4261 * node is alive. 4262 */ 4263 int 4264 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4265 { 4266 int rv; 4267 4268 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4269 return (DDI_PROP_NOT_FOUND); 4270 } 4271 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4272 return (i_map_nvlist_error_to_mdi(rv)); 4273 } 4274 4275 /* 4276 * mdi_prop_lookup_int64(): 4277 * Look for int64 property identified by name. The data returned 4278 * is the actual property and valid as long as mdi_pathinfo_t node 4279 * is alive. 4280 */ 4281 int 4282 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4283 { 4284 int rv; 4285 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4286 return (DDI_PROP_NOT_FOUND); 4287 } 4288 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4289 return (i_map_nvlist_error_to_mdi(rv)); 4290 } 4291 4292 /* 4293 * mdi_prop_lookup_int_array(): 4294 * Look for int array property identified by name. The data 4295 * returned is the actual property and valid as long as 4296 * mdi_pathinfo_t node is alive. 4297 */ 4298 int 4299 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4300 uint_t *nelements) 4301 { 4302 int rv; 4303 4304 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4305 return (DDI_PROP_NOT_FOUND); 4306 } 4307 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4308 (int32_t **)data, nelements); 4309 return (i_map_nvlist_error_to_mdi(rv)); 4310 } 4311 4312 /* 4313 * mdi_prop_lookup_string(): 4314 * Look for string property identified by name. The data 4315 * returned is the actual property and valid as long as 4316 * mdi_pathinfo_t node is alive. 4317 */ 4318 int 4319 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4320 { 4321 int rv; 4322 4323 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4324 return (DDI_PROP_NOT_FOUND); 4325 } 4326 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4327 return (i_map_nvlist_error_to_mdi(rv)); 4328 } 4329 4330 /* 4331 * mdi_prop_lookup_string_array(): 4332 * Look for string array property identified by name. The data 4333 * returned is the actual property and valid as long as 4334 * mdi_pathinfo_t node is alive. 4335 */ 4336 int 4337 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4338 uint_t *nelements) 4339 { 4340 int rv; 4341 4342 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4343 return (DDI_PROP_NOT_FOUND); 4344 } 4345 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4346 nelements); 4347 return (i_map_nvlist_error_to_mdi(rv)); 4348 } 4349 4350 /* 4351 * mdi_prop_free(): 4352 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4353 * functions return the pointer to actual property data and not a 4354 * copy of it. So the data returned is valid as long as 4355 * mdi_pathinfo_t node is valid. 4356 */ 4357 /*ARGSUSED*/ 4358 int 4359 mdi_prop_free(void *data) 4360 { 4361 return (DDI_PROP_SUCCESS); 4362 } 4363 4364 /*ARGSUSED*/ 4365 static void 4366 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4367 { 4368 char *phci_path, *ct_path; 4369 char *ct_status; 4370 char *status; 4371 dev_info_t *dip = ct->ct_dip; 4372 char lb_buf[64]; 4373 4374 ASSERT(MDI_CLIENT_LOCKED(ct)); 4375 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4376 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4377 return; 4378 } 4379 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4380 ct_status = "optimal"; 4381 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4382 ct_status = "degraded"; 4383 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4384 ct_status = "failed"; 4385 } else { 4386 ct_status = "unknown"; 4387 } 4388 4389 if (MDI_PI_IS_OFFLINE(pip)) { 4390 status = "offline"; 4391 } else if (MDI_PI_IS_ONLINE(pip)) { 4392 status = "online"; 4393 } else if (MDI_PI_IS_STANDBY(pip)) { 4394 status = "standby"; 4395 } else if (MDI_PI_IS_FAULT(pip)) { 4396 status = "faulted"; 4397 } else { 4398 status = "unknown"; 4399 } 4400 4401 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4402 (void) snprintf(lb_buf, sizeof (lb_buf), 4403 "%s, region-size: %d", mdi_load_balance_lba, 4404 ct->ct_lb_args->region_size); 4405 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4406 (void) snprintf(lb_buf, sizeof (lb_buf), 4407 "%s", mdi_load_balance_none); 4408 } else { 4409 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4410 mdi_load_balance_rr); 4411 } 4412 4413 if (dip) { 4414 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4415 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4416 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4417 "path %s (%s%d) to target address: %s is %s" 4418 " Load balancing: %s\n", 4419 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4420 ddi_get_instance(dip), ct_status, 4421 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4422 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4423 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4424 MDI_PI(pip)->pi_addr, status, lb_buf); 4425 kmem_free(phci_path, MAXPATHLEN); 4426 kmem_free(ct_path, MAXPATHLEN); 4427 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4428 } 4429 } 4430 4431 #ifdef DEBUG 4432 /* 4433 * i_mdi_log(): 4434 * Utility function for error message management 4435 * 4436 */ 4437 /*PRINTFLIKE3*/ 4438 static void 4439 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4440 { 4441 char name[MAXNAMELEN]; 4442 char buf[MAXNAMELEN]; 4443 char *bp; 4444 va_list ap; 4445 int log_only = 0; 4446 int boot_only = 0; 4447 int console_only = 0; 4448 4449 if (dip) { 4450 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4451 ddi_node_name(dip), ddi_get_instance(dip)); 4452 } else { 4453 name[0] = 0; 4454 } 4455 4456 va_start(ap, fmt); 4457 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4458 va_end(ap); 4459 4460 switch (buf[0]) { 4461 case '!': 4462 bp = &buf[1]; 4463 log_only = 1; 4464 break; 4465 case '?': 4466 bp = &buf[1]; 4467 boot_only = 1; 4468 break; 4469 case '^': 4470 bp = &buf[1]; 4471 console_only = 1; 4472 break; 4473 default: 4474 bp = buf; 4475 break; 4476 } 4477 if (mdi_debug_logonly) { 4478 log_only = 1; 4479 boot_only = 0; 4480 console_only = 0; 4481 } 4482 4483 switch (level) { 4484 case CE_NOTE: 4485 level = CE_CONT; 4486 /* FALLTHROUGH */ 4487 case CE_CONT: 4488 case CE_WARN: 4489 case CE_PANIC: 4490 if (boot_only) { 4491 cmn_err(level, "?mdi: %s%s", name, bp); 4492 } else if (console_only) { 4493 cmn_err(level, "^mdi: %s%s", name, bp); 4494 } else if (log_only) { 4495 cmn_err(level, "!mdi: %s%s", name, bp); 4496 } else { 4497 cmn_err(level, "mdi: %s%s", name, bp); 4498 } 4499 break; 4500 default: 4501 cmn_err(level, "mdi: %s%s", name, bp); 4502 break; 4503 } 4504 } 4505 #endif /* DEBUG */ 4506 4507 void 4508 i_mdi_client_online(dev_info_t *ct_dip) 4509 { 4510 mdi_client_t *ct; 4511 4512 /* 4513 * Client online notification. Mark client state as online 4514 * restore our binding with dev_info node 4515 */ 4516 ct = i_devi_get_client(ct_dip); 4517 ASSERT(ct != NULL); 4518 MDI_CLIENT_LOCK(ct); 4519 MDI_CLIENT_SET_ONLINE(ct); 4520 /* catch for any memory leaks */ 4521 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4522 ct->ct_dip = ct_dip; 4523 4524 if (ct->ct_power_cnt == 0) 4525 (void) i_mdi_power_all_phci(ct); 4526 4527 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4528 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4529 i_mdi_pm_hold_client(ct, 1); 4530 4531 MDI_CLIENT_UNLOCK(ct); 4532 } 4533 4534 void 4535 i_mdi_phci_online(dev_info_t *ph_dip) 4536 { 4537 mdi_phci_t *ph; 4538 4539 /* pHCI online notification. Mark state accordingly */ 4540 ph = i_devi_get_phci(ph_dip); 4541 ASSERT(ph != NULL); 4542 MDI_PHCI_LOCK(ph); 4543 MDI_PHCI_SET_ONLINE(ph); 4544 MDI_PHCI_UNLOCK(ph); 4545 } 4546 4547 /* 4548 * mdi_devi_online(): 4549 * Online notification from NDI framework on pHCI/client 4550 * device online. 4551 * Return Values: 4552 * NDI_SUCCESS 4553 * MDI_FAILURE 4554 */ 4555 /*ARGSUSED*/ 4556 int 4557 mdi_devi_online(dev_info_t *dip, uint_t flags) 4558 { 4559 if (MDI_PHCI(dip)) { 4560 i_mdi_phci_online(dip); 4561 } 4562 4563 if (MDI_CLIENT(dip)) { 4564 i_mdi_client_online(dip); 4565 } 4566 return (NDI_SUCCESS); 4567 } 4568 4569 /* 4570 * mdi_devi_offline(): 4571 * Offline notification from NDI framework on pHCI/Client device 4572 * offline. 4573 * 4574 * Return Values: 4575 * NDI_SUCCESS 4576 * NDI_FAILURE 4577 */ 4578 /*ARGSUSED*/ 4579 int 4580 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4581 { 4582 int rv = NDI_SUCCESS; 4583 4584 if (MDI_CLIENT(dip)) { 4585 rv = i_mdi_client_offline(dip, flags); 4586 if (rv != NDI_SUCCESS) 4587 return (rv); 4588 } 4589 4590 if (MDI_PHCI(dip)) { 4591 rv = i_mdi_phci_offline(dip, flags); 4592 4593 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4594 /* set client back online */ 4595 i_mdi_client_online(dip); 4596 } 4597 } 4598 4599 return (rv); 4600 } 4601 4602 /*ARGSUSED*/ 4603 static int 4604 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4605 { 4606 int rv = NDI_SUCCESS; 4607 mdi_phci_t *ph; 4608 mdi_client_t *ct; 4609 mdi_pathinfo_t *pip; 4610 mdi_pathinfo_t *next; 4611 mdi_pathinfo_t *failed_pip = NULL; 4612 dev_info_t *cdip; 4613 4614 /* 4615 * pHCI component offline notification 4616 * Make sure that this pHCI instance is free to be offlined. 4617 * If it is OK to proceed, Offline and remove all the child 4618 * mdi_pathinfo nodes. This process automatically offlines 4619 * corresponding client devices, for which this pHCI provides 4620 * critical services. 4621 */ 4622 ph = i_devi_get_phci(dip); 4623 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4624 (void *)dip, (void *)ph)); 4625 if (ph == NULL) { 4626 return (rv); 4627 } 4628 4629 MDI_PHCI_LOCK(ph); 4630 4631 if (MDI_PHCI_IS_OFFLINE(ph)) { 4632 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4633 (void *)ph)); 4634 MDI_PHCI_UNLOCK(ph); 4635 return (NDI_SUCCESS); 4636 } 4637 4638 /* 4639 * Check to see if the pHCI can be offlined 4640 */ 4641 if (ph->ph_unstable) { 4642 MDI_DEBUG(1, (CE_WARN, dip, 4643 "!One or more target devices are in transient " 4644 "state. This device can not be removed at " 4645 "this moment. Please try again later.")); 4646 MDI_PHCI_UNLOCK(ph); 4647 return (NDI_BUSY); 4648 } 4649 4650 pip = ph->ph_path_head; 4651 while (pip != NULL) { 4652 MDI_PI_LOCK(pip); 4653 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4654 4655 /* 4656 * The mdi_pathinfo state is OK. Check the client state. 4657 * If failover in progress fail the pHCI from offlining 4658 */ 4659 ct = MDI_PI(pip)->pi_client; 4660 i_mdi_client_lock(ct, pip); 4661 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4662 (ct->ct_unstable)) { 4663 /* 4664 * Failover is in progress, Fail the DR 4665 */ 4666 MDI_DEBUG(1, (CE_WARN, dip, 4667 "!pHCI device (%s%d) is Busy. %s", 4668 ddi_driver_name(dip), ddi_get_instance(dip), 4669 "This device can not be removed at " 4670 "this moment. Please try again later.")); 4671 MDI_PI_UNLOCK(pip); 4672 i_mdi_client_unlock(ct); 4673 MDI_PHCI_UNLOCK(ph); 4674 return (NDI_BUSY); 4675 } 4676 MDI_PI_UNLOCK(pip); 4677 4678 /* 4679 * Check to see of we are removing the last path of this 4680 * client device... 4681 */ 4682 cdip = ct->ct_dip; 4683 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4684 (i_mdi_client_compute_state(ct, ph) == 4685 MDI_CLIENT_STATE_FAILED)) { 4686 i_mdi_client_unlock(ct); 4687 MDI_PHCI_UNLOCK(ph); 4688 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4689 /* 4690 * ndi_devi_offline() failed. 4691 * This pHCI provides the critical path 4692 * to one or more client devices. 4693 * Return busy. 4694 */ 4695 MDI_PHCI_LOCK(ph); 4696 MDI_DEBUG(1, (CE_WARN, dip, 4697 "!pHCI device (%s%d) is Busy. %s", 4698 ddi_driver_name(dip), ddi_get_instance(dip), 4699 "This device can not be removed at " 4700 "this moment. Please try again later.")); 4701 failed_pip = pip; 4702 break; 4703 } else { 4704 MDI_PHCI_LOCK(ph); 4705 pip = next; 4706 } 4707 } else { 4708 i_mdi_client_unlock(ct); 4709 pip = next; 4710 } 4711 } 4712 4713 if (failed_pip) { 4714 pip = ph->ph_path_head; 4715 while (pip != failed_pip) { 4716 MDI_PI_LOCK(pip); 4717 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4718 ct = MDI_PI(pip)->pi_client; 4719 i_mdi_client_lock(ct, pip); 4720 cdip = ct->ct_dip; 4721 switch (MDI_CLIENT_STATE(ct)) { 4722 case MDI_CLIENT_STATE_OPTIMAL: 4723 case MDI_CLIENT_STATE_DEGRADED: 4724 if (cdip) { 4725 MDI_PI_UNLOCK(pip); 4726 i_mdi_client_unlock(ct); 4727 MDI_PHCI_UNLOCK(ph); 4728 (void) ndi_devi_online(cdip, 0); 4729 MDI_PHCI_LOCK(ph); 4730 pip = next; 4731 continue; 4732 } 4733 break; 4734 4735 case MDI_CLIENT_STATE_FAILED: 4736 if (cdip) { 4737 MDI_PI_UNLOCK(pip); 4738 i_mdi_client_unlock(ct); 4739 MDI_PHCI_UNLOCK(ph); 4740 (void) ndi_devi_offline(cdip, 0); 4741 MDI_PHCI_LOCK(ph); 4742 pip = next; 4743 continue; 4744 } 4745 break; 4746 } 4747 MDI_PI_UNLOCK(pip); 4748 i_mdi_client_unlock(ct); 4749 pip = next; 4750 } 4751 MDI_PHCI_UNLOCK(ph); 4752 return (NDI_BUSY); 4753 } 4754 4755 /* 4756 * Mark the pHCI as offline 4757 */ 4758 MDI_PHCI_SET_OFFLINE(ph); 4759 4760 /* 4761 * Mark the child mdi_pathinfo nodes as transient 4762 */ 4763 pip = ph->ph_path_head; 4764 while (pip != NULL) { 4765 MDI_PI_LOCK(pip); 4766 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4767 MDI_PI_SET_OFFLINING(pip); 4768 MDI_PI_UNLOCK(pip); 4769 pip = next; 4770 } 4771 MDI_PHCI_UNLOCK(ph); 4772 /* 4773 * Give a chance for any pending commands to execute 4774 */ 4775 delay(1); 4776 MDI_PHCI_LOCK(ph); 4777 pip = ph->ph_path_head; 4778 while (pip != NULL) { 4779 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4780 (void) i_mdi_pi_offline(pip, flags); 4781 MDI_PI_LOCK(pip); 4782 ct = MDI_PI(pip)->pi_client; 4783 if (!MDI_PI_IS_OFFLINE(pip)) { 4784 MDI_DEBUG(1, (CE_WARN, dip, 4785 "!pHCI device (%s%d) is Busy. %s", 4786 ddi_driver_name(dip), ddi_get_instance(dip), 4787 "This device can not be removed at " 4788 "this moment. Please try again later.")); 4789 MDI_PI_UNLOCK(pip); 4790 MDI_PHCI_SET_ONLINE(ph); 4791 MDI_PHCI_UNLOCK(ph); 4792 return (NDI_BUSY); 4793 } 4794 MDI_PI_UNLOCK(pip); 4795 pip = next; 4796 } 4797 MDI_PHCI_UNLOCK(ph); 4798 4799 return (rv); 4800 } 4801 4802 /*ARGSUSED*/ 4803 static int 4804 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4805 { 4806 int rv = NDI_SUCCESS; 4807 mdi_client_t *ct; 4808 4809 /* 4810 * Client component to go offline. Make sure that we are 4811 * not in failing over state and update client state 4812 * accordingly 4813 */ 4814 ct = i_devi_get_client(dip); 4815 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4816 (void *)dip, (void *)ct)); 4817 if (ct != NULL) { 4818 MDI_CLIENT_LOCK(ct); 4819 if (ct->ct_unstable) { 4820 /* 4821 * One or more paths are in transient state, 4822 * Dont allow offline of a client device 4823 */ 4824 MDI_DEBUG(1, (CE_WARN, dip, 4825 "!One or more paths to this device is " 4826 "in transient state. This device can not " 4827 "be removed at this moment. " 4828 "Please try again later.")); 4829 MDI_CLIENT_UNLOCK(ct); 4830 return (NDI_BUSY); 4831 } 4832 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4833 /* 4834 * Failover is in progress, Dont allow DR of 4835 * a client device 4836 */ 4837 MDI_DEBUG(1, (CE_WARN, dip, 4838 "!Client device (%s%d) is Busy. %s", 4839 ddi_driver_name(dip), ddi_get_instance(dip), 4840 "This device can not be removed at " 4841 "this moment. Please try again later.")); 4842 MDI_CLIENT_UNLOCK(ct); 4843 return (NDI_BUSY); 4844 } 4845 MDI_CLIENT_SET_OFFLINE(ct); 4846 4847 /* 4848 * Unbind our relationship with the dev_info node 4849 */ 4850 if (flags & NDI_DEVI_REMOVE) { 4851 ct->ct_dip = NULL; 4852 } 4853 MDI_CLIENT_UNLOCK(ct); 4854 } 4855 return (rv); 4856 } 4857 4858 /* 4859 * mdi_pre_attach(): 4860 * Pre attach() notification handler 4861 */ 4862 /*ARGSUSED*/ 4863 int 4864 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4865 { 4866 /* don't support old DDI_PM_RESUME */ 4867 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4868 (cmd == DDI_PM_RESUME)) 4869 return (DDI_FAILURE); 4870 4871 return (DDI_SUCCESS); 4872 } 4873 4874 /* 4875 * mdi_post_attach(): 4876 * Post attach() notification handler 4877 */ 4878 /*ARGSUSED*/ 4879 void 4880 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4881 { 4882 mdi_phci_t *ph; 4883 mdi_client_t *ct; 4884 mdi_pathinfo_t *pip; 4885 4886 if (MDI_PHCI(dip)) { 4887 ph = i_devi_get_phci(dip); 4888 ASSERT(ph != NULL); 4889 4890 MDI_PHCI_LOCK(ph); 4891 switch (cmd) { 4892 case DDI_ATTACH: 4893 MDI_DEBUG(2, (CE_NOTE, dip, 4894 "!pHCI post_attach: called %p\n", (void *)ph)); 4895 if (error == DDI_SUCCESS) { 4896 MDI_PHCI_SET_ATTACH(ph); 4897 } else { 4898 MDI_DEBUG(1, (CE_NOTE, dip, 4899 "!pHCI post_attach: failed error=%d\n", 4900 error)); 4901 MDI_PHCI_SET_DETACH(ph); 4902 } 4903 break; 4904 4905 case DDI_RESUME: 4906 MDI_DEBUG(2, (CE_NOTE, dip, 4907 "!pHCI post_resume: called %p\n", (void *)ph)); 4908 if (error == DDI_SUCCESS) { 4909 MDI_PHCI_SET_RESUME(ph); 4910 } else { 4911 MDI_DEBUG(1, (CE_NOTE, dip, 4912 "!pHCI post_resume: failed error=%d\n", 4913 error)); 4914 MDI_PHCI_SET_SUSPEND(ph); 4915 } 4916 break; 4917 } 4918 MDI_PHCI_UNLOCK(ph); 4919 } 4920 4921 if (MDI_CLIENT(dip)) { 4922 ct = i_devi_get_client(dip); 4923 ASSERT(ct != NULL); 4924 4925 MDI_CLIENT_LOCK(ct); 4926 switch (cmd) { 4927 case DDI_ATTACH: 4928 MDI_DEBUG(2, (CE_NOTE, dip, 4929 "!Client post_attach: called %p\n", (void *)ct)); 4930 if (error != DDI_SUCCESS) { 4931 MDI_DEBUG(1, (CE_NOTE, dip, 4932 "!Client post_attach: failed error=%d\n", 4933 error)); 4934 MDI_CLIENT_SET_DETACH(ct); 4935 MDI_DEBUG(4, (CE_WARN, dip, 4936 "mdi_post_attach i_mdi_pm_reset_client\n")); 4937 i_mdi_pm_reset_client(ct); 4938 break; 4939 } 4940 4941 /* 4942 * Client device has successfully attached. 4943 * Create kstats for any pathinfo structures 4944 * initially associated with this client. 4945 */ 4946 for (pip = ct->ct_path_head; pip != NULL; 4947 pip = (mdi_pathinfo_t *) 4948 MDI_PI(pip)->pi_client_link) { 4949 if (!MDI_PI_IS_OFFLINE(pip)) { 4950 (void) i_mdi_pi_kstat_create(pip); 4951 i_mdi_report_path_state(ct, pip); 4952 } 4953 } 4954 MDI_CLIENT_SET_ATTACH(ct); 4955 break; 4956 4957 case DDI_RESUME: 4958 MDI_DEBUG(2, (CE_NOTE, dip, 4959 "!Client post_attach: called %p\n", (void *)ct)); 4960 if (error == DDI_SUCCESS) { 4961 MDI_CLIENT_SET_RESUME(ct); 4962 } else { 4963 MDI_DEBUG(1, (CE_NOTE, dip, 4964 "!Client post_resume: failed error=%d\n", 4965 error)); 4966 MDI_CLIENT_SET_SUSPEND(ct); 4967 } 4968 break; 4969 } 4970 MDI_CLIENT_UNLOCK(ct); 4971 } 4972 } 4973 4974 /* 4975 * mdi_pre_detach(): 4976 * Pre detach notification handler 4977 */ 4978 /*ARGSUSED*/ 4979 int 4980 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4981 { 4982 int rv = DDI_SUCCESS; 4983 4984 if (MDI_CLIENT(dip)) { 4985 (void) i_mdi_client_pre_detach(dip, cmd); 4986 } 4987 4988 if (MDI_PHCI(dip)) { 4989 rv = i_mdi_phci_pre_detach(dip, cmd); 4990 } 4991 4992 return (rv); 4993 } 4994 4995 /*ARGSUSED*/ 4996 static int 4997 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4998 { 4999 int rv = DDI_SUCCESS; 5000 mdi_phci_t *ph; 5001 mdi_client_t *ct; 5002 mdi_pathinfo_t *pip; 5003 mdi_pathinfo_t *failed_pip = NULL; 5004 mdi_pathinfo_t *next; 5005 5006 ph = i_devi_get_phci(dip); 5007 if (ph == NULL) { 5008 return (rv); 5009 } 5010 5011 MDI_PHCI_LOCK(ph); 5012 switch (cmd) { 5013 case DDI_DETACH: 5014 MDI_DEBUG(2, (CE_NOTE, dip, 5015 "!pHCI pre_detach: called %p\n", (void *)ph)); 5016 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5017 /* 5018 * mdi_pathinfo nodes are still attached to 5019 * this pHCI. Fail the detach for this pHCI. 5020 */ 5021 MDI_DEBUG(2, (CE_WARN, dip, 5022 "!pHCI pre_detach: " 5023 "mdi_pathinfo nodes are still attached " 5024 "%p\n", (void *)ph)); 5025 rv = DDI_FAILURE; 5026 break; 5027 } 5028 MDI_PHCI_SET_DETACH(ph); 5029 break; 5030 5031 case DDI_SUSPEND: 5032 /* 5033 * pHCI is getting suspended. Since mpxio client 5034 * devices may not be suspended at this point, to avoid 5035 * a potential stack overflow, it is important to suspend 5036 * client devices before pHCI can be suspended. 5037 */ 5038 5039 MDI_DEBUG(2, (CE_NOTE, dip, 5040 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5041 /* 5042 * Suspend all the client devices accessible through this pHCI 5043 */ 5044 pip = ph->ph_path_head; 5045 while (pip != NULL && rv == DDI_SUCCESS) { 5046 dev_info_t *cdip; 5047 MDI_PI_LOCK(pip); 5048 next = 5049 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5050 ct = MDI_PI(pip)->pi_client; 5051 i_mdi_client_lock(ct, pip); 5052 cdip = ct->ct_dip; 5053 MDI_PI_UNLOCK(pip); 5054 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5055 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5056 i_mdi_client_unlock(ct); 5057 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5058 DDI_SUCCESS) { 5059 /* 5060 * Suspend of one of the client 5061 * device has failed. 5062 */ 5063 MDI_DEBUG(1, (CE_WARN, dip, 5064 "!Suspend of device (%s%d) failed.", 5065 ddi_driver_name(cdip), 5066 ddi_get_instance(cdip))); 5067 failed_pip = pip; 5068 break; 5069 } 5070 } else { 5071 i_mdi_client_unlock(ct); 5072 } 5073 pip = next; 5074 } 5075 5076 if (rv == DDI_SUCCESS) { 5077 /* 5078 * Suspend of client devices is complete. Proceed 5079 * with pHCI suspend. 5080 */ 5081 MDI_PHCI_SET_SUSPEND(ph); 5082 } else { 5083 /* 5084 * Revert back all the suspended client device states 5085 * to converse. 5086 */ 5087 pip = ph->ph_path_head; 5088 while (pip != failed_pip) { 5089 dev_info_t *cdip; 5090 MDI_PI_LOCK(pip); 5091 next = 5092 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5093 ct = MDI_PI(pip)->pi_client; 5094 i_mdi_client_lock(ct, pip); 5095 cdip = ct->ct_dip; 5096 MDI_PI_UNLOCK(pip); 5097 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5098 i_mdi_client_unlock(ct); 5099 (void) devi_attach(cdip, DDI_RESUME); 5100 } else { 5101 i_mdi_client_unlock(ct); 5102 } 5103 pip = next; 5104 } 5105 } 5106 break; 5107 5108 default: 5109 rv = DDI_FAILURE; 5110 break; 5111 } 5112 MDI_PHCI_UNLOCK(ph); 5113 return (rv); 5114 } 5115 5116 /*ARGSUSED*/ 5117 static int 5118 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5119 { 5120 int rv = DDI_SUCCESS; 5121 mdi_client_t *ct; 5122 5123 ct = i_devi_get_client(dip); 5124 if (ct == NULL) { 5125 return (rv); 5126 } 5127 5128 MDI_CLIENT_LOCK(ct); 5129 switch (cmd) { 5130 case DDI_DETACH: 5131 MDI_DEBUG(2, (CE_NOTE, dip, 5132 "!Client pre_detach: called %p\n", (void *)ct)); 5133 MDI_CLIENT_SET_DETACH(ct); 5134 break; 5135 5136 case DDI_SUSPEND: 5137 MDI_DEBUG(2, (CE_NOTE, dip, 5138 "!Client pre_suspend: called %p\n", (void *)ct)); 5139 MDI_CLIENT_SET_SUSPEND(ct); 5140 break; 5141 5142 default: 5143 rv = DDI_FAILURE; 5144 break; 5145 } 5146 MDI_CLIENT_UNLOCK(ct); 5147 return (rv); 5148 } 5149 5150 /* 5151 * mdi_post_detach(): 5152 * Post detach notification handler 5153 */ 5154 /*ARGSUSED*/ 5155 void 5156 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5157 { 5158 /* 5159 * Detach/Suspend of mpxio component failed. Update our state 5160 * too 5161 */ 5162 if (MDI_PHCI(dip)) 5163 i_mdi_phci_post_detach(dip, cmd, error); 5164 5165 if (MDI_CLIENT(dip)) 5166 i_mdi_client_post_detach(dip, cmd, error); 5167 } 5168 5169 /*ARGSUSED*/ 5170 static void 5171 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5172 { 5173 mdi_phci_t *ph; 5174 5175 /* 5176 * Detach/Suspend of phci component failed. Update our state 5177 * too 5178 */ 5179 ph = i_devi_get_phci(dip); 5180 if (ph == NULL) { 5181 return; 5182 } 5183 5184 MDI_PHCI_LOCK(ph); 5185 /* 5186 * Detach of pHCI failed. Restore back converse 5187 * state 5188 */ 5189 switch (cmd) { 5190 case DDI_DETACH: 5191 MDI_DEBUG(2, (CE_NOTE, dip, 5192 "!pHCI post_detach: called %p\n", (void *)ph)); 5193 if (error != DDI_SUCCESS) 5194 MDI_PHCI_SET_ATTACH(ph); 5195 break; 5196 5197 case DDI_SUSPEND: 5198 MDI_DEBUG(2, (CE_NOTE, dip, 5199 "!pHCI post_suspend: called %p\n", (void *)ph)); 5200 if (error != DDI_SUCCESS) 5201 MDI_PHCI_SET_RESUME(ph); 5202 break; 5203 } 5204 MDI_PHCI_UNLOCK(ph); 5205 } 5206 5207 /*ARGSUSED*/ 5208 static void 5209 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5210 { 5211 mdi_client_t *ct; 5212 5213 ct = i_devi_get_client(dip); 5214 if (ct == NULL) { 5215 return; 5216 } 5217 MDI_CLIENT_LOCK(ct); 5218 /* 5219 * Detach of Client failed. Restore back converse 5220 * state 5221 */ 5222 switch (cmd) { 5223 case DDI_DETACH: 5224 MDI_DEBUG(2, (CE_NOTE, dip, 5225 "!Client post_detach: called %p\n", (void *)ct)); 5226 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5227 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5228 "i_mdi_pm_rele_client\n")); 5229 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5230 } else { 5231 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5232 "i_mdi_pm_reset_client\n")); 5233 i_mdi_pm_reset_client(ct); 5234 } 5235 if (error != DDI_SUCCESS) 5236 MDI_CLIENT_SET_ATTACH(ct); 5237 break; 5238 5239 case DDI_SUSPEND: 5240 MDI_DEBUG(2, (CE_NOTE, dip, 5241 "!Client post_suspend: called %p\n", (void *)ct)); 5242 if (error != DDI_SUCCESS) 5243 MDI_CLIENT_SET_RESUME(ct); 5244 break; 5245 } 5246 MDI_CLIENT_UNLOCK(ct); 5247 } 5248 5249 /* 5250 * create and install per-path (client - pHCI) statistics 5251 * I/O stats supported: nread, nwritten, reads, and writes 5252 * Error stats - hard errors, soft errors, & transport errors 5253 */ 5254 static int 5255 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5256 { 5257 5258 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5259 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5260 char ksname[KSTAT_STRLEN]; 5261 mdi_pathinfo_t *cpip; 5262 const char *err_postfix = ",err"; 5263 kstat_t *kiosp, *kerrsp; 5264 struct pi_errs *nsp; 5265 struct mdi_pi_kstats *mdi_statp; 5266 5267 ASSERT(client != NULL && ppath != NULL); 5268 5269 ASSERT(MDI_CLIENT_LOCKED(MDI_PI(pip)->pi_client)); 5270 5271 if (MDI_PI(pip)->pi_kstats != NULL) 5272 return (MDI_SUCCESS); 5273 5274 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5275 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5276 if ((cpip == pip) || MDI_PI_IS_OFFLINE(pip)) 5277 continue; 5278 /* 5279 * We have found a different path with same parent 5280 * kstats for a given client-pHCI are common 5281 */ 5282 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5283 (MDI_PI(cpip)->pi_kstats != NULL)) { 5284 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5285 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5286 return (MDI_SUCCESS); 5287 } 5288 } 5289 5290 /* 5291 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5292 * clamp length of name against max length of error kstat name 5293 */ 5294 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5295 ddi_driver_name(client), ddi_get_instance(client), 5296 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5297 (KSTAT_STRLEN - strlen(err_postfix))) { 5298 return (MDI_FAILURE); 5299 } 5300 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5301 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5302 return (MDI_FAILURE); 5303 } 5304 5305 (void) strcat(ksname, err_postfix); 5306 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5307 KSTAT_TYPE_NAMED, 5308 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5309 5310 if (kerrsp == NULL) { 5311 kstat_delete(kiosp); 5312 return (MDI_FAILURE); 5313 } 5314 5315 nsp = (struct pi_errs *)kerrsp->ks_data; 5316 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5317 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5318 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5319 KSTAT_DATA_UINT32); 5320 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5321 KSTAT_DATA_UINT32); 5322 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5323 KSTAT_DATA_UINT32); 5324 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5325 KSTAT_DATA_UINT32); 5326 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5327 KSTAT_DATA_UINT32); 5328 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5329 KSTAT_DATA_UINT32); 5330 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5331 KSTAT_DATA_UINT32); 5332 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5333 5334 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5335 mdi_statp->pi_kstat_ref = 1; 5336 mdi_statp->pi_kstat_iostats = kiosp; 5337 mdi_statp->pi_kstat_errstats = kerrsp; 5338 kstat_install(kiosp); 5339 kstat_install(kerrsp); 5340 MDI_PI(pip)->pi_kstats = mdi_statp; 5341 return (MDI_SUCCESS); 5342 } 5343 5344 /* 5345 * destroy per-path properties 5346 */ 5347 static void 5348 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5349 { 5350 5351 struct mdi_pi_kstats *mdi_statp; 5352 5353 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5354 return; 5355 5356 MDI_PI(pip)->pi_kstats = NULL; 5357 5358 /* 5359 * the kstat may be shared between multiple pathinfo nodes 5360 * decrement this pathinfo's usage, removing the kstats 5361 * themselves when the last pathinfo reference is removed. 5362 */ 5363 ASSERT(mdi_statp->pi_kstat_ref > 0); 5364 if (--mdi_statp->pi_kstat_ref != 0) 5365 return; 5366 5367 kstat_delete(mdi_statp->pi_kstat_iostats); 5368 kstat_delete(mdi_statp->pi_kstat_errstats); 5369 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5370 } 5371 5372 /* 5373 * update I/O paths KSTATS 5374 */ 5375 void 5376 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5377 { 5378 kstat_t *iostatp; 5379 size_t xfer_cnt; 5380 5381 ASSERT(pip != NULL); 5382 5383 /* 5384 * I/O can be driven across a path prior to having path 5385 * statistics available, i.e. probe(9e). 5386 */ 5387 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5388 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5389 xfer_cnt = bp->b_bcount - bp->b_resid; 5390 if (bp->b_flags & B_READ) { 5391 KSTAT_IO_PTR(iostatp)->reads++; 5392 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5393 } else { 5394 KSTAT_IO_PTR(iostatp)->writes++; 5395 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5396 } 5397 } 5398 } 5399 5400 /* 5401 * Enable the path(specific client/target/initiator) 5402 * Enabling a path means that MPxIO may select the enabled path for routing 5403 * future I/O requests, subject to other path state constraints. 5404 */ 5405 int 5406 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5407 { 5408 mdi_phci_t *ph; 5409 5410 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5411 if (ph == NULL) { 5412 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5413 " failed. pip: %p ph = NULL\n", (void *)pip)); 5414 return (MDI_FAILURE); 5415 } 5416 5417 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5418 MDI_ENABLE_OP); 5419 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5420 " Returning success pip = %p. ph = %p\n", 5421 (void *)pip, (void *)ph)); 5422 return (MDI_SUCCESS); 5423 5424 } 5425 5426 /* 5427 * Disable the path (specific client/target/initiator) 5428 * Disabling a path means that MPxIO will not select the disabled path for 5429 * routing any new I/O requests. 5430 */ 5431 int 5432 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5433 { 5434 mdi_phci_t *ph; 5435 5436 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5437 if (ph == NULL) { 5438 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5439 " failed. pip: %p ph = NULL\n", (void *)pip)); 5440 return (MDI_FAILURE); 5441 } 5442 5443 (void) i_mdi_enable_disable_path(pip, 5444 ph->ph_vhci, flags, MDI_DISABLE_OP); 5445 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5446 "Returning success pip = %p. ph = %p", 5447 (void *)pip, (void *)ph)); 5448 return (MDI_SUCCESS); 5449 } 5450 5451 /* 5452 * disable the path to a particular pHCI (pHCI specified in the phci_path 5453 * argument) for a particular client (specified in the client_path argument). 5454 * Disabling a path means that MPxIO will not select the disabled path for 5455 * routing any new I/O requests. 5456 * NOTE: this will be removed once the NWS files are changed to use the new 5457 * mdi_{enable,disable}_path interfaces 5458 */ 5459 int 5460 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5461 { 5462 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5463 } 5464 5465 /* 5466 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5467 * argument) for a particular client (specified in the client_path argument). 5468 * Enabling a path means that MPxIO may select the enabled path for routing 5469 * future I/O requests, subject to other path state constraints. 5470 * NOTE: this will be removed once the NWS files are changed to use the new 5471 * mdi_{enable,disable}_path interfaces 5472 */ 5473 5474 int 5475 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5476 { 5477 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5478 } 5479 5480 /* 5481 * Common routine for doing enable/disable. 5482 */ 5483 static mdi_pathinfo_t * 5484 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5485 int op) 5486 { 5487 int sync_flag = 0; 5488 int rv; 5489 mdi_pathinfo_t *next; 5490 int (*f)() = NULL; 5491 5492 f = vh->vh_ops->vo_pi_state_change; 5493 5494 sync_flag = (flags << 8) & 0xf00; 5495 5496 /* 5497 * Do a callback into the mdi consumer to let it 5498 * know that path is about to get enabled/disabled. 5499 */ 5500 if (f != NULL) { 5501 rv = (*f)(vh->vh_dip, pip, 0, 5502 MDI_PI_EXT_STATE(pip), 5503 MDI_EXT_STATE_CHANGE | sync_flag | 5504 op | MDI_BEFORE_STATE_CHANGE); 5505 if (rv != MDI_SUCCESS) { 5506 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5507 "!vo_pi_state_change: failed rv = %x", rv)); 5508 } 5509 } 5510 MDI_PI_LOCK(pip); 5511 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5512 5513 switch (flags) { 5514 case USER_DISABLE: 5515 if (op == MDI_DISABLE_OP) { 5516 MDI_PI_SET_USER_DISABLE(pip); 5517 } else { 5518 MDI_PI_SET_USER_ENABLE(pip); 5519 } 5520 break; 5521 case DRIVER_DISABLE: 5522 if (op == MDI_DISABLE_OP) { 5523 MDI_PI_SET_DRV_DISABLE(pip); 5524 } else { 5525 MDI_PI_SET_DRV_ENABLE(pip); 5526 } 5527 break; 5528 case DRIVER_DISABLE_TRANSIENT: 5529 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5530 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5531 } else { 5532 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5533 } 5534 break; 5535 } 5536 MDI_PI_UNLOCK(pip); 5537 /* 5538 * Do a callback into the mdi consumer to let it 5539 * know that path is now enabled/disabled. 5540 */ 5541 if (f != NULL) { 5542 rv = (*f)(vh->vh_dip, pip, 0, 5543 MDI_PI_EXT_STATE(pip), 5544 MDI_EXT_STATE_CHANGE | sync_flag | 5545 op | MDI_AFTER_STATE_CHANGE); 5546 if (rv != MDI_SUCCESS) { 5547 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5548 "!vo_pi_state_change: failed rv = %x", rv)); 5549 } 5550 } 5551 return (next); 5552 } 5553 5554 /* 5555 * Common routine for doing enable/disable. 5556 * NOTE: this will be removed once the NWS files are changed to use the new 5557 * mdi_{enable,disable}_path has been putback 5558 */ 5559 int 5560 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5561 { 5562 5563 mdi_phci_t *ph; 5564 mdi_vhci_t *vh = NULL; 5565 mdi_client_t *ct; 5566 mdi_pathinfo_t *next, *pip; 5567 int found_it; 5568 5569 ph = i_devi_get_phci(pdip); 5570 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5571 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5572 (void *)cdip)); 5573 if (ph == NULL) { 5574 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5575 "Op %d failed. ph = NULL\n", op)); 5576 return (MDI_FAILURE); 5577 } 5578 5579 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5580 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5581 "Op Invalid operation = %d\n", op)); 5582 return (MDI_FAILURE); 5583 } 5584 5585 vh = ph->ph_vhci; 5586 5587 if (cdip == NULL) { 5588 /* 5589 * Need to mark the Phci as enabled/disabled. 5590 */ 5591 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5592 "Op %d for the phci\n", op)); 5593 MDI_PHCI_LOCK(ph); 5594 switch (flags) { 5595 case USER_DISABLE: 5596 if (op == MDI_DISABLE_OP) { 5597 MDI_PHCI_SET_USER_DISABLE(ph); 5598 } else { 5599 MDI_PHCI_SET_USER_ENABLE(ph); 5600 } 5601 break; 5602 case DRIVER_DISABLE: 5603 if (op == MDI_DISABLE_OP) { 5604 MDI_PHCI_SET_DRV_DISABLE(ph); 5605 } else { 5606 MDI_PHCI_SET_DRV_ENABLE(ph); 5607 } 5608 break; 5609 case DRIVER_DISABLE_TRANSIENT: 5610 if (op == MDI_DISABLE_OP) { 5611 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5612 } else { 5613 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5614 } 5615 break; 5616 default: 5617 MDI_PHCI_UNLOCK(ph); 5618 MDI_DEBUG(1, (CE_NOTE, NULL, 5619 "!i_mdi_pi_enable_disable:" 5620 " Invalid flag argument= %d\n", flags)); 5621 } 5622 5623 /* 5624 * Phci has been disabled. Now try to enable/disable 5625 * path info's to each client. 5626 */ 5627 pip = ph->ph_path_head; 5628 while (pip != NULL) { 5629 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5630 } 5631 MDI_PHCI_UNLOCK(ph); 5632 } else { 5633 5634 /* 5635 * Disable a specific client. 5636 */ 5637 ct = i_devi_get_client(cdip); 5638 if (ct == NULL) { 5639 MDI_DEBUG(1, (CE_NOTE, NULL, 5640 "!i_mdi_pi_enable_disable:" 5641 " failed. ct = NULL operation = %d\n", op)); 5642 return (MDI_FAILURE); 5643 } 5644 5645 MDI_CLIENT_LOCK(ct); 5646 pip = ct->ct_path_head; 5647 found_it = 0; 5648 while (pip != NULL) { 5649 MDI_PI_LOCK(pip); 5650 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5651 if (MDI_PI(pip)->pi_phci == ph) { 5652 MDI_PI_UNLOCK(pip); 5653 found_it = 1; 5654 break; 5655 } 5656 MDI_PI_UNLOCK(pip); 5657 pip = next; 5658 } 5659 5660 5661 MDI_CLIENT_UNLOCK(ct); 5662 if (found_it == 0) { 5663 MDI_DEBUG(1, (CE_NOTE, NULL, 5664 "!i_mdi_pi_enable_disable:" 5665 " failed. Could not find corresponding pip\n")); 5666 return (MDI_FAILURE); 5667 } 5668 5669 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5670 } 5671 5672 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5673 "Op %d Returning success pdip = %p cdip = %p\n", 5674 op, (void *)pdip, (void *)cdip)); 5675 return (MDI_SUCCESS); 5676 } 5677 5678 /* 5679 * Ensure phci powered up 5680 */ 5681 static void 5682 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5683 { 5684 dev_info_t *ph_dip; 5685 5686 ASSERT(pip != NULL); 5687 ASSERT(MDI_PI_LOCKED(pip)); 5688 5689 if (MDI_PI(pip)->pi_pm_held) { 5690 return; 5691 } 5692 5693 ph_dip = mdi_pi_get_phci(pip); 5694 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5695 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5696 if (ph_dip == NULL) { 5697 return; 5698 } 5699 5700 MDI_PI_UNLOCK(pip); 5701 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5702 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5703 5704 pm_hold_power(ph_dip); 5705 5706 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5707 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5708 MDI_PI_LOCK(pip); 5709 5710 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 5711 if (DEVI(ph_dip)->devi_pm_info) 5712 MDI_PI(pip)->pi_pm_held = 1; 5713 } 5714 5715 /* 5716 * Allow phci powered down 5717 */ 5718 static void 5719 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5720 { 5721 dev_info_t *ph_dip = NULL; 5722 5723 ASSERT(pip != NULL); 5724 ASSERT(MDI_PI_LOCKED(pip)); 5725 5726 if (MDI_PI(pip)->pi_pm_held == 0) { 5727 return; 5728 } 5729 5730 ph_dip = mdi_pi_get_phci(pip); 5731 ASSERT(ph_dip != NULL); 5732 5733 MDI_PI_UNLOCK(pip); 5734 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5735 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5736 5737 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5738 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5739 pm_rele_power(ph_dip); 5740 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5741 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5742 5743 MDI_PI_LOCK(pip); 5744 MDI_PI(pip)->pi_pm_held = 0; 5745 } 5746 5747 static void 5748 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5749 { 5750 ASSERT(MDI_CLIENT_LOCKED(ct)); 5751 5752 ct->ct_power_cnt += incr; 5753 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5754 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5755 ct->ct_power_cnt, incr)); 5756 ASSERT(ct->ct_power_cnt >= 0); 5757 } 5758 5759 static void 5760 i_mdi_rele_all_phci(mdi_client_t *ct) 5761 { 5762 mdi_pathinfo_t *pip; 5763 5764 ASSERT(MDI_CLIENT_LOCKED(ct)); 5765 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5766 while (pip != NULL) { 5767 mdi_hold_path(pip); 5768 MDI_PI_LOCK(pip); 5769 i_mdi_pm_rele_pip(pip); 5770 MDI_PI_UNLOCK(pip); 5771 mdi_rele_path(pip); 5772 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5773 } 5774 } 5775 5776 static void 5777 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5778 { 5779 ASSERT(MDI_CLIENT_LOCKED(ct)); 5780 5781 if (i_ddi_devi_attached(ct->ct_dip)) { 5782 ct->ct_power_cnt -= decr; 5783 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5784 "ct_power_cnt = %d decr = %d\n", 5785 (void *)ct, ct->ct_power_cnt, decr)); 5786 } 5787 5788 ASSERT(ct->ct_power_cnt >= 0); 5789 if (ct->ct_power_cnt == 0) { 5790 i_mdi_rele_all_phci(ct); 5791 return; 5792 } 5793 } 5794 5795 static void 5796 i_mdi_pm_reset_client(mdi_client_t *ct) 5797 { 5798 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5799 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5800 ASSERT(MDI_CLIENT_LOCKED(ct)); 5801 ct->ct_power_cnt = 0; 5802 i_mdi_rele_all_phci(ct); 5803 ct->ct_powercnt_config = 0; 5804 ct->ct_powercnt_unconfig = 0; 5805 ct->ct_powercnt_reset = 1; 5806 } 5807 5808 static int 5809 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5810 { 5811 int ret; 5812 dev_info_t *ph_dip; 5813 5814 MDI_PI_LOCK(pip); 5815 i_mdi_pm_hold_pip(pip); 5816 5817 ph_dip = mdi_pi_get_phci(pip); 5818 MDI_PI_UNLOCK(pip); 5819 5820 /* bring all components of phci to full power */ 5821 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5822 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5823 ddi_get_instance(ph_dip), (void *)pip)); 5824 5825 ret = pm_powerup(ph_dip); 5826 5827 if (ret == DDI_FAILURE) { 5828 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5829 "pm_powerup FAILED for %s%d %p\n", 5830 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5831 (void *)pip)); 5832 5833 MDI_PI_LOCK(pip); 5834 i_mdi_pm_rele_pip(pip); 5835 MDI_PI_UNLOCK(pip); 5836 return (MDI_FAILURE); 5837 } 5838 5839 return (MDI_SUCCESS); 5840 } 5841 5842 static int 5843 i_mdi_power_all_phci(mdi_client_t *ct) 5844 { 5845 mdi_pathinfo_t *pip; 5846 int succeeded = 0; 5847 5848 ASSERT(MDI_CLIENT_LOCKED(ct)); 5849 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5850 while (pip != NULL) { 5851 /* 5852 * Don't power if MDI_PATHINFO_STATE_FAULT 5853 * or MDI_PATHINFO_STATE_OFFLINE. 5854 */ 5855 if (MDI_PI_IS_INIT(pip) || 5856 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5857 mdi_hold_path(pip); 5858 MDI_CLIENT_UNLOCK(ct); 5859 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5860 succeeded = 1; 5861 5862 ASSERT(ct == MDI_PI(pip)->pi_client); 5863 MDI_CLIENT_LOCK(ct); 5864 mdi_rele_path(pip); 5865 } 5866 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5867 } 5868 5869 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5870 } 5871 5872 /* 5873 * mdi_bus_power(): 5874 * 1. Place the phci(s) into powered up state so that 5875 * client can do power management 5876 * 2. Ensure phci powered up as client power managing 5877 * Return Values: 5878 * MDI_SUCCESS 5879 * MDI_FAILURE 5880 */ 5881 int 5882 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5883 void *arg, void *result) 5884 { 5885 int ret = MDI_SUCCESS; 5886 pm_bp_child_pwrchg_t *bpc; 5887 mdi_client_t *ct; 5888 dev_info_t *cdip; 5889 pm_bp_has_changed_t *bphc; 5890 5891 /* 5892 * BUS_POWER_NOINVOL not supported 5893 */ 5894 if (op == BUS_POWER_NOINVOL) 5895 return (MDI_FAILURE); 5896 5897 /* 5898 * ignore other OPs. 5899 * return quickly to save cou cycles on the ct processing 5900 */ 5901 switch (op) { 5902 case BUS_POWER_PRE_NOTIFICATION: 5903 case BUS_POWER_POST_NOTIFICATION: 5904 bpc = (pm_bp_child_pwrchg_t *)arg; 5905 cdip = bpc->bpc_dip; 5906 break; 5907 case BUS_POWER_HAS_CHANGED: 5908 bphc = (pm_bp_has_changed_t *)arg; 5909 cdip = bphc->bphc_dip; 5910 break; 5911 default: 5912 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5913 } 5914 5915 ASSERT(MDI_CLIENT(cdip)); 5916 5917 ct = i_devi_get_client(cdip); 5918 if (ct == NULL) 5919 return (MDI_FAILURE); 5920 5921 /* 5922 * wait till the mdi_pathinfo node state change are processed 5923 */ 5924 MDI_CLIENT_LOCK(ct); 5925 switch (op) { 5926 case BUS_POWER_PRE_NOTIFICATION: 5927 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5928 "BUS_POWER_PRE_NOTIFICATION:" 5929 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5930 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5931 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5932 5933 /* serialize power level change per client */ 5934 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5935 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5936 5937 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5938 5939 if (ct->ct_power_cnt == 0) { 5940 ret = i_mdi_power_all_phci(ct); 5941 } 5942 5943 /* 5944 * if new_level > 0: 5945 * - hold phci(s) 5946 * - power up phci(s) if not already 5947 * ignore power down 5948 */ 5949 if (bpc->bpc_nlevel > 0) { 5950 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5951 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5952 "mdi_bus_power i_mdi_pm_hold_client\n")); 5953 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5954 } 5955 } 5956 break; 5957 case BUS_POWER_POST_NOTIFICATION: 5958 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5959 "BUS_POWER_POST_NOTIFICATION:" 5960 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5961 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5962 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5963 *(int *)result)); 5964 5965 if (*(int *)result == DDI_SUCCESS) { 5966 if (bpc->bpc_nlevel > 0) { 5967 MDI_CLIENT_SET_POWER_UP(ct); 5968 } else { 5969 MDI_CLIENT_SET_POWER_DOWN(ct); 5970 } 5971 } 5972 5973 /* release the hold we did in pre-notification */ 5974 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5975 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5976 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5977 "mdi_bus_power i_mdi_pm_rele_client\n")); 5978 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5979 } 5980 5981 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5982 /* another thread might started attaching */ 5983 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5984 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5985 "mdi_bus_power i_mdi_pm_rele_client\n")); 5986 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5987 /* detaching has been taken care in pm_post_unconfig */ 5988 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5989 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5990 "mdi_bus_power i_mdi_pm_reset_client\n")); 5991 i_mdi_pm_reset_client(ct); 5992 } 5993 } 5994 5995 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5996 cv_broadcast(&ct->ct_powerchange_cv); 5997 5998 break; 5999 6000 /* need to do more */ 6001 case BUS_POWER_HAS_CHANGED: 6002 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6003 "BUS_POWER_HAS_CHANGED:" 6004 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6005 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6006 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6007 6008 if (bphc->bphc_nlevel > 0 && 6009 bphc->bphc_nlevel > bphc->bphc_olevel) { 6010 if (ct->ct_power_cnt == 0) { 6011 ret = i_mdi_power_all_phci(ct); 6012 } 6013 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6014 "mdi_bus_power i_mdi_pm_hold_client\n")); 6015 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6016 } 6017 6018 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6019 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6020 "mdi_bus_power i_mdi_pm_rele_client\n")); 6021 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6022 } 6023 break; 6024 } 6025 6026 MDI_CLIENT_UNLOCK(ct); 6027 return (ret); 6028 } 6029 6030 static int 6031 i_mdi_pm_pre_config_one(dev_info_t *child) 6032 { 6033 int ret = MDI_SUCCESS; 6034 mdi_client_t *ct; 6035 6036 ct = i_devi_get_client(child); 6037 if (ct == NULL) 6038 return (MDI_FAILURE); 6039 6040 MDI_CLIENT_LOCK(ct); 6041 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6042 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6043 6044 if (!MDI_CLIENT_IS_FAILED(ct)) { 6045 MDI_CLIENT_UNLOCK(ct); 6046 MDI_DEBUG(4, (CE_NOTE, child, 6047 "i_mdi_pm_pre_config_one already configured\n")); 6048 return (MDI_SUCCESS); 6049 } 6050 6051 if (ct->ct_powercnt_config) { 6052 MDI_CLIENT_UNLOCK(ct); 6053 MDI_DEBUG(4, (CE_NOTE, child, 6054 "i_mdi_pm_pre_config_one ALREADY held\n")); 6055 return (MDI_SUCCESS); 6056 } 6057 6058 if (ct->ct_power_cnt == 0) { 6059 ret = i_mdi_power_all_phci(ct); 6060 } 6061 MDI_DEBUG(4, (CE_NOTE, child, 6062 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6063 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6064 ct->ct_powercnt_config = 1; 6065 ct->ct_powercnt_reset = 0; 6066 MDI_CLIENT_UNLOCK(ct); 6067 return (ret); 6068 } 6069 6070 static int 6071 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6072 { 6073 int ret = MDI_SUCCESS; 6074 dev_info_t *cdip; 6075 int circ; 6076 6077 ASSERT(MDI_VHCI(vdip)); 6078 6079 /* ndi_devi_config_one */ 6080 if (child) { 6081 ASSERT(DEVI_BUSY_OWNED(vdip)); 6082 return (i_mdi_pm_pre_config_one(child)); 6083 } 6084 6085 /* devi_config_common */ 6086 ndi_devi_enter(vdip, &circ); 6087 cdip = ddi_get_child(vdip); 6088 while (cdip) { 6089 dev_info_t *next = ddi_get_next_sibling(cdip); 6090 6091 ret = i_mdi_pm_pre_config_one(cdip); 6092 if (ret != MDI_SUCCESS) 6093 break; 6094 cdip = next; 6095 } 6096 ndi_devi_exit(vdip, circ); 6097 return (ret); 6098 } 6099 6100 static int 6101 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6102 { 6103 int ret = MDI_SUCCESS; 6104 mdi_client_t *ct; 6105 6106 ct = i_devi_get_client(child); 6107 if (ct == NULL) 6108 return (MDI_FAILURE); 6109 6110 MDI_CLIENT_LOCK(ct); 6111 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6112 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6113 6114 if (!i_ddi_devi_attached(ct->ct_dip)) { 6115 MDI_DEBUG(4, (CE_NOTE, child, 6116 "i_mdi_pm_pre_unconfig node detached already\n")); 6117 MDI_CLIENT_UNLOCK(ct); 6118 return (MDI_SUCCESS); 6119 } 6120 6121 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6122 (flags & NDI_AUTODETACH)) { 6123 MDI_DEBUG(4, (CE_NOTE, child, 6124 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6125 MDI_CLIENT_UNLOCK(ct); 6126 return (MDI_FAILURE); 6127 } 6128 6129 if (ct->ct_powercnt_unconfig) { 6130 MDI_DEBUG(4, (CE_NOTE, child, 6131 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6132 MDI_CLIENT_UNLOCK(ct); 6133 *held = 1; 6134 return (MDI_SUCCESS); 6135 } 6136 6137 if (ct->ct_power_cnt == 0) { 6138 ret = i_mdi_power_all_phci(ct); 6139 } 6140 MDI_DEBUG(4, (CE_NOTE, child, 6141 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6142 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6143 ct->ct_powercnt_unconfig = 1; 6144 ct->ct_powercnt_reset = 0; 6145 MDI_CLIENT_UNLOCK(ct); 6146 if (ret == MDI_SUCCESS) 6147 *held = 1; 6148 return (ret); 6149 } 6150 6151 static int 6152 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6153 int flags) 6154 { 6155 int ret = MDI_SUCCESS; 6156 dev_info_t *cdip; 6157 int circ; 6158 6159 ASSERT(MDI_VHCI(vdip)); 6160 *held = 0; 6161 6162 /* ndi_devi_unconfig_one */ 6163 if (child) { 6164 ASSERT(DEVI_BUSY_OWNED(vdip)); 6165 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6166 } 6167 6168 /* devi_unconfig_common */ 6169 ndi_devi_enter(vdip, &circ); 6170 cdip = ddi_get_child(vdip); 6171 while (cdip) { 6172 dev_info_t *next = ddi_get_next_sibling(cdip); 6173 6174 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6175 cdip = next; 6176 } 6177 ndi_devi_exit(vdip, circ); 6178 6179 if (*held) 6180 ret = MDI_SUCCESS; 6181 6182 return (ret); 6183 } 6184 6185 static void 6186 i_mdi_pm_post_config_one(dev_info_t *child) 6187 { 6188 mdi_client_t *ct; 6189 6190 ct = i_devi_get_client(child); 6191 if (ct == NULL) 6192 return; 6193 6194 MDI_CLIENT_LOCK(ct); 6195 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6196 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6197 6198 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6199 MDI_DEBUG(4, (CE_NOTE, child, 6200 "i_mdi_pm_post_config_one NOT configured\n")); 6201 MDI_CLIENT_UNLOCK(ct); 6202 return; 6203 } 6204 6205 /* client has not been updated */ 6206 if (MDI_CLIENT_IS_FAILED(ct)) { 6207 MDI_DEBUG(4, (CE_NOTE, child, 6208 "i_mdi_pm_post_config_one NOT configured\n")); 6209 MDI_CLIENT_UNLOCK(ct); 6210 return; 6211 } 6212 6213 /* another thread might have powered it down or detached it */ 6214 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6215 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6216 (!i_ddi_devi_attached(ct->ct_dip) && 6217 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6218 MDI_DEBUG(4, (CE_NOTE, child, 6219 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6220 i_mdi_pm_reset_client(ct); 6221 } else { 6222 mdi_pathinfo_t *pip, *next; 6223 int valid_path_count = 0; 6224 6225 MDI_DEBUG(4, (CE_NOTE, child, 6226 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6227 pip = ct->ct_path_head; 6228 while (pip != NULL) { 6229 MDI_PI_LOCK(pip); 6230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6231 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6232 valid_path_count ++; 6233 MDI_PI_UNLOCK(pip); 6234 pip = next; 6235 } 6236 i_mdi_pm_rele_client(ct, valid_path_count); 6237 } 6238 ct->ct_powercnt_config = 0; 6239 MDI_CLIENT_UNLOCK(ct); 6240 } 6241 6242 static void 6243 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6244 { 6245 int circ; 6246 dev_info_t *cdip; 6247 6248 ASSERT(MDI_VHCI(vdip)); 6249 6250 /* ndi_devi_config_one */ 6251 if (child) { 6252 ASSERT(DEVI_BUSY_OWNED(vdip)); 6253 i_mdi_pm_post_config_one(child); 6254 return; 6255 } 6256 6257 /* devi_config_common */ 6258 ndi_devi_enter(vdip, &circ); 6259 cdip = ddi_get_child(vdip); 6260 while (cdip) { 6261 dev_info_t *next = ddi_get_next_sibling(cdip); 6262 6263 i_mdi_pm_post_config_one(cdip); 6264 cdip = next; 6265 } 6266 ndi_devi_exit(vdip, circ); 6267 } 6268 6269 static void 6270 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6271 { 6272 mdi_client_t *ct; 6273 6274 ct = i_devi_get_client(child); 6275 if (ct == NULL) 6276 return; 6277 6278 MDI_CLIENT_LOCK(ct); 6279 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6280 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6281 6282 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6283 MDI_DEBUG(4, (CE_NOTE, child, 6284 "i_mdi_pm_post_unconfig NOT held\n")); 6285 MDI_CLIENT_UNLOCK(ct); 6286 return; 6287 } 6288 6289 /* failure detaching or another thread just attached it */ 6290 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6291 i_ddi_devi_attached(ct->ct_dip)) || 6292 (!i_ddi_devi_attached(ct->ct_dip) && 6293 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6294 MDI_DEBUG(4, (CE_NOTE, child, 6295 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6296 i_mdi_pm_reset_client(ct); 6297 } else { 6298 mdi_pathinfo_t *pip, *next; 6299 int valid_path_count = 0; 6300 6301 MDI_DEBUG(4, (CE_NOTE, child, 6302 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6303 pip = ct->ct_path_head; 6304 while (pip != NULL) { 6305 MDI_PI_LOCK(pip); 6306 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6307 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6308 valid_path_count ++; 6309 MDI_PI_UNLOCK(pip); 6310 pip = next; 6311 } 6312 i_mdi_pm_rele_client(ct, valid_path_count); 6313 ct->ct_powercnt_unconfig = 0; 6314 } 6315 6316 MDI_CLIENT_UNLOCK(ct); 6317 } 6318 6319 static void 6320 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6321 { 6322 int circ; 6323 dev_info_t *cdip; 6324 6325 ASSERT(MDI_VHCI(vdip)); 6326 6327 if (!held) { 6328 MDI_DEBUG(4, (CE_NOTE, vdip, 6329 "i_mdi_pm_post_unconfig held = %d\n", held)); 6330 return; 6331 } 6332 6333 if (child) { 6334 ASSERT(DEVI_BUSY_OWNED(vdip)); 6335 i_mdi_pm_post_unconfig_one(child); 6336 return; 6337 } 6338 6339 ndi_devi_enter(vdip, &circ); 6340 cdip = ddi_get_child(vdip); 6341 while (cdip) { 6342 dev_info_t *next = ddi_get_next_sibling(cdip); 6343 6344 i_mdi_pm_post_unconfig_one(cdip); 6345 cdip = next; 6346 } 6347 ndi_devi_exit(vdip, circ); 6348 } 6349 6350 int 6351 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6352 { 6353 int circ, ret = MDI_SUCCESS; 6354 dev_info_t *client_dip = NULL; 6355 mdi_client_t *ct; 6356 6357 /* 6358 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6359 * Power up pHCI for the named client device. 6360 * Note: Before the client is enumerated under vhci by phci, 6361 * client_dip can be NULL. Then proceed to power up all the 6362 * pHCIs. 6363 */ 6364 if (devnm != NULL) { 6365 ndi_devi_enter(vdip, &circ); 6366 client_dip = ndi_devi_findchild(vdip, devnm); 6367 } 6368 6369 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6370 op, devnm ? devnm : "NULL", (void *)client_dip)); 6371 6372 switch (op) { 6373 case MDI_PM_PRE_CONFIG: 6374 ret = i_mdi_pm_pre_config(vdip, client_dip); 6375 break; 6376 6377 case MDI_PM_PRE_UNCONFIG: 6378 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6379 flags); 6380 break; 6381 6382 case MDI_PM_POST_CONFIG: 6383 i_mdi_pm_post_config(vdip, client_dip); 6384 break; 6385 6386 case MDI_PM_POST_UNCONFIG: 6387 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6388 break; 6389 6390 case MDI_PM_HOLD_POWER: 6391 case MDI_PM_RELE_POWER: 6392 ASSERT(args); 6393 6394 client_dip = (dev_info_t *)args; 6395 ASSERT(MDI_CLIENT(client_dip)); 6396 6397 ct = i_devi_get_client(client_dip); 6398 MDI_CLIENT_LOCK(ct); 6399 6400 if (op == MDI_PM_HOLD_POWER) { 6401 if (ct->ct_power_cnt == 0) { 6402 (void) i_mdi_power_all_phci(ct); 6403 MDI_DEBUG(4, (CE_NOTE, client_dip, 6404 "mdi_power i_mdi_pm_hold_client\n")); 6405 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6406 } 6407 } else { 6408 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6409 MDI_DEBUG(4, (CE_NOTE, client_dip, 6410 "mdi_power i_mdi_pm_rele_client\n")); 6411 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6412 } else { 6413 MDI_DEBUG(4, (CE_NOTE, client_dip, 6414 "mdi_power i_mdi_pm_reset_client\n")); 6415 i_mdi_pm_reset_client(ct); 6416 } 6417 } 6418 6419 MDI_CLIENT_UNLOCK(ct); 6420 break; 6421 6422 default: 6423 break; 6424 } 6425 6426 if (devnm) 6427 ndi_devi_exit(vdip, circ); 6428 6429 return (ret); 6430 } 6431 6432 int 6433 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6434 { 6435 mdi_vhci_t *vhci; 6436 6437 if (!MDI_VHCI(dip)) 6438 return (MDI_FAILURE); 6439 6440 if (mdi_class) { 6441 vhci = DEVI(dip)->devi_mdi_xhci; 6442 ASSERT(vhci); 6443 *mdi_class = vhci->vh_class; 6444 } 6445 6446 return (MDI_SUCCESS); 6447 } 6448 6449 int 6450 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6451 { 6452 mdi_phci_t *phci; 6453 6454 if (!MDI_PHCI(dip)) 6455 return (MDI_FAILURE); 6456 6457 if (mdi_class) { 6458 phci = DEVI(dip)->devi_mdi_xhci; 6459 ASSERT(phci); 6460 *mdi_class = phci->ph_vhci->vh_class; 6461 } 6462 6463 return (MDI_SUCCESS); 6464 } 6465 6466 int 6467 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6468 { 6469 mdi_client_t *client; 6470 6471 if (!MDI_CLIENT(dip)) 6472 return (MDI_FAILURE); 6473 6474 if (mdi_class) { 6475 client = DEVI(dip)->devi_mdi_client; 6476 ASSERT(client); 6477 *mdi_class = client->ct_vhci->vh_class; 6478 } 6479 6480 return (MDI_SUCCESS); 6481 } 6482 6483 void * 6484 mdi_client_get_vhci_private(dev_info_t *dip) 6485 { 6486 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6487 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6488 mdi_client_t *ct; 6489 ct = i_devi_get_client(dip); 6490 return (ct->ct_vprivate); 6491 } 6492 return (NULL); 6493 } 6494 6495 void 6496 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6497 { 6498 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6499 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6500 mdi_client_t *ct; 6501 ct = i_devi_get_client(dip); 6502 ct->ct_vprivate = data; 6503 } 6504 } 6505 /* 6506 * mdi_pi_get_vhci_private(): 6507 * Get the vhci private information associated with the 6508 * mdi_pathinfo node 6509 */ 6510 void * 6511 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6512 { 6513 caddr_t vprivate = NULL; 6514 if (pip) { 6515 vprivate = MDI_PI(pip)->pi_vprivate; 6516 } 6517 return (vprivate); 6518 } 6519 6520 /* 6521 * mdi_pi_set_vhci_private(): 6522 * Set the vhci private information in the mdi_pathinfo node 6523 */ 6524 void 6525 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6526 { 6527 if (pip) { 6528 MDI_PI(pip)->pi_vprivate = priv; 6529 } 6530 } 6531 6532 /* 6533 * mdi_phci_get_vhci_private(): 6534 * Get the vhci private information associated with the 6535 * mdi_phci node 6536 */ 6537 void * 6538 mdi_phci_get_vhci_private(dev_info_t *dip) 6539 { 6540 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6541 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6542 mdi_phci_t *ph; 6543 ph = i_devi_get_phci(dip); 6544 return (ph->ph_vprivate); 6545 } 6546 return (NULL); 6547 } 6548 6549 /* 6550 * mdi_phci_set_vhci_private(): 6551 * Set the vhci private information in the mdi_phci node 6552 */ 6553 void 6554 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6555 { 6556 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6557 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6558 mdi_phci_t *ph; 6559 ph = i_devi_get_phci(dip); 6560 ph->ph_vprivate = priv; 6561 } 6562 } 6563 6564 /* 6565 * List of vhci class names: 6566 * A vhci class name must be in this list only if the corresponding vhci 6567 * driver intends to use the mdi provided bus config implementation 6568 * (i.e., mdi_vhci_bus_config()). 6569 */ 6570 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6571 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6572 6573 /* 6574 * During boot time, the on-disk vhci cache for every vhci class is read 6575 * in the form of an nvlist and stored here. 6576 */ 6577 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6578 6579 /* nvpair names in vhci cache nvlist */ 6580 #define MDI_VHCI_CACHE_VERSION 1 6581 #define MDI_NVPNAME_VERSION "version" 6582 #define MDI_NVPNAME_PHCIS "phcis" 6583 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6584 6585 /* 6586 * Given vhci class name, return its on-disk vhci cache filename. 6587 * Memory for the returned filename which includes the full path is allocated 6588 * by this function. 6589 */ 6590 static char * 6591 vhclass2vhcache_filename(char *vhclass) 6592 { 6593 char *filename; 6594 int len; 6595 static char *fmt = "/etc/devices/mdi_%s_cache"; 6596 6597 /* 6598 * fmt contains the on-disk vhci cache file name format; 6599 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6600 */ 6601 6602 /* the -1 below is to account for "%s" in the format string */ 6603 len = strlen(fmt) + strlen(vhclass) - 1; 6604 filename = kmem_alloc(len, KM_SLEEP); 6605 (void) snprintf(filename, len, fmt, vhclass); 6606 ASSERT(len == (strlen(filename) + 1)); 6607 return (filename); 6608 } 6609 6610 /* 6611 * initialize the vhci cache related data structures and read the on-disk 6612 * vhci cached data into memory. 6613 */ 6614 static void 6615 setup_vhci_cache(mdi_vhci_t *vh) 6616 { 6617 mdi_vhci_config_t *vhc; 6618 mdi_vhci_cache_t *vhcache; 6619 int i; 6620 nvlist_t *nvl = NULL; 6621 6622 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6623 vh->vh_config = vhc; 6624 vhcache = &vhc->vhc_vhcache; 6625 6626 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6627 6628 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6629 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6630 6631 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6632 6633 /* 6634 * Create string hash; same as mod_hash_create_strhash() except that 6635 * we use NULL key destructor. 6636 */ 6637 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6638 mdi_bus_config_cache_hash_size, 6639 mod_hash_null_keydtor, mod_hash_null_valdtor, 6640 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6641 6642 /* 6643 * The on-disk vhci cache is read during booting prior to the 6644 * lights-out period by mdi_read_devices_files(). 6645 */ 6646 for (i = 0; i < N_VHCI_CLASSES; i++) { 6647 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6648 nvl = vhcache_nvl[i]; 6649 vhcache_nvl[i] = NULL; 6650 break; 6651 } 6652 } 6653 6654 /* 6655 * this is to cover the case of some one manually causing unloading 6656 * (or detaching) and reloading (or attaching) of a vhci driver. 6657 */ 6658 if (nvl == NULL && modrootloaded) 6659 nvl = read_on_disk_vhci_cache(vh->vh_class); 6660 6661 if (nvl != NULL) { 6662 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6663 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6664 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6665 else { 6666 cmn_err(CE_WARN, 6667 "%s: data file corrupted, will recreate\n", 6668 vhc->vhc_vhcache_filename); 6669 } 6670 rw_exit(&vhcache->vhcache_lock); 6671 nvlist_free(nvl); 6672 } 6673 6674 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6675 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6676 6677 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6678 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6679 } 6680 6681 /* 6682 * free all vhci cache related resources 6683 */ 6684 static int 6685 destroy_vhci_cache(mdi_vhci_t *vh) 6686 { 6687 mdi_vhci_config_t *vhc = vh->vh_config; 6688 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6689 mdi_vhcache_phci_t *cphci, *cphci_next; 6690 mdi_vhcache_client_t *cct, *cct_next; 6691 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6692 6693 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6694 return (MDI_FAILURE); 6695 6696 kmem_free(vhc->vhc_vhcache_filename, 6697 strlen(vhc->vhc_vhcache_filename) + 1); 6698 6699 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6700 6701 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6702 cphci = cphci_next) { 6703 cphci_next = cphci->cphci_next; 6704 free_vhcache_phci(cphci); 6705 } 6706 6707 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6708 cct_next = cct->cct_next; 6709 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6710 cpi_next = cpi->cpi_next; 6711 free_vhcache_pathinfo(cpi); 6712 } 6713 free_vhcache_client(cct); 6714 } 6715 6716 rw_destroy(&vhcache->vhcache_lock); 6717 6718 mutex_destroy(&vhc->vhc_lock); 6719 cv_destroy(&vhc->vhc_cv); 6720 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6721 return (MDI_SUCCESS); 6722 } 6723 6724 /* 6725 * Stop all vhci cache related async threads and free their resources. 6726 */ 6727 static int 6728 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6729 { 6730 mdi_async_client_config_t *acc, *acc_next; 6731 6732 mutex_enter(&vhc->vhc_lock); 6733 vhc->vhc_flags |= MDI_VHC_EXIT; 6734 ASSERT(vhc->vhc_acc_thrcount >= 0); 6735 cv_broadcast(&vhc->vhc_cv); 6736 6737 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6738 vhc->vhc_acc_thrcount != 0) { 6739 mutex_exit(&vhc->vhc_lock); 6740 delay(1); 6741 mutex_enter(&vhc->vhc_lock); 6742 } 6743 6744 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6745 6746 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6747 acc_next = acc->acc_next; 6748 free_async_client_config(acc); 6749 } 6750 vhc->vhc_acc_list_head = NULL; 6751 vhc->vhc_acc_list_tail = NULL; 6752 vhc->vhc_acc_count = 0; 6753 6754 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6755 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6756 mutex_exit(&vhc->vhc_lock); 6757 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6758 vhcache_dirty(vhc); 6759 return (MDI_FAILURE); 6760 } 6761 } else 6762 mutex_exit(&vhc->vhc_lock); 6763 6764 if (callb_delete(vhc->vhc_cbid) != 0) 6765 return (MDI_FAILURE); 6766 6767 return (MDI_SUCCESS); 6768 } 6769 6770 /* 6771 * Stop vhci cache flush thread 6772 */ 6773 /* ARGSUSED */ 6774 static boolean_t 6775 stop_vhcache_flush_thread(void *arg, int code) 6776 { 6777 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6778 6779 mutex_enter(&vhc->vhc_lock); 6780 vhc->vhc_flags |= MDI_VHC_EXIT; 6781 cv_broadcast(&vhc->vhc_cv); 6782 6783 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6784 mutex_exit(&vhc->vhc_lock); 6785 delay(1); 6786 mutex_enter(&vhc->vhc_lock); 6787 } 6788 6789 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6790 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6791 mutex_exit(&vhc->vhc_lock); 6792 (void) flush_vhcache(vhc, 1); 6793 } else 6794 mutex_exit(&vhc->vhc_lock); 6795 6796 return (B_TRUE); 6797 } 6798 6799 /* 6800 * Enqueue the vhcache phci (cphci) at the tail of the list 6801 */ 6802 static void 6803 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6804 { 6805 cphci->cphci_next = NULL; 6806 if (vhcache->vhcache_phci_head == NULL) 6807 vhcache->vhcache_phci_head = cphci; 6808 else 6809 vhcache->vhcache_phci_tail->cphci_next = cphci; 6810 vhcache->vhcache_phci_tail = cphci; 6811 } 6812 6813 /* 6814 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6815 */ 6816 static void 6817 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6818 mdi_vhcache_pathinfo_t *cpi) 6819 { 6820 cpi->cpi_next = NULL; 6821 if (cct->cct_cpi_head == NULL) 6822 cct->cct_cpi_head = cpi; 6823 else 6824 cct->cct_cpi_tail->cpi_next = cpi; 6825 cct->cct_cpi_tail = cpi; 6826 } 6827 6828 /* 6829 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6830 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6831 * flag set come at the beginning of the list. All cpis which have this 6832 * flag set come at the end of the list. 6833 */ 6834 static void 6835 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6836 mdi_vhcache_pathinfo_t *newcpi) 6837 { 6838 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6839 6840 if (cct->cct_cpi_head == NULL || 6841 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6842 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6843 else { 6844 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6845 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6846 prev_cpi = cpi, cpi = cpi->cpi_next) 6847 ; 6848 6849 if (prev_cpi == NULL) 6850 cct->cct_cpi_head = newcpi; 6851 else 6852 prev_cpi->cpi_next = newcpi; 6853 6854 newcpi->cpi_next = cpi; 6855 6856 if (cpi == NULL) 6857 cct->cct_cpi_tail = newcpi; 6858 } 6859 } 6860 6861 /* 6862 * Enqueue the vhcache client (cct) at the tail of the list 6863 */ 6864 static void 6865 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6866 mdi_vhcache_client_t *cct) 6867 { 6868 cct->cct_next = NULL; 6869 if (vhcache->vhcache_client_head == NULL) 6870 vhcache->vhcache_client_head = cct; 6871 else 6872 vhcache->vhcache_client_tail->cct_next = cct; 6873 vhcache->vhcache_client_tail = cct; 6874 } 6875 6876 static void 6877 free_string_array(char **str, int nelem) 6878 { 6879 int i; 6880 6881 if (str) { 6882 for (i = 0; i < nelem; i++) { 6883 if (str[i]) 6884 kmem_free(str[i], strlen(str[i]) + 1); 6885 } 6886 kmem_free(str, sizeof (char *) * nelem); 6887 } 6888 } 6889 6890 static void 6891 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6892 { 6893 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6894 kmem_free(cphci, sizeof (*cphci)); 6895 } 6896 6897 static void 6898 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6899 { 6900 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6901 kmem_free(cpi, sizeof (*cpi)); 6902 } 6903 6904 static void 6905 free_vhcache_client(mdi_vhcache_client_t *cct) 6906 { 6907 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6908 kmem_free(cct, sizeof (*cct)); 6909 } 6910 6911 static char * 6912 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6913 { 6914 char *name_addr; 6915 int len; 6916 6917 len = strlen(ct_name) + strlen(ct_addr) + 2; 6918 name_addr = kmem_alloc(len, KM_SLEEP); 6919 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6920 6921 if (ret_len) 6922 *ret_len = len; 6923 return (name_addr); 6924 } 6925 6926 /* 6927 * Copy the contents of paddrnvl to vhci cache. 6928 * paddrnvl nvlist contains path information for a vhci client. 6929 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6930 */ 6931 static void 6932 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6933 mdi_vhcache_client_t *cct) 6934 { 6935 nvpair_t *nvp = NULL; 6936 mdi_vhcache_pathinfo_t *cpi; 6937 uint_t nelem; 6938 uint32_t *val; 6939 6940 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6941 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6942 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6943 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6944 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6945 ASSERT(nelem == 2); 6946 cpi->cpi_cphci = cphci_list[val[0]]; 6947 cpi->cpi_flags = val[1]; 6948 enqueue_tail_vhcache_pathinfo(cct, cpi); 6949 } 6950 } 6951 6952 /* 6953 * Copy the contents of caddrmapnvl to vhci cache. 6954 * caddrmapnvl nvlist contains vhci client address to phci client address 6955 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6956 * this nvlist. 6957 */ 6958 static void 6959 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6960 mdi_vhcache_phci_t *cphci_list[]) 6961 { 6962 nvpair_t *nvp = NULL; 6963 nvlist_t *paddrnvl; 6964 mdi_vhcache_client_t *cct; 6965 6966 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6967 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6968 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6969 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6970 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6971 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6972 /* the client must contain at least one path */ 6973 ASSERT(cct->cct_cpi_head != NULL); 6974 6975 enqueue_vhcache_client(vhcache, cct); 6976 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6977 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6978 } 6979 } 6980 6981 /* 6982 * Copy the contents of the main nvlist to vhci cache. 6983 * 6984 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6985 * The nvlist contains the mappings between the vhci client addresses and 6986 * their corresponding phci client addresses. 6987 * 6988 * The structure of the nvlist is as follows: 6989 * 6990 * Main nvlist: 6991 * NAME TYPE DATA 6992 * version int32 version number 6993 * phcis string array array of phci paths 6994 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6995 * 6996 * structure of c2paddrs_nvl: 6997 * NAME TYPE DATA 6998 * caddr1 nvlist_t paddrs_nvl1 6999 * caddr2 nvlist_t paddrs_nvl2 7000 * ... 7001 * where caddr1, caddr2, ... are vhci client name and addresses in the 7002 * form of "<clientname>@<clientaddress>". 7003 * (for example: "ssd@2000002037cd9f72"); 7004 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7005 * 7006 * structure of paddrs_nvl: 7007 * NAME TYPE DATA 7008 * pi_addr1 uint32_array (phci-id, cpi_flags) 7009 * pi_addr2 uint32_array (phci-id, cpi_flags) 7010 * ... 7011 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7012 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7013 * phci-ids are integers that identify PHCIs to which the 7014 * the bus specific address belongs to. These integers are used as an index 7015 * into to the phcis string array in the main nvlist to get the PHCI path. 7016 */ 7017 static int 7018 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7019 { 7020 char **phcis, **phci_namep; 7021 uint_t nphcis; 7022 mdi_vhcache_phci_t *cphci, **cphci_list; 7023 nvlist_t *caddrmapnvl; 7024 int32_t ver; 7025 int i; 7026 size_t cphci_list_size; 7027 7028 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7029 7030 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7031 ver != MDI_VHCI_CACHE_VERSION) 7032 return (MDI_FAILURE); 7033 7034 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7035 &nphcis) != 0) 7036 return (MDI_SUCCESS); 7037 7038 ASSERT(nphcis > 0); 7039 7040 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7041 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7042 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7043 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7044 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7045 enqueue_vhcache_phci(vhcache, cphci); 7046 cphci_list[i] = cphci; 7047 } 7048 7049 ASSERT(vhcache->vhcache_phci_head != NULL); 7050 7051 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7052 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7053 7054 kmem_free(cphci_list, cphci_list_size); 7055 return (MDI_SUCCESS); 7056 } 7057 7058 /* 7059 * Build paddrnvl for the specified client using the information in the 7060 * vhci cache and add it to the caddrmapnnvl. 7061 * Returns 0 on success, errno on failure. 7062 */ 7063 static int 7064 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7065 nvlist_t *caddrmapnvl) 7066 { 7067 mdi_vhcache_pathinfo_t *cpi; 7068 nvlist_t *nvl; 7069 int err; 7070 uint32_t val[2]; 7071 7072 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7073 7074 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7075 return (err); 7076 7077 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7078 val[0] = cpi->cpi_cphci->cphci_id; 7079 val[1] = cpi->cpi_flags; 7080 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7081 != 0) 7082 goto out; 7083 } 7084 7085 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7086 out: 7087 nvlist_free(nvl); 7088 return (err); 7089 } 7090 7091 /* 7092 * Build caddrmapnvl using the information in the vhci cache 7093 * and add it to the mainnvl. 7094 * Returns 0 on success, errno on failure. 7095 */ 7096 static int 7097 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7098 { 7099 mdi_vhcache_client_t *cct; 7100 nvlist_t *nvl; 7101 int err; 7102 7103 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7104 7105 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7106 return (err); 7107 7108 for (cct = vhcache->vhcache_client_head; cct != NULL; 7109 cct = cct->cct_next) { 7110 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7111 goto out; 7112 } 7113 7114 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7115 out: 7116 nvlist_free(nvl); 7117 return (err); 7118 } 7119 7120 /* 7121 * Build nvlist using the information in the vhci cache. 7122 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7123 * Returns nvl on success, NULL on failure. 7124 */ 7125 static nvlist_t * 7126 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7127 { 7128 mdi_vhcache_phci_t *cphci; 7129 uint_t phci_count; 7130 char **phcis; 7131 nvlist_t *nvl; 7132 int err, i; 7133 7134 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7135 nvl = NULL; 7136 goto out; 7137 } 7138 7139 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7140 MDI_VHCI_CACHE_VERSION)) != 0) 7141 goto out; 7142 7143 rw_enter(&vhcache->vhcache_lock, RW_READER); 7144 if (vhcache->vhcache_phci_head == NULL) { 7145 rw_exit(&vhcache->vhcache_lock); 7146 return (nvl); 7147 } 7148 7149 phci_count = 0; 7150 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7151 cphci = cphci->cphci_next) 7152 cphci->cphci_id = phci_count++; 7153 7154 /* build phci pathname list */ 7155 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7156 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7157 cphci = cphci->cphci_next, i++) 7158 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7159 7160 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7161 phci_count); 7162 free_string_array(phcis, phci_count); 7163 7164 if (err == 0 && 7165 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7166 rw_exit(&vhcache->vhcache_lock); 7167 return (nvl); 7168 } 7169 7170 rw_exit(&vhcache->vhcache_lock); 7171 out: 7172 if (nvl) 7173 nvlist_free(nvl); 7174 return (NULL); 7175 } 7176 7177 /* 7178 * Lookup vhcache phci structure for the specified phci path. 7179 */ 7180 static mdi_vhcache_phci_t * 7181 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7182 { 7183 mdi_vhcache_phci_t *cphci; 7184 7185 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7186 7187 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7188 cphci = cphci->cphci_next) { 7189 if (strcmp(cphci->cphci_path, phci_path) == 0) 7190 return (cphci); 7191 } 7192 7193 return (NULL); 7194 } 7195 7196 /* 7197 * Lookup vhcache phci structure for the specified phci. 7198 */ 7199 static mdi_vhcache_phci_t * 7200 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7201 { 7202 mdi_vhcache_phci_t *cphci; 7203 7204 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7205 7206 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7207 cphci = cphci->cphci_next) { 7208 if (cphci->cphci_phci == ph) 7209 return (cphci); 7210 } 7211 7212 return (NULL); 7213 } 7214 7215 /* 7216 * Add the specified phci to the vhci cache if not already present. 7217 */ 7218 static void 7219 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7220 { 7221 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7222 mdi_vhcache_phci_t *cphci; 7223 char *pathname; 7224 int cache_updated; 7225 7226 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7227 7228 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7229 (void) ddi_pathname(ph->ph_dip, pathname); 7230 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7231 != NULL) { 7232 cphci->cphci_phci = ph; 7233 cache_updated = 0; 7234 } else { 7235 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7236 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7237 cphci->cphci_phci = ph; 7238 enqueue_vhcache_phci(vhcache, cphci); 7239 cache_updated = 1; 7240 } 7241 7242 rw_exit(&vhcache->vhcache_lock); 7243 7244 /* 7245 * Since a new phci has been added, reset 7246 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7247 * during next vhcache_discover_paths(). 7248 */ 7249 mutex_enter(&vhc->vhc_lock); 7250 vhc->vhc_path_discovery_cutoff_time = 0; 7251 mutex_exit(&vhc->vhc_lock); 7252 7253 kmem_free(pathname, MAXPATHLEN); 7254 if (cache_updated) 7255 vhcache_dirty(vhc); 7256 } 7257 7258 /* 7259 * Remove the reference to the specified phci from the vhci cache. 7260 */ 7261 static void 7262 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7263 { 7264 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7265 mdi_vhcache_phci_t *cphci; 7266 7267 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7268 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7269 /* do not remove the actual mdi_vhcache_phci structure */ 7270 cphci->cphci_phci = NULL; 7271 } 7272 rw_exit(&vhcache->vhcache_lock); 7273 } 7274 7275 static void 7276 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7277 mdi_vhcache_lookup_token_t *src) 7278 { 7279 if (src == NULL) { 7280 dst->lt_cct = NULL; 7281 dst->lt_cct_lookup_time = 0; 7282 } else { 7283 dst->lt_cct = src->lt_cct; 7284 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7285 } 7286 } 7287 7288 /* 7289 * Look up vhcache client for the specified client. 7290 */ 7291 static mdi_vhcache_client_t * 7292 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7293 mdi_vhcache_lookup_token_t *token) 7294 { 7295 mod_hash_val_t hv; 7296 char *name_addr; 7297 int len; 7298 7299 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7300 7301 /* 7302 * If no vhcache clean occurred since the last lookup, we can 7303 * simply return the cct from the last lookup operation. 7304 * It works because ccts are never freed except during the vhcache 7305 * cleanup operation. 7306 */ 7307 if (token != NULL && 7308 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7309 return (token->lt_cct); 7310 7311 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7312 if (mod_hash_find(vhcache->vhcache_client_hash, 7313 (mod_hash_key_t)name_addr, &hv) == 0) { 7314 if (token) { 7315 token->lt_cct = (mdi_vhcache_client_t *)hv; 7316 token->lt_cct_lookup_time = lbolt64; 7317 } 7318 } else { 7319 if (token) { 7320 token->lt_cct = NULL; 7321 token->lt_cct_lookup_time = 0; 7322 } 7323 hv = NULL; 7324 } 7325 kmem_free(name_addr, len); 7326 return ((mdi_vhcache_client_t *)hv); 7327 } 7328 7329 /* 7330 * Add the specified path to the vhci cache if not already present. 7331 * Also add the vhcache client for the client corresponding to this path 7332 * if it doesn't already exist. 7333 */ 7334 static void 7335 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7336 { 7337 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7338 mdi_vhcache_client_t *cct; 7339 mdi_vhcache_pathinfo_t *cpi; 7340 mdi_phci_t *ph = pip->pi_phci; 7341 mdi_client_t *ct = pip->pi_client; 7342 int cache_updated = 0; 7343 7344 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7345 7346 /* if vhcache client for this pip doesn't already exist, add it */ 7347 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7348 NULL)) == NULL) { 7349 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7350 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7351 ct->ct_guid, NULL); 7352 enqueue_vhcache_client(vhcache, cct); 7353 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7354 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7355 cache_updated = 1; 7356 } 7357 7358 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7359 if (cpi->cpi_cphci->cphci_phci == ph && 7360 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7361 cpi->cpi_pip = pip; 7362 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7363 cpi->cpi_flags &= 7364 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7365 sort_vhcache_paths(cct); 7366 cache_updated = 1; 7367 } 7368 break; 7369 } 7370 } 7371 7372 if (cpi == NULL) { 7373 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7374 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7375 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7376 ASSERT(cpi->cpi_cphci != NULL); 7377 cpi->cpi_pip = pip; 7378 enqueue_vhcache_pathinfo(cct, cpi); 7379 cache_updated = 1; 7380 } 7381 7382 rw_exit(&vhcache->vhcache_lock); 7383 7384 if (cache_updated) 7385 vhcache_dirty(vhc); 7386 } 7387 7388 /* 7389 * Remove the reference to the specified path from the vhci cache. 7390 */ 7391 static void 7392 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7393 { 7394 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7395 mdi_client_t *ct = pip->pi_client; 7396 mdi_vhcache_client_t *cct; 7397 mdi_vhcache_pathinfo_t *cpi; 7398 7399 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7400 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7401 NULL)) != NULL) { 7402 for (cpi = cct->cct_cpi_head; cpi != NULL; 7403 cpi = cpi->cpi_next) { 7404 if (cpi->cpi_pip == pip) { 7405 cpi->cpi_pip = NULL; 7406 break; 7407 } 7408 } 7409 } 7410 rw_exit(&vhcache->vhcache_lock); 7411 } 7412 7413 /* 7414 * Flush the vhci cache to disk. 7415 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7416 */ 7417 static int 7418 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7419 { 7420 nvlist_t *nvl; 7421 int err; 7422 int rv; 7423 7424 /* 7425 * It is possible that the system may shutdown before 7426 * i_ddi_io_initialized (during stmsboot for example). To allow for 7427 * flushing the cache in this case do not check for 7428 * i_ddi_io_initialized when force flag is set. 7429 */ 7430 if (force_flag == 0 && !i_ddi_io_initialized()) 7431 return (MDI_FAILURE); 7432 7433 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7434 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7435 nvlist_free(nvl); 7436 } else 7437 err = EFAULT; 7438 7439 rv = MDI_SUCCESS; 7440 mutex_enter(&vhc->vhc_lock); 7441 if (err != 0) { 7442 if (err == EROFS) { 7443 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7444 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7445 MDI_VHC_VHCACHE_DIRTY); 7446 } else { 7447 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7448 cmn_err(CE_CONT, "%s: update failed\n", 7449 vhc->vhc_vhcache_filename); 7450 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7451 } 7452 rv = MDI_FAILURE; 7453 } 7454 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7455 cmn_err(CE_CONT, 7456 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7457 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7458 } 7459 mutex_exit(&vhc->vhc_lock); 7460 7461 return (rv); 7462 } 7463 7464 /* 7465 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7466 * Exits itself if left idle for the idle timeout period. 7467 */ 7468 static void 7469 vhcache_flush_thread(void *arg) 7470 { 7471 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7472 clock_t idle_time, quit_at_ticks; 7473 callb_cpr_t cprinfo; 7474 7475 /* number of seconds to sleep idle before exiting */ 7476 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7477 7478 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7479 "mdi_vhcache_flush"); 7480 mutex_enter(&vhc->vhc_lock); 7481 for (; ; ) { 7482 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7483 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7484 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7485 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7486 (void) cv_timedwait(&vhc->vhc_cv, 7487 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7488 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7489 } else { 7490 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7491 mutex_exit(&vhc->vhc_lock); 7492 7493 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7494 vhcache_dirty(vhc); 7495 7496 mutex_enter(&vhc->vhc_lock); 7497 } 7498 } 7499 7500 quit_at_ticks = ddi_get_lbolt() + idle_time; 7501 7502 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7503 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7504 ddi_get_lbolt() < quit_at_ticks) { 7505 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7506 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7507 quit_at_ticks); 7508 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7509 } 7510 7511 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7512 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7513 goto out; 7514 } 7515 7516 out: 7517 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7518 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7519 CALLB_CPR_EXIT(&cprinfo); 7520 } 7521 7522 /* 7523 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7524 */ 7525 static void 7526 vhcache_dirty(mdi_vhci_config_t *vhc) 7527 { 7528 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7529 int create_thread; 7530 7531 rw_enter(&vhcache->vhcache_lock, RW_READER); 7532 /* do not flush cache until the cache is fully built */ 7533 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7534 rw_exit(&vhcache->vhcache_lock); 7535 return; 7536 } 7537 rw_exit(&vhcache->vhcache_lock); 7538 7539 mutex_enter(&vhc->vhc_lock); 7540 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7541 mutex_exit(&vhc->vhc_lock); 7542 return; 7543 } 7544 7545 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7546 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7547 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7548 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7549 cv_broadcast(&vhc->vhc_cv); 7550 create_thread = 0; 7551 } else { 7552 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7553 create_thread = 1; 7554 } 7555 mutex_exit(&vhc->vhc_lock); 7556 7557 if (create_thread) 7558 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7559 0, &p0, TS_RUN, minclsyspri); 7560 } 7561 7562 /* 7563 * phci bus config structure - one for for each phci bus config operation that 7564 * we initiate on behalf of a vhci. 7565 */ 7566 typedef struct mdi_phci_bus_config_s { 7567 char *phbc_phci_path; 7568 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7569 struct mdi_phci_bus_config_s *phbc_next; 7570 } mdi_phci_bus_config_t; 7571 7572 /* vhci bus config structure - one for each vhci bus config operation */ 7573 typedef struct mdi_vhci_bus_config_s { 7574 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7575 major_t vhbc_op_major; /* bus config op major */ 7576 uint_t vhbc_op_flags; /* bus config op flags */ 7577 kmutex_t vhbc_lock; 7578 kcondvar_t vhbc_cv; 7579 int vhbc_thr_count; 7580 } mdi_vhci_bus_config_t; 7581 7582 /* 7583 * bus config the specified phci 7584 */ 7585 static void 7586 bus_config_phci(void *arg) 7587 { 7588 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7589 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7590 dev_info_t *ph_dip; 7591 7592 /* 7593 * first configure all path components upto phci and then configure 7594 * the phci children. 7595 */ 7596 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7597 != NULL) { 7598 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7599 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7600 (void) ndi_devi_config_driver(ph_dip, 7601 vhbc->vhbc_op_flags, 7602 vhbc->vhbc_op_major); 7603 } else 7604 (void) ndi_devi_config(ph_dip, 7605 vhbc->vhbc_op_flags); 7606 7607 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7608 ndi_rele_devi(ph_dip); 7609 } 7610 7611 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7612 kmem_free(phbc, sizeof (*phbc)); 7613 7614 mutex_enter(&vhbc->vhbc_lock); 7615 vhbc->vhbc_thr_count--; 7616 if (vhbc->vhbc_thr_count == 0) 7617 cv_broadcast(&vhbc->vhbc_cv); 7618 mutex_exit(&vhbc->vhbc_lock); 7619 } 7620 7621 /* 7622 * Bus config all phcis associated with the vhci in parallel. 7623 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7624 */ 7625 static void 7626 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7627 ddi_bus_config_op_t op, major_t maj) 7628 { 7629 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7630 mdi_vhci_bus_config_t *vhbc; 7631 mdi_vhcache_phci_t *cphci; 7632 7633 rw_enter(&vhcache->vhcache_lock, RW_READER); 7634 if (vhcache->vhcache_phci_head == NULL) { 7635 rw_exit(&vhcache->vhcache_lock); 7636 return; 7637 } 7638 7639 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7640 7641 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7642 cphci = cphci->cphci_next) { 7643 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7644 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7645 KM_SLEEP); 7646 phbc->phbc_vhbusconfig = vhbc; 7647 phbc->phbc_next = phbc_head; 7648 phbc_head = phbc; 7649 vhbc->vhbc_thr_count++; 7650 } 7651 rw_exit(&vhcache->vhcache_lock); 7652 7653 vhbc->vhbc_op = op; 7654 vhbc->vhbc_op_major = maj; 7655 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7656 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7657 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7658 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7659 7660 /* now create threads to initiate bus config on all phcis in parallel */ 7661 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7662 phbc_next = phbc->phbc_next; 7663 if (mdi_mtc_off) 7664 bus_config_phci((void *)phbc); 7665 else 7666 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7667 0, &p0, TS_RUN, minclsyspri); 7668 } 7669 7670 mutex_enter(&vhbc->vhbc_lock); 7671 /* wait until all threads exit */ 7672 while (vhbc->vhbc_thr_count > 0) 7673 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7674 mutex_exit(&vhbc->vhbc_lock); 7675 7676 mutex_destroy(&vhbc->vhbc_lock); 7677 cv_destroy(&vhbc->vhbc_cv); 7678 kmem_free(vhbc, sizeof (*vhbc)); 7679 } 7680 7681 /* 7682 * Single threaded version of bus_config_all_phcis() 7683 */ 7684 static void 7685 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7686 ddi_bus_config_op_t op, major_t maj) 7687 { 7688 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7689 7690 single_threaded_vhconfig_enter(vhc); 7691 bus_config_all_phcis(vhcache, flags, op, maj); 7692 single_threaded_vhconfig_exit(vhc); 7693 } 7694 7695 /* 7696 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7697 * The path includes the child component in addition to the phci path. 7698 */ 7699 static int 7700 bus_config_one_phci_child(char *path) 7701 { 7702 dev_info_t *ph_dip, *child; 7703 char *devnm; 7704 int rv = MDI_FAILURE; 7705 7706 /* extract the child component of the phci */ 7707 devnm = strrchr(path, '/'); 7708 *devnm++ = '\0'; 7709 7710 /* 7711 * first configure all path components upto phci and then 7712 * configure the phci child. 7713 */ 7714 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7715 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7716 NDI_SUCCESS) { 7717 /* 7718 * release the hold that ndi_devi_config_one() placed 7719 */ 7720 ndi_rele_devi(child); 7721 rv = MDI_SUCCESS; 7722 } 7723 7724 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7725 ndi_rele_devi(ph_dip); 7726 } 7727 7728 devnm--; 7729 *devnm = '/'; 7730 return (rv); 7731 } 7732 7733 /* 7734 * Build a list of phci client paths for the specified vhci client. 7735 * The list includes only those phci client paths which aren't configured yet. 7736 */ 7737 static mdi_phys_path_t * 7738 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7739 { 7740 mdi_vhcache_pathinfo_t *cpi; 7741 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7742 int config_path, len; 7743 7744 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7745 /* 7746 * include only those paths that aren't configured. 7747 */ 7748 config_path = 0; 7749 if (cpi->cpi_pip == NULL) 7750 config_path = 1; 7751 else { 7752 MDI_PI_LOCK(cpi->cpi_pip); 7753 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7754 config_path = 1; 7755 MDI_PI_UNLOCK(cpi->cpi_pip); 7756 } 7757 7758 if (config_path) { 7759 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7760 len = strlen(cpi->cpi_cphci->cphci_path) + 7761 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7762 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7763 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7764 cpi->cpi_cphci->cphci_path, ct_name, 7765 cpi->cpi_addr); 7766 pp->phys_path_next = NULL; 7767 7768 if (pp_head == NULL) 7769 pp_head = pp; 7770 else 7771 pp_tail->phys_path_next = pp; 7772 pp_tail = pp; 7773 } 7774 } 7775 7776 return (pp_head); 7777 } 7778 7779 /* 7780 * Free the memory allocated for phci client path list. 7781 */ 7782 static void 7783 free_phclient_path_list(mdi_phys_path_t *pp_head) 7784 { 7785 mdi_phys_path_t *pp, *pp_next; 7786 7787 for (pp = pp_head; pp != NULL; pp = pp_next) { 7788 pp_next = pp->phys_path_next; 7789 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7790 kmem_free(pp, sizeof (*pp)); 7791 } 7792 } 7793 7794 /* 7795 * Allocated async client structure and initialize with the specified values. 7796 */ 7797 static mdi_async_client_config_t * 7798 alloc_async_client_config(char *ct_name, char *ct_addr, 7799 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7800 { 7801 mdi_async_client_config_t *acc; 7802 7803 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7804 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7805 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7806 acc->acc_phclient_path_list_head = pp_head; 7807 init_vhcache_lookup_token(&acc->acc_token, tok); 7808 acc->acc_next = NULL; 7809 return (acc); 7810 } 7811 7812 /* 7813 * Free the memory allocated for the async client structure and their members. 7814 */ 7815 static void 7816 free_async_client_config(mdi_async_client_config_t *acc) 7817 { 7818 if (acc->acc_phclient_path_list_head) 7819 free_phclient_path_list(acc->acc_phclient_path_list_head); 7820 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7821 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7822 kmem_free(acc, sizeof (*acc)); 7823 } 7824 7825 /* 7826 * Sort vhcache pathinfos (cpis) of the specified client. 7827 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7828 * flag set come at the beginning of the list. All cpis which have this 7829 * flag set come at the end of the list. 7830 */ 7831 static void 7832 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7833 { 7834 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7835 7836 cpi_head = cct->cct_cpi_head; 7837 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7838 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7839 cpi_next = cpi->cpi_next; 7840 enqueue_vhcache_pathinfo(cct, cpi); 7841 } 7842 } 7843 7844 /* 7845 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7846 * every vhcache pathinfo of the specified client. If not adjust the flag 7847 * setting appropriately. 7848 * 7849 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7850 * on-disk vhci cache. So every time this flag is updated the cache must be 7851 * flushed. 7852 */ 7853 static void 7854 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7855 mdi_vhcache_lookup_token_t *tok) 7856 { 7857 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7858 mdi_vhcache_client_t *cct; 7859 mdi_vhcache_pathinfo_t *cpi; 7860 7861 rw_enter(&vhcache->vhcache_lock, RW_READER); 7862 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7863 == NULL) { 7864 rw_exit(&vhcache->vhcache_lock); 7865 return; 7866 } 7867 7868 /* 7869 * to avoid unnecessary on-disk cache updates, first check if an 7870 * update is really needed. If no update is needed simply return. 7871 */ 7872 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7873 if ((cpi->cpi_pip != NULL && 7874 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7875 (cpi->cpi_pip == NULL && 7876 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7877 break; 7878 } 7879 } 7880 if (cpi == NULL) { 7881 rw_exit(&vhcache->vhcache_lock); 7882 return; 7883 } 7884 7885 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7886 rw_exit(&vhcache->vhcache_lock); 7887 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7888 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7889 tok)) == NULL) { 7890 rw_exit(&vhcache->vhcache_lock); 7891 return; 7892 } 7893 } 7894 7895 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7896 if (cpi->cpi_pip != NULL) 7897 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7898 else 7899 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7900 } 7901 sort_vhcache_paths(cct); 7902 7903 rw_exit(&vhcache->vhcache_lock); 7904 vhcache_dirty(vhc); 7905 } 7906 7907 /* 7908 * Configure all specified paths of the client. 7909 */ 7910 static void 7911 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7912 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7913 { 7914 mdi_phys_path_t *pp; 7915 7916 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7917 (void) bus_config_one_phci_child(pp->phys_path); 7918 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7919 } 7920 7921 /* 7922 * Dequeue elements from vhci async client config list and bus configure 7923 * their corresponding phci clients. 7924 */ 7925 static void 7926 config_client_paths_thread(void *arg) 7927 { 7928 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7929 mdi_async_client_config_t *acc; 7930 clock_t quit_at_ticks; 7931 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7932 callb_cpr_t cprinfo; 7933 7934 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7935 "mdi_config_client_paths"); 7936 7937 for (; ; ) { 7938 quit_at_ticks = ddi_get_lbolt() + idle_time; 7939 7940 mutex_enter(&vhc->vhc_lock); 7941 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7942 vhc->vhc_acc_list_head == NULL && 7943 ddi_get_lbolt() < quit_at_ticks) { 7944 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7945 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7946 quit_at_ticks); 7947 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7948 } 7949 7950 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7951 vhc->vhc_acc_list_head == NULL) 7952 goto out; 7953 7954 acc = vhc->vhc_acc_list_head; 7955 vhc->vhc_acc_list_head = acc->acc_next; 7956 if (vhc->vhc_acc_list_head == NULL) 7957 vhc->vhc_acc_list_tail = NULL; 7958 vhc->vhc_acc_count--; 7959 mutex_exit(&vhc->vhc_lock); 7960 7961 config_client_paths_sync(vhc, acc->acc_ct_name, 7962 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7963 &acc->acc_token); 7964 7965 free_async_client_config(acc); 7966 } 7967 7968 out: 7969 vhc->vhc_acc_thrcount--; 7970 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7971 CALLB_CPR_EXIT(&cprinfo); 7972 } 7973 7974 /* 7975 * Arrange for all the phci client paths (pp_head) for the specified client 7976 * to be bus configured asynchronously by a thread. 7977 */ 7978 static void 7979 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7980 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7981 { 7982 mdi_async_client_config_t *acc, *newacc; 7983 int create_thread; 7984 7985 if (pp_head == NULL) 7986 return; 7987 7988 if (mdi_mtc_off) { 7989 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7990 free_phclient_path_list(pp_head); 7991 return; 7992 } 7993 7994 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7995 ASSERT(newacc); 7996 7997 mutex_enter(&vhc->vhc_lock); 7998 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7999 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8000 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8001 free_async_client_config(newacc); 8002 mutex_exit(&vhc->vhc_lock); 8003 return; 8004 } 8005 } 8006 8007 if (vhc->vhc_acc_list_head == NULL) 8008 vhc->vhc_acc_list_head = newacc; 8009 else 8010 vhc->vhc_acc_list_tail->acc_next = newacc; 8011 vhc->vhc_acc_list_tail = newacc; 8012 vhc->vhc_acc_count++; 8013 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8014 cv_broadcast(&vhc->vhc_cv); 8015 create_thread = 0; 8016 } else { 8017 vhc->vhc_acc_thrcount++; 8018 create_thread = 1; 8019 } 8020 mutex_exit(&vhc->vhc_lock); 8021 8022 if (create_thread) 8023 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8024 0, &p0, TS_RUN, minclsyspri); 8025 } 8026 8027 /* 8028 * Return number of online paths for the specified client. 8029 */ 8030 static int 8031 nonline_paths(mdi_vhcache_client_t *cct) 8032 { 8033 mdi_vhcache_pathinfo_t *cpi; 8034 int online_count = 0; 8035 8036 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8037 if (cpi->cpi_pip != NULL) { 8038 MDI_PI_LOCK(cpi->cpi_pip); 8039 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8040 online_count++; 8041 MDI_PI_UNLOCK(cpi->cpi_pip); 8042 } 8043 } 8044 8045 return (online_count); 8046 } 8047 8048 /* 8049 * Bus configure all paths for the specified vhci client. 8050 * If at least one path for the client is already online, the remaining paths 8051 * will be configured asynchronously. Otherwise, it synchronously configures 8052 * the paths until at least one path is online and then rest of the paths 8053 * will be configured asynchronously. 8054 */ 8055 static void 8056 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8057 { 8058 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8059 mdi_phys_path_t *pp_head, *pp; 8060 mdi_vhcache_client_t *cct; 8061 mdi_vhcache_lookup_token_t tok; 8062 8063 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8064 8065 init_vhcache_lookup_token(&tok, NULL); 8066 8067 if (ct_name == NULL || ct_addr == NULL || 8068 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8069 == NULL || 8070 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8071 rw_exit(&vhcache->vhcache_lock); 8072 return; 8073 } 8074 8075 /* if at least one path is online, configure the rest asynchronously */ 8076 if (nonline_paths(cct) > 0) { 8077 rw_exit(&vhcache->vhcache_lock); 8078 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8079 return; 8080 } 8081 8082 rw_exit(&vhcache->vhcache_lock); 8083 8084 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8085 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8086 rw_enter(&vhcache->vhcache_lock, RW_READER); 8087 8088 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8089 ct_addr, &tok)) == NULL) { 8090 rw_exit(&vhcache->vhcache_lock); 8091 goto out; 8092 } 8093 8094 if (nonline_paths(cct) > 0 && 8095 pp->phys_path_next != NULL) { 8096 rw_exit(&vhcache->vhcache_lock); 8097 config_client_paths_async(vhc, ct_name, ct_addr, 8098 pp->phys_path_next, &tok); 8099 pp->phys_path_next = NULL; 8100 goto out; 8101 } 8102 8103 rw_exit(&vhcache->vhcache_lock); 8104 } 8105 } 8106 8107 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8108 out: 8109 free_phclient_path_list(pp_head); 8110 } 8111 8112 static void 8113 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8114 { 8115 mutex_enter(&vhc->vhc_lock); 8116 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8117 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8118 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8119 mutex_exit(&vhc->vhc_lock); 8120 } 8121 8122 static void 8123 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8124 { 8125 mutex_enter(&vhc->vhc_lock); 8126 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8127 cv_broadcast(&vhc->vhc_cv); 8128 mutex_exit(&vhc->vhc_lock); 8129 } 8130 8131 typedef struct mdi_phci_driver_info { 8132 char *phdriver_name; /* name of the phci driver */ 8133 8134 /* set to non zero if the phci driver supports root device */ 8135 int phdriver_root_support; 8136 } mdi_phci_driver_info_t; 8137 8138 /* 8139 * vhci class and root support capability of a phci driver can be 8140 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8141 * phci driver.conf file. The built-in tables below contain this information 8142 * for those phci drivers whose driver.conf files don't yet contain this info. 8143 * 8144 * All phci drivers expect iscsi have root device support. 8145 */ 8146 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8147 { "fp", 1 }, 8148 { "iscsi", 0 }, 8149 { "ibsrp", 1 } 8150 }; 8151 8152 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8153 8154 static void * 8155 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8156 { 8157 void *new_ptr; 8158 8159 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8160 if (old_ptr) { 8161 bcopy(old_ptr, new_ptr, old_size); 8162 kmem_free(old_ptr, old_size); 8163 } 8164 return (new_ptr); 8165 } 8166 8167 static void 8168 add_to_phci_list(char ***driver_list, int **root_support_list, 8169 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8170 { 8171 ASSERT(*cur_elements <= *max_elements); 8172 if (*cur_elements == *max_elements) { 8173 *max_elements += 10; 8174 *driver_list = mdi_realloc(*driver_list, 8175 sizeof (char *) * (*cur_elements), 8176 sizeof (char *) * (*max_elements)); 8177 *root_support_list = mdi_realloc(*root_support_list, 8178 sizeof (int) * (*cur_elements), 8179 sizeof (int) * (*max_elements)); 8180 } 8181 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8182 (*root_support_list)[*cur_elements] = root_support; 8183 (*cur_elements)++; 8184 } 8185 8186 static void 8187 get_phci_driver_list(char *vhci_class, char ***driver_list, 8188 int **root_support_list, int *cur_elements, int *max_elements) 8189 { 8190 mdi_phci_driver_info_t *st_driver_list, *p; 8191 int st_ndrivers, root_support, i, j, driver_conf_count; 8192 major_t m; 8193 struct devnames *dnp; 8194 ddi_prop_t *propp; 8195 8196 *driver_list = NULL; 8197 *root_support_list = NULL; 8198 *cur_elements = 0; 8199 *max_elements = 0; 8200 8201 /* add the phci drivers derived from the phci driver.conf files */ 8202 for (m = 0; m < devcnt; m++) { 8203 dnp = &devnamesp[m]; 8204 8205 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8206 LOCK_DEV_OPS(&dnp->dn_lock); 8207 if (dnp->dn_global_prop_ptr != NULL && 8208 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8209 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8210 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8211 strcmp(propp->prop_val, vhci_class) == 0) { 8212 8213 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8214 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8215 &dnp->dn_global_prop_ptr->prop_list) 8216 == NULL) ? 1 : 0; 8217 8218 add_to_phci_list(driver_list, root_support_list, 8219 cur_elements, max_elements, dnp->dn_name, 8220 root_support); 8221 8222 UNLOCK_DEV_OPS(&dnp->dn_lock); 8223 } else 8224 UNLOCK_DEV_OPS(&dnp->dn_lock); 8225 } 8226 } 8227 8228 driver_conf_count = *cur_elements; 8229 8230 /* add the phci drivers specified in the built-in tables */ 8231 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8232 st_driver_list = scsi_phci_driver_list; 8233 st_ndrivers = sizeof (scsi_phci_driver_list) / 8234 sizeof (mdi_phci_driver_info_t); 8235 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8236 st_driver_list = ib_phci_driver_list; 8237 st_ndrivers = sizeof (ib_phci_driver_list) / 8238 sizeof (mdi_phci_driver_info_t); 8239 } else { 8240 st_driver_list = NULL; 8241 st_ndrivers = 0; 8242 } 8243 8244 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8245 /* add this phci driver if not already added before */ 8246 for (j = 0; j < driver_conf_count; j++) { 8247 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8248 break; 8249 } 8250 if (j == driver_conf_count) { 8251 add_to_phci_list(driver_list, root_support_list, 8252 cur_elements, max_elements, p->phdriver_name, 8253 p->phdriver_root_support); 8254 } 8255 } 8256 } 8257 8258 /* 8259 * Attach the phci driver instances associated with the specified vhci class. 8260 * If root is mounted attach all phci driver instances. 8261 * If root is not mounted, attach the instances of only those phci 8262 * drivers that have the root support. 8263 */ 8264 static void 8265 attach_phci_drivers(char *vhci_class) 8266 { 8267 char **driver_list, **p; 8268 int *root_support_list; 8269 int cur_elements, max_elements, i; 8270 major_t m; 8271 8272 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8273 &cur_elements, &max_elements); 8274 8275 for (i = 0; i < cur_elements; i++) { 8276 if (modrootloaded || root_support_list[i]) { 8277 m = ddi_name_to_major(driver_list[i]); 8278 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8279 ddi_rele_driver(m); 8280 } 8281 } 8282 8283 if (driver_list) { 8284 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8285 kmem_free(*p, strlen(*p) + 1); 8286 kmem_free(driver_list, sizeof (char *) * max_elements); 8287 kmem_free(root_support_list, sizeof (int) * max_elements); 8288 } 8289 } 8290 8291 /* 8292 * Build vhci cache: 8293 * 8294 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8295 * the phci driver instances. During this process the cache gets built. 8296 * 8297 * Cache is built fully if the root is mounted. 8298 * If the root is not mounted, phci drivers that do not have root support 8299 * are not attached. As a result the cache is built partially. The entries 8300 * in the cache reflect only those phci drivers that have root support. 8301 */ 8302 static int 8303 build_vhci_cache(mdi_vhci_t *vh) 8304 { 8305 mdi_vhci_config_t *vhc = vh->vh_config; 8306 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8307 8308 single_threaded_vhconfig_enter(vhc); 8309 8310 rw_enter(&vhcache->vhcache_lock, RW_READER); 8311 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8312 rw_exit(&vhcache->vhcache_lock); 8313 single_threaded_vhconfig_exit(vhc); 8314 return (0); 8315 } 8316 rw_exit(&vhcache->vhcache_lock); 8317 8318 attach_phci_drivers(vh->vh_class); 8319 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8320 BUS_CONFIG_ALL, (major_t)-1); 8321 8322 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8323 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8324 rw_exit(&vhcache->vhcache_lock); 8325 8326 single_threaded_vhconfig_exit(vhc); 8327 vhcache_dirty(vhc); 8328 return (1); 8329 } 8330 8331 /* 8332 * Determine if discovery of paths is needed. 8333 */ 8334 static int 8335 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8336 { 8337 int rv = 1; 8338 8339 mutex_enter(&vhc->vhc_lock); 8340 if (i_ddi_io_initialized() == 0) { 8341 if (vhc->vhc_path_discovery_boot > 0) { 8342 vhc->vhc_path_discovery_boot--; 8343 goto out; 8344 } 8345 } else { 8346 if (vhc->vhc_path_discovery_postboot > 0) { 8347 vhc->vhc_path_discovery_postboot--; 8348 goto out; 8349 } 8350 } 8351 8352 /* 8353 * Do full path discovery at most once per mdi_path_discovery_interval. 8354 * This is to avoid a series of full path discoveries when opening 8355 * stale /dev/[r]dsk links. 8356 */ 8357 if (mdi_path_discovery_interval != -1 && 8358 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8359 goto out; 8360 8361 rv = 0; 8362 out: 8363 mutex_exit(&vhc->vhc_lock); 8364 return (rv); 8365 } 8366 8367 /* 8368 * Discover all paths: 8369 * 8370 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8371 * driver instances. During this process all paths will be discovered. 8372 */ 8373 static int 8374 vhcache_discover_paths(mdi_vhci_t *vh) 8375 { 8376 mdi_vhci_config_t *vhc = vh->vh_config; 8377 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8378 int rv = 0; 8379 8380 single_threaded_vhconfig_enter(vhc); 8381 8382 if (vhcache_do_discovery(vhc)) { 8383 attach_phci_drivers(vh->vh_class); 8384 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8385 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8386 8387 mutex_enter(&vhc->vhc_lock); 8388 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8389 mdi_path_discovery_interval * TICKS_PER_SECOND; 8390 mutex_exit(&vhc->vhc_lock); 8391 rv = 1; 8392 } 8393 8394 single_threaded_vhconfig_exit(vhc); 8395 return (rv); 8396 } 8397 8398 /* 8399 * Generic vhci bus config implementation: 8400 * 8401 * Parameters 8402 * vdip vhci dip 8403 * flags bus config flags 8404 * op bus config operation 8405 * The remaining parameters are bus config operation specific 8406 * 8407 * for BUS_CONFIG_ONE 8408 * arg pointer to name@addr 8409 * child upon successful return from this function, *child will be 8410 * set to the configured and held devinfo child node of vdip. 8411 * ct_addr pointer to client address (i.e. GUID) 8412 * 8413 * for BUS_CONFIG_DRIVER 8414 * arg major number of the driver 8415 * child and ct_addr parameters are ignored 8416 * 8417 * for BUS_CONFIG_ALL 8418 * arg, child, and ct_addr parameters are ignored 8419 * 8420 * Note that for the rest of the bus config operations, this function simply 8421 * calls the framework provided default bus config routine. 8422 */ 8423 int 8424 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8425 void *arg, dev_info_t **child, char *ct_addr) 8426 { 8427 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8428 mdi_vhci_config_t *vhc = vh->vh_config; 8429 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8430 int rv = 0; 8431 int params_valid = 0; 8432 char *cp; 8433 8434 /* 8435 * To bus config vhcis we relay operation, possibly using another 8436 * thread, to phcis. The phci driver then interacts with MDI to cause 8437 * vhci child nodes to be enumerated under the vhci node. Adding a 8438 * vhci child requires an ndi_devi_enter of the vhci. Since another 8439 * thread may be adding the child, to avoid deadlock we can't wait 8440 * for the relayed operations to complete if we have already entered 8441 * the vhci node. 8442 */ 8443 if (DEVI_BUSY_OWNED(vdip)) { 8444 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8445 "vhci dip is busy owned %p\n", (void *)vdip)); 8446 goto default_bus_config; 8447 } 8448 8449 rw_enter(&vhcache->vhcache_lock, RW_READER); 8450 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8451 rw_exit(&vhcache->vhcache_lock); 8452 rv = build_vhci_cache(vh); 8453 rw_enter(&vhcache->vhcache_lock, RW_READER); 8454 } 8455 8456 switch (op) { 8457 case BUS_CONFIG_ONE: 8458 if (arg != NULL && ct_addr != NULL) { 8459 /* extract node name */ 8460 cp = (char *)arg; 8461 while (*cp != '\0' && *cp != '@') 8462 cp++; 8463 if (*cp == '@') { 8464 params_valid = 1; 8465 *cp = '\0'; 8466 config_client_paths(vhc, (char *)arg, ct_addr); 8467 /* config_client_paths() releases cache_lock */ 8468 *cp = '@'; 8469 break; 8470 } 8471 } 8472 8473 rw_exit(&vhcache->vhcache_lock); 8474 break; 8475 8476 case BUS_CONFIG_DRIVER: 8477 rw_exit(&vhcache->vhcache_lock); 8478 if (rv == 0) 8479 st_bus_config_all_phcis(vhc, flags, op, 8480 (major_t)(uintptr_t)arg); 8481 break; 8482 8483 case BUS_CONFIG_ALL: 8484 rw_exit(&vhcache->vhcache_lock); 8485 if (rv == 0) 8486 st_bus_config_all_phcis(vhc, flags, op, -1); 8487 break; 8488 8489 default: 8490 rw_exit(&vhcache->vhcache_lock); 8491 break; 8492 } 8493 8494 8495 default_bus_config: 8496 /* 8497 * All requested child nodes are enumerated under the vhci. 8498 * Now configure them. 8499 */ 8500 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8501 NDI_SUCCESS) { 8502 return (MDI_SUCCESS); 8503 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8504 /* discover all paths and try configuring again */ 8505 if (vhcache_discover_paths(vh) && 8506 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8507 NDI_SUCCESS) 8508 return (MDI_SUCCESS); 8509 } 8510 8511 return (MDI_FAILURE); 8512 } 8513 8514 /* 8515 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8516 */ 8517 static nvlist_t * 8518 read_on_disk_vhci_cache(char *vhci_class) 8519 { 8520 nvlist_t *nvl; 8521 int err; 8522 char *filename; 8523 8524 filename = vhclass2vhcache_filename(vhci_class); 8525 8526 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8527 kmem_free(filename, strlen(filename) + 1); 8528 return (nvl); 8529 } else if (err == EIO) 8530 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8531 else if (err == EINVAL) 8532 cmn_err(CE_WARN, 8533 "%s: data file corrupted, will recreate\n", filename); 8534 8535 kmem_free(filename, strlen(filename) + 1); 8536 return (NULL); 8537 } 8538 8539 /* 8540 * Read on-disk vhci cache into nvlists for all vhci classes. 8541 * Called during booting by i_ddi_read_devices_files(). 8542 */ 8543 void 8544 mdi_read_devices_files(void) 8545 { 8546 int i; 8547 8548 for (i = 0; i < N_VHCI_CLASSES; i++) 8549 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8550 } 8551 8552 /* 8553 * Remove all stale entries from vhci cache. 8554 */ 8555 static void 8556 clean_vhcache(mdi_vhci_config_t *vhc) 8557 { 8558 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8559 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8560 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8561 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8562 8563 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8564 8565 cct_head = vhcache->vhcache_client_head; 8566 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8567 for (cct = cct_head; cct != NULL; cct = cct_next) { 8568 cct_next = cct->cct_next; 8569 8570 cpi_head = cct->cct_cpi_head; 8571 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8572 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8573 cpi_next = cpi->cpi_next; 8574 if (cpi->cpi_pip != NULL) { 8575 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8576 enqueue_tail_vhcache_pathinfo(cct, cpi); 8577 } else 8578 free_vhcache_pathinfo(cpi); 8579 } 8580 8581 if (cct->cct_cpi_head != NULL) 8582 enqueue_vhcache_client(vhcache, cct); 8583 else { 8584 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8585 (mod_hash_key_t)cct->cct_name_addr); 8586 free_vhcache_client(cct); 8587 } 8588 } 8589 8590 cphci_head = vhcache->vhcache_phci_head; 8591 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8592 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8593 cphci_next = cphci->cphci_next; 8594 if (cphci->cphci_phci != NULL) 8595 enqueue_vhcache_phci(vhcache, cphci); 8596 else 8597 free_vhcache_phci(cphci); 8598 } 8599 8600 vhcache->vhcache_clean_time = lbolt64; 8601 rw_exit(&vhcache->vhcache_lock); 8602 vhcache_dirty(vhc); 8603 } 8604 8605 /* 8606 * Remove all stale entries from vhci cache. 8607 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8608 */ 8609 void 8610 mdi_clean_vhcache(void) 8611 { 8612 mdi_vhci_t *vh; 8613 8614 mutex_enter(&mdi_mutex); 8615 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8616 vh->vh_refcnt++; 8617 mutex_exit(&mdi_mutex); 8618 clean_vhcache(vh->vh_config); 8619 mutex_enter(&mdi_mutex); 8620 vh->vh_refcnt--; 8621 } 8622 mutex_exit(&mdi_mutex); 8623 } 8624 8625 /* 8626 * mdi_vhci_walk_clients(): 8627 * Walker routine to traverse client dev_info nodes 8628 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8629 * below the client, including nexus devices, which we dont want. 8630 * So we just traverse the immediate siblings, starting from 1st client. 8631 */ 8632 void 8633 mdi_vhci_walk_clients(dev_info_t *vdip, 8634 int (*f)(dev_info_t *, void *), void *arg) 8635 { 8636 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8637 dev_info_t *cdip; 8638 mdi_client_t *ct; 8639 8640 MDI_VHCI_CLIENT_LOCK(vh); 8641 cdip = ddi_get_child(vdip); 8642 while (cdip) { 8643 ct = i_devi_get_client(cdip); 8644 MDI_CLIENT_LOCK(ct); 8645 8646 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8647 cdip = ddi_get_next_sibling(cdip); 8648 else 8649 cdip = NULL; 8650 8651 MDI_CLIENT_UNLOCK(ct); 8652 } 8653 MDI_VHCI_CLIENT_UNLOCK(vh); 8654 } 8655 8656 /* 8657 * mdi_vhci_walk_phcis(): 8658 * Walker routine to traverse phci dev_info nodes 8659 */ 8660 void 8661 mdi_vhci_walk_phcis(dev_info_t *vdip, 8662 int (*f)(dev_info_t *, void *), void *arg) 8663 { 8664 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8665 mdi_phci_t *ph, *next; 8666 8667 MDI_VHCI_PHCI_LOCK(vh); 8668 ph = vh->vh_phci_head; 8669 while (ph) { 8670 MDI_PHCI_LOCK(ph); 8671 8672 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8673 next = ph->ph_next; 8674 else 8675 next = NULL; 8676 8677 MDI_PHCI_UNLOCK(ph); 8678 ph = next; 8679 } 8680 MDI_VHCI_PHCI_UNLOCK(vh); 8681 } 8682 8683 8684 /* 8685 * mdi_walk_vhcis(): 8686 * Walker routine to traverse vhci dev_info nodes 8687 */ 8688 void 8689 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8690 { 8691 mdi_vhci_t *vh = NULL; 8692 8693 mutex_enter(&mdi_mutex); 8694 /* 8695 * Scan for already registered vhci 8696 */ 8697 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8698 vh->vh_refcnt++; 8699 mutex_exit(&mdi_mutex); 8700 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8701 mutex_enter(&mdi_mutex); 8702 vh->vh_refcnt--; 8703 break; 8704 } else { 8705 mutex_enter(&mdi_mutex); 8706 vh->vh_refcnt--; 8707 } 8708 } 8709 8710 mutex_exit(&mdi_mutex); 8711 } 8712 8713 /* 8714 * i_mdi_log_sysevent(): 8715 * Logs events for pickup by syseventd 8716 */ 8717 static void 8718 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8719 { 8720 char *path_name; 8721 nvlist_t *attr_list; 8722 8723 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8724 KM_SLEEP) != DDI_SUCCESS) { 8725 goto alloc_failed; 8726 } 8727 8728 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8729 (void) ddi_pathname(dip, path_name); 8730 8731 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8732 ddi_driver_name(dip)) != DDI_SUCCESS) { 8733 goto error; 8734 } 8735 8736 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8737 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8738 goto error; 8739 } 8740 8741 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8742 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8743 goto error; 8744 } 8745 8746 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8747 path_name) != DDI_SUCCESS) { 8748 goto error; 8749 } 8750 8751 if (nvlist_add_string(attr_list, DDI_CLASS, 8752 ph_vh_class) != DDI_SUCCESS) { 8753 goto error; 8754 } 8755 8756 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8757 attr_list, NULL, DDI_SLEEP); 8758 8759 error: 8760 kmem_free(path_name, MAXPATHLEN); 8761 nvlist_free(attr_list); 8762 return; 8763 8764 alloc_failed: 8765 MDI_DEBUG(1, (CE_WARN, dip, 8766 "!i_mdi_log_sysevent: Unable to send sysevent")); 8767 } 8768