xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision 7f11fd00fc23e2af7ae21cc8837a2b86380dcfa7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
24  */
25 
26 /*
27  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
28  * more detailed discussion of the overall mpxio architecture.
29  *
30  * Default locking order:
31  *
32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 #include <sys/sysmacros.h>
71 
72 #ifdef	DEBUG
73 #include <sys/debug.h>
74 int	mdi_debug = 1;
75 int	mdi_debug_logonly = 0;
76 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
77 #define	MDI_WARN	CE_WARN, __func__
78 #define	MDI_NOTE	CE_NOTE, __func__
79 #define	MDI_CONT	CE_CONT, __func__
80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
81 #else	/* !DEBUG */
82 #define	MDI_DEBUG(dbglevel, pargs)
83 #endif	/* DEBUG */
84 int	mdi_debug_consoleonly = 0;
85 int	mdi_delay = 3;
86 
87 extern pri_t	minclsyspri;
88 extern int	modrootloaded;
89 
90 /*
91  * Global mutex:
92  * Protects vHCI list and structure members.
93  */
94 kmutex_t	mdi_mutex;
95 
96 /*
97  * Registered vHCI class driver lists
98  */
99 int		mdi_vhci_count;
100 mdi_vhci_t	*mdi_vhci_head;
101 mdi_vhci_t	*mdi_vhci_tail;
102 
103 /*
104  * Client Hash Table size
105  */
106 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
107 
108 /*
109  * taskq interface definitions
110  */
111 #define	MDI_TASKQ_N_THREADS	8
112 #define	MDI_TASKQ_PRI		minclsyspri
113 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
114 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
115 
116 taskq_t				*mdi_taskq;
117 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
118 
119 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
120 
121 /*
122  * The data should be "quiet" for this interval (in seconds) before the
123  * vhci cached data is flushed to the disk.
124  */
125 static int mdi_vhcache_flush_delay = 10;
126 
127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
128 static int mdi_vhcache_flush_daemon_idle_time = 60;
129 
130 /*
131  * MDI falls back to discovery of all paths when a bus_config_one fails.
132  * The following parameters can be used to tune this operation.
133  *
134  * mdi_path_discovery_boot
135  *	Number of times path discovery will be attempted during early boot.
136  *	Probably there is no reason to ever set this value to greater than one.
137  *
138  * mdi_path_discovery_postboot
139  *	Number of times path discovery will be attempted after early boot.
140  *	Set it to a minimum of two to allow for discovery of iscsi paths which
141  *	may happen very late during booting.
142  *
143  * mdi_path_discovery_interval
144  *	Minimum number of seconds MDI will wait between successive discovery
145  *	of all paths. Set it to -1 to disable discovery of all paths.
146  */
147 static int mdi_path_discovery_boot = 1;
148 static int mdi_path_discovery_postboot = 2;
149 static int mdi_path_discovery_interval = 10;
150 
151 /*
152  * number of seconds the asynchronous configuration thread will sleep idle
153  * before exiting.
154  */
155 static int mdi_async_config_idle_time = 600;
156 
157 static int mdi_bus_config_cache_hash_size = 256;
158 
159 /* turns off multithreaded configuration for certain operations */
160 static int mdi_mtc_off = 0;
161 
162 /*
163  * The "path" to a pathinfo node is identical to the /devices path to a
164  * devinfo node had the device been enumerated under a pHCI instead of
165  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
166  * This association persists across create/delete of the pathinfo nodes,
167  * but not across reboot.
168  */
169 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
170 static int		mdi_pathmap_hash_size = 256;
171 static kmutex_t		mdi_pathmap_mutex;
172 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
173 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
174 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
175 
176 /*
177  * MDI component property name/value string definitions
178  */
179 const char 		*mdi_component_prop = "mpxio-component";
180 const char		*mdi_component_prop_vhci = "vhci";
181 const char		*mdi_component_prop_phci = "phci";
182 const char		*mdi_component_prop_client = "client";
183 
184 /*
185  * MDI client global unique identifier property name
186  */
187 const char		*mdi_client_guid_prop = "client-guid";
188 
189 /*
190  * MDI client load balancing property name/value string definitions
191  */
192 const char		*mdi_load_balance = "load-balance";
193 const char		*mdi_load_balance_none = "none";
194 const char		*mdi_load_balance_rr = "round-robin";
195 const char		*mdi_load_balance_lba = "logical-block";
196 
197 /*
198  * Obsolete vHCI class definition; to be removed after Leadville update
199  */
200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
201 
202 static char vhci_greeting[] =
203 	"\tThere already exists one vHCI driver for class %s\n"
204 	"\tOnly one vHCI driver for each class is allowed\n";
205 
206 /*
207  * Static function prototypes
208  */
209 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
210 static int		i_mdi_client_offline(dev_info_t *, uint_t);
211 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
212 static void		i_mdi_phci_post_detach(dev_info_t *,
213 			    ddi_detach_cmd_t, int);
214 static int		i_mdi_client_pre_detach(dev_info_t *,
215 			    ddi_detach_cmd_t);
216 static void		i_mdi_client_post_detach(dev_info_t *,
217 			    ddi_detach_cmd_t, int);
218 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
219 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
220 static int 		i_mdi_lba_lb(mdi_client_t *ct,
221 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
222 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
223 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
224 static void		i_mdi_pm_reset_client(mdi_client_t *);
225 static int		i_mdi_power_all_phci(mdi_client_t *);
226 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
227 
228 
229 /*
230  * Internal mdi_pathinfo node functions
231  */
232 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
233 
234 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
235 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
236 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
237 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
238 static void		i_mdi_phci_unlock(mdi_phci_t *);
239 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
240 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
241 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
242 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
243 			    mdi_client_t *);
244 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
245 static void		i_mdi_client_remove_path(mdi_client_t *,
246 			    mdi_pathinfo_t *);
247 
248 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
249 			    mdi_pathinfo_state_t, int);
250 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
251 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
252 			    char **, int);
253 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
254 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
255 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
256 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
257 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
258 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
259 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
260 static void		i_mdi_client_update_state(mdi_client_t *);
261 static int		i_mdi_client_compute_state(mdi_client_t *,
262 			    mdi_phci_t *);
263 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
264 static void		i_mdi_client_unlock(mdi_client_t *);
265 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
266 static mdi_client_t	*i_devi_get_client(dev_info_t *);
267 /*
268  * NOTE: this will be removed once the NWS files are changed to use the new
269  * mdi_{enable,disable}_path interfaces
270  */
271 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
272 				int, int);
273 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
274 				mdi_vhci_t *vh, int flags, int op);
275 /*
276  * Failover related function prototypes
277  */
278 static int		i_mdi_failover(void *);
279 
280 /*
281  * misc internal functions
282  */
283 static int		i_mdi_get_hash_key(char *);
284 static int		i_map_nvlist_error_to_mdi(int);
285 static void		i_mdi_report_path_state(mdi_client_t *,
286 			    mdi_pathinfo_t *);
287 
288 static void		setup_vhci_cache(mdi_vhci_t *);
289 static int		destroy_vhci_cache(mdi_vhci_t *);
290 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
291 static boolean_t	stop_vhcache_flush_thread(void *, int);
292 static void		free_string_array(char **, int);
293 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
294 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
295 static void		free_vhcache_client(mdi_vhcache_client_t *);
296 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
297 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
298 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
299 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
300 static void		vhcache_pi_add(mdi_vhci_config_t *,
301 			    struct mdi_pathinfo *);
302 static void		vhcache_pi_remove(mdi_vhci_config_t *,
303 			    struct mdi_pathinfo *);
304 static void		free_phclient_path_list(mdi_phys_path_t *);
305 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
306 static int		flush_vhcache(mdi_vhci_config_t *, int);
307 static void		vhcache_dirty(mdi_vhci_config_t *);
308 static void		free_async_client_config(mdi_async_client_config_t *);
309 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
310 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
311 static nvlist_t		*read_on_disk_vhci_cache(char *);
312 extern int		fread_nvlist(char *, nvlist_t **);
313 extern int		fwrite_nvlist(char *, nvlist_t *);
314 
315 /* called once when first vhci registers with mdi */
316 static void
317 i_mdi_init()
318 {
319 	static int initialized = 0;
320 
321 	if (initialized)
322 		return;
323 	initialized = 1;
324 
325 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
326 
327 	/* Create our taskq resources */
328 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
329 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
330 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
331 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
332 
333 	/* Allocate ['path_instance' <-> "path"] maps */
334 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
335 	mdi_pathmap_bypath = mod_hash_create_strhash(
336 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
337 	    mod_hash_null_valdtor);
338 	mdi_pathmap_byinstance = mod_hash_create_idhash(
339 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
340 	    mod_hash_null_valdtor);
341 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
342 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
343 	    mod_hash_null_valdtor);
344 }
345 
346 /*
347  * mdi_get_component_type():
348  *		Return mpxio component type
349  * Return Values:
350  *		MDI_COMPONENT_NONE
351  *		MDI_COMPONENT_VHCI
352  *		MDI_COMPONENT_PHCI
353  *		MDI_COMPONENT_CLIENT
354  * XXX This doesn't work under multi-level MPxIO and should be
355  *	removed when clients migrate mdi_component_is_*() interfaces.
356  */
357 int
358 mdi_get_component_type(dev_info_t *dip)
359 {
360 	return (DEVI(dip)->devi_mdi_component);
361 }
362 
363 /*
364  * mdi_vhci_register():
365  *		Register a vHCI module with the mpxio framework
366  *		mdi_vhci_register() is called by vHCI drivers to register the
367  *		'class_driver' vHCI driver and its MDI entrypoints with the
368  *		mpxio framework.  The vHCI driver must call this interface as
369  *		part of its attach(9e) handler.
370  *		Competing threads may try to attach mdi_vhci_register() as
371  *		the vHCI drivers are loaded and attached as a result of pHCI
372  *		driver instance registration (mdi_phci_register()) with the
373  *		framework.
374  * Return Values:
375  *		MDI_SUCCESS
376  *		MDI_FAILURE
377  */
378 /*ARGSUSED*/
379 int
380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
381     int flags)
382 {
383 	mdi_vhci_t		*vh = NULL;
384 
385 	/* Registrant can't be older */
386 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
387 
388 #ifdef DEBUG
389 	/*
390 	 * IB nexus driver is loaded only when IB hardware is present.
391 	 * In order to be able to do this there is a need to drive the loading
392 	 * and attaching of the IB nexus driver (especially when an IB hardware
393 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
394 	 * is being attached. Unfortunately this gets into the limitations
395 	 * of devfs as there seems to be no clean way to drive configuration
396 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
397 	 * for IB.
398 	 */
399 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
400 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
401 #endif
402 
403 	i_mdi_init();
404 
405 	mutex_enter(&mdi_mutex);
406 	/*
407 	 * Scan for already registered vhci
408 	 */
409 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
410 		if (strcmp(vh->vh_class, class) == 0) {
411 			/*
412 			 * vHCI has already been created.  Check for valid
413 			 * vHCI ops registration.  We only support one vHCI
414 			 * module per class
415 			 */
416 			if (vh->vh_ops != NULL) {
417 				mutex_exit(&mdi_mutex);
418 				cmn_err(CE_NOTE, vhci_greeting, class);
419 				return (MDI_FAILURE);
420 			}
421 			break;
422 		}
423 	}
424 
425 	/*
426 	 * if not yet created, create the vHCI component
427 	 */
428 	if (vh == NULL) {
429 		struct client_hash	*hash = NULL;
430 		char			*load_balance;
431 
432 		/*
433 		 * Allocate and initialize the mdi extensions
434 		 */
435 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
436 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
437 		    KM_SLEEP);
438 		vh->vh_client_table = hash;
439 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
440 		(void) strcpy(vh->vh_class, class);
441 		vh->vh_lb = LOAD_BALANCE_RR;
442 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
443 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
444 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
445 				vh->vh_lb = LOAD_BALANCE_NONE;
446 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
447 				    == 0) {
448 				vh->vh_lb = LOAD_BALANCE_LBA;
449 			}
450 			ddi_prop_free(load_balance);
451 		}
452 
453 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
454 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
455 
456 		/*
457 		 * Store the vHCI ops vectors
458 		 */
459 		vh->vh_dip = vdip;
460 		vh->vh_ops = vops;
461 
462 		setup_vhci_cache(vh);
463 
464 		if (mdi_vhci_head == NULL) {
465 			mdi_vhci_head = vh;
466 		}
467 		if (mdi_vhci_tail) {
468 			mdi_vhci_tail->vh_next = vh;
469 		}
470 		mdi_vhci_tail = vh;
471 		mdi_vhci_count++;
472 	}
473 
474 	/*
475 	 * Claim the devfs node as a vhci component
476 	 */
477 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
478 
479 	/*
480 	 * Initialize our back reference from dev_info node
481 	 */
482 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
483 	mutex_exit(&mdi_mutex);
484 	return (MDI_SUCCESS);
485 }
486 
487 /*
488  * mdi_vhci_unregister():
489  *		Unregister a vHCI module from mpxio framework
490  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
491  * 		of a vhci to unregister it from the framework.
492  * Return Values:
493  *		MDI_SUCCESS
494  *		MDI_FAILURE
495  */
496 /*ARGSUSED*/
497 int
498 mdi_vhci_unregister(dev_info_t *vdip, int flags)
499 {
500 	mdi_vhci_t	*found, *vh, *prev = NULL;
501 
502 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
503 
504 	/*
505 	 * Check for invalid VHCI
506 	 */
507 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
508 		return (MDI_FAILURE);
509 
510 	/*
511 	 * Scan the list of registered vHCIs for a match
512 	 */
513 	mutex_enter(&mdi_mutex);
514 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
515 		if (found == vh)
516 			break;
517 		prev = found;
518 	}
519 
520 	if (found == NULL) {
521 		mutex_exit(&mdi_mutex);
522 		return (MDI_FAILURE);
523 	}
524 
525 	/*
526 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
527 	 * should have been unregistered, before a vHCI can be
528 	 * unregistered.
529 	 */
530 	MDI_VHCI_PHCI_LOCK(vh);
531 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
532 		MDI_VHCI_PHCI_UNLOCK(vh);
533 		mutex_exit(&mdi_mutex);
534 		return (MDI_FAILURE);
535 	}
536 	MDI_VHCI_PHCI_UNLOCK(vh);
537 
538 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
539 		mutex_exit(&mdi_mutex);
540 		return (MDI_FAILURE);
541 	}
542 
543 	/*
544 	 * Remove the vHCI from the global list
545 	 */
546 	if (vh == mdi_vhci_head) {
547 		mdi_vhci_head = vh->vh_next;
548 	} else {
549 		prev->vh_next = vh->vh_next;
550 	}
551 	if (vh == mdi_vhci_tail) {
552 		mdi_vhci_tail = prev;
553 	}
554 	mdi_vhci_count--;
555 	mutex_exit(&mdi_mutex);
556 
557 	vh->vh_ops = NULL;
558 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
559 	DEVI(vdip)->devi_mdi_xhci = NULL;
560 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
561 	kmem_free(vh->vh_client_table,
562 	    mdi_client_table_size * sizeof (struct client_hash));
563 	mutex_destroy(&vh->vh_phci_mutex);
564 	mutex_destroy(&vh->vh_client_mutex);
565 
566 	kmem_free(vh, sizeof (mdi_vhci_t));
567 	return (MDI_SUCCESS);
568 }
569 
570 /*
571  * i_mdi_vhci_class2vhci():
572  *		Look for a matching vHCI module given a vHCI class name
573  * Return Values:
574  *		Handle to a vHCI component
575  *		NULL
576  */
577 static mdi_vhci_t *
578 i_mdi_vhci_class2vhci(char *class)
579 {
580 	mdi_vhci_t	*vh = NULL;
581 
582 	ASSERT(!MUTEX_HELD(&mdi_mutex));
583 
584 	mutex_enter(&mdi_mutex);
585 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
586 		if (strcmp(vh->vh_class, class) == 0) {
587 			break;
588 		}
589 	}
590 	mutex_exit(&mdi_mutex);
591 	return (vh);
592 }
593 
594 /*
595  * i_devi_get_vhci():
596  *		Utility function to get the handle to a vHCI component
597  * Return Values:
598  *		Handle to a vHCI component
599  *		NULL
600  */
601 mdi_vhci_t *
602 i_devi_get_vhci(dev_info_t *vdip)
603 {
604 	mdi_vhci_t	*vh = NULL;
605 	if (MDI_VHCI(vdip)) {
606 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
607 	}
608 	return (vh);
609 }
610 
611 /*
612  * mdi_phci_register():
613  *		Register a pHCI module with mpxio framework
614  *		mdi_phci_register() is called by pHCI drivers to register with
615  *		the mpxio framework and a specific 'class_driver' vHCI.  The
616  *		pHCI driver must call this interface as part of its attach(9e)
617  *		handler.
618  * Return Values:
619  *		MDI_SUCCESS
620  *		MDI_FAILURE
621  */
622 /*ARGSUSED*/
623 int
624 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
625 {
626 	mdi_phci_t		*ph;
627 	mdi_vhci_t		*vh;
628 	char			*data;
629 
630 	/*
631 	 * Some subsystems, like fcp, perform pHCI registration from a
632 	 * different thread than the one doing the pHCI attach(9E) - the
633 	 * driver attach code is waiting for this other thread to complete.
634 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
635 	 * (indicating that some thread has done an ndi_devi_enter of parent)
636 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
637 	 */
638 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
639 
640 	/*
641 	 * Check for mpxio-disable property. Enable mpxio if the property is
642 	 * missing or not set to "yes".
643 	 * If the property is set to "yes" then emit a brief message.
644 	 */
645 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
646 	    &data) == DDI_SUCCESS)) {
647 		if (strcmp(data, "yes") == 0) {
648 			MDI_DEBUG(1, (MDI_CONT, pdip,
649 			    "?multipath capabilities disabled via %s.conf.",
650 			    ddi_driver_name(pdip)));
651 			ddi_prop_free(data);
652 			return (MDI_FAILURE);
653 		}
654 		ddi_prop_free(data);
655 	}
656 
657 	/*
658 	 * Search for a matching vHCI
659 	 */
660 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
661 	if (vh == NULL) {
662 		return (MDI_FAILURE);
663 	}
664 
665 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
666 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
667 	ph->ph_dip = pdip;
668 	ph->ph_vhci = vh;
669 	ph->ph_next = NULL;
670 	ph->ph_unstable = 0;
671 	ph->ph_vprivate = 0;
672 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
673 
674 	MDI_PHCI_LOCK(ph);
675 	MDI_PHCI_SET_POWER_UP(ph);
676 	MDI_PHCI_UNLOCK(ph);
677 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
678 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
679 
680 	vhcache_phci_add(vh->vh_config, ph);
681 
682 	MDI_VHCI_PHCI_LOCK(vh);
683 	if (vh->vh_phci_head == NULL) {
684 		vh->vh_phci_head = ph;
685 	}
686 	if (vh->vh_phci_tail) {
687 		vh->vh_phci_tail->ph_next = ph;
688 	}
689 	vh->vh_phci_tail = ph;
690 	vh->vh_phci_count++;
691 	MDI_VHCI_PHCI_UNLOCK(vh);
692 
693 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
694 	return (MDI_SUCCESS);
695 }
696 
697 /*
698  * mdi_phci_unregister():
699  *		Unregister a pHCI module from mpxio framework
700  *		mdi_phci_unregister() is called by the pHCI drivers from their
701  *		detach(9E) handler to unregister their instances from the
702  *		framework.
703  * Return Values:
704  *		MDI_SUCCESS
705  *		MDI_FAILURE
706  */
707 /*ARGSUSED*/
708 int
709 mdi_phci_unregister(dev_info_t *pdip, int flags)
710 {
711 	mdi_vhci_t		*vh;
712 	mdi_phci_t		*ph;
713 	mdi_phci_t		*tmp;
714 	mdi_phci_t		*prev = NULL;
715 	mdi_pathinfo_t		*pip;
716 
717 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
718 
719 	ph = i_devi_get_phci(pdip);
720 	if (ph == NULL) {
721 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
722 		return (MDI_FAILURE);
723 	}
724 
725 	vh = ph->ph_vhci;
726 	ASSERT(vh != NULL);
727 	if (vh == NULL) {
728 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
729 		return (MDI_FAILURE);
730 	}
731 
732 	MDI_VHCI_PHCI_LOCK(vh);
733 	tmp = vh->vh_phci_head;
734 	while (tmp) {
735 		if (tmp == ph) {
736 			break;
737 		}
738 		prev = tmp;
739 		tmp = tmp->ph_next;
740 	}
741 
742 	if (ph == vh->vh_phci_head) {
743 		vh->vh_phci_head = ph->ph_next;
744 	} else {
745 		prev->ph_next = ph->ph_next;
746 	}
747 
748 	if (ph == vh->vh_phci_tail) {
749 		vh->vh_phci_tail = prev;
750 	}
751 
752 	vh->vh_phci_count--;
753 	MDI_VHCI_PHCI_UNLOCK(vh);
754 
755 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
756 	MDI_PHCI_LOCK(ph);
757 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
758 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
759 		MDI_PI(pip)->pi_phci = NULL;
760 	MDI_PHCI_UNLOCK(ph);
761 
762 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
763 	    ESC_DDI_INITIATOR_UNREGISTER);
764 	vhcache_phci_remove(vh->vh_config, ph);
765 	cv_destroy(&ph->ph_unstable_cv);
766 	mutex_destroy(&ph->ph_mutex);
767 	kmem_free(ph, sizeof (mdi_phci_t));
768 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
769 	DEVI(pdip)->devi_mdi_xhci = NULL;
770 	return (MDI_SUCCESS);
771 }
772 
773 /*
774  * i_devi_get_phci():
775  * 		Utility function to return the phci extensions.
776  */
777 static mdi_phci_t *
778 i_devi_get_phci(dev_info_t *pdip)
779 {
780 	mdi_phci_t	*ph = NULL;
781 
782 	if (MDI_PHCI(pdip)) {
783 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
784 	}
785 	return (ph);
786 }
787 
788 /*
789  * Single thread mdi entry into devinfo node for modifying its children.
790  * If necessary we perform an ndi_devi_enter of the vHCI before doing
791  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
792  * for the vHCI and one for the pHCI.
793  */
794 void
795 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
796 {
797 	dev_info_t	*vdip;
798 	int		vcircular, pcircular;
799 
800 	/* Verify calling context */
801 	ASSERT(MDI_PHCI(phci_dip));
802 	vdip = mdi_devi_get_vdip(phci_dip);
803 	ASSERT(vdip);			/* A pHCI always has a vHCI */
804 
805 	/*
806 	 * If pHCI is detaching then the framework has already entered the
807 	 * vHCI on a threads that went down the code path leading to
808 	 * detach_node().  This framework enter of the vHCI during pHCI
809 	 * detach is done to avoid deadlock with vHCI power management
810 	 * operations which enter the vHCI and the enter down the path
811 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
812 	 * enter of the vHCI on frameworks vHCI enter that has already
813 	 * occurred - this is OK because we know that the framework thread
814 	 * doing detach is waiting for our completion.
815 	 *
816 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
817 	 * race with detach - but we can't do that because the framework has
818 	 * already entered the parent, so we have some complexity instead.
819 	 */
820 	for (;;) {
821 		if (ndi_devi_tryenter(vdip, &vcircular)) {
822 			ASSERT(vcircular != -1);
823 			if (DEVI_IS_DETACHING(phci_dip)) {
824 				ndi_devi_exit(vdip, vcircular);
825 				vcircular = -1;
826 			}
827 			break;
828 		} else if (DEVI_IS_DETACHING(phci_dip)) {
829 			vcircular = -1;
830 			break;
831 		} else if (servicing_interrupt()) {
832 			/*
833 			 * Don't delay an interrupt (and ensure adaptive
834 			 * mutex inversion support).
835 			 */
836 			ndi_devi_enter(vdip, &vcircular);
837 			break;
838 		} else {
839 			delay_random(mdi_delay);
840 		}
841 	}
842 
843 	ndi_devi_enter(phci_dip, &pcircular);
844 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
845 }
846 
847 /*
848  * Attempt to mdi_devi_enter.
849  */
850 int
851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
852 {
853 	dev_info_t	*vdip;
854 	int		vcircular, pcircular;
855 
856 	/* Verify calling context */
857 	ASSERT(MDI_PHCI(phci_dip));
858 	vdip = mdi_devi_get_vdip(phci_dip);
859 	ASSERT(vdip);			/* A pHCI always has a vHCI */
860 
861 	if (ndi_devi_tryenter(vdip, &vcircular)) {
862 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
863 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
864 			return (1);	/* locked */
865 		}
866 		ndi_devi_exit(vdip, vcircular);
867 	}
868 	return (0);			/* busy */
869 }
870 
871 /*
872  * Release mdi_devi_enter or successful mdi_devi_tryenter.
873  */
874 void
875 mdi_devi_exit(dev_info_t *phci_dip, int circular)
876 {
877 	dev_info_t	*vdip;
878 	int		vcircular, pcircular;
879 
880 	/* Verify calling context */
881 	ASSERT(MDI_PHCI(phci_dip));
882 	vdip = mdi_devi_get_vdip(phci_dip);
883 	ASSERT(vdip);			/* A pHCI always has a vHCI */
884 
885 	/* extract two circular recursion values from single int */
886 	pcircular = (short)(circular & 0xFFFF);
887 	vcircular = (short)((circular >> 16) & 0xFFFF);
888 
889 	ndi_devi_exit(phci_dip, pcircular);
890 	if (vcircular != -1)
891 		ndi_devi_exit(vdip, vcircular);
892 }
893 
894 /*
895  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
896  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
897  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
898  * with vHCI power management code during path online/offline.  Each
899  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
900  * occur within the scope of an active mdi_devi_enter that establishes the
901  * circular value.
902  */
903 void
904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
905 {
906 	int		pcircular;
907 
908 	/* Verify calling context */
909 	ASSERT(MDI_PHCI(phci_dip));
910 
911 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
912 	ndi_hold_devi(phci_dip);
913 
914 	pcircular = (short)(circular & 0xFFFF);
915 	ndi_devi_exit(phci_dip, pcircular);
916 }
917 
918 void
919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
920 {
921 	int		pcircular;
922 
923 	/* Verify calling context */
924 	ASSERT(MDI_PHCI(phci_dip));
925 
926 	ndi_devi_enter(phci_dip, &pcircular);
927 
928 	/* Drop hold from mdi_devi_exit_phci. */
929 	ndi_rele_devi(phci_dip);
930 
931 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
932 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
933 }
934 
935 /*
936  * mdi_devi_get_vdip():
937  *		given a pHCI dip return vHCI dip
938  */
939 dev_info_t *
940 mdi_devi_get_vdip(dev_info_t *pdip)
941 {
942 	mdi_phci_t	*ph;
943 
944 	ph = i_devi_get_phci(pdip);
945 	if (ph && ph->ph_vhci)
946 		return (ph->ph_vhci->vh_dip);
947 	return (NULL);
948 }
949 
950 /*
951  * mdi_devi_pdip_entered():
952  *		Return 1 if we are vHCI and have done an ndi_devi_enter
953  *		of a pHCI
954  */
955 int
956 mdi_devi_pdip_entered(dev_info_t *vdip)
957 {
958 	mdi_vhci_t	*vh;
959 	mdi_phci_t	*ph;
960 
961 	vh = i_devi_get_vhci(vdip);
962 	if (vh == NULL)
963 		return (0);
964 
965 	MDI_VHCI_PHCI_LOCK(vh);
966 	ph = vh->vh_phci_head;
967 	while (ph) {
968 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
969 			MDI_VHCI_PHCI_UNLOCK(vh);
970 			return (1);
971 		}
972 		ph = ph->ph_next;
973 	}
974 	MDI_VHCI_PHCI_UNLOCK(vh);
975 	return (0);
976 }
977 
978 /*
979  * mdi_phci_path2devinfo():
980  * 		Utility function to search for a valid phci device given
981  *		the devfs pathname.
982  */
983 dev_info_t *
984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
985 {
986 	char		*temp_pathname;
987 	mdi_vhci_t	*vh;
988 	mdi_phci_t	*ph;
989 	dev_info_t 	*pdip = NULL;
990 
991 	vh = i_devi_get_vhci(vdip);
992 	ASSERT(vh != NULL);
993 
994 	if (vh == NULL) {
995 		/*
996 		 * Invalid vHCI component, return failure
997 		 */
998 		return (NULL);
999 	}
1000 
1001 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1002 	MDI_VHCI_PHCI_LOCK(vh);
1003 	ph = vh->vh_phci_head;
1004 	while (ph != NULL) {
1005 		pdip = ph->ph_dip;
1006 		ASSERT(pdip != NULL);
1007 		*temp_pathname = '\0';
1008 		(void) ddi_pathname(pdip, temp_pathname);
1009 		if (strcmp(temp_pathname, pathname) == 0) {
1010 			break;
1011 		}
1012 		ph = ph->ph_next;
1013 	}
1014 	if (ph == NULL) {
1015 		pdip = NULL;
1016 	}
1017 	MDI_VHCI_PHCI_UNLOCK(vh);
1018 	kmem_free(temp_pathname, MAXPATHLEN);
1019 	return (pdip);
1020 }
1021 
1022 /*
1023  * mdi_phci_get_path_count():
1024  * 		get number of path information nodes associated with a given
1025  *		pHCI device.
1026  */
1027 int
1028 mdi_phci_get_path_count(dev_info_t *pdip)
1029 {
1030 	mdi_phci_t	*ph;
1031 	int		count = 0;
1032 
1033 	ph = i_devi_get_phci(pdip);
1034 	if (ph != NULL) {
1035 		count = ph->ph_path_count;
1036 	}
1037 	return (count);
1038 }
1039 
1040 /*
1041  * i_mdi_phci_lock():
1042  *		Lock a pHCI device
1043  * Return Values:
1044  *		None
1045  * Note:
1046  *		The default locking order is:
1047  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1048  *		But there are number of situations where locks need to be
1049  *		grabbed in reverse order.  This routine implements try and lock
1050  *		mechanism depending on the requested parameter option.
1051  */
1052 static void
1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1054 {
1055 	if (pip) {
1056 		/* Reverse locking is requested. */
1057 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1058 			if (servicing_interrupt()) {
1059 				MDI_PI_HOLD(pip);
1060 				MDI_PI_UNLOCK(pip);
1061 				MDI_PHCI_LOCK(ph);
1062 				MDI_PI_LOCK(pip);
1063 				MDI_PI_RELE(pip);
1064 				break;
1065 			} else {
1066 				/*
1067 				 * tryenter failed. Try to grab again
1068 				 * after a small delay
1069 				 */
1070 				MDI_PI_HOLD(pip);
1071 				MDI_PI_UNLOCK(pip);
1072 				delay_random(mdi_delay);
1073 				MDI_PI_LOCK(pip);
1074 				MDI_PI_RELE(pip);
1075 			}
1076 		}
1077 	} else {
1078 		MDI_PHCI_LOCK(ph);
1079 	}
1080 }
1081 
1082 /*
1083  * i_mdi_phci_unlock():
1084  *		Unlock the pHCI component
1085  */
1086 static void
1087 i_mdi_phci_unlock(mdi_phci_t *ph)
1088 {
1089 	MDI_PHCI_UNLOCK(ph);
1090 }
1091 
1092 /*
1093  * i_mdi_devinfo_create():
1094  *		create client device's devinfo node
1095  * Return Values:
1096  *		dev_info
1097  *		NULL
1098  * Notes:
1099  */
1100 static dev_info_t *
1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1102 	char **compatible, int ncompatible)
1103 {
1104 	dev_info_t *cdip = NULL;
1105 
1106 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1107 
1108 	/* Verify for duplicate entry */
1109 	cdip = i_mdi_devinfo_find(vh, name, guid);
1110 	ASSERT(cdip == NULL);
1111 	if (cdip) {
1112 		cmn_err(CE_WARN,
1113 		    "i_mdi_devinfo_create: client %s@%s already exists",
1114 			name ? name : "", guid ? guid : "");
1115 	}
1116 
1117 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1118 	if (cdip == NULL)
1119 		goto fail;
1120 
1121 	/*
1122 	 * Create component type and Global unique identifier
1123 	 * properties
1124 	 */
1125 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1126 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1127 		goto fail;
1128 	}
1129 
1130 	/* Decorate the node with compatible property */
1131 	if (compatible &&
1132 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1133 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1134 		goto fail;
1135 	}
1136 
1137 	return (cdip);
1138 
1139 fail:
1140 	if (cdip) {
1141 		(void) ndi_prop_remove_all(cdip);
1142 		(void) ndi_devi_free(cdip);
1143 	}
1144 	return (NULL);
1145 }
1146 
1147 /*
1148  * i_mdi_devinfo_find():
1149  *		Find a matching devinfo node for given client node name
1150  *		and its guid.
1151  * Return Values:
1152  *		Handle to a dev_info node or NULL
1153  */
1154 static dev_info_t *
1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1156 {
1157 	char			*data;
1158 	dev_info_t 		*cdip = NULL;
1159 	dev_info_t 		*ndip = NULL;
1160 	int			circular;
1161 
1162 	ndi_devi_enter(vh->vh_dip, &circular);
1163 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1164 	while ((cdip = ndip) != NULL) {
1165 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1166 
1167 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1168 			continue;
1169 		}
1170 
1171 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1172 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1173 		    &data) != DDI_PROP_SUCCESS) {
1174 			continue;
1175 		}
1176 
1177 		if (strcmp(data, guid) != 0) {
1178 			ddi_prop_free(data);
1179 			continue;
1180 		}
1181 		ddi_prop_free(data);
1182 		break;
1183 	}
1184 	ndi_devi_exit(vh->vh_dip, circular);
1185 	return (cdip);
1186 }
1187 
1188 /*
1189  * i_mdi_devinfo_remove():
1190  *		Remove a client device node
1191  */
1192 static int
1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1194 {
1195 	int	rv = MDI_SUCCESS;
1196 
1197 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1198 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1199 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1200 		if (rv != NDI_SUCCESS) {
1201 			MDI_DEBUG(1, (MDI_NOTE, cdip,
1202 			    "!failed: cdip %p", (void *)cdip));
1203 		}
1204 		/*
1205 		 * Convert to MDI error code
1206 		 */
1207 		switch (rv) {
1208 		case NDI_SUCCESS:
1209 			rv = MDI_SUCCESS;
1210 			break;
1211 		case NDI_BUSY:
1212 			rv = MDI_BUSY;
1213 			break;
1214 		default:
1215 			rv = MDI_FAILURE;
1216 			break;
1217 		}
1218 	}
1219 	return (rv);
1220 }
1221 
1222 /*
1223  * i_devi_get_client()
1224  *		Utility function to get mpxio component extensions
1225  */
1226 static mdi_client_t *
1227 i_devi_get_client(dev_info_t *cdip)
1228 {
1229 	mdi_client_t	*ct = NULL;
1230 
1231 	if (MDI_CLIENT(cdip)) {
1232 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1233 	}
1234 	return (ct);
1235 }
1236 
1237 /*
1238  * i_mdi_is_child_present():
1239  *		Search for the presence of client device dev_info node
1240  */
1241 static int
1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1243 {
1244 	int		rv = MDI_FAILURE;
1245 	struct dev_info	*dip;
1246 	int		circular;
1247 
1248 	ndi_devi_enter(vdip, &circular);
1249 	dip = DEVI(vdip)->devi_child;
1250 	while (dip) {
1251 		if (dip == DEVI(cdip)) {
1252 			rv = MDI_SUCCESS;
1253 			break;
1254 		}
1255 		dip = dip->devi_sibling;
1256 	}
1257 	ndi_devi_exit(vdip, circular);
1258 	return (rv);
1259 }
1260 
1261 
1262 /*
1263  * i_mdi_client_lock():
1264  *		Grab client component lock
1265  * Return Values:
1266  *		None
1267  * Note:
1268  *		The default locking order is:
1269  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1270  *		But there are number of situations where locks need to be
1271  *		grabbed in reverse order.  This routine implements try and lock
1272  *		mechanism depending on the requested parameter option.
1273  */
1274 static void
1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1276 {
1277 	if (pip) {
1278 		/*
1279 		 * Reverse locking is requested.
1280 		 */
1281 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1282 			if (servicing_interrupt()) {
1283 				MDI_PI_HOLD(pip);
1284 				MDI_PI_UNLOCK(pip);
1285 				MDI_CLIENT_LOCK(ct);
1286 				MDI_PI_LOCK(pip);
1287 				MDI_PI_RELE(pip);
1288 				break;
1289 			} else {
1290 				/*
1291 				 * tryenter failed. Try to grab again
1292 				 * after a small delay
1293 				 */
1294 				MDI_PI_HOLD(pip);
1295 				MDI_PI_UNLOCK(pip);
1296 				delay_random(mdi_delay);
1297 				MDI_PI_LOCK(pip);
1298 				MDI_PI_RELE(pip);
1299 			}
1300 		}
1301 	} else {
1302 		MDI_CLIENT_LOCK(ct);
1303 	}
1304 }
1305 
1306 /*
1307  * i_mdi_client_unlock():
1308  *		Unlock a client component
1309  */
1310 static void
1311 i_mdi_client_unlock(mdi_client_t *ct)
1312 {
1313 	MDI_CLIENT_UNLOCK(ct);
1314 }
1315 
1316 /*
1317  * i_mdi_client_alloc():
1318  * 		Allocate and initialize a client structure.  Caller should
1319  *		hold the vhci client lock.
1320  * Return Values:
1321  *		Handle to a client component
1322  */
1323 /*ARGSUSED*/
1324 static mdi_client_t *
1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1326 {
1327 	mdi_client_t	*ct;
1328 
1329 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1330 
1331 	/*
1332 	 * Allocate and initialize a component structure.
1333 	 */
1334 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1335 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1336 	ct->ct_hnext = NULL;
1337 	ct->ct_hprev = NULL;
1338 	ct->ct_dip = NULL;
1339 	ct->ct_vhci = vh;
1340 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1341 	(void) strcpy(ct->ct_drvname, name);
1342 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1343 	(void) strcpy(ct->ct_guid, lguid);
1344 	ct->ct_cprivate = NULL;
1345 	ct->ct_vprivate = NULL;
1346 	ct->ct_flags = 0;
1347 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1348 	MDI_CLIENT_LOCK(ct);
1349 	MDI_CLIENT_SET_OFFLINE(ct);
1350 	MDI_CLIENT_SET_DETACH(ct);
1351 	MDI_CLIENT_SET_POWER_UP(ct);
1352 	MDI_CLIENT_UNLOCK(ct);
1353 	ct->ct_failover_flags = 0;
1354 	ct->ct_failover_status = 0;
1355 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1356 	ct->ct_unstable = 0;
1357 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1358 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1359 	ct->ct_lb = vh->vh_lb;
1360 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1361 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1362 	ct->ct_path_count = 0;
1363 	ct->ct_path_head = NULL;
1364 	ct->ct_path_tail = NULL;
1365 	ct->ct_path_last = NULL;
1366 
1367 	/*
1368 	 * Add this client component to our client hash queue
1369 	 */
1370 	i_mdi_client_enlist_table(vh, ct);
1371 	return (ct);
1372 }
1373 
1374 /*
1375  * i_mdi_client_enlist_table():
1376  *		Attach the client device to the client hash table. Caller
1377  *		should hold the vhci client lock.
1378  */
1379 static void
1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1381 {
1382 	int 			index;
1383 	struct client_hash	*head;
1384 
1385 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1386 
1387 	index = i_mdi_get_hash_key(ct->ct_guid);
1388 	head = &vh->vh_client_table[index];
1389 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1390 	head->ct_hash_head = ct;
1391 	head->ct_hash_count++;
1392 	vh->vh_client_count++;
1393 }
1394 
1395 /*
1396  * i_mdi_client_delist_table():
1397  *		Attach the client device to the client hash table.
1398  *		Caller should hold the vhci client lock.
1399  */
1400 static void
1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1402 {
1403 	int			index;
1404 	char			*guid;
1405 	struct client_hash 	*head;
1406 	mdi_client_t		*next;
1407 	mdi_client_t		*last;
1408 
1409 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1410 
1411 	guid = ct->ct_guid;
1412 	index = i_mdi_get_hash_key(guid);
1413 	head = &vh->vh_client_table[index];
1414 
1415 	last = NULL;
1416 	next = (mdi_client_t *)head->ct_hash_head;
1417 	while (next != NULL) {
1418 		if (next == ct) {
1419 			break;
1420 		}
1421 		last = next;
1422 		next = next->ct_hnext;
1423 	}
1424 
1425 	if (next) {
1426 		head->ct_hash_count--;
1427 		if (last == NULL) {
1428 			head->ct_hash_head = ct->ct_hnext;
1429 		} else {
1430 			last->ct_hnext = ct->ct_hnext;
1431 		}
1432 		ct->ct_hnext = NULL;
1433 		vh->vh_client_count--;
1434 	}
1435 }
1436 
1437 
1438 /*
1439  * i_mdi_client_free():
1440  *		Free a client component
1441  */
1442 static int
1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1444 {
1445 	int		rv = MDI_SUCCESS;
1446 	int		flags = ct->ct_flags;
1447 	dev_info_t	*cdip;
1448 	dev_info_t	*vdip;
1449 
1450 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1451 
1452 	vdip = vh->vh_dip;
1453 	cdip = ct->ct_dip;
1454 
1455 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1456 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1457 	DEVI(cdip)->devi_mdi_client = NULL;
1458 
1459 	/*
1460 	 * Clear out back ref. to dev_info_t node
1461 	 */
1462 	ct->ct_dip = NULL;
1463 
1464 	/*
1465 	 * Remove this client from our hash queue
1466 	 */
1467 	i_mdi_client_delist_table(vh, ct);
1468 
1469 	/*
1470 	 * Uninitialize and free the component
1471 	 */
1472 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1473 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1474 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1475 	cv_destroy(&ct->ct_failover_cv);
1476 	cv_destroy(&ct->ct_unstable_cv);
1477 	cv_destroy(&ct->ct_powerchange_cv);
1478 	mutex_destroy(&ct->ct_mutex);
1479 	kmem_free(ct, sizeof (*ct));
1480 
1481 	if (cdip != NULL) {
1482 		MDI_VHCI_CLIENT_UNLOCK(vh);
1483 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1484 		MDI_VHCI_CLIENT_LOCK(vh);
1485 	}
1486 	return (rv);
1487 }
1488 
1489 /*
1490  * i_mdi_client_find():
1491  * 		Find the client structure corresponding to a given guid
1492  *		Caller should hold the vhci client lock.
1493  */
1494 static mdi_client_t *
1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1496 {
1497 	int			index;
1498 	struct client_hash	*head;
1499 	mdi_client_t		*ct;
1500 
1501 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1502 
1503 	index = i_mdi_get_hash_key(guid);
1504 	head = &vh->vh_client_table[index];
1505 
1506 	ct = head->ct_hash_head;
1507 	while (ct != NULL) {
1508 		if (strcmp(ct->ct_guid, guid) == 0 &&
1509 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1510 			break;
1511 		}
1512 		ct = ct->ct_hnext;
1513 	}
1514 	return (ct);
1515 }
1516 
1517 /*
1518  * i_mdi_client_update_state():
1519  *		Compute and update client device state
1520  * Notes:
1521  *		A client device can be in any of three possible states:
1522  *
1523  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1524  *		one online/standby paths. Can tolerate failures.
1525  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1526  *		no alternate paths available as standby. A failure on the online
1527  *		would result in loss of access to device data.
1528  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1529  *		no paths available to access the device.
1530  */
1531 static void
1532 i_mdi_client_update_state(mdi_client_t *ct)
1533 {
1534 	int state;
1535 
1536 	ASSERT(MDI_CLIENT_LOCKED(ct));
1537 	state = i_mdi_client_compute_state(ct, NULL);
1538 	MDI_CLIENT_SET_STATE(ct, state);
1539 }
1540 
1541 /*
1542  * i_mdi_client_compute_state():
1543  *		Compute client device state
1544  *
1545  *		mdi_phci_t *	Pointer to pHCI structure which should
1546  *				while computing the new value.  Used by
1547  *				i_mdi_phci_offline() to find the new
1548  *				client state after DR of a pHCI.
1549  */
1550 static int
1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1552 {
1553 	int		state;
1554 	int		online_count = 0;
1555 	int		standby_count = 0;
1556 	mdi_pathinfo_t	*pip, *next;
1557 
1558 	ASSERT(MDI_CLIENT_LOCKED(ct));
1559 	pip = ct->ct_path_head;
1560 	while (pip != NULL) {
1561 		MDI_PI_LOCK(pip);
1562 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1563 		if (MDI_PI(pip)->pi_phci == ph) {
1564 			MDI_PI_UNLOCK(pip);
1565 			pip = next;
1566 			continue;
1567 		}
1568 
1569 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1570 				== MDI_PATHINFO_STATE_ONLINE)
1571 			online_count++;
1572 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1573 				== MDI_PATHINFO_STATE_STANDBY)
1574 			standby_count++;
1575 		MDI_PI_UNLOCK(pip);
1576 		pip = next;
1577 	}
1578 
1579 	if (online_count == 0) {
1580 		if (standby_count == 0) {
1581 			state = MDI_CLIENT_STATE_FAILED;
1582 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1583 			    "client state failed: ct = %p", (void *)ct));
1584 		} else if (standby_count == 1) {
1585 			state = MDI_CLIENT_STATE_DEGRADED;
1586 		} else {
1587 			state = MDI_CLIENT_STATE_OPTIMAL;
1588 		}
1589 	} else if (online_count == 1) {
1590 		if (standby_count == 0) {
1591 			state = MDI_CLIENT_STATE_DEGRADED;
1592 		} else {
1593 			state = MDI_CLIENT_STATE_OPTIMAL;
1594 		}
1595 	} else {
1596 		state = MDI_CLIENT_STATE_OPTIMAL;
1597 	}
1598 	return (state);
1599 }
1600 
1601 /*
1602  * i_mdi_client2devinfo():
1603  *		Utility function
1604  */
1605 dev_info_t *
1606 i_mdi_client2devinfo(mdi_client_t *ct)
1607 {
1608 	return (ct->ct_dip);
1609 }
1610 
1611 /*
1612  * mdi_client_path2_devinfo():
1613  * 		Given the parent devinfo and child devfs pathname, search for
1614  *		a valid devfs node handle.
1615  */
1616 dev_info_t *
1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1618 {
1619 	dev_info_t 	*cdip = NULL;
1620 	dev_info_t 	*ndip = NULL;
1621 	char		*temp_pathname;
1622 	int		circular;
1623 
1624 	/*
1625 	 * Allocate temp buffer
1626 	 */
1627 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1628 
1629 	/*
1630 	 * Lock parent against changes
1631 	 */
1632 	ndi_devi_enter(vdip, &circular);
1633 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1634 	while ((cdip = ndip) != NULL) {
1635 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1636 
1637 		*temp_pathname = '\0';
1638 		(void) ddi_pathname(cdip, temp_pathname);
1639 		if (strcmp(temp_pathname, pathname) == 0) {
1640 			break;
1641 		}
1642 	}
1643 	/*
1644 	 * Release devinfo lock
1645 	 */
1646 	ndi_devi_exit(vdip, circular);
1647 
1648 	/*
1649 	 * Free the temp buffer
1650 	 */
1651 	kmem_free(temp_pathname, MAXPATHLEN);
1652 	return (cdip);
1653 }
1654 
1655 /*
1656  * mdi_client_get_path_count():
1657  * 		Utility function to get number of path information nodes
1658  *		associated with a given client device.
1659  */
1660 int
1661 mdi_client_get_path_count(dev_info_t *cdip)
1662 {
1663 	mdi_client_t	*ct;
1664 	int		count = 0;
1665 
1666 	ct = i_devi_get_client(cdip);
1667 	if (ct != NULL) {
1668 		count = ct->ct_path_count;
1669 	}
1670 	return (count);
1671 }
1672 
1673 
1674 /*
1675  * i_mdi_get_hash_key():
1676  * 		Create a hash using strings as keys
1677  *
1678  */
1679 static int
1680 i_mdi_get_hash_key(char *str)
1681 {
1682 	uint32_t	g, hash = 0;
1683 	char		*p;
1684 
1685 	for (p = str; *p != '\0'; p++) {
1686 		g = *p;
1687 		hash += g;
1688 	}
1689 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1690 }
1691 
1692 /*
1693  * mdi_get_lb_policy():
1694  * 		Get current load balancing policy for a given client device
1695  */
1696 client_lb_t
1697 mdi_get_lb_policy(dev_info_t *cdip)
1698 {
1699 	client_lb_t	lb = LOAD_BALANCE_NONE;
1700 	mdi_client_t	*ct;
1701 
1702 	ct = i_devi_get_client(cdip);
1703 	if (ct != NULL) {
1704 		lb = ct->ct_lb;
1705 	}
1706 	return (lb);
1707 }
1708 
1709 /*
1710  * mdi_set_lb_region_size():
1711  * 		Set current region size for the load-balance
1712  */
1713 int
1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1715 {
1716 	mdi_client_t	*ct;
1717 	int		rv = MDI_FAILURE;
1718 
1719 	ct = i_devi_get_client(cdip);
1720 	if (ct != NULL && ct->ct_lb_args != NULL) {
1721 		ct->ct_lb_args->region_size = region_size;
1722 		rv = MDI_SUCCESS;
1723 	}
1724 	return (rv);
1725 }
1726 
1727 /*
1728  * mdi_Set_lb_policy():
1729  * 		Set current load balancing policy for a given client device
1730  */
1731 int
1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1733 {
1734 	mdi_client_t	*ct;
1735 	int		rv = MDI_FAILURE;
1736 
1737 	ct = i_devi_get_client(cdip);
1738 	if (ct != NULL) {
1739 		ct->ct_lb = lb;
1740 		rv = MDI_SUCCESS;
1741 	}
1742 	return (rv);
1743 }
1744 
1745 /*
1746  * mdi_failover():
1747  *		failover function called by the vHCI drivers to initiate
1748  *		a failover operation.  This is typically due to non-availability
1749  *		of online paths to route I/O requests.  Failover can be
1750  *		triggered through user application also.
1751  *
1752  *		The vHCI driver calls mdi_failover() to initiate a failover
1753  *		operation. mdi_failover() calls back into the vHCI driver's
1754  *		vo_failover() entry point to perform the actual failover
1755  *		operation.  The reason for requiring the vHCI driver to
1756  *		initiate failover by calling mdi_failover(), instead of directly
1757  *		executing vo_failover() itself, is to ensure that the mdi
1758  *		framework can keep track of the client state properly.
1759  *		Additionally, mdi_failover() provides as a convenience the
1760  *		option of performing the failover operation synchronously or
1761  *		asynchronously
1762  *
1763  *		Upon successful completion of the failover operation, the
1764  *		paths that were previously ONLINE will be in the STANDBY state,
1765  *		and the newly activated paths will be in the ONLINE state.
1766  *
1767  *		The flags modifier determines whether the activation is done
1768  *		synchronously: MDI_FAILOVER_SYNC
1769  * Return Values:
1770  *		MDI_SUCCESS
1771  *		MDI_FAILURE
1772  *		MDI_BUSY
1773  */
1774 /*ARGSUSED*/
1775 int
1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1777 {
1778 	int			rv;
1779 	mdi_client_t		*ct;
1780 
1781 	ct = i_devi_get_client(cdip);
1782 	ASSERT(ct != NULL);
1783 	if (ct == NULL) {
1784 		/* cdip is not a valid client device. Nothing more to do. */
1785 		return (MDI_FAILURE);
1786 	}
1787 
1788 	MDI_CLIENT_LOCK(ct);
1789 
1790 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1791 		/* A path to the client is being freed */
1792 		MDI_CLIENT_UNLOCK(ct);
1793 		return (MDI_BUSY);
1794 	}
1795 
1796 
1797 	if (MDI_CLIENT_IS_FAILED(ct)) {
1798 		/*
1799 		 * Client is in failed state. Nothing more to do.
1800 		 */
1801 		MDI_CLIENT_UNLOCK(ct);
1802 		return (MDI_FAILURE);
1803 	}
1804 
1805 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1806 		/*
1807 		 * Failover is already in progress; return BUSY
1808 		 */
1809 		MDI_CLIENT_UNLOCK(ct);
1810 		return (MDI_BUSY);
1811 	}
1812 	/*
1813 	 * Make sure that mdi_pathinfo node state changes are processed.
1814 	 * We do not allow failovers to progress while client path state
1815 	 * changes are in progress
1816 	 */
1817 	if (ct->ct_unstable) {
1818 		if (flags == MDI_FAILOVER_ASYNC) {
1819 			MDI_CLIENT_UNLOCK(ct);
1820 			return (MDI_BUSY);
1821 		} else {
1822 			while (ct->ct_unstable)
1823 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1824 		}
1825 	}
1826 
1827 	/*
1828 	 * Client device is in stable state. Before proceeding, perform sanity
1829 	 * checks again.
1830 	 */
1831 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1832 	    (!i_ddi_devi_attached(cdip))) {
1833 		/*
1834 		 * Client is in failed state. Nothing more to do.
1835 		 */
1836 		MDI_CLIENT_UNLOCK(ct);
1837 		return (MDI_FAILURE);
1838 	}
1839 
1840 	/*
1841 	 * Set the client state as failover in progress.
1842 	 */
1843 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1844 	ct->ct_failover_flags = flags;
1845 	MDI_CLIENT_UNLOCK(ct);
1846 
1847 	if (flags == MDI_FAILOVER_ASYNC) {
1848 		/*
1849 		 * Submit the initiate failover request via CPR safe
1850 		 * taskq threads.
1851 		 */
1852 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1853 		    ct, KM_SLEEP);
1854 		return (MDI_ACCEPT);
1855 	} else {
1856 		/*
1857 		 * Synchronous failover mode.  Typically invoked from the user
1858 		 * land.
1859 		 */
1860 		rv = i_mdi_failover(ct);
1861 	}
1862 	return (rv);
1863 }
1864 
1865 /*
1866  * i_mdi_failover():
1867  *		internal failover function. Invokes vHCI drivers failover
1868  *		callback function and process the failover status
1869  * Return Values:
1870  *		None
1871  *
1872  * Note: A client device in failover state can not be detached or freed.
1873  */
1874 static int
1875 i_mdi_failover(void *arg)
1876 {
1877 	int		rv = MDI_SUCCESS;
1878 	mdi_client_t	*ct = (mdi_client_t *)arg;
1879 	mdi_vhci_t	*vh = ct->ct_vhci;
1880 
1881 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1882 
1883 	if (vh->vh_ops->vo_failover != NULL) {
1884 		/*
1885 		 * Call vHCI drivers callback routine
1886 		 */
1887 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1888 		    ct->ct_failover_flags);
1889 	}
1890 
1891 	MDI_CLIENT_LOCK(ct);
1892 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1893 
1894 	/*
1895 	 * Save the failover return status
1896 	 */
1897 	ct->ct_failover_status = rv;
1898 
1899 	/*
1900 	 * As a result of failover, client status would have been changed.
1901 	 * Update the client state and wake up anyone waiting on this client
1902 	 * device.
1903 	 */
1904 	i_mdi_client_update_state(ct);
1905 
1906 	cv_broadcast(&ct->ct_failover_cv);
1907 	MDI_CLIENT_UNLOCK(ct);
1908 	return (rv);
1909 }
1910 
1911 /*
1912  * Load balancing is logical block.
1913  * IOs within the range described by region_size
1914  * would go on the same path. This would improve the
1915  * performance by cache-hit on some of the RAID devices.
1916  * Search only for online paths(At some point we
1917  * may want to balance across target ports).
1918  * If no paths are found then default to round-robin.
1919  */
1920 static int
1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1922 {
1923 	int		path_index = -1;
1924 	int		online_path_count = 0;
1925 	int		online_nonpref_path_count = 0;
1926 	int 		region_size = ct->ct_lb_args->region_size;
1927 	mdi_pathinfo_t	*pip;
1928 	mdi_pathinfo_t	*next;
1929 	int		preferred, path_cnt;
1930 
1931 	pip = ct->ct_path_head;
1932 	while (pip) {
1933 		MDI_PI_LOCK(pip);
1934 		if (MDI_PI(pip)->pi_state ==
1935 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1936 			online_path_count++;
1937 		} else if (MDI_PI(pip)->pi_state ==
1938 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1939 			online_nonpref_path_count++;
1940 		}
1941 		next = (mdi_pathinfo_t *)
1942 		    MDI_PI(pip)->pi_client_link;
1943 		MDI_PI_UNLOCK(pip);
1944 		pip = next;
1945 	}
1946 	/* if found any online/preferred then use this type */
1947 	if (online_path_count > 0) {
1948 		path_cnt = online_path_count;
1949 		preferred = 1;
1950 	} else if (online_nonpref_path_count > 0) {
1951 		path_cnt = online_nonpref_path_count;
1952 		preferred = 0;
1953 	} else {
1954 		path_cnt = 0;
1955 	}
1956 	if (path_cnt) {
1957 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1958 		pip = ct->ct_path_head;
1959 		while (pip && path_index != -1) {
1960 			MDI_PI_LOCK(pip);
1961 			if (path_index == 0 &&
1962 			    (MDI_PI(pip)->pi_state ==
1963 			    MDI_PATHINFO_STATE_ONLINE) &&
1964 				MDI_PI(pip)->pi_preferred == preferred) {
1965 				MDI_PI_HOLD(pip);
1966 				MDI_PI_UNLOCK(pip);
1967 				*ret_pip = pip;
1968 				return (MDI_SUCCESS);
1969 			}
1970 			path_index --;
1971 			next = (mdi_pathinfo_t *)
1972 			    MDI_PI(pip)->pi_client_link;
1973 			MDI_PI_UNLOCK(pip);
1974 			pip = next;
1975 		}
1976 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1977 		    "lba %llx: path %s %p",
1978 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1979 	}
1980 	return (MDI_FAILURE);
1981 }
1982 
1983 /*
1984  * mdi_select_path():
1985  *		select a path to access a client device.
1986  *
1987  *		mdi_select_path() function is called by the vHCI drivers to
1988  *		select a path to route the I/O request to.  The caller passes
1989  *		the block I/O data transfer structure ("buf") as one of the
1990  *		parameters.  The mpxio framework uses the buf structure
1991  *		contents to maintain per path statistics (total I/O size /
1992  *		count pending).  If more than one online paths are available to
1993  *		select, the framework automatically selects a suitable path
1994  *		for routing I/O request. If a failover operation is active for
1995  *		this client device the call shall be failed with MDI_BUSY error
1996  *		code.
1997  *
1998  *		By default this function returns a suitable path in online
1999  *		state based on the current load balancing policy.  Currently
2000  *		we support LOAD_BALANCE_NONE (Previously selected online path
2001  *		will continue to be used till the path is usable) and
2002  *		LOAD_BALANCE_RR (Online paths will be selected in a round
2003  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2004  *		based on the logical block).  The load balancing
2005  *		through vHCI drivers configuration file (driver.conf).
2006  *
2007  *		vHCI drivers may override this default behavior by specifying
2008  *		appropriate flags.  The meaning of the thrid argument depends
2009  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2010  *		then the argument is the "path instance" of the path to select.
2011  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2012  *		"start_pip". A non NULL "start_pip" is the starting point to
2013  *		walk and find the next appropriate path.  The following values
2014  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2015  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2016  *		STANDBY path).
2017  *
2018  *		The non-standard behavior is used by the scsi_vhci driver,
2019  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
2020  *		attach of client devices (to avoid an unnecessary failover
2021  *		when the STANDBY path comes up first), during failover
2022  *		(to activate a STANDBY path as ONLINE).
2023  *
2024  *		The selected path is returned in a a mdi_hold_path() state
2025  *		(pi_ref_cnt). Caller should release the hold by calling
2026  *		mdi_rele_path().
2027  *
2028  * Return Values:
2029  *		MDI_SUCCESS	- Completed successfully
2030  *		MDI_BUSY 	- Client device is busy failing over
2031  *		MDI_NOPATH	- Client device is online, but no valid path are
2032  *				  available to access this client device
2033  *		MDI_FAILURE	- Invalid client device or state
2034  *		MDI_DEVI_ONLINING
2035  *				- Client device (struct dev_info state) is in
2036  *				  onlining state.
2037  */
2038 
2039 /*ARGSUSED*/
2040 int
2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2042     void *arg, mdi_pathinfo_t **ret_pip)
2043 {
2044 	mdi_client_t	*ct;
2045 	mdi_pathinfo_t	*pip;
2046 	mdi_pathinfo_t	*next;
2047 	mdi_pathinfo_t	*head;
2048 	mdi_pathinfo_t	*start;
2049 	client_lb_t	lbp;	/* load balancing policy */
2050 	int		sb = 1;	/* standard behavior */
2051 	int		preferred = 1;	/* preferred path */
2052 	int		cond, cont = 1;
2053 	int		retry = 0;
2054 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
2055 	int		path_instance;	/* request specific path instance */
2056 
2057 	/* determine type of arg based on flags */
2058 	if (flags & MDI_SELECT_PATH_INSTANCE) {
2059 		path_instance = (int)(intptr_t)arg;
2060 		start_pip = NULL;
2061 	} else {
2062 		path_instance = 0;
2063 		start_pip = (mdi_pathinfo_t *)arg;
2064 	}
2065 
2066 	if (flags != 0) {
2067 		/*
2068 		 * disable default behavior
2069 		 */
2070 		sb = 0;
2071 	}
2072 
2073 	*ret_pip = NULL;
2074 	ct = i_devi_get_client(cdip);
2075 	if (ct == NULL) {
2076 		/* mdi extensions are NULL, Nothing more to do */
2077 		return (MDI_FAILURE);
2078 	}
2079 
2080 	MDI_CLIENT_LOCK(ct);
2081 
2082 	if (sb) {
2083 		if (MDI_CLIENT_IS_FAILED(ct)) {
2084 			/*
2085 			 * Client is not ready to accept any I/O requests.
2086 			 * Fail this request.
2087 			 */
2088 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2089 			    "client state offline ct = %p", (void *)ct));
2090 			MDI_CLIENT_UNLOCK(ct);
2091 			return (MDI_FAILURE);
2092 		}
2093 
2094 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2095 			/*
2096 			 * Check for Failover is in progress. If so tell the
2097 			 * caller that this device is busy.
2098 			 */
2099 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2100 			    "client failover in progress ct = %p",
2101 			    (void *)ct));
2102 			MDI_CLIENT_UNLOCK(ct);
2103 			return (MDI_BUSY);
2104 		}
2105 
2106 		/*
2107 		 * Check to see whether the client device is attached.
2108 		 * If not so, let the vHCI driver manually select a path
2109 		 * (standby) and let the probe/attach process to continue.
2110 		 */
2111 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2112 			MDI_DEBUG(4, (MDI_NOTE, cdip,
2113 			    "devi is onlining ct = %p", (void *)ct));
2114 			MDI_CLIENT_UNLOCK(ct);
2115 			return (MDI_DEVI_ONLINING);
2116 		}
2117 	}
2118 
2119 	/*
2120 	 * Cache in the client list head.  If head of the list is NULL
2121 	 * return MDI_NOPATH
2122 	 */
2123 	head = ct->ct_path_head;
2124 	if (head == NULL) {
2125 		MDI_CLIENT_UNLOCK(ct);
2126 		return (MDI_NOPATH);
2127 	}
2128 
2129 	/* Caller is specifying a specific pathinfo path by path_instance */
2130 	if (path_instance) {
2131 		/* search for pathinfo with correct path_instance */
2132 		for (pip = head;
2133 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2134 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2135 			;
2136 
2137 		/* If path can't be selected then MDI_NOPATH is returned. */
2138 		if (pip == NULL) {
2139 			MDI_CLIENT_UNLOCK(ct);
2140 			return (MDI_NOPATH);
2141 		}
2142 
2143 		/*
2144 		 * Verify state of path. When asked to select a specific
2145 		 * path_instance, we select the requested path in any
2146 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2147 		 * We don't however select paths where the pHCI has detached.
2148 		 * NOTE: last pathinfo node of an opened client device may
2149 		 * exist in an OFFLINE state after the pHCI associated with
2150 		 * that path has detached (but pi_phci will be NULL if that
2151 		 * has occurred).
2152 		 */
2153 		MDI_PI_LOCK(pip);
2154 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2155 		    (MDI_PI(pip)->pi_phci == NULL)) {
2156 			MDI_PI_UNLOCK(pip);
2157 			MDI_CLIENT_UNLOCK(ct);
2158 			return (MDI_FAILURE);
2159 		}
2160 
2161 		/* Return MDI_BUSY if we have a transient condition */
2162 		if (MDI_PI_IS_TRANSIENT(pip)) {
2163 			MDI_PI_UNLOCK(pip);
2164 			MDI_CLIENT_UNLOCK(ct);
2165 			return (MDI_BUSY);
2166 		}
2167 
2168 		/*
2169 		 * Return the path in hold state. Caller should release the
2170 		 * lock by calling mdi_rele_path()
2171 		 */
2172 		MDI_PI_HOLD(pip);
2173 		MDI_PI_UNLOCK(pip);
2174 		*ret_pip = pip;
2175 		MDI_CLIENT_UNLOCK(ct);
2176 		return (MDI_SUCCESS);
2177 	}
2178 
2179 	/*
2180 	 * for non default behavior, bypass current
2181 	 * load balancing policy and always use LOAD_BALANCE_RR
2182 	 * except that the start point will be adjusted based
2183 	 * on the provided start_pip
2184 	 */
2185 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2186 
2187 	switch (lbp) {
2188 	case LOAD_BALANCE_NONE:
2189 		/*
2190 		 * Load balancing is None  or Alternate path mode
2191 		 * Start looking for a online mdi_pathinfo node starting from
2192 		 * last known selected path
2193 		 */
2194 		preferred = 1;
2195 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2196 		if (pip == NULL) {
2197 			pip = head;
2198 		}
2199 		start = pip;
2200 		do {
2201 			MDI_PI_LOCK(pip);
2202 			/*
2203 			 * No need to explicitly check if the path is disabled.
2204 			 * Since we are checking for state == ONLINE and the
2205 			 * same variable is used for DISABLE/ENABLE information.
2206 			 */
2207 			if ((MDI_PI(pip)->pi_state  ==
2208 				MDI_PATHINFO_STATE_ONLINE) &&
2209 				preferred == MDI_PI(pip)->pi_preferred) {
2210 				/*
2211 				 * Return the path in hold state. Caller should
2212 				 * release the lock by calling mdi_rele_path()
2213 				 */
2214 				MDI_PI_HOLD(pip);
2215 				MDI_PI_UNLOCK(pip);
2216 				ct->ct_path_last = pip;
2217 				*ret_pip = pip;
2218 				MDI_CLIENT_UNLOCK(ct);
2219 				return (MDI_SUCCESS);
2220 			}
2221 
2222 			/*
2223 			 * Path is busy.
2224 			 */
2225 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2226 			    MDI_PI_IS_TRANSIENT(pip))
2227 				retry = 1;
2228 			/*
2229 			 * Keep looking for a next available online path
2230 			 */
2231 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2232 			if (next == NULL) {
2233 				next = head;
2234 			}
2235 			MDI_PI_UNLOCK(pip);
2236 			pip = next;
2237 			if (start == pip && preferred) {
2238 				preferred = 0;
2239 			} else if (start == pip && !preferred) {
2240 				cont = 0;
2241 			}
2242 		} while (cont);
2243 		break;
2244 
2245 	case LOAD_BALANCE_LBA:
2246 		/*
2247 		 * Make sure we are looking
2248 		 * for an online path. Otherwise, if it is for a STANDBY
2249 		 * path request, it will go through and fetch an ONLINE
2250 		 * path which is not desirable.
2251 		 */
2252 		if ((ct->ct_lb_args != NULL) &&
2253 			    (ct->ct_lb_args->region_size) && bp &&
2254 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2255 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2256 				    == MDI_SUCCESS) {
2257 				MDI_CLIENT_UNLOCK(ct);
2258 				return (MDI_SUCCESS);
2259 			}
2260 		}
2261 		/* FALLTHROUGH */
2262 	case LOAD_BALANCE_RR:
2263 		/*
2264 		 * Load balancing is Round Robin. Start looking for a online
2265 		 * mdi_pathinfo node starting from last known selected path
2266 		 * as the start point.  If override flags are specified,
2267 		 * process accordingly.
2268 		 * If the search is already in effect(start_pip not null),
2269 		 * then lets just use the same path preference to continue the
2270 		 * traversal.
2271 		 */
2272 
2273 		if (start_pip != NULL) {
2274 			preferred = MDI_PI(start_pip)->pi_preferred;
2275 		} else {
2276 			preferred = 1;
2277 		}
2278 
2279 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2280 		if (start == NULL) {
2281 			pip = head;
2282 		} else {
2283 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2284 			if (pip == NULL) {
2285 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2286 					/*
2287 					 * Return since we hit the end of list
2288 					 */
2289 					MDI_CLIENT_UNLOCK(ct);
2290 					return (MDI_NOPATH);
2291 				}
2292 
2293 				if (!sb) {
2294 					if (preferred == 0) {
2295 						/*
2296 						 * Looks like we have completed
2297 						 * the traversal as preferred
2298 						 * value is 0. Time to bail out.
2299 						 */
2300 						*ret_pip = NULL;
2301 						MDI_CLIENT_UNLOCK(ct);
2302 						return (MDI_NOPATH);
2303 					} else {
2304 						/*
2305 						 * Looks like we reached the
2306 						 * end of the list. Lets enable
2307 						 * traversal of non preferred
2308 						 * paths.
2309 						 */
2310 						preferred = 0;
2311 					}
2312 				}
2313 				pip = head;
2314 			}
2315 		}
2316 		start = pip;
2317 		do {
2318 			MDI_PI_LOCK(pip);
2319 			if (sb) {
2320 				cond = ((MDI_PI(pip)->pi_state ==
2321 				    MDI_PATHINFO_STATE_ONLINE &&
2322 					MDI_PI(pip)->pi_preferred ==
2323 						preferred) ? 1 : 0);
2324 			} else {
2325 				if (flags == MDI_SELECT_ONLINE_PATH) {
2326 					cond = ((MDI_PI(pip)->pi_state ==
2327 					    MDI_PATHINFO_STATE_ONLINE &&
2328 						MDI_PI(pip)->pi_preferred ==
2329 						preferred) ? 1 : 0);
2330 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2331 					cond = ((MDI_PI(pip)->pi_state ==
2332 					    MDI_PATHINFO_STATE_STANDBY &&
2333 						MDI_PI(pip)->pi_preferred ==
2334 						preferred) ? 1 : 0);
2335 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2336 				    MDI_SELECT_STANDBY_PATH)) {
2337 					cond = (((MDI_PI(pip)->pi_state ==
2338 					    MDI_PATHINFO_STATE_ONLINE ||
2339 					    (MDI_PI(pip)->pi_state ==
2340 					    MDI_PATHINFO_STATE_STANDBY)) &&
2341 						MDI_PI(pip)->pi_preferred ==
2342 						preferred) ? 1 : 0);
2343 				} else if (flags ==
2344 					(MDI_SELECT_STANDBY_PATH |
2345 					MDI_SELECT_ONLINE_PATH |
2346 					MDI_SELECT_USER_DISABLE_PATH)) {
2347 					cond = (((MDI_PI(pip)->pi_state ==
2348 					    MDI_PATHINFO_STATE_ONLINE ||
2349 					    (MDI_PI(pip)->pi_state ==
2350 					    MDI_PATHINFO_STATE_STANDBY) ||
2351 						(MDI_PI(pip)->pi_state ==
2352 					    (MDI_PATHINFO_STATE_ONLINE|
2353 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2354 						(MDI_PI(pip)->pi_state ==
2355 					    (MDI_PATHINFO_STATE_STANDBY |
2356 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2357 						MDI_PI(pip)->pi_preferred ==
2358 						preferred) ? 1 : 0);
2359 				} else if (flags ==
2360 				    (MDI_SELECT_STANDBY_PATH |
2361 				    MDI_SELECT_ONLINE_PATH |
2362 				    MDI_SELECT_NO_PREFERRED)) {
2363 					cond = (((MDI_PI(pip)->pi_state ==
2364 					    MDI_PATHINFO_STATE_ONLINE) ||
2365 					    (MDI_PI(pip)->pi_state ==
2366 					    MDI_PATHINFO_STATE_STANDBY))
2367 					    ? 1 : 0);
2368 				} else {
2369 					cond = 0;
2370 				}
2371 			}
2372 			/*
2373 			 * No need to explicitly check if the path is disabled.
2374 			 * Since we are checking for state == ONLINE and the
2375 			 * same variable is used for DISABLE/ENABLE information.
2376 			 */
2377 			if (cond) {
2378 				/*
2379 				 * Return the path in hold state. Caller should
2380 				 * release the lock by calling mdi_rele_path()
2381 				 */
2382 				MDI_PI_HOLD(pip);
2383 				MDI_PI_UNLOCK(pip);
2384 				if (sb)
2385 					ct->ct_path_last = pip;
2386 				*ret_pip = pip;
2387 				MDI_CLIENT_UNLOCK(ct);
2388 				return (MDI_SUCCESS);
2389 			}
2390 			/*
2391 			 * Path is busy.
2392 			 */
2393 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2394 			    MDI_PI_IS_TRANSIENT(pip))
2395 				retry = 1;
2396 
2397 			/*
2398 			 * Keep looking for a next available online path
2399 			 */
2400 do_again:
2401 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2402 			if (next == NULL) {
2403 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2404 					/*
2405 					 * Bail out since we hit the end of list
2406 					 */
2407 					MDI_PI_UNLOCK(pip);
2408 					break;
2409 				}
2410 
2411 				if (!sb) {
2412 					if (preferred == 1) {
2413 						/*
2414 						 * Looks like we reached the
2415 						 * end of the list. Lets enable
2416 						 * traversal of non preferred
2417 						 * paths.
2418 						 */
2419 						preferred = 0;
2420 						next = head;
2421 					} else {
2422 						/*
2423 						 * We have done both the passes
2424 						 * Preferred as well as for
2425 						 * Non-preferred. Bail out now.
2426 						 */
2427 						cont = 0;
2428 					}
2429 				} else {
2430 					/*
2431 					 * Standard behavior case.
2432 					 */
2433 					next = head;
2434 				}
2435 			}
2436 			MDI_PI_UNLOCK(pip);
2437 			if (cont == 0) {
2438 				break;
2439 			}
2440 			pip = next;
2441 
2442 			if (!sb) {
2443 				/*
2444 				 * We need to handle the selection of
2445 				 * non-preferred path in the following
2446 				 * case:
2447 				 *
2448 				 * +------+   +------+   +------+   +-----+
2449 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2450 				 * +------+   +------+   +------+   +-----+
2451 				 *
2452 				 * If we start the search with B, we need to
2453 				 * skip beyond B to pick C which is non -
2454 				 * preferred in the second pass. The following
2455 				 * test, if true, will allow us to skip over
2456 				 * the 'start'(B in the example) to select
2457 				 * other non preferred elements.
2458 				 */
2459 				if ((start_pip != NULL) && (start_pip == pip) &&
2460 				    (MDI_PI(start_pip)->pi_preferred
2461 				    != preferred)) {
2462 					/*
2463 					 * try again after going past the start
2464 					 * pip
2465 					 */
2466 					MDI_PI_LOCK(pip);
2467 					goto do_again;
2468 				}
2469 			} else {
2470 				/*
2471 				 * Standard behavior case
2472 				 */
2473 				if (start == pip && preferred) {
2474 					/* look for nonpreferred paths */
2475 					preferred = 0;
2476 				} else if (start == pip && !preferred) {
2477 					/*
2478 					 * Exit condition
2479 					 */
2480 					cont = 0;
2481 				}
2482 			}
2483 		} while (cont);
2484 		break;
2485 	}
2486 
2487 	MDI_CLIENT_UNLOCK(ct);
2488 	if (retry == 1) {
2489 		return (MDI_BUSY);
2490 	} else {
2491 		return (MDI_NOPATH);
2492 	}
2493 }
2494 
2495 /*
2496  * For a client, return the next available path to any phci
2497  *
2498  * Note:
2499  *		Caller should hold the branch's devinfo node to get a consistent
2500  *		snap shot of the mdi_pathinfo nodes.
2501  *
2502  *		Please note that even the list is stable the mdi_pathinfo
2503  *		node state and properties are volatile.  The caller should lock
2504  *		and unlock the nodes by calling mdi_pi_lock() and
2505  *		mdi_pi_unlock() functions to get a stable properties.
2506  *
2507  *		If there is a need to use the nodes beyond the hold of the
2508  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2509  *		need to be held against unexpected removal by calling
2510  *		mdi_hold_path() and should be released by calling
2511  *		mdi_rele_path() on completion.
2512  */
2513 mdi_pathinfo_t *
2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2515 {
2516 	mdi_client_t *ct;
2517 
2518 	if (!MDI_CLIENT(ct_dip))
2519 		return (NULL);
2520 
2521 	/*
2522 	 * Walk through client link
2523 	 */
2524 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2525 	ASSERT(ct != NULL);
2526 
2527 	if (pip == NULL)
2528 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2529 
2530 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2531 }
2532 
2533 /*
2534  * For a phci, return the next available path to any client
2535  * Note: ditto mdi_get_next_phci_path()
2536  */
2537 mdi_pathinfo_t *
2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2539 {
2540 	mdi_phci_t *ph;
2541 
2542 	if (!MDI_PHCI(ph_dip))
2543 		return (NULL);
2544 
2545 	/*
2546 	 * Walk through pHCI link
2547 	 */
2548 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2549 	ASSERT(ph != NULL);
2550 
2551 	if (pip == NULL)
2552 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2553 
2554 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2555 }
2556 
2557 /*
2558  * mdi_hold_path():
2559  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2560  * Return Values:
2561  *		None
2562  */
2563 void
2564 mdi_hold_path(mdi_pathinfo_t *pip)
2565 {
2566 	if (pip) {
2567 		MDI_PI_LOCK(pip);
2568 		MDI_PI_HOLD(pip);
2569 		MDI_PI_UNLOCK(pip);
2570 	}
2571 }
2572 
2573 
2574 /*
2575  * mdi_rele_path():
2576  *		Release the mdi_pathinfo node which was selected
2577  *		through mdi_select_path() mechanism or manually held by
2578  *		calling mdi_hold_path().
2579  * Return Values:
2580  *		None
2581  */
2582 void
2583 mdi_rele_path(mdi_pathinfo_t *pip)
2584 {
2585 	if (pip) {
2586 		MDI_PI_LOCK(pip);
2587 		MDI_PI_RELE(pip);
2588 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2589 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2590 		}
2591 		MDI_PI_UNLOCK(pip);
2592 	}
2593 }
2594 
2595 /*
2596  * mdi_pi_lock():
2597  * 		Lock the mdi_pathinfo node.
2598  * Note:
2599  *		The caller should release the lock by calling mdi_pi_unlock()
2600  */
2601 void
2602 mdi_pi_lock(mdi_pathinfo_t *pip)
2603 {
2604 	ASSERT(pip != NULL);
2605 	if (pip) {
2606 		MDI_PI_LOCK(pip);
2607 	}
2608 }
2609 
2610 
2611 /*
2612  * mdi_pi_unlock():
2613  * 		Unlock the mdi_pathinfo node.
2614  * Note:
2615  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2616  */
2617 void
2618 mdi_pi_unlock(mdi_pathinfo_t *pip)
2619 {
2620 	ASSERT(pip != NULL);
2621 	if (pip) {
2622 		MDI_PI_UNLOCK(pip);
2623 	}
2624 }
2625 
2626 /*
2627  * mdi_pi_find():
2628  *		Search the list of mdi_pathinfo nodes attached to the
2629  *		pHCI/Client device node whose path address matches "paddr".
2630  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2631  *		found.
2632  * Return Values:
2633  *		mdi_pathinfo node handle
2634  *		NULL
2635  * Notes:
2636  *		Caller need not hold any locks to call this function.
2637  */
2638 mdi_pathinfo_t *
2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2640 {
2641 	mdi_phci_t		*ph;
2642 	mdi_vhci_t		*vh;
2643 	mdi_client_t		*ct;
2644 	mdi_pathinfo_t		*pip = NULL;
2645 
2646 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2647 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2648 	if ((pdip == NULL) || (paddr == NULL)) {
2649 		return (NULL);
2650 	}
2651 	ph = i_devi_get_phci(pdip);
2652 	if (ph == NULL) {
2653 		/*
2654 		 * Invalid pHCI device, Nothing more to do.
2655 		 */
2656 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2657 		return (NULL);
2658 	}
2659 
2660 	vh = ph->ph_vhci;
2661 	if (vh == NULL) {
2662 		/*
2663 		 * Invalid vHCI device, Nothing more to do.
2664 		 */
2665 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2666 		return (NULL);
2667 	}
2668 
2669 	/*
2670 	 * Look for pathinfo node identified by paddr.
2671 	 */
2672 	if (caddr == NULL) {
2673 		/*
2674 		 * Find a mdi_pathinfo node under pHCI list for a matching
2675 		 * unit address.
2676 		 */
2677 		MDI_PHCI_LOCK(ph);
2678 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2679 			MDI_DEBUG(2, (MDI_WARN, pdip,
2680 			    "offline phci %p", (void *)ph));
2681 			MDI_PHCI_UNLOCK(ph);
2682 			return (NULL);
2683 		}
2684 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2685 
2686 		while (pip != NULL) {
2687 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2688 				break;
2689 			}
2690 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2691 		}
2692 		MDI_PHCI_UNLOCK(ph);
2693 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2694 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2695 		return (pip);
2696 	}
2697 
2698 	/*
2699 	 * XXX - Is the rest of the code in this function really necessary?
2700 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2701 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2702 	 * whether the search is based on the pathinfo nodes attached to
2703 	 * the pHCI or the client node, the result will be the same.
2704 	 */
2705 
2706 	/*
2707 	 * Find the client device corresponding to 'caddr'
2708 	 */
2709 	MDI_VHCI_CLIENT_LOCK(vh);
2710 
2711 	/*
2712 	 * XXX - Passing NULL to the following function works as long as the
2713 	 * the client addresses (caddr) are unique per vhci basis.
2714 	 */
2715 	ct = i_mdi_client_find(vh, NULL, caddr);
2716 	if (ct == NULL) {
2717 		/*
2718 		 * Client not found, Obviously mdi_pathinfo node has not been
2719 		 * created yet.
2720 		 */
2721 		MDI_VHCI_CLIENT_UNLOCK(vh);
2722 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2723 		    "client not found for caddr @%s", caddr ? caddr : ""));
2724 		return (NULL);
2725 	}
2726 
2727 	/*
2728 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2729 	 * pHCI and paddr
2730 	 */
2731 	MDI_CLIENT_LOCK(ct);
2732 
2733 	/*
2734 	 * Release the global mutex as it is no more needed. Note: We always
2735 	 * respect the locking order while acquiring.
2736 	 */
2737 	MDI_VHCI_CLIENT_UNLOCK(vh);
2738 
2739 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2740 	while (pip != NULL) {
2741 		/*
2742 		 * Compare the unit address
2743 		 */
2744 		if ((MDI_PI(pip)->pi_phci == ph) &&
2745 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2746 			break;
2747 		}
2748 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2749 	}
2750 	MDI_CLIENT_UNLOCK(ct);
2751 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2752 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2753 	return (pip);
2754 }
2755 
2756 /*
2757  * mdi_pi_alloc():
2758  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2759  *		The mdi_pathinfo node returned by this function identifies a
2760  *		unique device path is capable of having properties attached
2761  *		and passed to mdi_pi_online() to fully attach and online the
2762  *		path and client device node.
2763  *		The mdi_pathinfo node returned by this function must be
2764  *		destroyed using mdi_pi_free() if the path is no longer
2765  *		operational or if the caller fails to attach a client device
2766  *		node when calling mdi_pi_online(). The framework will not free
2767  *		the resources allocated.
2768  *		This function can be called from both interrupt and kernel
2769  *		contexts.  DDI_NOSLEEP flag should be used while calling
2770  *		from interrupt contexts.
2771  * Return Values:
2772  *		MDI_SUCCESS
2773  *		MDI_FAILURE
2774  *		MDI_NOMEM
2775  */
2776 /*ARGSUSED*/
2777 int
2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2779     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2780 {
2781 	mdi_vhci_t	*vh;
2782 	mdi_phci_t	*ph;
2783 	mdi_client_t	*ct;
2784 	mdi_pathinfo_t	*pip = NULL;
2785 	dev_info_t	*cdip;
2786 	int		rv = MDI_NOMEM;
2787 	int		path_allocated = 0;
2788 
2789 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2790 	    "cname %s: caddr@%s paddr@%s",
2791 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2792 
2793 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2794 	    ret_pip == NULL) {
2795 		/* Nothing more to do */
2796 		return (MDI_FAILURE);
2797 	}
2798 
2799 	*ret_pip = NULL;
2800 
2801 	/* No allocations on detaching pHCI */
2802 	if (DEVI_IS_DETACHING(pdip)) {
2803 		/* Invalid pHCI device, return failure */
2804 		MDI_DEBUG(1, (MDI_WARN, pdip,
2805 		    "!detaching pHCI=%p", (void *)pdip));
2806 		return (MDI_FAILURE);
2807 	}
2808 
2809 	ph = i_devi_get_phci(pdip);
2810 	ASSERT(ph != NULL);
2811 	if (ph == NULL) {
2812 		/* Invalid pHCI device, return failure */
2813 		MDI_DEBUG(1, (MDI_WARN, pdip,
2814 		    "!invalid pHCI=%p", (void *)pdip));
2815 		return (MDI_FAILURE);
2816 	}
2817 
2818 	MDI_PHCI_LOCK(ph);
2819 	vh = ph->ph_vhci;
2820 	if (vh == NULL) {
2821 		/* Invalid vHCI device, return failure */
2822 		MDI_DEBUG(1, (MDI_WARN, pdip,
2823 		    "!invalid vHCI=%p", (void *)pdip));
2824 		MDI_PHCI_UNLOCK(ph);
2825 		return (MDI_FAILURE);
2826 	}
2827 
2828 	if (MDI_PHCI_IS_READY(ph) == 0) {
2829 		/*
2830 		 * Do not allow new node creation when pHCI is in
2831 		 * offline/suspended states
2832 		 */
2833 		MDI_DEBUG(1, (MDI_WARN, pdip,
2834 		    "pHCI=%p is not ready", (void *)ph));
2835 		MDI_PHCI_UNLOCK(ph);
2836 		return (MDI_BUSY);
2837 	}
2838 	MDI_PHCI_UNSTABLE(ph);
2839 	MDI_PHCI_UNLOCK(ph);
2840 
2841 	/* look for a matching client, create one if not found */
2842 	MDI_VHCI_CLIENT_LOCK(vh);
2843 	ct = i_mdi_client_find(vh, cname, caddr);
2844 	if (ct == NULL) {
2845 		ct = i_mdi_client_alloc(vh, cname, caddr);
2846 		ASSERT(ct != NULL);
2847 	}
2848 
2849 	if (ct->ct_dip == NULL) {
2850 		/*
2851 		 * Allocate a devinfo node
2852 		 */
2853 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2854 		    compatible, ncompatible);
2855 		if (ct->ct_dip == NULL) {
2856 			(void) i_mdi_client_free(vh, ct);
2857 			goto fail;
2858 		}
2859 	}
2860 	cdip = ct->ct_dip;
2861 
2862 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2863 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2864 
2865 	MDI_CLIENT_LOCK(ct);
2866 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2867 	while (pip != NULL) {
2868 		/*
2869 		 * Compare the unit address
2870 		 */
2871 		if ((MDI_PI(pip)->pi_phci == ph) &&
2872 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2873 			break;
2874 		}
2875 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2876 	}
2877 	MDI_CLIENT_UNLOCK(ct);
2878 
2879 	if (pip == NULL) {
2880 		/*
2881 		 * This is a new path for this client device.  Allocate and
2882 		 * initialize a new pathinfo node
2883 		 */
2884 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2885 		ASSERT(pip != NULL);
2886 		path_allocated = 1;
2887 	}
2888 	rv = MDI_SUCCESS;
2889 
2890 fail:
2891 	/*
2892 	 * Release the global mutex.
2893 	 */
2894 	MDI_VHCI_CLIENT_UNLOCK(vh);
2895 
2896 	/*
2897 	 * Mark the pHCI as stable
2898 	 */
2899 	MDI_PHCI_LOCK(ph);
2900 	MDI_PHCI_STABLE(ph);
2901 	MDI_PHCI_UNLOCK(ph);
2902 	*ret_pip = pip;
2903 
2904 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2905 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2906 
2907 	if (path_allocated)
2908 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2909 
2910 	return (rv);
2911 }
2912 
2913 /*ARGSUSED*/
2914 int
2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2916     int flags, mdi_pathinfo_t **ret_pip)
2917 {
2918 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2919 	    flags, ret_pip));
2920 }
2921 
2922 /*
2923  * i_mdi_pi_alloc():
2924  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2925  * Return Values:
2926  *		mdi_pathinfo
2927  */
2928 /*ARGSUSED*/
2929 static mdi_pathinfo_t *
2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2931 {
2932 	mdi_pathinfo_t	*pip;
2933 	int		ct_circular;
2934 	int		ph_circular;
2935 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
2936 	char		*path_persistent;
2937 	int		path_instance;
2938 	mod_hash_val_t	hv;
2939 
2940 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2941 
2942 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2943 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2944 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2945 	    MDI_PATHINFO_STATE_TRANSIENT;
2946 
2947 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2948 		MDI_PI_SET_USER_DISABLE(pip);
2949 
2950 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2951 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2952 
2953 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2954 		MDI_PI_SET_DRV_DISABLE(pip);
2955 
2956 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2957 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2958 	MDI_PI(pip)->pi_client = ct;
2959 	MDI_PI(pip)->pi_phci = ph;
2960 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2961 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2962 
2963         /*
2964 	 * We form the "path" to the pathinfo node, and see if we have
2965 	 * already allocated a 'path_instance' for that "path".  If so,
2966 	 * we use the already allocated 'path_instance'.  If not, we
2967 	 * allocate a new 'path_instance' and associate it with a copy of
2968 	 * the "path" string (which is never freed). The association
2969 	 * between a 'path_instance' this "path" string persists until
2970 	 * reboot.
2971 	 */
2972         mutex_enter(&mdi_pathmap_mutex);
2973 	(void) ddi_pathname(ph->ph_dip, path);
2974 	(void) sprintf(path + strlen(path), "/%s@%s",
2975 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2976         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2977                 path_instance = (uint_t)(intptr_t)hv;
2978         } else {
2979 		/* allocate a new 'path_instance' and persistent "path" */
2980 		path_instance = mdi_pathmap_instance++;
2981 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2982                 (void) mod_hash_insert(mdi_pathmap_bypath,
2983                     (mod_hash_key_t)path_persistent,
2984                     (mod_hash_val_t)(intptr_t)path_instance);
2985 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2986 		    (mod_hash_key_t)(intptr_t)path_instance,
2987 		    (mod_hash_val_t)path_persistent);
2988 
2989 		/* create shortpath name */
2990 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2991 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2992 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2993 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2994 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
2995 		    (mod_hash_key_t)(intptr_t)path_instance,
2996 		    (mod_hash_val_t)path_persistent);
2997         }
2998         mutex_exit(&mdi_pathmap_mutex);
2999 	MDI_PI(pip)->pi_path_instance = path_instance;
3000 
3001 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3002 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
3003 	MDI_PI(pip)->pi_pprivate = NULL;
3004 	MDI_PI(pip)->pi_cprivate = NULL;
3005 	MDI_PI(pip)->pi_vprivate = NULL;
3006 	MDI_PI(pip)->pi_client_link = NULL;
3007 	MDI_PI(pip)->pi_phci_link = NULL;
3008 	MDI_PI(pip)->pi_ref_cnt = 0;
3009 	MDI_PI(pip)->pi_kstats = NULL;
3010 	MDI_PI(pip)->pi_preferred = 1;
3011 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3012 
3013 	/*
3014 	 * Lock both dev_info nodes against changes in parallel.
3015 	 *
3016 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3017 	 * This atypical operation is done to synchronize pathinfo nodes
3018 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3019 	 * the pathinfo nodes are children of the Client.
3020 	 */
3021 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3022 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3023 
3024 	i_mdi_phci_add_path(ph, pip);
3025 	i_mdi_client_add_path(ct, pip);
3026 
3027 	ndi_devi_exit(ph->ph_dip, ph_circular);
3028 	ndi_devi_exit(ct->ct_dip, ct_circular);
3029 
3030 	return (pip);
3031 }
3032 
3033 /*
3034  * mdi_pi_pathname_by_instance():
3035  *	Lookup of "path" by 'path_instance'. Return "path".
3036  *	NOTE: returned "path" remains valid forever (until reboot).
3037  */
3038 char *
3039 mdi_pi_pathname_by_instance(int path_instance)
3040 {
3041 	char		*path;
3042 	mod_hash_val_t	hv;
3043 
3044 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3045 	mutex_enter(&mdi_pathmap_mutex);
3046 	if (mod_hash_find(mdi_pathmap_byinstance,
3047 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3048 		path = (char *)hv;
3049 	else
3050 		path = NULL;
3051 	mutex_exit(&mdi_pathmap_mutex);
3052 	return (path);
3053 }
3054 
3055 /*
3056  * mdi_pi_spathname_by_instance():
3057  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3058  *	NOTE: returned "shortpath" remains valid forever (until reboot).
3059  */
3060 char *
3061 mdi_pi_spathname_by_instance(int path_instance)
3062 {
3063 	char		*path;
3064 	mod_hash_val_t	hv;
3065 
3066 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3067 	mutex_enter(&mdi_pathmap_mutex);
3068 	if (mod_hash_find(mdi_pathmap_sbyinstance,
3069 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3070 		path = (char *)hv;
3071 	else
3072 		path = NULL;
3073 	mutex_exit(&mdi_pathmap_mutex);
3074 	return (path);
3075 }
3076 
3077 
3078 /*
3079  * i_mdi_phci_add_path():
3080  * 		Add a mdi_pathinfo node to pHCI list.
3081  * Notes:
3082  *		Caller should per-pHCI mutex
3083  */
3084 static void
3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3086 {
3087 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3088 
3089 	MDI_PHCI_LOCK(ph);
3090 	if (ph->ph_path_head == NULL) {
3091 		ph->ph_path_head = pip;
3092 	} else {
3093 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3094 	}
3095 	ph->ph_path_tail = pip;
3096 	ph->ph_path_count++;
3097 	MDI_PHCI_UNLOCK(ph);
3098 }
3099 
3100 /*
3101  * i_mdi_client_add_path():
3102  *		Add mdi_pathinfo node to client list
3103  */
3104 static void
3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3106 {
3107 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3108 
3109 	MDI_CLIENT_LOCK(ct);
3110 	if (ct->ct_path_head == NULL) {
3111 		ct->ct_path_head = pip;
3112 	} else {
3113 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3114 	}
3115 	ct->ct_path_tail = pip;
3116 	ct->ct_path_count++;
3117 	MDI_CLIENT_UNLOCK(ct);
3118 }
3119 
3120 /*
3121  * mdi_pi_free():
3122  *		Free the mdi_pathinfo node and also client device node if this
3123  *		is the last path to the device
3124  * Return Values:
3125  *		MDI_SUCCESS
3126  *		MDI_FAILURE
3127  *		MDI_BUSY
3128  */
3129 /*ARGSUSED*/
3130 int
3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3132 {
3133 	int		rv;
3134 	mdi_vhci_t	*vh;
3135 	mdi_phci_t	*ph;
3136 	mdi_client_t	*ct;
3137 	int		(*f)();
3138 	int		client_held = 0;
3139 
3140 	MDI_PI_LOCK(pip);
3141 	ph = MDI_PI(pip)->pi_phci;
3142 	ASSERT(ph != NULL);
3143 	if (ph == NULL) {
3144 		/*
3145 		 * Invalid pHCI device, return failure
3146 		 */
3147 		MDI_DEBUG(1, (MDI_WARN, NULL,
3148 		    "!invalid pHCI: pip %s %p",
3149 		    mdi_pi_spathname(pip), (void *)pip));
3150 		MDI_PI_UNLOCK(pip);
3151 		return (MDI_FAILURE);
3152 	}
3153 
3154 	vh = ph->ph_vhci;
3155 	ASSERT(vh != NULL);
3156 	if (vh == NULL) {
3157 		/* Invalid pHCI device, return failure */
3158 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3159 		    "!invalid vHCI: pip %s %p",
3160 		    mdi_pi_spathname(pip), (void *)pip));
3161 		MDI_PI_UNLOCK(pip);
3162 		return (MDI_FAILURE);
3163 	}
3164 
3165 	ct = MDI_PI(pip)->pi_client;
3166 	ASSERT(ct != NULL);
3167 	if (ct == NULL) {
3168 		/*
3169 		 * Invalid Client device, return failure
3170 		 */
3171 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3172 		    "!invalid client: pip %s %p",
3173 		    mdi_pi_spathname(pip), (void *)pip));
3174 		MDI_PI_UNLOCK(pip);
3175 		return (MDI_FAILURE);
3176 	}
3177 
3178 	/*
3179 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3180 	 * if the node state is either offline or init and the reference count
3181 	 * is zero.
3182 	 */
3183 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3184 	    MDI_PI_IS_INITING(pip))) {
3185 		/*
3186 		 * Node is busy
3187 		 */
3188 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3189 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3190 		MDI_PI_UNLOCK(pip);
3191 		return (MDI_BUSY);
3192 	}
3193 
3194 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3195 		/*
3196 		 * Give a chance for pending I/Os to complete.
3197 		 */
3198 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3199 		    "!%d cmds still pending on path: %s %p",
3200 		    MDI_PI(pip)->pi_ref_cnt,
3201 		    mdi_pi_spathname(pip), (void *)pip));
3202 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3203 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3204 		    TR_CLOCK_TICK) == -1) {
3205 			/*
3206 			 * The timeout time reached without ref_cnt being zero
3207 			 * being signaled.
3208 			 */
3209 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3210 			    "!Timeout reached on path %s %p without the cond",
3211 			    mdi_pi_spathname(pip), (void *)pip));
3212 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3213 			    "!%d cmds still pending on path %s %p",
3214 			    MDI_PI(pip)->pi_ref_cnt,
3215 			    mdi_pi_spathname(pip), (void *)pip));
3216 			MDI_PI_UNLOCK(pip);
3217 			return (MDI_BUSY);
3218 		}
3219 	}
3220 	if (MDI_PI(pip)->pi_pm_held) {
3221 		client_held = 1;
3222 	}
3223 	MDI_PI_UNLOCK(pip);
3224 
3225 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3226 
3227 	MDI_CLIENT_LOCK(ct);
3228 
3229 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3230 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3231 
3232 	/*
3233 	 * Wait till failover is complete before removing this node.
3234 	 */
3235 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3236 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3237 
3238 	MDI_CLIENT_UNLOCK(ct);
3239 	MDI_VHCI_CLIENT_LOCK(vh);
3240 	MDI_CLIENT_LOCK(ct);
3241 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3242 
3243 	if (!MDI_PI_IS_INITING(pip)) {
3244 		f = vh->vh_ops->vo_pi_uninit;
3245 		if (f != NULL) {
3246 			rv = (*f)(vh->vh_dip, pip, 0);
3247 		}
3248 	} else
3249 		rv = MDI_SUCCESS;
3250 
3251 	/*
3252 	 * If vo_pi_uninit() completed successfully.
3253 	 */
3254 	if (rv == MDI_SUCCESS) {
3255 		if (client_held) {
3256 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3257 			    "i_mdi_pm_rele_client\n"));
3258 			i_mdi_pm_rele_client(ct, 1);
3259 		}
3260 		i_mdi_pi_free(ph, pip, ct);
3261 		if (ct->ct_path_count == 0) {
3262 			/*
3263 			 * Client lost its last path.
3264 			 * Clean up the client device
3265 			 */
3266 			MDI_CLIENT_UNLOCK(ct);
3267 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3268 			MDI_VHCI_CLIENT_UNLOCK(vh);
3269 			return (rv);
3270 		}
3271 	}
3272 	MDI_CLIENT_UNLOCK(ct);
3273 	MDI_VHCI_CLIENT_UNLOCK(vh);
3274 
3275 	if (rv == MDI_FAILURE)
3276 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3277 
3278 	return (rv);
3279 }
3280 
3281 /*
3282  * i_mdi_pi_free():
3283  *		Free the mdi_pathinfo node
3284  */
3285 static void
3286 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3287 {
3288 	int	ct_circular;
3289 	int	ph_circular;
3290 
3291 	ASSERT(MDI_CLIENT_LOCKED(ct));
3292 
3293 	/*
3294 	 * remove any per-path kstats
3295 	 */
3296 	i_mdi_pi_kstat_destroy(pip);
3297 
3298 	/* See comments in i_mdi_pi_alloc() */
3299 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3300 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3301 
3302 	i_mdi_client_remove_path(ct, pip);
3303 	i_mdi_phci_remove_path(ph, pip);
3304 
3305 	ndi_devi_exit(ph->ph_dip, ph_circular);
3306 	ndi_devi_exit(ct->ct_dip, ct_circular);
3307 
3308 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3309 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3310 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3311 	if (MDI_PI(pip)->pi_addr) {
3312 		kmem_free(MDI_PI(pip)->pi_addr,
3313 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3314 		MDI_PI(pip)->pi_addr = NULL;
3315 	}
3316 
3317 	if (MDI_PI(pip)->pi_prop) {
3318 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3319 		MDI_PI(pip)->pi_prop = NULL;
3320 	}
3321 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3322 }
3323 
3324 
3325 /*
3326  * i_mdi_phci_remove_path():
3327  * 		Remove a mdi_pathinfo node from pHCI list.
3328  * Notes:
3329  *		Caller should hold per-pHCI mutex
3330  */
3331 static void
3332 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3333 {
3334 	mdi_pathinfo_t	*prev = NULL;
3335 	mdi_pathinfo_t	*path = NULL;
3336 
3337 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3338 
3339 	MDI_PHCI_LOCK(ph);
3340 	path = ph->ph_path_head;
3341 	while (path != NULL) {
3342 		if (path == pip) {
3343 			break;
3344 		}
3345 		prev = path;
3346 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3347 	}
3348 
3349 	if (path) {
3350 		ph->ph_path_count--;
3351 		if (prev) {
3352 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3353 		} else {
3354 			ph->ph_path_head =
3355 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3356 		}
3357 		if (ph->ph_path_tail == path) {
3358 			ph->ph_path_tail = prev;
3359 		}
3360 	}
3361 
3362 	/*
3363 	 * Clear the pHCI link
3364 	 */
3365 	MDI_PI(pip)->pi_phci_link = NULL;
3366 	MDI_PI(pip)->pi_phci = NULL;
3367 	MDI_PHCI_UNLOCK(ph);
3368 }
3369 
3370 /*
3371  * i_mdi_client_remove_path():
3372  * 		Remove a mdi_pathinfo node from client path list.
3373  */
3374 static void
3375 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3376 {
3377 	mdi_pathinfo_t	*prev = NULL;
3378 	mdi_pathinfo_t	*path;
3379 
3380 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3381 
3382 	ASSERT(MDI_CLIENT_LOCKED(ct));
3383 	path = ct->ct_path_head;
3384 	while (path != NULL) {
3385 		if (path == pip) {
3386 			break;
3387 		}
3388 		prev = path;
3389 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3390 	}
3391 
3392 	if (path) {
3393 		ct->ct_path_count--;
3394 		if (prev) {
3395 			MDI_PI(prev)->pi_client_link =
3396 			    MDI_PI(path)->pi_client_link;
3397 		} else {
3398 			ct->ct_path_head =
3399 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3400 		}
3401 		if (ct->ct_path_tail == path) {
3402 			ct->ct_path_tail = prev;
3403 		}
3404 		if (ct->ct_path_last == path) {
3405 			ct->ct_path_last = ct->ct_path_head;
3406 		}
3407 	}
3408 	MDI_PI(pip)->pi_client_link = NULL;
3409 	MDI_PI(pip)->pi_client = NULL;
3410 }
3411 
3412 /*
3413  * i_mdi_pi_state_change():
3414  *		online a mdi_pathinfo node
3415  *
3416  * Return Values:
3417  *		MDI_SUCCESS
3418  *		MDI_FAILURE
3419  */
3420 /*ARGSUSED*/
3421 static int
3422 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3423 {
3424 	int		rv = MDI_SUCCESS;
3425 	mdi_vhci_t	*vh;
3426 	mdi_phci_t	*ph;
3427 	mdi_client_t	*ct;
3428 	int		(*f)();
3429 	dev_info_t	*cdip;
3430 
3431 	MDI_PI_LOCK(pip);
3432 
3433 	ph = MDI_PI(pip)->pi_phci;
3434 	ASSERT(ph);
3435 	if (ph == NULL) {
3436 		/*
3437 		 * Invalid pHCI device, fail the request
3438 		 */
3439 		MDI_PI_UNLOCK(pip);
3440 		MDI_DEBUG(1, (MDI_WARN, NULL,
3441 		    "!invalid phci: pip %s %p",
3442 		    mdi_pi_spathname(pip), (void *)pip));
3443 		return (MDI_FAILURE);
3444 	}
3445 
3446 	vh = ph->ph_vhci;
3447 	ASSERT(vh);
3448 	if (vh == NULL) {
3449 		/*
3450 		 * Invalid vHCI device, fail the request
3451 		 */
3452 		MDI_PI_UNLOCK(pip);
3453 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3454 		    "!invalid vhci: pip %s %p",
3455 		    mdi_pi_spathname(pip), (void *)pip));
3456 		return (MDI_FAILURE);
3457 	}
3458 
3459 	ct = MDI_PI(pip)->pi_client;
3460 	ASSERT(ct != NULL);
3461 	if (ct == NULL) {
3462 		/*
3463 		 * Invalid client device, fail the request
3464 		 */
3465 		MDI_PI_UNLOCK(pip);
3466 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3467 		    "!invalid client: pip %s %p",
3468 		    mdi_pi_spathname(pip), (void *)pip));
3469 		return (MDI_FAILURE);
3470 	}
3471 
3472 	/*
3473 	 * If this path has not been initialized yet, Callback vHCI driver's
3474 	 * pathinfo node initialize entry point
3475 	 */
3476 
3477 	if (MDI_PI_IS_INITING(pip)) {
3478 		MDI_PI_UNLOCK(pip);
3479 		f = vh->vh_ops->vo_pi_init;
3480 		if (f != NULL) {
3481 			rv = (*f)(vh->vh_dip, pip, 0);
3482 			if (rv != MDI_SUCCESS) {
3483 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3484 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
3485 				    (void *)vh, mdi_pi_spathname(pip),
3486 				    (void *)pip));
3487 				return (MDI_FAILURE);
3488 			}
3489 		}
3490 		MDI_PI_LOCK(pip);
3491 		MDI_PI_CLEAR_TRANSIENT(pip);
3492 	}
3493 
3494 	/*
3495 	 * Do not allow state transition when pHCI is in offline/suspended
3496 	 * states
3497 	 */
3498 	i_mdi_phci_lock(ph, pip);
3499 	if (MDI_PHCI_IS_READY(ph) == 0) {
3500 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3501 		    "!pHCI not ready, pHCI=%p", (void *)ph));
3502 		MDI_PI_UNLOCK(pip);
3503 		i_mdi_phci_unlock(ph);
3504 		return (MDI_BUSY);
3505 	}
3506 	MDI_PHCI_UNSTABLE(ph);
3507 	i_mdi_phci_unlock(ph);
3508 
3509 	/*
3510 	 * Check if mdi_pathinfo state is in transient state.
3511 	 * If yes, offlining is in progress and wait till transient state is
3512 	 * cleared.
3513 	 */
3514 	if (MDI_PI_IS_TRANSIENT(pip)) {
3515 		while (MDI_PI_IS_TRANSIENT(pip)) {
3516 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3517 			    &MDI_PI(pip)->pi_mutex);
3518 		}
3519 	}
3520 
3521 	/*
3522 	 * Grab the client lock in reverse order sequence and release the
3523 	 * mdi_pathinfo mutex.
3524 	 */
3525 	i_mdi_client_lock(ct, pip);
3526 	MDI_PI_UNLOCK(pip);
3527 
3528 	/*
3529 	 * Wait till failover state is cleared
3530 	 */
3531 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3532 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3533 
3534 	/*
3535 	 * Mark the mdi_pathinfo node state as transient
3536 	 */
3537 	MDI_PI_LOCK(pip);
3538 	switch (state) {
3539 	case MDI_PATHINFO_STATE_ONLINE:
3540 		MDI_PI_SET_ONLINING(pip);
3541 		break;
3542 
3543 	case MDI_PATHINFO_STATE_STANDBY:
3544 		MDI_PI_SET_STANDBYING(pip);
3545 		break;
3546 
3547 	case MDI_PATHINFO_STATE_FAULT:
3548 		/*
3549 		 * Mark the pathinfo state as FAULTED
3550 		 */
3551 		MDI_PI_SET_FAULTING(pip);
3552 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3553 		break;
3554 
3555 	case MDI_PATHINFO_STATE_OFFLINE:
3556 		/*
3557 		 * ndi_devi_offline() cannot hold pip or ct locks.
3558 		 */
3559 		MDI_PI_UNLOCK(pip);
3560 
3561 		/*
3562 		 * If this is a user initiated path online->offline operation
3563 		 * who's success would transition a client from DEGRADED to
3564 		 * FAILED then only proceed if we can offline the client first.
3565 		 */
3566 		cdip = ct->ct_dip;
3567 		if ((flag & NDI_USER_REQ) &&
3568 		    MDI_PI_IS_ONLINE(pip) &&
3569 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3570 			i_mdi_client_unlock(ct);
3571 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3572 			if (rv != NDI_SUCCESS) {
3573 				/*
3574 				 * Convert to MDI error code
3575 				 */
3576 				switch (rv) {
3577 				case NDI_BUSY:
3578 					rv = MDI_BUSY;
3579 					break;
3580 				default:
3581 					rv = MDI_FAILURE;
3582 					break;
3583 				}
3584 				goto state_change_exit;
3585 			} else {
3586 				i_mdi_client_lock(ct, NULL);
3587 			}
3588 		}
3589 		/*
3590 		 * Mark the mdi_pathinfo node state as transient
3591 		 */
3592 		MDI_PI_LOCK(pip);
3593 		MDI_PI_SET_OFFLINING(pip);
3594 		break;
3595 	}
3596 	MDI_PI_UNLOCK(pip);
3597 	MDI_CLIENT_UNSTABLE(ct);
3598 	i_mdi_client_unlock(ct);
3599 
3600 	f = vh->vh_ops->vo_pi_state_change;
3601 	if (f != NULL)
3602 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3603 
3604 	MDI_CLIENT_LOCK(ct);
3605 	MDI_PI_LOCK(pip);
3606 	if (rv == MDI_NOT_SUPPORTED) {
3607 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3608 	}
3609 	if (rv != MDI_SUCCESS) {
3610 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3611 		    "vo_pi_state_change failed: rv %x", rv));
3612 	}
3613 	if (MDI_PI_IS_TRANSIENT(pip)) {
3614 		if (rv == MDI_SUCCESS) {
3615 			MDI_PI_CLEAR_TRANSIENT(pip);
3616 		} else {
3617 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3618 		}
3619 	}
3620 
3621 	/*
3622 	 * Wake anyone waiting for this mdi_pathinfo node
3623 	 */
3624 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3625 	MDI_PI_UNLOCK(pip);
3626 
3627 	/*
3628 	 * Mark the client device as stable
3629 	 */
3630 	MDI_CLIENT_STABLE(ct);
3631 	if (rv == MDI_SUCCESS) {
3632 		if (ct->ct_unstable == 0) {
3633 			cdip = ct->ct_dip;
3634 
3635 			/*
3636 			 * Onlining the mdi_pathinfo node will impact the
3637 			 * client state Update the client and dev_info node
3638 			 * state accordingly
3639 			 */
3640 			rv = NDI_SUCCESS;
3641 			i_mdi_client_update_state(ct);
3642 			switch (MDI_CLIENT_STATE(ct)) {
3643 			case MDI_CLIENT_STATE_OPTIMAL:
3644 			case MDI_CLIENT_STATE_DEGRADED:
3645 				if (cdip && !i_ddi_devi_attached(cdip) &&
3646 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3647 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3648 
3649 					/*
3650 					 * Must do ndi_devi_online() through
3651 					 * hotplug thread for deferred
3652 					 * attach mechanism to work
3653 					 */
3654 					MDI_CLIENT_UNLOCK(ct);
3655 					rv = ndi_devi_online(cdip, 0);
3656 					MDI_CLIENT_LOCK(ct);
3657 					if ((rv != NDI_SUCCESS) &&
3658 					    (MDI_CLIENT_STATE(ct) ==
3659 					    MDI_CLIENT_STATE_DEGRADED)) {
3660 						MDI_DEBUG(1, (MDI_WARN, cdip,
3661 						    "!ndi_devi_online failed "
3662 						    "error %x", rv));
3663 					}
3664 					rv = NDI_SUCCESS;
3665 				}
3666 				break;
3667 
3668 			case MDI_CLIENT_STATE_FAILED:
3669 				/*
3670 				 * This is the last path case for
3671 				 * non-user initiated events.
3672 				 */
3673 				if (((flag & NDI_USER_REQ) == 0) &&
3674 				    cdip && (i_ddi_node_state(cdip) >=
3675 				    DS_INITIALIZED)) {
3676 					MDI_CLIENT_UNLOCK(ct);
3677 					rv = ndi_devi_offline(cdip,
3678 					    NDI_DEVFS_CLEAN);
3679 					MDI_CLIENT_LOCK(ct);
3680 
3681 					if (rv != NDI_SUCCESS) {
3682 						/*
3683 						 * ndi_devi_offline failed.
3684 						 * Reset client flags to
3685 						 * online as the path could not
3686 						 * be offlined.
3687 						 */
3688 						MDI_DEBUG(1, (MDI_WARN, cdip,
3689 						    "!ndi_devi_offline failed: "
3690 						    "error %x", rv));
3691 						MDI_CLIENT_SET_ONLINE(ct);
3692 					}
3693 				}
3694 				break;
3695 			}
3696 			/*
3697 			 * Convert to MDI error code
3698 			 */
3699 			switch (rv) {
3700 			case NDI_SUCCESS:
3701 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3702 				i_mdi_report_path_state(ct, pip);
3703 				rv = MDI_SUCCESS;
3704 				break;
3705 			case NDI_BUSY:
3706 				rv = MDI_BUSY;
3707 				break;
3708 			default:
3709 				rv = MDI_FAILURE;
3710 				break;
3711 			}
3712 		}
3713 	}
3714 	MDI_CLIENT_UNLOCK(ct);
3715 
3716 state_change_exit:
3717 	/*
3718 	 * Mark the pHCI as stable again.
3719 	 */
3720 	MDI_PHCI_LOCK(ph);
3721 	MDI_PHCI_STABLE(ph);
3722 	MDI_PHCI_UNLOCK(ph);
3723 	return (rv);
3724 }
3725 
3726 /*
3727  * mdi_pi_online():
3728  *		Place the path_info node in the online state.  The path is
3729  *		now available to be selected by mdi_select_path() for
3730  *		transporting I/O requests to client devices.
3731  * Return Values:
3732  *		MDI_SUCCESS
3733  *		MDI_FAILURE
3734  */
3735 int
3736 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3737 {
3738 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3739 	int		client_held = 0;
3740 	int		rv;
3741 
3742 	ASSERT(ct != NULL);
3743 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3744 	if (rv != MDI_SUCCESS)
3745 		return (rv);
3746 
3747 	MDI_PI_LOCK(pip);
3748 	if (MDI_PI(pip)->pi_pm_held == 0) {
3749 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3750 		    "i_mdi_pm_hold_pip %p", (void *)pip));
3751 		i_mdi_pm_hold_pip(pip);
3752 		client_held = 1;
3753 	}
3754 	MDI_PI_UNLOCK(pip);
3755 
3756 	if (client_held) {
3757 		MDI_CLIENT_LOCK(ct);
3758 		if (ct->ct_power_cnt == 0) {
3759 			rv = i_mdi_power_all_phci(ct);
3760 		}
3761 
3762 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3763 		    "i_mdi_pm_hold_client %p", (void *)ct));
3764 		i_mdi_pm_hold_client(ct, 1);
3765 		MDI_CLIENT_UNLOCK(ct);
3766 	}
3767 
3768 	return (rv);
3769 }
3770 
3771 /*
3772  * mdi_pi_standby():
3773  *		Place the mdi_pathinfo node in standby state
3774  *
3775  * Return Values:
3776  *		MDI_SUCCESS
3777  *		MDI_FAILURE
3778  */
3779 int
3780 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3781 {
3782 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3783 }
3784 
3785 /*
3786  * mdi_pi_fault():
3787  *		Place the mdi_pathinfo node in fault'ed state
3788  * Return Values:
3789  *		MDI_SUCCESS
3790  *		MDI_FAILURE
3791  */
3792 int
3793 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3794 {
3795 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3796 }
3797 
3798 /*
3799  * mdi_pi_offline():
3800  *		Offline a mdi_pathinfo node.
3801  * Return Values:
3802  *		MDI_SUCCESS
3803  *		MDI_FAILURE
3804  */
3805 int
3806 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3807 {
3808 	int	ret, client_held = 0;
3809 	mdi_client_t	*ct;
3810 
3811 	/*
3812 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3813 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
3814 	 * should now just use NDI_USER_REQ.
3815 	 */
3816 	if (flags & NDI_DEVI_REMOVE) {
3817 		flags &= ~NDI_DEVI_REMOVE;
3818 		flags |= NDI_USER_REQ;
3819 	}
3820 
3821 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3822 
3823 	if (ret == MDI_SUCCESS) {
3824 		MDI_PI_LOCK(pip);
3825 		if (MDI_PI(pip)->pi_pm_held) {
3826 			client_held = 1;
3827 		}
3828 		MDI_PI_UNLOCK(pip);
3829 
3830 		if (client_held) {
3831 			ct = MDI_PI(pip)->pi_client;
3832 			MDI_CLIENT_LOCK(ct);
3833 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3834 			    "i_mdi_pm_rele_client\n"));
3835 			i_mdi_pm_rele_client(ct, 1);
3836 			MDI_CLIENT_UNLOCK(ct);
3837 		}
3838 	}
3839 
3840 	return (ret);
3841 }
3842 
3843 /*
3844  * i_mdi_pi_offline():
3845  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3846  */
3847 static int
3848 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3849 {
3850 	dev_info_t	*vdip = NULL;
3851 	mdi_vhci_t	*vh = NULL;
3852 	mdi_client_t	*ct = NULL;
3853 	int		(*f)();
3854 	int		rv;
3855 
3856 	MDI_PI_LOCK(pip);
3857 	ct = MDI_PI(pip)->pi_client;
3858 	ASSERT(ct != NULL);
3859 
3860 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3861 		/*
3862 		 * Give a chance for pending I/Os to complete.
3863 		 */
3864 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3865 		    "!%d cmds still pending on path %s %p",
3866 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3867 		    (void *)pip));
3868 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3869 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3870 		    TR_CLOCK_TICK) == -1) {
3871 			/*
3872 			 * The timeout time reached without ref_cnt being zero
3873 			 * being signaled.
3874 			 */
3875 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876 			    "!Timeout reached on path %s %p without the cond",
3877 			    mdi_pi_spathname(pip), (void *)pip));
3878 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3879 			    "!%d cmds still pending on path %s %p",
3880 			    MDI_PI(pip)->pi_ref_cnt,
3881 			    mdi_pi_spathname(pip), (void *)pip));
3882 		}
3883 	}
3884 	vh = ct->ct_vhci;
3885 	vdip = vh->vh_dip;
3886 
3887 	/*
3888 	 * Notify vHCI that has registered this event
3889 	 */
3890 	ASSERT(vh->vh_ops);
3891 	f = vh->vh_ops->vo_pi_state_change;
3892 
3893 	if (f != NULL) {
3894 		MDI_PI_UNLOCK(pip);
3895 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3896 		    flags)) != MDI_SUCCESS) {
3897 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3898 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3899 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
3900 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3901 		}
3902 		MDI_PI_LOCK(pip);
3903 	}
3904 
3905 	/*
3906 	 * Set the mdi_pathinfo node state and clear the transient condition
3907 	 */
3908 	MDI_PI_SET_OFFLINE(pip);
3909 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3910 	MDI_PI_UNLOCK(pip);
3911 
3912 	MDI_CLIENT_LOCK(ct);
3913 	if (rv == MDI_SUCCESS) {
3914 		if (ct->ct_unstable == 0) {
3915 			dev_info_t	*cdip = ct->ct_dip;
3916 
3917 			/*
3918 			 * Onlining the mdi_pathinfo node will impact the
3919 			 * client state Update the client and dev_info node
3920 			 * state accordingly
3921 			 */
3922 			i_mdi_client_update_state(ct);
3923 			rv = NDI_SUCCESS;
3924 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3925 				if (cdip &&
3926 				    (i_ddi_node_state(cdip) >=
3927 				    DS_INITIALIZED)) {
3928 					MDI_CLIENT_UNLOCK(ct);
3929 					rv = ndi_devi_offline(cdip,
3930 					    NDI_DEVFS_CLEAN);
3931 					MDI_CLIENT_LOCK(ct);
3932 					if (rv != NDI_SUCCESS) {
3933 						/*
3934 						 * ndi_devi_offline failed.
3935 						 * Reset client flags to
3936 						 * online.
3937 						 */
3938 						MDI_DEBUG(4, (MDI_WARN, cdip,
3939 						    "ndi_devi_offline failed: "
3940 						    "error %x", rv));
3941 						MDI_CLIENT_SET_ONLINE(ct);
3942 					}
3943 				}
3944 			}
3945 			/*
3946 			 * Convert to MDI error code
3947 			 */
3948 			switch (rv) {
3949 			case NDI_SUCCESS:
3950 				rv = MDI_SUCCESS;
3951 				break;
3952 			case NDI_BUSY:
3953 				rv = MDI_BUSY;
3954 				break;
3955 			default:
3956 				rv = MDI_FAILURE;
3957 				break;
3958 			}
3959 		}
3960 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3961 		i_mdi_report_path_state(ct, pip);
3962 	}
3963 
3964 	MDI_CLIENT_UNLOCK(ct);
3965 
3966 	/*
3967 	 * Change in the mdi_pathinfo node state will impact the client state
3968 	 */
3969 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3970 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
3971 	return (rv);
3972 }
3973 
3974 /*
3975  * i_mdi_pi_online():
3976  *		Online a mdi_pathinfo node and call the vHCI driver's callback
3977  */
3978 static int
3979 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3980 {
3981 	mdi_vhci_t	*vh = NULL;
3982 	mdi_client_t	*ct = NULL;
3983 	mdi_phci_t	*ph;
3984 	int		(*f)();
3985 	int		rv;
3986 
3987 	MDI_PI_LOCK(pip);
3988 	ph = MDI_PI(pip)->pi_phci;
3989 	vh = ph->ph_vhci;
3990 	ct = MDI_PI(pip)->pi_client;
3991 	MDI_PI_SET_ONLINING(pip)
3992 	MDI_PI_UNLOCK(pip);
3993 	f = vh->vh_ops->vo_pi_state_change;
3994 	if (f != NULL)
3995 		rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3996 		    flags);
3997 	MDI_CLIENT_LOCK(ct);
3998 	MDI_PI_LOCK(pip);
3999 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4000 	MDI_PI_UNLOCK(pip);
4001 	if (rv == MDI_SUCCESS) {
4002 		dev_info_t	*cdip = ct->ct_dip;
4003 
4004 		rv = MDI_SUCCESS;
4005 		i_mdi_client_update_state(ct);
4006 		if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4007 		    MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4008 			if (cdip && !i_ddi_devi_attached(cdip)) {
4009 				MDI_CLIENT_UNLOCK(ct);
4010 				rv = ndi_devi_online(cdip, 0);
4011 				MDI_CLIENT_LOCK(ct);
4012 				if ((rv != NDI_SUCCESS) &&
4013 				    (MDI_CLIENT_STATE(ct) ==
4014 				    MDI_CLIENT_STATE_DEGRADED)) {
4015 					MDI_CLIENT_SET_OFFLINE(ct);
4016 				}
4017 				if (rv != NDI_SUCCESS) {
4018 					/* Reset the path state */
4019 					MDI_PI_LOCK(pip);
4020 					MDI_PI(pip)->pi_state =
4021 					    MDI_PI_OLD_STATE(pip);
4022 					MDI_PI_UNLOCK(pip);
4023 				}
4024 			}
4025 		}
4026 		switch (rv) {
4027 		case NDI_SUCCESS:
4028 			MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4029 			i_mdi_report_path_state(ct, pip);
4030 			rv = MDI_SUCCESS;
4031 			break;
4032 		case NDI_BUSY:
4033 			rv = MDI_BUSY;
4034 			break;
4035 		default:
4036 			rv = MDI_FAILURE;
4037 			break;
4038 		}
4039 	} else {
4040 		/* Reset the path state */
4041 		MDI_PI_LOCK(pip);
4042 		MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4043 		MDI_PI_UNLOCK(pip);
4044 	}
4045 	MDI_CLIENT_UNLOCK(ct);
4046 	return (rv);
4047 }
4048 
4049 /*
4050  * mdi_pi_get_node_name():
4051  *              Get the name associated with a mdi_pathinfo node.
4052  *              Since pathinfo nodes are not directly named, we
4053  *              return the node_name of the client.
4054  *
4055  * Return Values:
4056  *              char *
4057  */
4058 char *
4059 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4060 {
4061 	mdi_client_t    *ct;
4062 
4063 	if (pip == NULL)
4064 		return (NULL);
4065 	ct = MDI_PI(pip)->pi_client;
4066 	if ((ct == NULL) || (ct->ct_dip == NULL))
4067 		return (NULL);
4068 	return (ddi_node_name(ct->ct_dip));
4069 }
4070 
4071 /*
4072  * mdi_pi_get_addr():
4073  *		Get the unit address associated with a mdi_pathinfo node
4074  *
4075  * Return Values:
4076  *		char *
4077  */
4078 char *
4079 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4080 {
4081 	if (pip == NULL)
4082 		return (NULL);
4083 
4084 	return (MDI_PI(pip)->pi_addr);
4085 }
4086 
4087 /*
4088  * mdi_pi_get_path_instance():
4089  *		Get the 'path_instance' of a mdi_pathinfo node
4090  *
4091  * Return Values:
4092  *		path_instance
4093  */
4094 int
4095 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4096 {
4097 	if (pip == NULL)
4098 		return (0);
4099 
4100 	return (MDI_PI(pip)->pi_path_instance);
4101 }
4102 
4103 /*
4104  * mdi_pi_pathname():
4105  *		Return pointer to path to pathinfo node.
4106  */
4107 char *
4108 mdi_pi_pathname(mdi_pathinfo_t *pip)
4109 {
4110 	if (pip == NULL)
4111 		return (NULL);
4112 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4113 }
4114 
4115 /*
4116  * mdi_pi_spathname():
4117  *		Return pointer to shortpath to pathinfo node. Used for debug
4118  *		messages, so return "" instead of NULL when unknown.
4119  */
4120 char *
4121 mdi_pi_spathname(mdi_pathinfo_t *pip)
4122 {
4123 	char	*spath = "";
4124 
4125 	if (pip) {
4126 		spath = mdi_pi_spathname_by_instance(
4127 		    mdi_pi_get_path_instance(pip));
4128 		if (spath == NULL)
4129 			spath = "";
4130 	}
4131 	return (spath);
4132 }
4133 
4134 char *
4135 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4136 {
4137 	char *obp_path = NULL;
4138 	if ((pip == NULL) || (path == NULL))
4139 		return (NULL);
4140 
4141 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4142 		(void) strcpy(path, obp_path);
4143 		(void) mdi_prop_free(obp_path);
4144 	} else {
4145 		path = NULL;
4146 	}
4147 	return (path);
4148 }
4149 
4150 int
4151 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4152 {
4153 	dev_info_t *pdip;
4154 	char *obp_path = NULL;
4155 	int rc = MDI_FAILURE;
4156 
4157 	if (pip == NULL)
4158 		return (MDI_FAILURE);
4159 
4160 	pdip = mdi_pi_get_phci(pip);
4161 	if (pdip == NULL)
4162 		return (MDI_FAILURE);
4163 
4164 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4165 
4166 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4167 		(void) ddi_pathname(pdip, obp_path);
4168 	}
4169 
4170 	if (component) {
4171 		(void) strncat(obp_path, "/", MAXPATHLEN);
4172 		(void) strncat(obp_path, component, MAXPATHLEN);
4173 	}
4174 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4175 
4176 	if (obp_path)
4177 		kmem_free(obp_path, MAXPATHLEN);
4178 	return (rc);
4179 }
4180 
4181 /*
4182  * mdi_pi_get_client():
4183  *		Get the client devinfo associated with a mdi_pathinfo node
4184  *
4185  * Return Values:
4186  *		Handle to client device dev_info node
4187  */
4188 dev_info_t *
4189 mdi_pi_get_client(mdi_pathinfo_t *pip)
4190 {
4191 	dev_info_t	*dip = NULL;
4192 	if (pip) {
4193 		dip = MDI_PI(pip)->pi_client->ct_dip;
4194 	}
4195 	return (dip);
4196 }
4197 
4198 /*
4199  * mdi_pi_get_phci():
4200  *		Get the pHCI devinfo associated with the mdi_pathinfo node
4201  * Return Values:
4202  *		Handle to dev_info node
4203  */
4204 dev_info_t *
4205 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4206 {
4207 	dev_info_t	*dip = NULL;
4208 	mdi_phci_t	*ph;
4209 
4210 	if (pip) {
4211 		ph = MDI_PI(pip)->pi_phci;
4212 		if (ph)
4213 			dip = ph->ph_dip;
4214 	}
4215 	return (dip);
4216 }
4217 
4218 /*
4219  * mdi_pi_get_client_private():
4220  *		Get the client private information associated with the
4221  *		mdi_pathinfo node
4222  */
4223 void *
4224 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4225 {
4226 	void *cprivate = NULL;
4227 	if (pip) {
4228 		cprivate = MDI_PI(pip)->pi_cprivate;
4229 	}
4230 	return (cprivate);
4231 }
4232 
4233 /*
4234  * mdi_pi_set_client_private():
4235  *		Set the client private information in the mdi_pathinfo node
4236  */
4237 void
4238 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4239 {
4240 	if (pip) {
4241 		MDI_PI(pip)->pi_cprivate = priv;
4242 	}
4243 }
4244 
4245 /*
4246  * mdi_pi_get_phci_private():
4247  *		Get the pHCI private information associated with the
4248  *		mdi_pathinfo node
4249  */
4250 caddr_t
4251 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4252 {
4253 	caddr_t	pprivate = NULL;
4254 
4255 	if (pip) {
4256 		pprivate = MDI_PI(pip)->pi_pprivate;
4257 	}
4258 	return (pprivate);
4259 }
4260 
4261 /*
4262  * mdi_pi_set_phci_private():
4263  *		Set the pHCI private information in the mdi_pathinfo node
4264  */
4265 void
4266 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4267 {
4268 	if (pip) {
4269 		MDI_PI(pip)->pi_pprivate = priv;
4270 	}
4271 }
4272 
4273 /*
4274  * mdi_pi_get_state():
4275  *		Get the mdi_pathinfo node state. Transient states are internal
4276  *		and not provided to the users
4277  */
4278 mdi_pathinfo_state_t
4279 mdi_pi_get_state(mdi_pathinfo_t *pip)
4280 {
4281 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4282 
4283 	if (pip) {
4284 		if (MDI_PI_IS_TRANSIENT(pip)) {
4285 			/*
4286 			 * mdi_pathinfo is in state transition.  Return the
4287 			 * last good state.
4288 			 */
4289 			state = MDI_PI_OLD_STATE(pip);
4290 		} else {
4291 			state = MDI_PI_STATE(pip);
4292 		}
4293 	}
4294 	return (state);
4295 }
4296 
4297 /*
4298  * mdi_pi_get_flags():
4299  *		Get the mdi_pathinfo node flags.
4300  */
4301 uint_t
4302 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4303 {
4304 	return (pip ? MDI_PI(pip)->pi_flags : 0);
4305 }
4306 
4307 /*
4308  * Note that the following function needs to be the new interface for
4309  * mdi_pi_get_state when mpxio gets integrated to ON.
4310  */
4311 int
4312 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4313 		uint32_t *ext_state)
4314 {
4315 	*state = MDI_PATHINFO_STATE_INIT;
4316 
4317 	if (pip) {
4318 		if (MDI_PI_IS_TRANSIENT(pip)) {
4319 			/*
4320 			 * mdi_pathinfo is in state transition.  Return the
4321 			 * last good state.
4322 			 */
4323 			*state = MDI_PI_OLD_STATE(pip);
4324 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4325 		} else {
4326 			*state = MDI_PI_STATE(pip);
4327 			*ext_state = MDI_PI_EXT_STATE(pip);
4328 		}
4329 	}
4330 	return (MDI_SUCCESS);
4331 }
4332 
4333 /*
4334  * mdi_pi_get_preferred:
4335  *	Get the preferred path flag
4336  */
4337 int
4338 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4339 {
4340 	if (pip) {
4341 		return (MDI_PI(pip)->pi_preferred);
4342 	}
4343 	return (0);
4344 }
4345 
4346 /*
4347  * mdi_pi_set_preferred:
4348  *	Set the preferred path flag
4349  */
4350 void
4351 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4352 {
4353 	if (pip) {
4354 		MDI_PI(pip)->pi_preferred = preferred;
4355 	}
4356 }
4357 
4358 /*
4359  * mdi_pi_set_state():
4360  *		Set the mdi_pathinfo node state
4361  */
4362 void
4363 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4364 {
4365 	uint32_t	ext_state;
4366 
4367 	if (pip) {
4368 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4369 		MDI_PI(pip)->pi_state = state;
4370 		MDI_PI(pip)->pi_state |= ext_state;
4371 
4372 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
4373 		i_ddi_di_cache_invalidate();
4374 	}
4375 }
4376 
4377 /*
4378  * Property functions:
4379  */
4380 int
4381 i_map_nvlist_error_to_mdi(int val)
4382 {
4383 	int rv;
4384 
4385 	switch (val) {
4386 	case 0:
4387 		rv = DDI_PROP_SUCCESS;
4388 		break;
4389 	case EINVAL:
4390 	case ENOTSUP:
4391 		rv = DDI_PROP_INVAL_ARG;
4392 		break;
4393 	case ENOMEM:
4394 		rv = DDI_PROP_NO_MEMORY;
4395 		break;
4396 	default:
4397 		rv = DDI_PROP_NOT_FOUND;
4398 		break;
4399 	}
4400 	return (rv);
4401 }
4402 
4403 /*
4404  * mdi_pi_get_next_prop():
4405  * 		Property walk function.  The caller should hold mdi_pi_lock()
4406  *		and release by calling mdi_pi_unlock() at the end of walk to
4407  *		get a consistent value.
4408  */
4409 nvpair_t *
4410 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4411 {
4412 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4413 		return (NULL);
4414 	}
4415 	ASSERT(MDI_PI_LOCKED(pip));
4416 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4417 }
4418 
4419 /*
4420  * mdi_prop_remove():
4421  * 		Remove the named property from the named list.
4422  */
4423 int
4424 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4425 {
4426 	if (pip == NULL) {
4427 		return (DDI_PROP_NOT_FOUND);
4428 	}
4429 	ASSERT(!MDI_PI_LOCKED(pip));
4430 	MDI_PI_LOCK(pip);
4431 	if (MDI_PI(pip)->pi_prop == NULL) {
4432 		MDI_PI_UNLOCK(pip);
4433 		return (DDI_PROP_NOT_FOUND);
4434 	}
4435 	if (name) {
4436 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4437 	} else {
4438 		char		nvp_name[MAXNAMELEN];
4439 		nvpair_t	*nvp;
4440 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4441 		while (nvp) {
4442 			nvpair_t	*next;
4443 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4444 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4445 			    nvpair_name(nvp));
4446 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4447 			    nvp_name);
4448 			nvp = next;
4449 		}
4450 	}
4451 	MDI_PI_UNLOCK(pip);
4452 	return (DDI_PROP_SUCCESS);
4453 }
4454 
4455 /*
4456  * mdi_prop_size():
4457  * 		Get buffer size needed to pack the property data.
4458  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4459  *		buffer size.
4460  */
4461 int
4462 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4463 {
4464 	int	rv;
4465 	size_t	bufsize;
4466 
4467 	*buflenp = 0;
4468 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4469 		return (DDI_PROP_NOT_FOUND);
4470 	}
4471 	ASSERT(MDI_PI_LOCKED(pip));
4472 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4473 	    &bufsize, NV_ENCODE_NATIVE);
4474 	*buflenp = bufsize;
4475 	return (i_map_nvlist_error_to_mdi(rv));
4476 }
4477 
4478 /*
4479  * mdi_prop_pack():
4480  * 		pack the property list.  The caller should hold the
4481  *		mdi_pathinfo_t node to get a consistent data
4482  */
4483 int
4484 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4485 {
4486 	int	rv;
4487 	size_t	bufsize;
4488 
4489 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4490 		return (DDI_PROP_NOT_FOUND);
4491 	}
4492 
4493 	ASSERT(MDI_PI_LOCKED(pip));
4494 
4495 	bufsize = buflen;
4496 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4497 	    NV_ENCODE_NATIVE, KM_SLEEP);
4498 
4499 	return (i_map_nvlist_error_to_mdi(rv));
4500 }
4501 
4502 /*
4503  * mdi_prop_update_byte():
4504  *		Create/Update a byte property
4505  */
4506 int
4507 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4508 {
4509 	int rv;
4510 
4511 	if (pip == NULL) {
4512 		return (DDI_PROP_INVAL_ARG);
4513 	}
4514 	ASSERT(!MDI_PI_LOCKED(pip));
4515 	MDI_PI_LOCK(pip);
4516 	if (MDI_PI(pip)->pi_prop == NULL) {
4517 		MDI_PI_UNLOCK(pip);
4518 		return (DDI_PROP_NOT_FOUND);
4519 	}
4520 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4521 	MDI_PI_UNLOCK(pip);
4522 	return (i_map_nvlist_error_to_mdi(rv));
4523 }
4524 
4525 /*
4526  * mdi_prop_update_byte_array():
4527  *		Create/Update a byte array property
4528  */
4529 int
4530 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4531     uint_t nelements)
4532 {
4533 	int rv;
4534 
4535 	if (pip == NULL) {
4536 		return (DDI_PROP_INVAL_ARG);
4537 	}
4538 	ASSERT(!MDI_PI_LOCKED(pip));
4539 	MDI_PI_LOCK(pip);
4540 	if (MDI_PI(pip)->pi_prop == NULL) {
4541 		MDI_PI_UNLOCK(pip);
4542 		return (DDI_PROP_NOT_FOUND);
4543 	}
4544 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4545 	MDI_PI_UNLOCK(pip);
4546 	return (i_map_nvlist_error_to_mdi(rv));
4547 }
4548 
4549 /*
4550  * mdi_prop_update_int():
4551  *		Create/Update a 32 bit integer property
4552  */
4553 int
4554 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4555 {
4556 	int rv;
4557 
4558 	if (pip == NULL) {
4559 		return (DDI_PROP_INVAL_ARG);
4560 	}
4561 	ASSERT(!MDI_PI_LOCKED(pip));
4562 	MDI_PI_LOCK(pip);
4563 	if (MDI_PI(pip)->pi_prop == NULL) {
4564 		MDI_PI_UNLOCK(pip);
4565 		return (DDI_PROP_NOT_FOUND);
4566 	}
4567 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4568 	MDI_PI_UNLOCK(pip);
4569 	return (i_map_nvlist_error_to_mdi(rv));
4570 }
4571 
4572 /*
4573  * mdi_prop_update_int64():
4574  *		Create/Update a 64 bit integer property
4575  */
4576 int
4577 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4578 {
4579 	int rv;
4580 
4581 	if (pip == NULL) {
4582 		return (DDI_PROP_INVAL_ARG);
4583 	}
4584 	ASSERT(!MDI_PI_LOCKED(pip));
4585 	MDI_PI_LOCK(pip);
4586 	if (MDI_PI(pip)->pi_prop == NULL) {
4587 		MDI_PI_UNLOCK(pip);
4588 		return (DDI_PROP_NOT_FOUND);
4589 	}
4590 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4591 	MDI_PI_UNLOCK(pip);
4592 	return (i_map_nvlist_error_to_mdi(rv));
4593 }
4594 
4595 /*
4596  * mdi_prop_update_int_array():
4597  *		Create/Update a int array property
4598  */
4599 int
4600 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4601 	    uint_t nelements)
4602 {
4603 	int rv;
4604 
4605 	if (pip == NULL) {
4606 		return (DDI_PROP_INVAL_ARG);
4607 	}
4608 	ASSERT(!MDI_PI_LOCKED(pip));
4609 	MDI_PI_LOCK(pip);
4610 	if (MDI_PI(pip)->pi_prop == NULL) {
4611 		MDI_PI_UNLOCK(pip);
4612 		return (DDI_PROP_NOT_FOUND);
4613 	}
4614 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4615 	    nelements);
4616 	MDI_PI_UNLOCK(pip);
4617 	return (i_map_nvlist_error_to_mdi(rv));
4618 }
4619 
4620 /*
4621  * mdi_prop_update_string():
4622  *		Create/Update a string property
4623  */
4624 int
4625 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4626 {
4627 	int rv;
4628 
4629 	if (pip == NULL) {
4630 		return (DDI_PROP_INVAL_ARG);
4631 	}
4632 	ASSERT(!MDI_PI_LOCKED(pip));
4633 	MDI_PI_LOCK(pip);
4634 	if (MDI_PI(pip)->pi_prop == NULL) {
4635 		MDI_PI_UNLOCK(pip);
4636 		return (DDI_PROP_NOT_FOUND);
4637 	}
4638 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4639 	MDI_PI_UNLOCK(pip);
4640 	return (i_map_nvlist_error_to_mdi(rv));
4641 }
4642 
4643 /*
4644  * mdi_prop_update_string_array():
4645  *		Create/Update a string array property
4646  */
4647 int
4648 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4649     uint_t nelements)
4650 {
4651 	int rv;
4652 
4653 	if (pip == NULL) {
4654 		return (DDI_PROP_INVAL_ARG);
4655 	}
4656 	ASSERT(!MDI_PI_LOCKED(pip));
4657 	MDI_PI_LOCK(pip);
4658 	if (MDI_PI(pip)->pi_prop == NULL) {
4659 		MDI_PI_UNLOCK(pip);
4660 		return (DDI_PROP_NOT_FOUND);
4661 	}
4662 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4663 	    nelements);
4664 	MDI_PI_UNLOCK(pip);
4665 	return (i_map_nvlist_error_to_mdi(rv));
4666 }
4667 
4668 /*
4669  * mdi_prop_lookup_byte():
4670  * 		Look for byte property identified by name.  The data returned
4671  *		is the actual property and valid as long as mdi_pathinfo_t node
4672  *		is alive.
4673  */
4674 int
4675 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4676 {
4677 	int rv;
4678 
4679 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4680 		return (DDI_PROP_NOT_FOUND);
4681 	}
4682 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4683 	return (i_map_nvlist_error_to_mdi(rv));
4684 }
4685 
4686 
4687 /*
4688  * mdi_prop_lookup_byte_array():
4689  * 		Look for byte array property identified by name.  The data
4690  *		returned is the actual property and valid as long as
4691  *		mdi_pathinfo_t node is alive.
4692  */
4693 int
4694 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4695     uint_t *nelements)
4696 {
4697 	int rv;
4698 
4699 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4700 		return (DDI_PROP_NOT_FOUND);
4701 	}
4702 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4703 	    nelements);
4704 	return (i_map_nvlist_error_to_mdi(rv));
4705 }
4706 
4707 /*
4708  * mdi_prop_lookup_int():
4709  * 		Look for int property identified by name.  The data returned
4710  *		is the actual property and valid as long as mdi_pathinfo_t
4711  *		node is alive.
4712  */
4713 int
4714 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4715 {
4716 	int rv;
4717 
4718 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4719 		return (DDI_PROP_NOT_FOUND);
4720 	}
4721 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4722 	return (i_map_nvlist_error_to_mdi(rv));
4723 }
4724 
4725 /*
4726  * mdi_prop_lookup_int64():
4727  * 		Look for int64 property identified by name.  The data returned
4728  *		is the actual property and valid as long as mdi_pathinfo_t node
4729  *		is alive.
4730  */
4731 int
4732 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4733 {
4734 	int rv;
4735 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4736 		return (DDI_PROP_NOT_FOUND);
4737 	}
4738 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4739 	return (i_map_nvlist_error_to_mdi(rv));
4740 }
4741 
4742 /*
4743  * mdi_prop_lookup_int_array():
4744  * 		Look for int array property identified by name.  The data
4745  *		returned is the actual property and valid as long as
4746  *		mdi_pathinfo_t node is alive.
4747  */
4748 int
4749 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4750     uint_t *nelements)
4751 {
4752 	int rv;
4753 
4754 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4755 		return (DDI_PROP_NOT_FOUND);
4756 	}
4757 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4758 	    (int32_t **)data, nelements);
4759 	return (i_map_nvlist_error_to_mdi(rv));
4760 }
4761 
4762 /*
4763  * mdi_prop_lookup_string():
4764  * 		Look for string property identified by name.  The data
4765  *		returned is the actual property and valid as long as
4766  *		mdi_pathinfo_t node is alive.
4767  */
4768 int
4769 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4770 {
4771 	int rv;
4772 
4773 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4774 		return (DDI_PROP_NOT_FOUND);
4775 	}
4776 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4777 	return (i_map_nvlist_error_to_mdi(rv));
4778 }
4779 
4780 /*
4781  * mdi_prop_lookup_string_array():
4782  * 		Look for string array property identified by name.  The data
4783  *		returned is the actual property and valid as long as
4784  *		mdi_pathinfo_t node is alive.
4785  */
4786 int
4787 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4788     uint_t *nelements)
4789 {
4790 	int rv;
4791 
4792 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4793 		return (DDI_PROP_NOT_FOUND);
4794 	}
4795 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4796 	    nelements);
4797 	return (i_map_nvlist_error_to_mdi(rv));
4798 }
4799 
4800 /*
4801  * mdi_prop_free():
4802  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4803  *		functions return the pointer to actual property data and not a
4804  *		copy of it.  So the data returned is valid as long as
4805  *		mdi_pathinfo_t node is valid.
4806  */
4807 /*ARGSUSED*/
4808 int
4809 mdi_prop_free(void *data)
4810 {
4811 	return (DDI_PROP_SUCCESS);
4812 }
4813 
4814 /*ARGSUSED*/
4815 static void
4816 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4817 {
4818 	char		*ct_path;
4819 	char		*ct_status;
4820 	char		*status;
4821 	dev_info_t	*cdip = ct->ct_dip;
4822 	char		lb_buf[64];
4823 	int		report_lb_c = 0, report_lb_p = 0;
4824 
4825 	ASSERT(MDI_CLIENT_LOCKED(ct));
4826 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4827 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4828 		return;
4829 	}
4830 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4831 		ct_status = "optimal";
4832 		report_lb_c = 1;
4833 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4834 		ct_status = "degraded";
4835 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4836 		ct_status = "failed";
4837 	} else {
4838 		ct_status = "unknown";
4839 	}
4840 
4841 	lb_buf[0] = 0;		/* not interested in load balancing config */
4842 
4843 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4844 		status = "removed";
4845 	} else if (MDI_PI_IS_OFFLINE(pip)) {
4846 		status = "offline";
4847 	} else if (MDI_PI_IS_ONLINE(pip)) {
4848 		status = "online";
4849 		report_lb_p = 1;
4850 	} else if (MDI_PI_IS_STANDBY(pip)) {
4851 		status = "standby";
4852 	} else if (MDI_PI_IS_FAULT(pip)) {
4853 		status = "faulted";
4854 	} else {
4855 		status = "unknown";
4856 	}
4857 
4858 	if (cdip) {
4859 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4860 
4861 		/*
4862 		 * NOTE: Keeping "multipath status: %s" and
4863 		 * "Load balancing: %s" format unchanged in case someone
4864 		 * scrubs /var/adm/messages looking for these messages.
4865 		 */
4866 		if (report_lb_c && report_lb_p) {
4867 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
4868 				(void) snprintf(lb_buf, sizeof (lb_buf),
4869 				    "%s, region-size: %d", mdi_load_balance_lba,
4870 				    ct->ct_lb_args->region_size);
4871 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4872 				(void) snprintf(lb_buf, sizeof (lb_buf),
4873 				    "%s", mdi_load_balance_none);
4874 			} else {
4875 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4876 				    mdi_load_balance_rr);
4877 			}
4878 
4879 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4880 			    "?%s (%s%d) multipath status: %s: "
4881 			    "path %d %s is %s: Load balancing: %s\n",
4882 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4883 			    ddi_get_instance(cdip), ct_status,
4884 			    mdi_pi_get_path_instance(pip),
4885 			    mdi_pi_spathname(pip), status, lb_buf);
4886 		} else {
4887 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4888 			    "?%s (%s%d) multipath status: %s: "
4889 			    "path %d %s is %s\n",
4890 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4891 			    ddi_get_instance(cdip), ct_status,
4892 			    mdi_pi_get_path_instance(pip),
4893 			    mdi_pi_spathname(pip), status);
4894 		}
4895 
4896 		kmem_free(ct_path, MAXPATHLEN);
4897 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4898 	}
4899 }
4900 
4901 #ifdef	DEBUG
4902 /*
4903  * i_mdi_log():
4904  *		Utility function for error message management
4905  *
4906  *		NOTE: Implementation takes care of trailing \n for cmn_err,
4907  *		MDI_DEBUG should not terminate fmt strings with \n.
4908  *
4909  *		NOTE: If the level is >= 2, and there is no leading !?^
4910  *		then a leading ! is implied (but can be overriden via
4911  *		mdi_debug_consoleonly). If you are using kmdb on the console,
4912  *		consider setting mdi_debug_consoleonly to 1 as an aid.
4913  */
4914 /*PRINTFLIKE4*/
4915 static void
4916 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4917 {
4918 	char		name[MAXNAMELEN];
4919 	char		buf[512];
4920 	char		*bp;
4921 	va_list		ap;
4922 	int		log_only = 0;
4923 	int		boot_only = 0;
4924 	int		console_only = 0;
4925 
4926 	if (dip) {
4927 		(void) snprintf(name, sizeof(name), "%s%d: ",
4928 		    ddi_driver_name(dip), ddi_get_instance(dip));
4929 	} else {
4930 		name[0] = 0;
4931 	}
4932 
4933 	va_start(ap, fmt);
4934 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
4935 	va_end(ap);
4936 
4937 	switch (buf[0]) {
4938 	case '!':
4939 		bp = &buf[1];
4940 		log_only = 1;
4941 		break;
4942 	case '?':
4943 		bp = &buf[1];
4944 		boot_only = 1;
4945 		break;
4946 	case '^':
4947 		bp = &buf[1];
4948 		console_only = 1;
4949 		break;
4950 	default:
4951 		if (level >= 2)
4952 			log_only = 1;		/* ! implied */
4953 		bp = buf;
4954 		break;
4955 	}
4956 	if (mdi_debug_logonly) {
4957 		log_only = 1;
4958 		boot_only = 0;
4959 		console_only = 0;
4960 	}
4961 	if (mdi_debug_consoleonly) {
4962 		log_only = 0;
4963 		boot_only = 0;
4964 		console_only = 1;
4965 		level = CE_NOTE;
4966 		goto console;
4967 	}
4968 
4969 	switch (level) {
4970 	case CE_NOTE:
4971 		level = CE_CONT;
4972 		/* FALLTHROUGH */
4973 	case CE_CONT:
4974 		if (boot_only) {
4975 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4976 		} else if (console_only) {
4977 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4978 		} else if (log_only) {
4979 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4980 		} else {
4981 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4982 		}
4983 		break;
4984 
4985 	case CE_WARN:
4986 	case CE_PANIC:
4987 	console:
4988 		if (boot_only) {
4989 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4990 		} else if (console_only) {
4991 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4992 		} else if (log_only) {
4993 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4994 		} else {
4995 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4996 		}
4997 		break;
4998 	default:
4999 		cmn_err(level, "mdi: %s%s", name, bp);
5000 		break;
5001 	}
5002 }
5003 #endif	/* DEBUG */
5004 
5005 void
5006 i_mdi_client_online(dev_info_t *ct_dip)
5007 {
5008 	mdi_client_t	*ct;
5009 
5010 	/*
5011 	 * Client online notification. Mark client state as online
5012 	 * restore our binding with dev_info node
5013 	 */
5014 	ct = i_devi_get_client(ct_dip);
5015 	ASSERT(ct != NULL);
5016 	MDI_CLIENT_LOCK(ct);
5017 	MDI_CLIENT_SET_ONLINE(ct);
5018 	/* catch for any memory leaks */
5019 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5020 	ct->ct_dip = ct_dip;
5021 
5022 	if (ct->ct_power_cnt == 0)
5023 		(void) i_mdi_power_all_phci(ct);
5024 
5025 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5026 	    "i_mdi_pm_hold_client %p", (void *)ct));
5027 	i_mdi_pm_hold_client(ct, 1);
5028 
5029 	MDI_CLIENT_UNLOCK(ct);
5030 }
5031 
5032 void
5033 i_mdi_phci_online(dev_info_t *ph_dip)
5034 {
5035 	mdi_phci_t	*ph;
5036 
5037 	/* pHCI online notification. Mark state accordingly */
5038 	ph = i_devi_get_phci(ph_dip);
5039 	ASSERT(ph != NULL);
5040 	MDI_PHCI_LOCK(ph);
5041 	MDI_PHCI_SET_ONLINE(ph);
5042 	MDI_PHCI_UNLOCK(ph);
5043 }
5044 
5045 /*
5046  * mdi_devi_online():
5047  * 		Online notification from NDI framework on pHCI/client
5048  *		device online.
5049  * Return Values:
5050  *		NDI_SUCCESS
5051  *		MDI_FAILURE
5052  */
5053 /*ARGSUSED*/
5054 int
5055 mdi_devi_online(dev_info_t *dip, uint_t flags)
5056 {
5057 	if (MDI_PHCI(dip)) {
5058 		i_mdi_phci_online(dip);
5059 	}
5060 
5061 	if (MDI_CLIENT(dip)) {
5062 		i_mdi_client_online(dip);
5063 	}
5064 	return (NDI_SUCCESS);
5065 }
5066 
5067 /*
5068  * mdi_devi_offline():
5069  * 		Offline notification from NDI framework on pHCI/Client device
5070  *		offline.
5071  *
5072  * Return Values:
5073  *		NDI_SUCCESS
5074  *		NDI_FAILURE
5075  */
5076 /*ARGSUSED*/
5077 int
5078 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5079 {
5080 	int		rv = NDI_SUCCESS;
5081 
5082 	if (MDI_CLIENT(dip)) {
5083 		rv = i_mdi_client_offline(dip, flags);
5084 		if (rv != NDI_SUCCESS)
5085 			return (rv);
5086 	}
5087 
5088 	if (MDI_PHCI(dip)) {
5089 		rv = i_mdi_phci_offline(dip, flags);
5090 
5091 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5092 			/* set client back online */
5093 			i_mdi_client_online(dip);
5094 		}
5095 	}
5096 
5097 	return (rv);
5098 }
5099 
5100 /*ARGSUSED*/
5101 static int
5102 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5103 {
5104 	int		rv = NDI_SUCCESS;
5105 	mdi_phci_t	*ph;
5106 	mdi_client_t	*ct;
5107 	mdi_pathinfo_t	*pip;
5108 	mdi_pathinfo_t	*next;
5109 	mdi_pathinfo_t	*failed_pip = NULL;
5110 	dev_info_t	*cdip;
5111 
5112 	/*
5113 	 * pHCI component offline notification
5114 	 * Make sure that this pHCI instance is free to be offlined.
5115 	 * If it is OK to proceed, Offline and remove all the child
5116 	 * mdi_pathinfo nodes.  This process automatically offlines
5117 	 * corresponding client devices, for which this pHCI provides
5118 	 * critical services.
5119 	 */
5120 	ph = i_devi_get_phci(dip);
5121 	MDI_DEBUG(2, (MDI_NOTE, dip,
5122 	    "called %p %p", (void *)dip, (void *)ph));
5123 	if (ph == NULL) {
5124 		return (rv);
5125 	}
5126 
5127 	MDI_PHCI_LOCK(ph);
5128 
5129 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5130 		MDI_DEBUG(1, (MDI_WARN, dip,
5131 		    "!pHCI already offlined: %p", (void *)dip));
5132 		MDI_PHCI_UNLOCK(ph);
5133 		return (NDI_SUCCESS);
5134 	}
5135 
5136 	/*
5137 	 * Check to see if the pHCI can be offlined
5138 	 */
5139 	if (ph->ph_unstable) {
5140 		MDI_DEBUG(1, (MDI_WARN, dip,
5141 		    "!One or more target devices are in transient state. "
5142 		    "This device can not be removed at this moment. "
5143 		    "Please try again later."));
5144 		MDI_PHCI_UNLOCK(ph);
5145 		return (NDI_BUSY);
5146 	}
5147 
5148 	pip = ph->ph_path_head;
5149 	while (pip != NULL) {
5150 		MDI_PI_LOCK(pip);
5151 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5152 
5153 		/*
5154 		 * The mdi_pathinfo state is OK. Check the client state.
5155 		 * If failover in progress fail the pHCI from offlining
5156 		 */
5157 		ct = MDI_PI(pip)->pi_client;
5158 		i_mdi_client_lock(ct, pip);
5159 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5160 		    (ct->ct_unstable)) {
5161 			/*
5162 			 * Failover is in progress, Fail the DR
5163 			 */
5164 			MDI_DEBUG(1, (MDI_WARN, dip,
5165 			    "!pHCI device is busy. "
5166 			    "This device can not be removed at this moment. "
5167 			    "Please try again later."));
5168 			MDI_PI_UNLOCK(pip);
5169 			i_mdi_client_unlock(ct);
5170 			MDI_PHCI_UNLOCK(ph);
5171 			return (NDI_BUSY);
5172 		}
5173 		MDI_PI_UNLOCK(pip);
5174 
5175 		/*
5176 		 * Check to see of we are removing the last path of this
5177 		 * client device...
5178 		 */
5179 		cdip = ct->ct_dip;
5180 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5181 		    (i_mdi_client_compute_state(ct, ph) ==
5182 		    MDI_CLIENT_STATE_FAILED)) {
5183 			i_mdi_client_unlock(ct);
5184 			MDI_PHCI_UNLOCK(ph);
5185 			if (ndi_devi_offline(cdip,
5186 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5187 				/*
5188 				 * ndi_devi_offline() failed.
5189 				 * This pHCI provides the critical path
5190 				 * to one or more client devices.
5191 				 * Return busy.
5192 				 */
5193 				MDI_PHCI_LOCK(ph);
5194 				MDI_DEBUG(1, (MDI_WARN, dip,
5195 				    "!pHCI device is busy. "
5196 				    "This device can not be removed at this "
5197 				    "moment. Please try again later."));
5198 				failed_pip = pip;
5199 				break;
5200 			} else {
5201 				MDI_PHCI_LOCK(ph);
5202 				pip = next;
5203 			}
5204 		} else {
5205 			i_mdi_client_unlock(ct);
5206 			pip = next;
5207 		}
5208 	}
5209 
5210 	if (failed_pip) {
5211 		pip = ph->ph_path_head;
5212 		while (pip != failed_pip) {
5213 			MDI_PI_LOCK(pip);
5214 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5215 			ct = MDI_PI(pip)->pi_client;
5216 			i_mdi_client_lock(ct, pip);
5217 			cdip = ct->ct_dip;
5218 			switch (MDI_CLIENT_STATE(ct)) {
5219 			case MDI_CLIENT_STATE_OPTIMAL:
5220 			case MDI_CLIENT_STATE_DEGRADED:
5221 				if (cdip) {
5222 					MDI_PI_UNLOCK(pip);
5223 					i_mdi_client_unlock(ct);
5224 					MDI_PHCI_UNLOCK(ph);
5225 					(void) ndi_devi_online(cdip, 0);
5226 					MDI_PHCI_LOCK(ph);
5227 					pip = next;
5228 					continue;
5229 				}
5230 				break;
5231 
5232 			case MDI_CLIENT_STATE_FAILED:
5233 				if (cdip) {
5234 					MDI_PI_UNLOCK(pip);
5235 					i_mdi_client_unlock(ct);
5236 					MDI_PHCI_UNLOCK(ph);
5237 					(void) ndi_devi_offline(cdip,
5238 						NDI_DEVFS_CLEAN);
5239 					MDI_PHCI_LOCK(ph);
5240 					pip = next;
5241 					continue;
5242 				}
5243 				break;
5244 			}
5245 			MDI_PI_UNLOCK(pip);
5246 			i_mdi_client_unlock(ct);
5247 			pip = next;
5248 		}
5249 		MDI_PHCI_UNLOCK(ph);
5250 		return (NDI_BUSY);
5251 	}
5252 
5253 	/*
5254 	 * Mark the pHCI as offline
5255 	 */
5256 	MDI_PHCI_SET_OFFLINE(ph);
5257 
5258 	/*
5259 	 * Mark the child mdi_pathinfo nodes as transient
5260 	 */
5261 	pip = ph->ph_path_head;
5262 	while (pip != NULL) {
5263 		MDI_PI_LOCK(pip);
5264 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5265 		MDI_PI_SET_OFFLINING(pip);
5266 		MDI_PI_UNLOCK(pip);
5267 		pip = next;
5268 	}
5269 	MDI_PHCI_UNLOCK(ph);
5270 	/*
5271 	 * Give a chance for any pending commands to execute
5272 	 */
5273 	delay_random(mdi_delay);
5274 	MDI_PHCI_LOCK(ph);
5275 	pip = ph->ph_path_head;
5276 	while (pip != NULL) {
5277 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5278 		(void) i_mdi_pi_offline(pip, flags);
5279 		MDI_PI_LOCK(pip);
5280 		ct = MDI_PI(pip)->pi_client;
5281 		if (!MDI_PI_IS_OFFLINE(pip)) {
5282 			MDI_DEBUG(1, (MDI_WARN, dip,
5283 			    "!pHCI device is busy. "
5284 			    "This device can not be removed at this moment. "
5285 			    "Please try again later."));
5286 			MDI_PI_UNLOCK(pip);
5287 			MDI_PHCI_SET_ONLINE(ph);
5288 			MDI_PHCI_UNLOCK(ph);
5289 			return (NDI_BUSY);
5290 		}
5291 		MDI_PI_UNLOCK(pip);
5292 		pip = next;
5293 	}
5294 	MDI_PHCI_UNLOCK(ph);
5295 
5296 	return (rv);
5297 }
5298 
5299 void
5300 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5301 {
5302 	mdi_phci_t	*ph;
5303 	mdi_client_t	*ct;
5304 	mdi_pathinfo_t	*pip;
5305 	mdi_pathinfo_t	*next;
5306 	dev_info_t	*cdip;
5307 
5308 	if (!MDI_PHCI(dip))
5309 		return;
5310 
5311 	ph = i_devi_get_phci(dip);
5312 	if (ph == NULL) {
5313 		return;
5314 	}
5315 
5316 	MDI_PHCI_LOCK(ph);
5317 
5318 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5319 		/* has no last path */
5320 		MDI_PHCI_UNLOCK(ph);
5321 		return;
5322 	}
5323 
5324 	pip = ph->ph_path_head;
5325 	while (pip != NULL) {
5326 		MDI_PI_LOCK(pip);
5327 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5328 
5329 		ct = MDI_PI(pip)->pi_client;
5330 		i_mdi_client_lock(ct, pip);
5331 		MDI_PI_UNLOCK(pip);
5332 
5333 		cdip = ct->ct_dip;
5334 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5335 		    (i_mdi_client_compute_state(ct, ph) ==
5336 		    MDI_CLIENT_STATE_FAILED)) {
5337 			/* Last path. Mark client dip as retiring */
5338 			i_mdi_client_unlock(ct);
5339 			MDI_PHCI_UNLOCK(ph);
5340 			(void) e_ddi_mark_retiring(cdip, cons_array);
5341 			MDI_PHCI_LOCK(ph);
5342 			pip = next;
5343 		} else {
5344 			i_mdi_client_unlock(ct);
5345 			pip = next;
5346 		}
5347 	}
5348 
5349 	MDI_PHCI_UNLOCK(ph);
5350 
5351 	return;
5352 }
5353 
5354 void
5355 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5356 {
5357 	mdi_phci_t	*ph;
5358 	mdi_client_t	*ct;
5359 	mdi_pathinfo_t	*pip;
5360 	mdi_pathinfo_t	*next;
5361 	dev_info_t	*cdip;
5362 
5363 	if (!MDI_PHCI(dip))
5364 		return;
5365 
5366 	ph = i_devi_get_phci(dip);
5367 	if (ph == NULL)
5368 		return;
5369 
5370 	MDI_PHCI_LOCK(ph);
5371 
5372 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5373 		MDI_PHCI_UNLOCK(ph);
5374 		/* not last path */
5375 		return;
5376 	}
5377 
5378 	if (ph->ph_unstable) {
5379 		MDI_PHCI_UNLOCK(ph);
5380 		/* can't check for constraints */
5381 		*constraint = 0;
5382 		return;
5383 	}
5384 
5385 	pip = ph->ph_path_head;
5386 	while (pip != NULL) {
5387 		MDI_PI_LOCK(pip);
5388 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5389 
5390 		/*
5391 		 * The mdi_pathinfo state is OK. Check the client state.
5392 		 * If failover in progress fail the pHCI from offlining
5393 		 */
5394 		ct = MDI_PI(pip)->pi_client;
5395 		i_mdi_client_lock(ct, pip);
5396 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5397 		    (ct->ct_unstable)) {
5398 			/*
5399 			 * Failover is in progress, can't check for constraints
5400 			 */
5401 			MDI_PI_UNLOCK(pip);
5402 			i_mdi_client_unlock(ct);
5403 			MDI_PHCI_UNLOCK(ph);
5404 			*constraint = 0;
5405 			return;
5406 		}
5407 		MDI_PI_UNLOCK(pip);
5408 
5409 		/*
5410 		 * Check to see of we are retiring the last path of this
5411 		 * client device...
5412 		 */
5413 		cdip = ct->ct_dip;
5414 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5415 		    (i_mdi_client_compute_state(ct, ph) ==
5416 		    MDI_CLIENT_STATE_FAILED)) {
5417 			i_mdi_client_unlock(ct);
5418 			MDI_PHCI_UNLOCK(ph);
5419 			(void) e_ddi_retire_notify(cdip, constraint);
5420 			MDI_PHCI_LOCK(ph);
5421 			pip = next;
5422 		} else {
5423 			i_mdi_client_unlock(ct);
5424 			pip = next;
5425 		}
5426 	}
5427 
5428 	MDI_PHCI_UNLOCK(ph);
5429 
5430 	return;
5431 }
5432 
5433 /*
5434  * offline the path(s) hanging off the pHCI. If the
5435  * last path to any client, check that constraints
5436  * have been applied.
5437  *
5438  * If constraint is 0, we aren't going to retire the
5439  * pHCI. However we still need to go through the paths
5440  * calling e_ddi_retire_finalize() to clear their
5441  * contract barriers.
5442  */
5443 void
5444 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5445 {
5446 	mdi_phci_t	*ph;
5447 	mdi_client_t	*ct;
5448 	mdi_pathinfo_t	*pip;
5449 	mdi_pathinfo_t	*next;
5450 	dev_info_t	*cdip;
5451 	int		unstable = 0;
5452 	int		tmp_constraint;
5453 
5454 	if (!MDI_PHCI(dip))
5455 		return;
5456 
5457 	ph = i_devi_get_phci(dip);
5458 	if (ph == NULL) {
5459 		/* no last path and no pips */
5460 		return;
5461 	}
5462 
5463 	MDI_PHCI_LOCK(ph);
5464 
5465 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5466 		MDI_PHCI_UNLOCK(ph);
5467 		/* no last path and no pips */
5468 		return;
5469 	}
5470 
5471 	/*
5472 	 * Check to see if the pHCI can be offlined
5473 	 */
5474 	if (ph->ph_unstable) {
5475 		unstable = 1;
5476 	}
5477 
5478 	pip = ph->ph_path_head;
5479 	while (pip != NULL) {
5480 		MDI_PI_LOCK(pip);
5481 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5482 
5483 		/*
5484 		 * if failover in progress fail the pHCI from offlining
5485 		 */
5486 		ct = MDI_PI(pip)->pi_client;
5487 		i_mdi_client_lock(ct, pip);
5488 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5489 		    (ct->ct_unstable)) {
5490 			unstable = 1;
5491 		}
5492 		MDI_PI_UNLOCK(pip);
5493 
5494 		/*
5495 		 * Check to see of we are removing the last path of this
5496 		 * client device...
5497 		 */
5498 		cdip = ct->ct_dip;
5499 		if (!phci_only && cdip &&
5500 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5501 		    (i_mdi_client_compute_state(ct, ph) ==
5502 		    MDI_CLIENT_STATE_FAILED)) {
5503 			i_mdi_client_unlock(ct);
5504 			MDI_PHCI_UNLOCK(ph);
5505 			/*
5506 			 * This is the last path to this client.
5507 			 *
5508 			 * Constraint will only be set to 1 if this client can
5509 			 * be retired (as already determined by
5510 			 * mdi_phci_retire_notify). However we don't actually
5511 			 * need to retire the client (we just retire the last
5512 			 * path - MPXIO will then fail all I/Os to the client).
5513 			 * But we still need to call e_ddi_retire_finalize so
5514 			 * the contract barriers can be cleared. Therefore we
5515 			 * temporarily set constraint = 0 so that the client
5516 			 * dip is not retired.
5517 			 */
5518 			tmp_constraint = 0;
5519 			(void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5520 			MDI_PHCI_LOCK(ph);
5521 			pip = next;
5522 		} else {
5523 			i_mdi_client_unlock(ct);
5524 			pip = next;
5525 		}
5526 	}
5527 
5528 	if (!phci_only && *((int *)constraint) == 0) {
5529 		MDI_PHCI_UNLOCK(ph);
5530 		return;
5531 	}
5532 
5533 	/*
5534 	 * Cannot offline pip(s)
5535 	 */
5536 	if (unstable) {
5537 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5538 		    "pHCI in transient state, cannot retire",
5539 		    ddi_driver_name(dip), ddi_get_instance(dip));
5540 		MDI_PHCI_UNLOCK(ph);
5541 		return;
5542 	}
5543 
5544 	/*
5545 	 * Mark the pHCI as offline
5546 	 */
5547 	MDI_PHCI_SET_OFFLINE(ph);
5548 
5549 	/*
5550 	 * Mark the child mdi_pathinfo nodes as transient
5551 	 */
5552 	pip = ph->ph_path_head;
5553 	while (pip != NULL) {
5554 		MDI_PI_LOCK(pip);
5555 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5556 		MDI_PI_SET_OFFLINING(pip);
5557 		MDI_PI_UNLOCK(pip);
5558 		pip = next;
5559 	}
5560 	MDI_PHCI_UNLOCK(ph);
5561 	/*
5562 	 * Give a chance for any pending commands to execute
5563 	 */
5564 	delay_random(mdi_delay);
5565 	MDI_PHCI_LOCK(ph);
5566 	pip = ph->ph_path_head;
5567 	while (pip != NULL) {
5568 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5569 		(void) i_mdi_pi_offline(pip, 0);
5570 		MDI_PI_LOCK(pip);
5571 		ct = MDI_PI(pip)->pi_client;
5572 		if (!MDI_PI_IS_OFFLINE(pip)) {
5573 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5574 			    "path %d %s busy, cannot offline",
5575 			    mdi_pi_get_path_instance(pip),
5576 			    mdi_pi_spathname(pip));
5577 			MDI_PI_UNLOCK(pip);
5578 			MDI_PHCI_SET_ONLINE(ph);
5579 			MDI_PHCI_UNLOCK(ph);
5580 			return;
5581 		}
5582 		MDI_PI_UNLOCK(pip);
5583 		pip = next;
5584 	}
5585 	MDI_PHCI_UNLOCK(ph);
5586 
5587 	return;
5588 }
5589 
5590 void
5591 mdi_phci_unretire(dev_info_t *dip)
5592 {
5593 	mdi_phci_t	*ph;
5594 	mdi_pathinfo_t	*pip;
5595 	mdi_pathinfo_t	*next;
5596 
5597 	ASSERT(MDI_PHCI(dip));
5598 
5599 	/*
5600 	 * Online the phci
5601 	 */
5602 	i_mdi_phci_online(dip);
5603 
5604 	ph = i_devi_get_phci(dip);
5605 	MDI_PHCI_LOCK(ph);
5606 	pip = ph->ph_path_head;
5607 	while (pip != NULL) {
5608 		MDI_PI_LOCK(pip);
5609 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5610 		MDI_PI_UNLOCK(pip);
5611 		(void) i_mdi_pi_online(pip, 0);
5612 		pip = next;
5613 	}
5614 	MDI_PHCI_UNLOCK(ph);
5615 }
5616 
5617 /*ARGSUSED*/
5618 static int
5619 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5620 {
5621 	int		rv = NDI_SUCCESS;
5622 	mdi_client_t	*ct;
5623 
5624 	/*
5625 	 * Client component to go offline.  Make sure that we are
5626 	 * not in failing over state and update client state
5627 	 * accordingly
5628 	 */
5629 	ct = i_devi_get_client(dip);
5630 	MDI_DEBUG(2, (MDI_NOTE, dip,
5631 	    "called %p %p", (void *)dip, (void *)ct));
5632 	if (ct != NULL) {
5633 		MDI_CLIENT_LOCK(ct);
5634 		if (ct->ct_unstable) {
5635 			/*
5636 			 * One or more paths are in transient state,
5637 			 * Dont allow offline of a client device
5638 			 */
5639 			MDI_DEBUG(1, (MDI_WARN, dip,
5640 			    "!One or more paths to "
5641 			    "this device are in transient state. "
5642 			    "This device can not be removed at this moment. "
5643 			    "Please try again later."));
5644 			MDI_CLIENT_UNLOCK(ct);
5645 			return (NDI_BUSY);
5646 		}
5647 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5648 			/*
5649 			 * Failover is in progress, Dont allow DR of
5650 			 * a client device
5651 			 */
5652 			MDI_DEBUG(1, (MDI_WARN, dip,
5653 			    "!Client device is Busy. "
5654 			    "This device can not be removed at this moment. "
5655 			    "Please try again later."));
5656 			MDI_CLIENT_UNLOCK(ct);
5657 			return (NDI_BUSY);
5658 		}
5659 		MDI_CLIENT_SET_OFFLINE(ct);
5660 
5661 		/*
5662 		 * Unbind our relationship with the dev_info node
5663 		 */
5664 		if (flags & NDI_DEVI_REMOVE) {
5665 			ct->ct_dip = NULL;
5666 		}
5667 		MDI_CLIENT_UNLOCK(ct);
5668 	}
5669 	return (rv);
5670 }
5671 
5672 /*
5673  * mdi_pre_attach():
5674  *		Pre attach() notification handler
5675  */
5676 /*ARGSUSED*/
5677 int
5678 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5679 {
5680 	/* don't support old DDI_PM_RESUME */
5681 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5682 	    (cmd == DDI_PM_RESUME))
5683 		return (DDI_FAILURE);
5684 
5685 	return (DDI_SUCCESS);
5686 }
5687 
5688 /*
5689  * mdi_post_attach():
5690  *		Post attach() notification handler
5691  */
5692 /*ARGSUSED*/
5693 void
5694 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5695 {
5696 	mdi_phci_t	*ph;
5697 	mdi_client_t	*ct;
5698 	mdi_vhci_t	*vh;
5699 
5700 	if (MDI_PHCI(dip)) {
5701 		ph = i_devi_get_phci(dip);
5702 		ASSERT(ph != NULL);
5703 
5704 		MDI_PHCI_LOCK(ph);
5705 		switch (cmd) {
5706 		case DDI_ATTACH:
5707 			MDI_DEBUG(2, (MDI_NOTE, dip,
5708 			    "phci post_attach called %p", (void *)ph));
5709 			if (error == DDI_SUCCESS) {
5710 				MDI_PHCI_SET_ATTACH(ph);
5711 			} else {
5712 				MDI_DEBUG(1, (MDI_NOTE, dip,
5713 				    "!pHCI post_attach failed: error %d",
5714 				    error));
5715 				MDI_PHCI_SET_DETACH(ph);
5716 			}
5717 			break;
5718 
5719 		case DDI_RESUME:
5720 			MDI_DEBUG(2, (MDI_NOTE, dip,
5721 			    "pHCI post_resume: called %p", (void *)ph));
5722 			if (error == DDI_SUCCESS) {
5723 				MDI_PHCI_SET_RESUME(ph);
5724 			} else {
5725 				MDI_DEBUG(1, (MDI_NOTE, dip,
5726 				    "!pHCI post_resume failed: error %d",
5727 				    error));
5728 				MDI_PHCI_SET_SUSPEND(ph);
5729 			}
5730 			break;
5731 		}
5732 		MDI_PHCI_UNLOCK(ph);
5733 	}
5734 
5735 	if (MDI_CLIENT(dip)) {
5736 		ct = i_devi_get_client(dip);
5737 		ASSERT(ct != NULL);
5738 
5739 		MDI_CLIENT_LOCK(ct);
5740 		switch (cmd) {
5741 		case DDI_ATTACH:
5742 			MDI_DEBUG(2, (MDI_NOTE, dip,
5743 			    "client post_attach called %p", (void *)ct));
5744 			if (error != DDI_SUCCESS) {
5745 				MDI_DEBUG(1, (MDI_NOTE, dip,
5746 				    "!client post_attach failed: error %d",
5747 				    error));
5748 				MDI_CLIENT_SET_DETACH(ct);
5749 				MDI_DEBUG(4, (MDI_WARN, dip,
5750 				    "i_mdi_pm_reset_client"));
5751 				i_mdi_pm_reset_client(ct);
5752 				break;
5753 			}
5754 
5755 			/*
5756 			 * Client device has successfully attached, inform
5757 			 * the vhci.
5758 			 */
5759 			vh = ct->ct_vhci;
5760 			if (vh->vh_ops->vo_client_attached)
5761 				(*vh->vh_ops->vo_client_attached)(dip);
5762 
5763 			MDI_CLIENT_SET_ATTACH(ct);
5764 			break;
5765 
5766 		case DDI_RESUME:
5767 			MDI_DEBUG(2, (MDI_NOTE, dip,
5768 			    "client post_attach: called %p", (void *)ct));
5769 			if (error == DDI_SUCCESS) {
5770 				MDI_CLIENT_SET_RESUME(ct);
5771 			} else {
5772 				MDI_DEBUG(1, (MDI_NOTE, dip,
5773 				    "!client post_resume failed: error %d",
5774 				    error));
5775 				MDI_CLIENT_SET_SUSPEND(ct);
5776 			}
5777 			break;
5778 		}
5779 		MDI_CLIENT_UNLOCK(ct);
5780 	}
5781 }
5782 
5783 /*
5784  * mdi_pre_detach():
5785  *		Pre detach notification handler
5786  */
5787 /*ARGSUSED*/
5788 int
5789 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5790 {
5791 	int rv = DDI_SUCCESS;
5792 
5793 	if (MDI_CLIENT(dip)) {
5794 		(void) i_mdi_client_pre_detach(dip, cmd);
5795 	}
5796 
5797 	if (MDI_PHCI(dip)) {
5798 		rv = i_mdi_phci_pre_detach(dip, cmd);
5799 	}
5800 
5801 	return (rv);
5802 }
5803 
5804 /*ARGSUSED*/
5805 static int
5806 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5807 {
5808 	int		rv = DDI_SUCCESS;
5809 	mdi_phci_t	*ph;
5810 	mdi_client_t	*ct;
5811 	mdi_pathinfo_t	*pip;
5812 	mdi_pathinfo_t	*failed_pip = NULL;
5813 	mdi_pathinfo_t	*next;
5814 
5815 	ph = i_devi_get_phci(dip);
5816 	if (ph == NULL) {
5817 		return (rv);
5818 	}
5819 
5820 	MDI_PHCI_LOCK(ph);
5821 	switch (cmd) {
5822 	case DDI_DETACH:
5823 		MDI_DEBUG(2, (MDI_NOTE, dip,
5824 		    "pHCI pre_detach: called %p", (void *)ph));
5825 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5826 			/*
5827 			 * mdi_pathinfo nodes are still attached to
5828 			 * this pHCI. Fail the detach for this pHCI.
5829 			 */
5830 			MDI_DEBUG(2, (MDI_WARN, dip,
5831 			    "pHCI pre_detach: paths are still attached %p",
5832 			    (void *)ph));
5833 			rv = DDI_FAILURE;
5834 			break;
5835 		}
5836 		MDI_PHCI_SET_DETACH(ph);
5837 		break;
5838 
5839 	case DDI_SUSPEND:
5840 		/*
5841 		 * pHCI is getting suspended.  Since mpxio client
5842 		 * devices may not be suspended at this point, to avoid
5843 		 * a potential stack overflow, it is important to suspend
5844 		 * client devices before pHCI can be suspended.
5845 		 */
5846 
5847 		MDI_DEBUG(2, (MDI_NOTE, dip,
5848 		    "pHCI pre_suspend: called %p", (void *)ph));
5849 		/*
5850 		 * Suspend all the client devices accessible through this pHCI
5851 		 */
5852 		pip = ph->ph_path_head;
5853 		while (pip != NULL && rv == DDI_SUCCESS) {
5854 			dev_info_t *cdip;
5855 			MDI_PI_LOCK(pip);
5856 			next =
5857 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5858 			ct = MDI_PI(pip)->pi_client;
5859 			i_mdi_client_lock(ct, pip);
5860 			cdip = ct->ct_dip;
5861 			MDI_PI_UNLOCK(pip);
5862 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5863 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5864 				i_mdi_client_unlock(ct);
5865 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5866 				    DDI_SUCCESS) {
5867 					/*
5868 					 * Suspend of one of the client
5869 					 * device has failed.
5870 					 */
5871 					MDI_DEBUG(1, (MDI_WARN, dip,
5872 					    "!suspend of device (%s%d) failed.",
5873 					    ddi_driver_name(cdip),
5874 					    ddi_get_instance(cdip)));
5875 					failed_pip = pip;
5876 					break;
5877 				}
5878 			} else {
5879 				i_mdi_client_unlock(ct);
5880 			}
5881 			pip = next;
5882 		}
5883 
5884 		if (rv == DDI_SUCCESS) {
5885 			/*
5886 			 * Suspend of client devices is complete. Proceed
5887 			 * with pHCI suspend.
5888 			 */
5889 			MDI_PHCI_SET_SUSPEND(ph);
5890 		} else {
5891 			/*
5892 			 * Revert back all the suspended client device states
5893 			 * to converse.
5894 			 */
5895 			pip = ph->ph_path_head;
5896 			while (pip != failed_pip) {
5897 				dev_info_t *cdip;
5898 				MDI_PI_LOCK(pip);
5899 				next =
5900 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5901 				ct = MDI_PI(pip)->pi_client;
5902 				i_mdi_client_lock(ct, pip);
5903 				cdip = ct->ct_dip;
5904 				MDI_PI_UNLOCK(pip);
5905 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5906 					i_mdi_client_unlock(ct);
5907 					(void) devi_attach(cdip, DDI_RESUME);
5908 				} else {
5909 					i_mdi_client_unlock(ct);
5910 				}
5911 				pip = next;
5912 			}
5913 		}
5914 		break;
5915 
5916 	default:
5917 		rv = DDI_FAILURE;
5918 		break;
5919 	}
5920 	MDI_PHCI_UNLOCK(ph);
5921 	return (rv);
5922 }
5923 
5924 /*ARGSUSED*/
5925 static int
5926 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5927 {
5928 	int		rv = DDI_SUCCESS;
5929 	mdi_client_t	*ct;
5930 
5931 	ct = i_devi_get_client(dip);
5932 	if (ct == NULL) {
5933 		return (rv);
5934 	}
5935 
5936 	MDI_CLIENT_LOCK(ct);
5937 	switch (cmd) {
5938 	case DDI_DETACH:
5939 		MDI_DEBUG(2, (MDI_NOTE, dip,
5940 		    "client pre_detach: called %p",
5941 		     (void *)ct));
5942 		MDI_CLIENT_SET_DETACH(ct);
5943 		break;
5944 
5945 	case DDI_SUSPEND:
5946 		MDI_DEBUG(2, (MDI_NOTE, dip,
5947 		    "client pre_suspend: called %p",
5948 		    (void *)ct));
5949 		MDI_CLIENT_SET_SUSPEND(ct);
5950 		break;
5951 
5952 	default:
5953 		rv = DDI_FAILURE;
5954 		break;
5955 	}
5956 	MDI_CLIENT_UNLOCK(ct);
5957 	return (rv);
5958 }
5959 
5960 /*
5961  * mdi_post_detach():
5962  *		Post detach notification handler
5963  */
5964 /*ARGSUSED*/
5965 void
5966 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5967 {
5968 	/*
5969 	 * Detach/Suspend of mpxio component failed. Update our state
5970 	 * too
5971 	 */
5972 	if (MDI_PHCI(dip))
5973 		i_mdi_phci_post_detach(dip, cmd, error);
5974 
5975 	if (MDI_CLIENT(dip))
5976 		i_mdi_client_post_detach(dip, cmd, error);
5977 }
5978 
5979 /*ARGSUSED*/
5980 static void
5981 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5982 {
5983 	mdi_phci_t	*ph;
5984 
5985 	/*
5986 	 * Detach/Suspend of phci component failed. Update our state
5987 	 * too
5988 	 */
5989 	ph = i_devi_get_phci(dip);
5990 	if (ph == NULL) {
5991 		return;
5992 	}
5993 
5994 	MDI_PHCI_LOCK(ph);
5995 	/*
5996 	 * Detach of pHCI failed. Restore back converse
5997 	 * state
5998 	 */
5999 	switch (cmd) {
6000 	case DDI_DETACH:
6001 		MDI_DEBUG(2, (MDI_NOTE, dip,
6002 		    "pHCI post_detach: called %p",
6003 		    (void *)ph));
6004 		if (error != DDI_SUCCESS)
6005 			MDI_PHCI_SET_ATTACH(ph);
6006 		break;
6007 
6008 	case DDI_SUSPEND:
6009 		MDI_DEBUG(2, (MDI_NOTE, dip,
6010 		    "pHCI post_suspend: called %p",
6011 		    (void *)ph));
6012 		if (error != DDI_SUCCESS)
6013 			MDI_PHCI_SET_RESUME(ph);
6014 		break;
6015 	}
6016 	MDI_PHCI_UNLOCK(ph);
6017 }
6018 
6019 /*ARGSUSED*/
6020 static void
6021 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6022 {
6023 	mdi_client_t	*ct;
6024 
6025 	ct = i_devi_get_client(dip);
6026 	if (ct == NULL) {
6027 		return;
6028 	}
6029 	MDI_CLIENT_LOCK(ct);
6030 	/*
6031 	 * Detach of Client failed. Restore back converse
6032 	 * state
6033 	 */
6034 	switch (cmd) {
6035 	case DDI_DETACH:
6036 		MDI_DEBUG(2, (MDI_NOTE, dip,
6037 		    "client post_detach: called %p", (void *)ct));
6038 		if (DEVI_IS_ATTACHING(dip)) {
6039 			MDI_DEBUG(4, (MDI_NOTE, dip,
6040 			    "i_mdi_pm_rele_client\n"));
6041 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6042 		} else {
6043 			MDI_DEBUG(4, (MDI_NOTE, dip,
6044 			    "i_mdi_pm_reset_client\n"));
6045 			i_mdi_pm_reset_client(ct);
6046 		}
6047 		if (error != DDI_SUCCESS)
6048 			MDI_CLIENT_SET_ATTACH(ct);
6049 		break;
6050 
6051 	case DDI_SUSPEND:
6052 		MDI_DEBUG(2, (MDI_NOTE, dip,
6053 		    "called %p", (void *)ct));
6054 		if (error != DDI_SUCCESS)
6055 			MDI_CLIENT_SET_RESUME(ct);
6056 		break;
6057 	}
6058 	MDI_CLIENT_UNLOCK(ct);
6059 }
6060 
6061 int
6062 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6063 {
6064 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6065 }
6066 
6067 /*
6068  * create and install per-path (client - pHCI) statistics
6069  * I/O stats supported: nread, nwritten, reads, and writes
6070  * Error stats - hard errors, soft errors, & transport errors
6071  */
6072 int
6073 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6074 {
6075 	kstat_t			*kiosp, *kerrsp;
6076 	struct pi_errs		*nsp;
6077 	struct mdi_pi_kstats	*mdi_statp;
6078 
6079 	if (MDI_PI(pip)->pi_kstats != NULL)
6080 		return (MDI_SUCCESS);
6081 
6082 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6083 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6084 		return (MDI_FAILURE);
6085 	}
6086 
6087 	(void) strcat(ksname, ",err");
6088 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6089 	    KSTAT_TYPE_NAMED,
6090 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6091 	if (kerrsp == NULL) {
6092 		kstat_delete(kiosp);
6093 		return (MDI_FAILURE);
6094 	}
6095 
6096 	nsp = (struct pi_errs *)kerrsp->ks_data;
6097 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6098 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6099 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6100 	    KSTAT_DATA_UINT32);
6101 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6102 	    KSTAT_DATA_UINT32);
6103 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6104 	    KSTAT_DATA_UINT32);
6105 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6106 	    KSTAT_DATA_UINT32);
6107 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6108 	    KSTAT_DATA_UINT32);
6109 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6110 	    KSTAT_DATA_UINT32);
6111 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6112 	    KSTAT_DATA_UINT32);
6113 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6114 
6115 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6116 	mdi_statp->pi_kstat_ref = 1;
6117 	mdi_statp->pi_kstat_iostats = kiosp;
6118 	mdi_statp->pi_kstat_errstats = kerrsp;
6119 	kstat_install(kiosp);
6120 	kstat_install(kerrsp);
6121 	MDI_PI(pip)->pi_kstats = mdi_statp;
6122 	return (MDI_SUCCESS);
6123 }
6124 
6125 /*
6126  * destroy per-path properties
6127  */
6128 static void
6129 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6130 {
6131 
6132 	struct mdi_pi_kstats *mdi_statp;
6133 
6134 	if (MDI_PI(pip)->pi_kstats == NULL)
6135 		return;
6136 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6137 		return;
6138 
6139 	MDI_PI(pip)->pi_kstats = NULL;
6140 
6141 	/*
6142 	 * the kstat may be shared between multiple pathinfo nodes
6143 	 * decrement this pathinfo's usage, removing the kstats
6144 	 * themselves when the last pathinfo reference is removed.
6145 	 */
6146 	ASSERT(mdi_statp->pi_kstat_ref > 0);
6147 	if (--mdi_statp->pi_kstat_ref != 0)
6148 		return;
6149 
6150 	kstat_delete(mdi_statp->pi_kstat_iostats);
6151 	kstat_delete(mdi_statp->pi_kstat_errstats);
6152 	kmem_free(mdi_statp, sizeof (*mdi_statp));
6153 }
6154 
6155 /*
6156  * update I/O paths KSTATS
6157  */
6158 void
6159 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6160 {
6161 	kstat_t *iostatp;
6162 	size_t xfer_cnt;
6163 
6164 	ASSERT(pip != NULL);
6165 
6166 	/*
6167 	 * I/O can be driven across a path prior to having path
6168 	 * statistics available, i.e. probe(9e).
6169 	 */
6170 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6171 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6172 		xfer_cnt = bp->b_bcount - bp->b_resid;
6173 		if (bp->b_flags & B_READ) {
6174 			KSTAT_IO_PTR(iostatp)->reads++;
6175 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6176 		} else {
6177 			KSTAT_IO_PTR(iostatp)->writes++;
6178 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6179 		}
6180 	}
6181 }
6182 
6183 /*
6184  * Enable the path(specific client/target/initiator)
6185  * Enabling a path means that MPxIO may select the enabled path for routing
6186  * future I/O requests, subject to other path state constraints.
6187  */
6188 int
6189 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6190 {
6191 	mdi_phci_t	*ph;
6192 
6193 	ph = MDI_PI(pip)->pi_phci;
6194 	if (ph == NULL) {
6195 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6196 		    "!failed: path %s %p: NULL ph",
6197 		    mdi_pi_spathname(pip), (void *)pip));
6198 		return (MDI_FAILURE);
6199 	}
6200 
6201 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6202 		MDI_ENABLE_OP);
6203 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6204 	    "!returning success pip = %p. ph = %p",
6205 	    (void *)pip, (void *)ph));
6206 	return (MDI_SUCCESS);
6207 
6208 }
6209 
6210 /*
6211  * Disable the path (specific client/target/initiator)
6212  * Disabling a path means that MPxIO will not select the disabled path for
6213  * routing any new I/O requests.
6214  */
6215 int
6216 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6217 {
6218 	mdi_phci_t	*ph;
6219 
6220 	ph = MDI_PI(pip)->pi_phci;
6221 	if (ph == NULL) {
6222 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6223 		    "!failed: path %s %p: NULL ph",
6224 		    mdi_pi_spathname(pip), (void *)pip));
6225 		return (MDI_FAILURE);
6226 	}
6227 
6228 	(void) i_mdi_enable_disable_path(pip,
6229 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
6230 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6231 	    "!returning success pip = %p. ph = %p",
6232 	    (void *)pip, (void *)ph));
6233 	return (MDI_SUCCESS);
6234 }
6235 
6236 /*
6237  * disable the path to a particular pHCI (pHCI specified in the phci_path
6238  * argument) for a particular client (specified in the client_path argument).
6239  * Disabling a path means that MPxIO will not select the disabled path for
6240  * routing any new I/O requests.
6241  * NOTE: this will be removed once the NWS files are changed to use the new
6242  * mdi_{enable,disable}_path interfaces
6243  */
6244 int
6245 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6246 {
6247 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6248 }
6249 
6250 /*
6251  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6252  * argument) for a particular client (specified in the client_path argument).
6253  * Enabling a path means that MPxIO may select the enabled path for routing
6254  * future I/O requests, subject to other path state constraints.
6255  * NOTE: this will be removed once the NWS files are changed to use the new
6256  * mdi_{enable,disable}_path interfaces
6257  */
6258 
6259 int
6260 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6261 {
6262 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6263 }
6264 
6265 /*
6266  * Common routine for doing enable/disable.
6267  */
6268 static mdi_pathinfo_t *
6269 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6270 		int op)
6271 {
6272 	int		sync_flag = 0;
6273 	int		rv;
6274 	mdi_pathinfo_t 	*next;
6275 	int		(*f)() = NULL;
6276 
6277 	/*
6278 	 * Check to make sure the path is not already in the
6279 	 * requested state. If it is just return the next path
6280 	 * as we have nothing to do here.
6281 	 */
6282 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6283 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6284 		MDI_PI_LOCK(pip);
6285 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6286 		MDI_PI_UNLOCK(pip);
6287 		return (next);
6288 	}
6289 
6290 	f = vh->vh_ops->vo_pi_state_change;
6291 
6292 	sync_flag = (flags << 8) & 0xf00;
6293 
6294 	/*
6295 	 * Do a callback into the mdi consumer to let it
6296 	 * know that path is about to get enabled/disabled.
6297 	 */
6298 	if (f != NULL) {
6299 		rv = (*f)(vh->vh_dip, pip, 0,
6300 			MDI_PI_EXT_STATE(pip),
6301 			MDI_EXT_STATE_CHANGE | sync_flag |
6302 			op | MDI_BEFORE_STATE_CHANGE);
6303 		if (rv != MDI_SUCCESS) {
6304 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6305 			    "vo_pi_state_change: failed rv = %x", rv));
6306 		}
6307 	}
6308 	MDI_PI_LOCK(pip);
6309 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6310 
6311 	switch (flags) {
6312 		case USER_DISABLE:
6313 			if (op == MDI_DISABLE_OP) {
6314 				MDI_PI_SET_USER_DISABLE(pip);
6315 			} else {
6316 				MDI_PI_SET_USER_ENABLE(pip);
6317 			}
6318 			break;
6319 		case DRIVER_DISABLE:
6320 			if (op == MDI_DISABLE_OP) {
6321 				MDI_PI_SET_DRV_DISABLE(pip);
6322 			} else {
6323 				MDI_PI_SET_DRV_ENABLE(pip);
6324 			}
6325 			break;
6326 		case DRIVER_DISABLE_TRANSIENT:
6327 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6328 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6329 			} else {
6330 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6331 			}
6332 			break;
6333 	}
6334 	MDI_PI_UNLOCK(pip);
6335 	/*
6336 	 * Do a callback into the mdi consumer to let it
6337 	 * know that path is now enabled/disabled.
6338 	 */
6339 	if (f != NULL) {
6340 		rv = (*f)(vh->vh_dip, pip, 0,
6341 			MDI_PI_EXT_STATE(pip),
6342 			MDI_EXT_STATE_CHANGE | sync_flag |
6343 			op | MDI_AFTER_STATE_CHANGE);
6344 		if (rv != MDI_SUCCESS) {
6345 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6346 			    "vo_pi_state_change failed: rv = %x", rv));
6347 		}
6348 	}
6349 	return (next);
6350 }
6351 
6352 /*
6353  * Common routine for doing enable/disable.
6354  * NOTE: this will be removed once the NWS files are changed to use the new
6355  * mdi_{enable,disable}_path has been putback
6356  */
6357 int
6358 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6359 {
6360 
6361 	mdi_phci_t	*ph;
6362 	mdi_vhci_t	*vh = NULL;
6363 	mdi_client_t	*ct;
6364 	mdi_pathinfo_t	*next, *pip;
6365 	int		found_it;
6366 
6367 	ph = i_devi_get_phci(pdip);
6368 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6369 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6370 	    (void *)cdip));
6371 	if (ph == NULL) {
6372 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6373 		    "!failed: operation %d: NULL ph", op));
6374 		return (MDI_FAILURE);
6375 	}
6376 
6377 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6378 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6379 		    "!failed: invalid operation %d", op));
6380 		return (MDI_FAILURE);
6381 	}
6382 
6383 	vh = ph->ph_vhci;
6384 
6385 	if (cdip == NULL) {
6386 		/*
6387 		 * Need to mark the Phci as enabled/disabled.
6388 		 */
6389 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6390 		    "op %d for the phci", op));
6391 		MDI_PHCI_LOCK(ph);
6392 		switch (flags) {
6393 			case USER_DISABLE:
6394 				if (op == MDI_DISABLE_OP) {
6395 					MDI_PHCI_SET_USER_DISABLE(ph);
6396 				} else {
6397 					MDI_PHCI_SET_USER_ENABLE(ph);
6398 				}
6399 				break;
6400 			case DRIVER_DISABLE:
6401 				if (op == MDI_DISABLE_OP) {
6402 					MDI_PHCI_SET_DRV_DISABLE(ph);
6403 				} else {
6404 					MDI_PHCI_SET_DRV_ENABLE(ph);
6405 				}
6406 				break;
6407 			case DRIVER_DISABLE_TRANSIENT:
6408 				if (op == MDI_DISABLE_OP) {
6409 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6410 				} else {
6411 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6412 				}
6413 				break;
6414 			default:
6415 				MDI_PHCI_UNLOCK(ph);
6416 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6417 				    "!invalid flag argument= %d", flags));
6418 		}
6419 
6420 		/*
6421 		 * Phci has been disabled. Now try to enable/disable
6422 		 * path info's to each client.
6423 		 */
6424 		pip = ph->ph_path_head;
6425 		while (pip != NULL) {
6426 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6427 		}
6428 		MDI_PHCI_UNLOCK(ph);
6429 	} else {
6430 
6431 		/*
6432 		 * Disable a specific client.
6433 		 */
6434 		ct = i_devi_get_client(cdip);
6435 		if (ct == NULL) {
6436 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6437 			    "!failed: operation = %d: NULL ct", op));
6438 			return (MDI_FAILURE);
6439 		}
6440 
6441 		MDI_CLIENT_LOCK(ct);
6442 		pip = ct->ct_path_head;
6443 		found_it = 0;
6444 		while (pip != NULL) {
6445 			MDI_PI_LOCK(pip);
6446 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6447 			if (MDI_PI(pip)->pi_phci == ph) {
6448 				MDI_PI_UNLOCK(pip);
6449 				found_it = 1;
6450 				break;
6451 			}
6452 			MDI_PI_UNLOCK(pip);
6453 			pip = next;
6454 		}
6455 
6456 
6457 		MDI_CLIENT_UNLOCK(ct);
6458 		if (found_it == 0) {
6459 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6460 			    "!failed. Could not find corresponding pip\n"));
6461 			return (MDI_FAILURE);
6462 		}
6463 
6464 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6465 	}
6466 
6467 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6468 	    "!op %d returning success pdip = %p cdip = %p",
6469 	    op, (void *)pdip, (void *)cdip));
6470 	return (MDI_SUCCESS);
6471 }
6472 
6473 /*
6474  * Ensure phci powered up
6475  */
6476 static void
6477 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6478 {
6479 	dev_info_t	*ph_dip;
6480 
6481 	ASSERT(pip != NULL);
6482 	ASSERT(MDI_PI_LOCKED(pip));
6483 
6484 	if (MDI_PI(pip)->pi_pm_held) {
6485 		return;
6486 	}
6487 
6488 	ph_dip = mdi_pi_get_phci(pip);
6489 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6490 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6491 	if (ph_dip == NULL) {
6492 		return;
6493 	}
6494 
6495 	MDI_PI_UNLOCK(pip);
6496 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6497 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6498 	pm_hold_power(ph_dip);
6499 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6500 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6501 	MDI_PI_LOCK(pip);
6502 
6503 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6504 	if (DEVI(ph_dip)->devi_pm_info)
6505 		MDI_PI(pip)->pi_pm_held = 1;
6506 }
6507 
6508 /*
6509  * Allow phci powered down
6510  */
6511 static void
6512 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6513 {
6514 	dev_info_t	*ph_dip = NULL;
6515 
6516 	ASSERT(pip != NULL);
6517 	ASSERT(MDI_PI_LOCKED(pip));
6518 
6519 	if (MDI_PI(pip)->pi_pm_held == 0) {
6520 		return;
6521 	}
6522 
6523 	ph_dip = mdi_pi_get_phci(pip);
6524 	ASSERT(ph_dip != NULL);
6525 
6526 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6527 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6528 
6529 	MDI_PI_UNLOCK(pip);
6530 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6531 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6532 	pm_rele_power(ph_dip);
6533 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6534 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6535 	MDI_PI_LOCK(pip);
6536 
6537 	MDI_PI(pip)->pi_pm_held = 0;
6538 }
6539 
6540 static void
6541 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6542 {
6543 	ASSERT(MDI_CLIENT_LOCKED(ct));
6544 
6545 	ct->ct_power_cnt += incr;
6546 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6547 	    "%p ct_power_cnt = %d incr = %d",
6548 	    (void *)ct, ct->ct_power_cnt, incr));
6549 	ASSERT(ct->ct_power_cnt >= 0);
6550 }
6551 
6552 static void
6553 i_mdi_rele_all_phci(mdi_client_t *ct)
6554 {
6555 	mdi_pathinfo_t  *pip;
6556 
6557 	ASSERT(MDI_CLIENT_LOCKED(ct));
6558 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6559 	while (pip != NULL) {
6560 		mdi_hold_path(pip);
6561 		MDI_PI_LOCK(pip);
6562 		i_mdi_pm_rele_pip(pip);
6563 		MDI_PI_UNLOCK(pip);
6564 		mdi_rele_path(pip);
6565 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6566 	}
6567 }
6568 
6569 static void
6570 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6571 {
6572 	ASSERT(MDI_CLIENT_LOCKED(ct));
6573 
6574 	if (i_ddi_devi_attached(ct->ct_dip)) {
6575 		ct->ct_power_cnt -= decr;
6576 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6577 		    "%p ct_power_cnt = %d decr = %d",
6578 		    (void *)ct, ct->ct_power_cnt, decr));
6579 	}
6580 
6581 	ASSERT(ct->ct_power_cnt >= 0);
6582 	if (ct->ct_power_cnt == 0) {
6583 		i_mdi_rele_all_phci(ct);
6584 		return;
6585 	}
6586 }
6587 
6588 static void
6589 i_mdi_pm_reset_client(mdi_client_t *ct)
6590 {
6591 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6592 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6593 	ASSERT(MDI_CLIENT_LOCKED(ct));
6594 	ct->ct_power_cnt = 0;
6595 	i_mdi_rele_all_phci(ct);
6596 	ct->ct_powercnt_config = 0;
6597 	ct->ct_powercnt_unconfig = 0;
6598 	ct->ct_powercnt_reset = 1;
6599 }
6600 
6601 static int
6602 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6603 {
6604 	int		ret;
6605 	dev_info_t	*ph_dip;
6606 
6607 	MDI_PI_LOCK(pip);
6608 	i_mdi_pm_hold_pip(pip);
6609 
6610 	ph_dip = mdi_pi_get_phci(pip);
6611 	MDI_PI_UNLOCK(pip);
6612 
6613 	/* bring all components of phci to full power */
6614 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6615 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6616 	    ddi_get_instance(ph_dip), (void *)pip));
6617 
6618 	ret = pm_powerup(ph_dip);
6619 
6620 	if (ret == DDI_FAILURE) {
6621 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6622 		    "pm_powerup FAILED for %s%d %p",
6623 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6624 		    (void *)pip));
6625 
6626 		MDI_PI_LOCK(pip);
6627 		i_mdi_pm_rele_pip(pip);
6628 		MDI_PI_UNLOCK(pip);
6629 		return (MDI_FAILURE);
6630 	}
6631 
6632 	return (MDI_SUCCESS);
6633 }
6634 
6635 static int
6636 i_mdi_power_all_phci(mdi_client_t *ct)
6637 {
6638 	mdi_pathinfo_t  *pip;
6639 	int		succeeded = 0;
6640 
6641 	ASSERT(MDI_CLIENT_LOCKED(ct));
6642 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6643 	while (pip != NULL) {
6644 		/*
6645 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6646 		 * or MDI_PATHINFO_STATE_OFFLINE.
6647 		 */
6648 		if (MDI_PI_IS_INIT(pip) ||
6649 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6650 			mdi_hold_path(pip);
6651 			MDI_CLIENT_UNLOCK(ct);
6652 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6653 				succeeded = 1;
6654 
6655 			ASSERT(ct == MDI_PI(pip)->pi_client);
6656 			MDI_CLIENT_LOCK(ct);
6657 			mdi_rele_path(pip);
6658 		}
6659 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6660 	}
6661 
6662 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6663 }
6664 
6665 /*
6666  * mdi_bus_power():
6667  *		1. Place the phci(s) into powered up state so that
6668  *		   client can do power management
6669  *		2. Ensure phci powered up as client power managing
6670  * Return Values:
6671  *		MDI_SUCCESS
6672  *		MDI_FAILURE
6673  */
6674 int
6675 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6676     void *arg, void *result)
6677 {
6678 	int			ret = MDI_SUCCESS;
6679 	pm_bp_child_pwrchg_t	*bpc;
6680 	mdi_client_t		*ct;
6681 	dev_info_t		*cdip;
6682 	pm_bp_has_changed_t	*bphc;
6683 
6684 	/*
6685 	 * BUS_POWER_NOINVOL not supported
6686 	 */
6687 	if (op == BUS_POWER_NOINVOL)
6688 		return (MDI_FAILURE);
6689 
6690 	/*
6691 	 * ignore other OPs.
6692 	 * return quickly to save cou cycles on the ct processing
6693 	 */
6694 	switch (op) {
6695 	case BUS_POWER_PRE_NOTIFICATION:
6696 	case BUS_POWER_POST_NOTIFICATION:
6697 		bpc = (pm_bp_child_pwrchg_t *)arg;
6698 		cdip = bpc->bpc_dip;
6699 		break;
6700 	case BUS_POWER_HAS_CHANGED:
6701 		bphc = (pm_bp_has_changed_t *)arg;
6702 		cdip = bphc->bphc_dip;
6703 		break;
6704 	default:
6705 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6706 	}
6707 
6708 	ASSERT(MDI_CLIENT(cdip));
6709 
6710 	ct = i_devi_get_client(cdip);
6711 	if (ct == NULL)
6712 		return (MDI_FAILURE);
6713 
6714 	/*
6715 	 * wait till the mdi_pathinfo node state change are processed
6716 	 */
6717 	MDI_CLIENT_LOCK(ct);
6718 	switch (op) {
6719 	case BUS_POWER_PRE_NOTIFICATION:
6720 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6721 		    "BUS_POWER_PRE_NOTIFICATION:"
6722 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6723 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6724 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6725 
6726 		/* serialize power level change per client */
6727 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6728 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6729 
6730 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6731 
6732 		if (ct->ct_power_cnt == 0) {
6733 			ret = i_mdi_power_all_phci(ct);
6734 		}
6735 
6736 		/*
6737 		 * if new_level > 0:
6738 		 *	- hold phci(s)
6739 		 *	- power up phci(s) if not already
6740 		 * ignore power down
6741 		 */
6742 		if (bpc->bpc_nlevel > 0) {
6743 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6744 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6745 				    "i_mdi_pm_hold_client\n"));
6746 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6747 			}
6748 		}
6749 		break;
6750 	case BUS_POWER_POST_NOTIFICATION:
6751 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6752 		    "BUS_POWER_POST_NOTIFICATION:"
6753 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6754 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6755 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6756 		    *(int *)result));
6757 
6758 		if (*(int *)result == DDI_SUCCESS) {
6759 			if (bpc->bpc_nlevel > 0) {
6760 				MDI_CLIENT_SET_POWER_UP(ct);
6761 			} else {
6762 				MDI_CLIENT_SET_POWER_DOWN(ct);
6763 			}
6764 		}
6765 
6766 		/* release the hold we did in pre-notification */
6767 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6768 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6769 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6770 			    "i_mdi_pm_rele_client\n"));
6771 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6772 		}
6773 
6774 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6775 			/* another thread might started attaching */
6776 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6777 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6778 				    "i_mdi_pm_rele_client\n"));
6779 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6780 			/* detaching has been taken care in pm_post_unconfig */
6781 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6782 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6783 				    "i_mdi_pm_reset_client\n"));
6784 				i_mdi_pm_reset_client(ct);
6785 			}
6786 		}
6787 
6788 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6789 		cv_broadcast(&ct->ct_powerchange_cv);
6790 
6791 		break;
6792 
6793 	/* need to do more */
6794 	case BUS_POWER_HAS_CHANGED:
6795 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6796 		    "BUS_POWER_HAS_CHANGED:"
6797 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6798 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6799 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6800 
6801 		if (bphc->bphc_nlevel > 0 &&
6802 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6803 			if (ct->ct_power_cnt == 0) {
6804 				ret = i_mdi_power_all_phci(ct);
6805 			}
6806 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 			    "i_mdi_pm_hold_client\n"));
6808 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6809 		}
6810 
6811 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6812 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6813 			    "i_mdi_pm_rele_client\n"));
6814 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6815 		}
6816 		break;
6817 	}
6818 
6819 	MDI_CLIENT_UNLOCK(ct);
6820 	return (ret);
6821 }
6822 
6823 static int
6824 i_mdi_pm_pre_config_one(dev_info_t *child)
6825 {
6826 	int		ret = MDI_SUCCESS;
6827 	mdi_client_t	*ct;
6828 
6829 	ct = i_devi_get_client(child);
6830 	if (ct == NULL)
6831 		return (MDI_FAILURE);
6832 
6833 	MDI_CLIENT_LOCK(ct);
6834 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6835 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6836 
6837 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6838 		MDI_CLIENT_UNLOCK(ct);
6839 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6840 		return (MDI_SUCCESS);
6841 	}
6842 
6843 	if (ct->ct_powercnt_config) {
6844 		MDI_CLIENT_UNLOCK(ct);
6845 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6846 		return (MDI_SUCCESS);
6847 	}
6848 
6849 	if (ct->ct_power_cnt == 0) {
6850 		ret = i_mdi_power_all_phci(ct);
6851 	}
6852 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6853 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6854 	ct->ct_powercnt_config = 1;
6855 	ct->ct_powercnt_reset = 0;
6856 	MDI_CLIENT_UNLOCK(ct);
6857 	return (ret);
6858 }
6859 
6860 static int
6861 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6862 {
6863 	int			ret = MDI_SUCCESS;
6864 	dev_info_t		*cdip;
6865 	int			circ;
6866 
6867 	ASSERT(MDI_VHCI(vdip));
6868 
6869 	/* ndi_devi_config_one */
6870 	if (child) {
6871 		ASSERT(DEVI_BUSY_OWNED(vdip));
6872 		return (i_mdi_pm_pre_config_one(child));
6873 	}
6874 
6875 	/* devi_config_common */
6876 	ndi_devi_enter(vdip, &circ);
6877 	cdip = ddi_get_child(vdip);
6878 	while (cdip) {
6879 		dev_info_t *next = ddi_get_next_sibling(cdip);
6880 
6881 		ret = i_mdi_pm_pre_config_one(cdip);
6882 		if (ret != MDI_SUCCESS)
6883 			break;
6884 		cdip = next;
6885 	}
6886 	ndi_devi_exit(vdip, circ);
6887 	return (ret);
6888 }
6889 
6890 static int
6891 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6892 {
6893 	int		ret = MDI_SUCCESS;
6894 	mdi_client_t	*ct;
6895 
6896 	ct = i_devi_get_client(child);
6897 	if (ct == NULL)
6898 		return (MDI_FAILURE);
6899 
6900 	MDI_CLIENT_LOCK(ct);
6901 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6902 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6903 
6904 	if (!i_ddi_devi_attached(child)) {
6905 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6906 		MDI_CLIENT_UNLOCK(ct);
6907 		return (MDI_SUCCESS);
6908 	}
6909 
6910 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6911 	    (flags & NDI_AUTODETACH)) {
6912 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6913 		MDI_CLIENT_UNLOCK(ct);
6914 		return (MDI_FAILURE);
6915 	}
6916 
6917 	if (ct->ct_powercnt_unconfig) {
6918 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6919 		MDI_CLIENT_UNLOCK(ct);
6920 		*held = 1;
6921 		return (MDI_SUCCESS);
6922 	}
6923 
6924 	if (ct->ct_power_cnt == 0) {
6925 		ret = i_mdi_power_all_phci(ct);
6926 	}
6927 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6928 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6929 	ct->ct_powercnt_unconfig = 1;
6930 	ct->ct_powercnt_reset = 0;
6931 	MDI_CLIENT_UNLOCK(ct);
6932 	if (ret == MDI_SUCCESS)
6933 		*held = 1;
6934 	return (ret);
6935 }
6936 
6937 static int
6938 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6939     int flags)
6940 {
6941 	int			ret = MDI_SUCCESS;
6942 	dev_info_t		*cdip;
6943 	int			circ;
6944 
6945 	ASSERT(MDI_VHCI(vdip));
6946 	*held = 0;
6947 
6948 	/* ndi_devi_unconfig_one */
6949 	if (child) {
6950 		ASSERT(DEVI_BUSY_OWNED(vdip));
6951 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6952 	}
6953 
6954 	/* devi_unconfig_common */
6955 	ndi_devi_enter(vdip, &circ);
6956 	cdip = ddi_get_child(vdip);
6957 	while (cdip) {
6958 		dev_info_t *next = ddi_get_next_sibling(cdip);
6959 
6960 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6961 		cdip = next;
6962 	}
6963 	ndi_devi_exit(vdip, circ);
6964 
6965 	if (*held)
6966 		ret = MDI_SUCCESS;
6967 
6968 	return (ret);
6969 }
6970 
6971 static void
6972 i_mdi_pm_post_config_one(dev_info_t *child)
6973 {
6974 	mdi_client_t	*ct;
6975 
6976 	ct = i_devi_get_client(child);
6977 	if (ct == NULL)
6978 		return;
6979 
6980 	MDI_CLIENT_LOCK(ct);
6981 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6982 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6983 
6984 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6985 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6986 		MDI_CLIENT_UNLOCK(ct);
6987 		return;
6988 	}
6989 
6990 	/* client has not been updated */
6991 	if (MDI_CLIENT_IS_FAILED(ct)) {
6992 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6993 		MDI_CLIENT_UNLOCK(ct);
6994 		return;
6995 	}
6996 
6997 	/* another thread might have powered it down or detached it */
6998 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6999 	    !DEVI_IS_ATTACHING(child)) ||
7000 	    (!i_ddi_devi_attached(child) &&
7001 	    !DEVI_IS_ATTACHING(child))) {
7002 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7003 		i_mdi_pm_reset_client(ct);
7004 	} else {
7005 		mdi_pathinfo_t  *pip, *next;
7006 		int	valid_path_count = 0;
7007 
7008 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7009 		pip = ct->ct_path_head;
7010 		while (pip != NULL) {
7011 			MDI_PI_LOCK(pip);
7012 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7013 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7014 				valid_path_count ++;
7015 			MDI_PI_UNLOCK(pip);
7016 			pip = next;
7017 		}
7018 		i_mdi_pm_rele_client(ct, valid_path_count);
7019 	}
7020 	ct->ct_powercnt_config = 0;
7021 	MDI_CLIENT_UNLOCK(ct);
7022 }
7023 
7024 static void
7025 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7026 {
7027 	int		circ;
7028 	dev_info_t	*cdip;
7029 
7030 	ASSERT(MDI_VHCI(vdip));
7031 
7032 	/* ndi_devi_config_one */
7033 	if (child) {
7034 		ASSERT(DEVI_BUSY_OWNED(vdip));
7035 		i_mdi_pm_post_config_one(child);
7036 		return;
7037 	}
7038 
7039 	/* devi_config_common */
7040 	ndi_devi_enter(vdip, &circ);
7041 	cdip = ddi_get_child(vdip);
7042 	while (cdip) {
7043 		dev_info_t *next = ddi_get_next_sibling(cdip);
7044 
7045 		i_mdi_pm_post_config_one(cdip);
7046 		cdip = next;
7047 	}
7048 	ndi_devi_exit(vdip, circ);
7049 }
7050 
7051 static void
7052 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7053 {
7054 	mdi_client_t	*ct;
7055 
7056 	ct = i_devi_get_client(child);
7057 	if (ct == NULL)
7058 		return;
7059 
7060 	MDI_CLIENT_LOCK(ct);
7061 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7062 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7063 
7064 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7065 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7066 		MDI_CLIENT_UNLOCK(ct);
7067 		return;
7068 	}
7069 
7070 	/* failure detaching or another thread just attached it */
7071 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7072 	    i_ddi_devi_attached(child)) ||
7073 	    (!i_ddi_devi_attached(child) &&
7074 	    !DEVI_IS_ATTACHING(child))) {
7075 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7076 		i_mdi_pm_reset_client(ct);
7077 	} else {
7078 		mdi_pathinfo_t  *pip, *next;
7079 		int	valid_path_count = 0;
7080 
7081 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7082 		pip = ct->ct_path_head;
7083 		while (pip != NULL) {
7084 			MDI_PI_LOCK(pip);
7085 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7086 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7087 				valid_path_count ++;
7088 			MDI_PI_UNLOCK(pip);
7089 			pip = next;
7090 		}
7091 		i_mdi_pm_rele_client(ct, valid_path_count);
7092 		ct->ct_powercnt_unconfig = 0;
7093 	}
7094 
7095 	MDI_CLIENT_UNLOCK(ct);
7096 }
7097 
7098 static void
7099 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7100 {
7101 	int			circ;
7102 	dev_info_t		*cdip;
7103 
7104 	ASSERT(MDI_VHCI(vdip));
7105 
7106 	if (!held) {
7107 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7108 		return;
7109 	}
7110 
7111 	if (child) {
7112 		ASSERT(DEVI_BUSY_OWNED(vdip));
7113 		i_mdi_pm_post_unconfig_one(child);
7114 		return;
7115 	}
7116 
7117 	ndi_devi_enter(vdip, &circ);
7118 	cdip = ddi_get_child(vdip);
7119 	while (cdip) {
7120 		dev_info_t *next = ddi_get_next_sibling(cdip);
7121 
7122 		i_mdi_pm_post_unconfig_one(cdip);
7123 		cdip = next;
7124 	}
7125 	ndi_devi_exit(vdip, circ);
7126 }
7127 
7128 int
7129 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7130 {
7131 	int			circ, ret = MDI_SUCCESS;
7132 	dev_info_t		*client_dip = NULL;
7133 	mdi_client_t		*ct;
7134 
7135 	/*
7136 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7137 	 * Power up pHCI for the named client device.
7138 	 * Note: Before the client is enumerated under vhci by phci,
7139 	 * client_dip can be NULL. Then proceed to power up all the
7140 	 * pHCIs.
7141 	 */
7142 	if (devnm != NULL) {
7143 		ndi_devi_enter(vdip, &circ);
7144 		client_dip = ndi_devi_findchild(vdip, devnm);
7145 	}
7146 
7147 	MDI_DEBUG(4, (MDI_NOTE, vdip,
7148 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7149 
7150 	switch (op) {
7151 	case MDI_PM_PRE_CONFIG:
7152 		ret = i_mdi_pm_pre_config(vdip, client_dip);
7153 		break;
7154 
7155 	case MDI_PM_PRE_UNCONFIG:
7156 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7157 		    flags);
7158 		break;
7159 
7160 	case MDI_PM_POST_CONFIG:
7161 		i_mdi_pm_post_config(vdip, client_dip);
7162 		break;
7163 
7164 	case MDI_PM_POST_UNCONFIG:
7165 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7166 		break;
7167 
7168 	case MDI_PM_HOLD_POWER:
7169 	case MDI_PM_RELE_POWER:
7170 		ASSERT(args);
7171 
7172 		client_dip = (dev_info_t *)args;
7173 		ASSERT(MDI_CLIENT(client_dip));
7174 
7175 		ct = i_devi_get_client(client_dip);
7176 		MDI_CLIENT_LOCK(ct);
7177 
7178 		if (op == MDI_PM_HOLD_POWER) {
7179 			if (ct->ct_power_cnt == 0) {
7180 				(void) i_mdi_power_all_phci(ct);
7181 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7182 				    "i_mdi_pm_hold_client\n"));
7183 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
7184 			}
7185 		} else {
7186 			if (DEVI_IS_ATTACHING(client_dip)) {
7187 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7188 				    "i_mdi_pm_rele_client\n"));
7189 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
7190 			} else {
7191 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7192 				    "i_mdi_pm_reset_client\n"));
7193 				i_mdi_pm_reset_client(ct);
7194 			}
7195 		}
7196 
7197 		MDI_CLIENT_UNLOCK(ct);
7198 		break;
7199 
7200 	default:
7201 		break;
7202 	}
7203 
7204 	if (devnm)
7205 		ndi_devi_exit(vdip, circ);
7206 
7207 	return (ret);
7208 }
7209 
7210 int
7211 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7212 {
7213 	mdi_vhci_t *vhci;
7214 
7215 	if (!MDI_VHCI(dip))
7216 		return (MDI_FAILURE);
7217 
7218 	if (mdi_class) {
7219 		vhci = DEVI(dip)->devi_mdi_xhci;
7220 		ASSERT(vhci);
7221 		*mdi_class = vhci->vh_class;
7222 	}
7223 
7224 	return (MDI_SUCCESS);
7225 }
7226 
7227 int
7228 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7229 {
7230 	mdi_phci_t *phci;
7231 
7232 	if (!MDI_PHCI(dip))
7233 		return (MDI_FAILURE);
7234 
7235 	if (mdi_class) {
7236 		phci = DEVI(dip)->devi_mdi_xhci;
7237 		ASSERT(phci);
7238 		*mdi_class = phci->ph_vhci->vh_class;
7239 	}
7240 
7241 	return (MDI_SUCCESS);
7242 }
7243 
7244 int
7245 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7246 {
7247 	mdi_client_t *client;
7248 
7249 	if (!MDI_CLIENT(dip))
7250 		return (MDI_FAILURE);
7251 
7252 	if (mdi_class) {
7253 		client = DEVI(dip)->devi_mdi_client;
7254 		ASSERT(client);
7255 		*mdi_class = client->ct_vhci->vh_class;
7256 	}
7257 
7258 	return (MDI_SUCCESS);
7259 }
7260 
7261 void *
7262 mdi_client_get_vhci_private(dev_info_t *dip)
7263 {
7264 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7265 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7266 		mdi_client_t	*ct;
7267 		ct = i_devi_get_client(dip);
7268 		return (ct->ct_vprivate);
7269 	}
7270 	return (NULL);
7271 }
7272 
7273 void
7274 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7275 {
7276 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7277 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7278 		mdi_client_t	*ct;
7279 		ct = i_devi_get_client(dip);
7280 		ct->ct_vprivate = data;
7281 	}
7282 }
7283 /*
7284  * mdi_pi_get_vhci_private():
7285  *		Get the vhci private information associated with the
7286  *		mdi_pathinfo node
7287  */
7288 void *
7289 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7290 {
7291 	caddr_t	vprivate = NULL;
7292 	if (pip) {
7293 		vprivate = MDI_PI(pip)->pi_vprivate;
7294 	}
7295 	return (vprivate);
7296 }
7297 
7298 /*
7299  * mdi_pi_set_vhci_private():
7300  *		Set the vhci private information in the mdi_pathinfo node
7301  */
7302 void
7303 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7304 {
7305 	if (pip) {
7306 		MDI_PI(pip)->pi_vprivate = priv;
7307 	}
7308 }
7309 
7310 /*
7311  * mdi_phci_get_vhci_private():
7312  *		Get the vhci private information associated with the
7313  *		mdi_phci node
7314  */
7315 void *
7316 mdi_phci_get_vhci_private(dev_info_t *dip)
7317 {
7318 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7319 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7320 		mdi_phci_t	*ph;
7321 		ph = i_devi_get_phci(dip);
7322 		return (ph->ph_vprivate);
7323 	}
7324 	return (NULL);
7325 }
7326 
7327 /*
7328  * mdi_phci_set_vhci_private():
7329  *		Set the vhci private information in the mdi_phci node
7330  */
7331 void
7332 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7333 {
7334 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7335 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7336 		mdi_phci_t	*ph;
7337 		ph = i_devi_get_phci(dip);
7338 		ph->ph_vprivate = priv;
7339 	}
7340 }
7341 
7342 int
7343 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7344 {
7345 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7346 }
7347 
7348 int
7349 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7350 {
7351 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7352 }
7353 
7354 /* Return 1 if all client paths are device_removed */
7355 static int
7356 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7357 {
7358 	mdi_pathinfo_t  *pip;
7359 	int		all_devices_removed = 1;
7360 
7361 	MDI_CLIENT_LOCK(ct);
7362 	for (pip = ct->ct_path_head; pip;
7363 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7364 		if (!mdi_pi_device_isremoved(pip)) {
7365 			all_devices_removed = 0;
7366 			break;
7367 		}
7368 	}
7369 	MDI_CLIENT_UNLOCK(ct);
7370 	return (all_devices_removed);
7371 }
7372 
7373 /*
7374  * When processing path hotunplug, represent device removal.
7375  */
7376 int
7377 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7378 {
7379 	mdi_client_t	*ct;
7380 
7381 	MDI_PI_LOCK(pip);
7382 	if (mdi_pi_device_isremoved(pip)) {
7383 		MDI_PI_UNLOCK(pip);
7384 		return (0);
7385 	}
7386 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7387 	MDI_PI_FLAGS_SET_HIDDEN(pip);
7388 	MDI_PI_UNLOCK(pip);
7389 
7390 	/*
7391 	 * If all paths associated with the client are now DEVICE_REMOVED,
7392 	 * reflect DEVICE_REMOVED in the client.
7393 	 */
7394 	ct = MDI_PI(pip)->pi_client;
7395 	if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7396 		(void) ndi_devi_device_remove(ct->ct_dip);
7397 	else
7398 		i_ddi_di_cache_invalidate();
7399 
7400 	return (1);
7401 }
7402 
7403 /*
7404  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7405  * is now accessible then this interfaces is used to represent device insertion.
7406  */
7407 int
7408 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7409 {
7410 	MDI_PI_LOCK(pip);
7411 	if (!mdi_pi_device_isremoved(pip)) {
7412 		MDI_PI_UNLOCK(pip);
7413 		return (0);
7414 	}
7415 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7416 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
7417 	MDI_PI_UNLOCK(pip);
7418 
7419 	i_ddi_di_cache_invalidate();
7420 
7421 	return (1);
7422 }
7423 
7424 /*
7425  * List of vhci class names:
7426  * A vhci class name must be in this list only if the corresponding vhci
7427  * driver intends to use the mdi provided bus config implementation
7428  * (i.e., mdi_vhci_bus_config()).
7429  */
7430 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7431 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7432 
7433 /*
7434  * During boot time, the on-disk vhci cache for every vhci class is read
7435  * in the form of an nvlist and stored here.
7436  */
7437 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7438 
7439 /* nvpair names in vhci cache nvlist */
7440 #define	MDI_VHCI_CACHE_VERSION	1
7441 #define	MDI_NVPNAME_VERSION	"version"
7442 #define	MDI_NVPNAME_PHCIS	"phcis"
7443 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7444 
7445 /*
7446  * Given vhci class name, return its on-disk vhci cache filename.
7447  * Memory for the returned filename which includes the full path is allocated
7448  * by this function.
7449  */
7450 static char *
7451 vhclass2vhcache_filename(char *vhclass)
7452 {
7453 	char *filename;
7454 	int len;
7455 	static char *fmt = "/etc/devices/mdi_%s_cache";
7456 
7457 	/*
7458 	 * fmt contains the on-disk vhci cache file name format;
7459 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7460 	 */
7461 
7462 	/* the -1 below is to account for "%s" in the format string */
7463 	len = strlen(fmt) + strlen(vhclass) - 1;
7464 	filename = kmem_alloc(len, KM_SLEEP);
7465 	(void) snprintf(filename, len, fmt, vhclass);
7466 	ASSERT(len == (strlen(filename) + 1));
7467 	return (filename);
7468 }
7469 
7470 /*
7471  * initialize the vhci cache related data structures and read the on-disk
7472  * vhci cached data into memory.
7473  */
7474 static void
7475 setup_vhci_cache(mdi_vhci_t *vh)
7476 {
7477 	mdi_vhci_config_t *vhc;
7478 	mdi_vhci_cache_t *vhcache;
7479 	int i;
7480 	nvlist_t *nvl = NULL;
7481 
7482 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7483 	vh->vh_config = vhc;
7484 	vhcache = &vhc->vhc_vhcache;
7485 
7486 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7487 
7488 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7489 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7490 
7491 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7492 
7493 	/*
7494 	 * Create string hash; same as mod_hash_create_strhash() except that
7495 	 * we use NULL key destructor.
7496 	 */
7497 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7498 	    mdi_bus_config_cache_hash_size,
7499 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7500 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7501 
7502 	/*
7503 	 * The on-disk vhci cache is read during booting prior to the
7504 	 * lights-out period by mdi_read_devices_files().
7505 	 */
7506 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7507 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7508 			nvl = vhcache_nvl[i];
7509 			vhcache_nvl[i] = NULL;
7510 			break;
7511 		}
7512 	}
7513 
7514 	/*
7515 	 * this is to cover the case of some one manually causing unloading
7516 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7517 	 */
7518 	if (nvl == NULL && modrootloaded)
7519 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7520 
7521 	if (nvl != NULL) {
7522 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7523 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7524 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7525 		else  {
7526 			cmn_err(CE_WARN,
7527 			    "%s: data file corrupted, will recreate",
7528 			    vhc->vhc_vhcache_filename);
7529 		}
7530 		rw_exit(&vhcache->vhcache_lock);
7531 		nvlist_free(nvl);
7532 	}
7533 
7534 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7535 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7536 
7537 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7538 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7539 }
7540 
7541 /*
7542  * free all vhci cache related resources
7543  */
7544 static int
7545 destroy_vhci_cache(mdi_vhci_t *vh)
7546 {
7547 	mdi_vhci_config_t *vhc = vh->vh_config;
7548 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7549 	mdi_vhcache_phci_t *cphci, *cphci_next;
7550 	mdi_vhcache_client_t *cct, *cct_next;
7551 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7552 
7553 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7554 		return (MDI_FAILURE);
7555 
7556 	kmem_free(vhc->vhc_vhcache_filename,
7557 	    strlen(vhc->vhc_vhcache_filename) + 1);
7558 
7559 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7560 
7561 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7562 	    cphci = cphci_next) {
7563 		cphci_next = cphci->cphci_next;
7564 		free_vhcache_phci(cphci);
7565 	}
7566 
7567 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7568 		cct_next = cct->cct_next;
7569 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7570 			cpi_next = cpi->cpi_next;
7571 			free_vhcache_pathinfo(cpi);
7572 		}
7573 		free_vhcache_client(cct);
7574 	}
7575 
7576 	rw_destroy(&vhcache->vhcache_lock);
7577 
7578 	mutex_destroy(&vhc->vhc_lock);
7579 	cv_destroy(&vhc->vhc_cv);
7580 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7581 	return (MDI_SUCCESS);
7582 }
7583 
7584 /*
7585  * Stop all vhci cache related async threads and free their resources.
7586  */
7587 static int
7588 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7589 {
7590 	mdi_async_client_config_t *acc, *acc_next;
7591 
7592 	mutex_enter(&vhc->vhc_lock);
7593 	vhc->vhc_flags |= MDI_VHC_EXIT;
7594 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7595 	cv_broadcast(&vhc->vhc_cv);
7596 
7597 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7598 	    vhc->vhc_acc_thrcount != 0) {
7599 		mutex_exit(&vhc->vhc_lock);
7600 		delay_random(mdi_delay);
7601 		mutex_enter(&vhc->vhc_lock);
7602 	}
7603 
7604 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7605 
7606 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7607 		acc_next = acc->acc_next;
7608 		free_async_client_config(acc);
7609 	}
7610 	vhc->vhc_acc_list_head = NULL;
7611 	vhc->vhc_acc_list_tail = NULL;
7612 	vhc->vhc_acc_count = 0;
7613 
7614 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7615 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7616 		mutex_exit(&vhc->vhc_lock);
7617 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7618 			vhcache_dirty(vhc);
7619 			return (MDI_FAILURE);
7620 		}
7621 	} else
7622 		mutex_exit(&vhc->vhc_lock);
7623 
7624 	if (callb_delete(vhc->vhc_cbid) != 0)
7625 		return (MDI_FAILURE);
7626 
7627 	return (MDI_SUCCESS);
7628 }
7629 
7630 /*
7631  * Stop vhci cache flush thread
7632  */
7633 /* ARGSUSED */
7634 static boolean_t
7635 stop_vhcache_flush_thread(void *arg, int code)
7636 {
7637 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7638 
7639 	mutex_enter(&vhc->vhc_lock);
7640 	vhc->vhc_flags |= MDI_VHC_EXIT;
7641 	cv_broadcast(&vhc->vhc_cv);
7642 
7643 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7644 		mutex_exit(&vhc->vhc_lock);
7645 		delay_random(mdi_delay);
7646 		mutex_enter(&vhc->vhc_lock);
7647 	}
7648 
7649 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7650 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7651 		mutex_exit(&vhc->vhc_lock);
7652 		(void) flush_vhcache(vhc, 1);
7653 	} else
7654 		mutex_exit(&vhc->vhc_lock);
7655 
7656 	return (B_TRUE);
7657 }
7658 
7659 /*
7660  * Enqueue the vhcache phci (cphci) at the tail of the list
7661  */
7662 static void
7663 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7664 {
7665 	cphci->cphci_next = NULL;
7666 	if (vhcache->vhcache_phci_head == NULL)
7667 		vhcache->vhcache_phci_head = cphci;
7668 	else
7669 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7670 	vhcache->vhcache_phci_tail = cphci;
7671 }
7672 
7673 /*
7674  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7675  */
7676 static void
7677 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7678     mdi_vhcache_pathinfo_t *cpi)
7679 {
7680 	cpi->cpi_next = NULL;
7681 	if (cct->cct_cpi_head == NULL)
7682 		cct->cct_cpi_head = cpi;
7683 	else
7684 		cct->cct_cpi_tail->cpi_next = cpi;
7685 	cct->cct_cpi_tail = cpi;
7686 }
7687 
7688 /*
7689  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7690  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7691  * flag set come at the beginning of the list. All cpis which have this
7692  * flag set come at the end of the list.
7693  */
7694 static void
7695 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7696     mdi_vhcache_pathinfo_t *newcpi)
7697 {
7698 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7699 
7700 	if (cct->cct_cpi_head == NULL ||
7701 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7702 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7703 	else {
7704 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7705 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7706 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7707 			;
7708 
7709 		if (prev_cpi == NULL)
7710 			cct->cct_cpi_head = newcpi;
7711 		else
7712 			prev_cpi->cpi_next = newcpi;
7713 
7714 		newcpi->cpi_next = cpi;
7715 
7716 		if (cpi == NULL)
7717 			cct->cct_cpi_tail = newcpi;
7718 	}
7719 }
7720 
7721 /*
7722  * Enqueue the vhcache client (cct) at the tail of the list
7723  */
7724 static void
7725 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7726     mdi_vhcache_client_t *cct)
7727 {
7728 	cct->cct_next = NULL;
7729 	if (vhcache->vhcache_client_head == NULL)
7730 		vhcache->vhcache_client_head = cct;
7731 	else
7732 		vhcache->vhcache_client_tail->cct_next = cct;
7733 	vhcache->vhcache_client_tail = cct;
7734 }
7735 
7736 static void
7737 free_string_array(char **str, int nelem)
7738 {
7739 	int i;
7740 
7741 	if (str) {
7742 		for (i = 0; i < nelem; i++) {
7743 			if (str[i])
7744 				kmem_free(str[i], strlen(str[i]) + 1);
7745 		}
7746 		kmem_free(str, sizeof (char *) * nelem);
7747 	}
7748 }
7749 
7750 static void
7751 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7752 {
7753 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7754 	kmem_free(cphci, sizeof (*cphci));
7755 }
7756 
7757 static void
7758 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7759 {
7760 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7761 	kmem_free(cpi, sizeof (*cpi));
7762 }
7763 
7764 static void
7765 free_vhcache_client(mdi_vhcache_client_t *cct)
7766 {
7767 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7768 	kmem_free(cct, sizeof (*cct));
7769 }
7770 
7771 static char *
7772 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7773 {
7774 	char *name_addr;
7775 	int len;
7776 
7777 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7778 	name_addr = kmem_alloc(len, KM_SLEEP);
7779 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7780 
7781 	if (ret_len)
7782 		*ret_len = len;
7783 	return (name_addr);
7784 }
7785 
7786 /*
7787  * Copy the contents of paddrnvl to vhci cache.
7788  * paddrnvl nvlist contains path information for a vhci client.
7789  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7790  */
7791 static void
7792 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7793     mdi_vhcache_client_t *cct)
7794 {
7795 	nvpair_t *nvp = NULL;
7796 	mdi_vhcache_pathinfo_t *cpi;
7797 	uint_t nelem;
7798 	uint32_t *val;
7799 
7800 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7801 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7802 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7803 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7804 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7805 		ASSERT(nelem == 2);
7806 		cpi->cpi_cphci = cphci_list[val[0]];
7807 		cpi->cpi_flags = val[1];
7808 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7809 	}
7810 }
7811 
7812 /*
7813  * Copy the contents of caddrmapnvl to vhci cache.
7814  * caddrmapnvl nvlist contains vhci client address to phci client address
7815  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7816  * this nvlist.
7817  */
7818 static void
7819 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7820     mdi_vhcache_phci_t *cphci_list[])
7821 {
7822 	nvpair_t *nvp = NULL;
7823 	nvlist_t *paddrnvl;
7824 	mdi_vhcache_client_t *cct;
7825 
7826 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7827 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7828 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7829 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7830 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7831 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7832 		/* the client must contain at least one path */
7833 		ASSERT(cct->cct_cpi_head != NULL);
7834 
7835 		enqueue_vhcache_client(vhcache, cct);
7836 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7837 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7838 	}
7839 }
7840 
7841 /*
7842  * Copy the contents of the main nvlist to vhci cache.
7843  *
7844  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7845  * The nvlist contains the mappings between the vhci client addresses and
7846  * their corresponding phci client addresses.
7847  *
7848  * The structure of the nvlist is as follows:
7849  *
7850  * Main nvlist:
7851  *	NAME		TYPE		DATA
7852  *	version		int32		version number
7853  *	phcis		string array	array of phci paths
7854  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7855  *
7856  * structure of c2paddrs_nvl:
7857  *	NAME		TYPE		DATA
7858  *	caddr1		nvlist_t	paddrs_nvl1
7859  *	caddr2		nvlist_t	paddrs_nvl2
7860  *	...
7861  * where caddr1, caddr2, ... are vhci client name and addresses in the
7862  * form of "<clientname>@<clientaddress>".
7863  * (for example: "ssd@2000002037cd9f72");
7864  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7865  *
7866  * structure of paddrs_nvl:
7867  *	NAME		TYPE		DATA
7868  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7869  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7870  *	...
7871  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7872  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7873  * phci-ids are integers that identify pHCIs to which the
7874  * the bus specific address belongs to. These integers are used as an index
7875  * into to the phcis string array in the main nvlist to get the pHCI path.
7876  */
7877 static int
7878 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7879 {
7880 	char **phcis, **phci_namep;
7881 	uint_t nphcis;
7882 	mdi_vhcache_phci_t *cphci, **cphci_list;
7883 	nvlist_t *caddrmapnvl;
7884 	int32_t ver;
7885 	int i;
7886 	size_t cphci_list_size;
7887 
7888 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7889 
7890 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7891 	    ver != MDI_VHCI_CACHE_VERSION)
7892 		return (MDI_FAILURE);
7893 
7894 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7895 	    &nphcis) != 0)
7896 		return (MDI_SUCCESS);
7897 
7898 	ASSERT(nphcis > 0);
7899 
7900 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7901 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7902 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7903 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7904 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7905 		enqueue_vhcache_phci(vhcache, cphci);
7906 		cphci_list[i] = cphci;
7907 	}
7908 
7909 	ASSERT(vhcache->vhcache_phci_head != NULL);
7910 
7911 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7912 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7913 
7914 	kmem_free(cphci_list, cphci_list_size);
7915 	return (MDI_SUCCESS);
7916 }
7917 
7918 /*
7919  * Build paddrnvl for the specified client using the information in the
7920  * vhci cache and add it to the caddrmapnnvl.
7921  * Returns 0 on success, errno on failure.
7922  */
7923 static int
7924 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7925     nvlist_t *caddrmapnvl)
7926 {
7927 	mdi_vhcache_pathinfo_t *cpi;
7928 	nvlist_t *nvl;
7929 	int err;
7930 	uint32_t val[2];
7931 
7932 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7933 
7934 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7935 		return (err);
7936 
7937 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7938 		val[0] = cpi->cpi_cphci->cphci_id;
7939 		val[1] = cpi->cpi_flags;
7940 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7941 		    != 0)
7942 			goto out;
7943 	}
7944 
7945 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7946 out:
7947 	nvlist_free(nvl);
7948 	return (err);
7949 }
7950 
7951 /*
7952  * Build caddrmapnvl using the information in the vhci cache
7953  * and add it to the mainnvl.
7954  * Returns 0 on success, errno on failure.
7955  */
7956 static int
7957 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7958 {
7959 	mdi_vhcache_client_t *cct;
7960 	nvlist_t *nvl;
7961 	int err;
7962 
7963 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7964 
7965 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7966 		return (err);
7967 
7968 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7969 	    cct = cct->cct_next) {
7970 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7971 			goto out;
7972 	}
7973 
7974 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7975 out:
7976 	nvlist_free(nvl);
7977 	return (err);
7978 }
7979 
7980 /*
7981  * Build nvlist using the information in the vhci cache.
7982  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7983  * Returns nvl on success, NULL on failure.
7984  */
7985 static nvlist_t *
7986 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7987 {
7988 	mdi_vhcache_phci_t *cphci;
7989 	uint_t phci_count;
7990 	char **phcis;
7991 	nvlist_t *nvl;
7992 	int err, i;
7993 
7994 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7995 		nvl = NULL;
7996 		goto out;
7997 	}
7998 
7999 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8000 	    MDI_VHCI_CACHE_VERSION)) != 0)
8001 		goto out;
8002 
8003 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8004 	if (vhcache->vhcache_phci_head == NULL) {
8005 		rw_exit(&vhcache->vhcache_lock);
8006 		return (nvl);
8007 	}
8008 
8009 	phci_count = 0;
8010 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8011 	    cphci = cphci->cphci_next)
8012 		cphci->cphci_id = phci_count++;
8013 
8014 	/* build phci pathname list */
8015 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8016 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8017 	    cphci = cphci->cphci_next, i++)
8018 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8019 
8020 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8021 	    phci_count);
8022 	free_string_array(phcis, phci_count);
8023 
8024 	if (err == 0 &&
8025 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8026 		rw_exit(&vhcache->vhcache_lock);
8027 		return (nvl);
8028 	}
8029 
8030 	rw_exit(&vhcache->vhcache_lock);
8031 out:
8032 	if (nvl)
8033 		nvlist_free(nvl);
8034 	return (NULL);
8035 }
8036 
8037 /*
8038  * Lookup vhcache phci structure for the specified phci path.
8039  */
8040 static mdi_vhcache_phci_t *
8041 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8042 {
8043 	mdi_vhcache_phci_t *cphci;
8044 
8045 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8046 
8047 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8048 	    cphci = cphci->cphci_next) {
8049 		if (strcmp(cphci->cphci_path, phci_path) == 0)
8050 			return (cphci);
8051 	}
8052 
8053 	return (NULL);
8054 }
8055 
8056 /*
8057  * Lookup vhcache phci structure for the specified phci.
8058  */
8059 static mdi_vhcache_phci_t *
8060 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8061 {
8062 	mdi_vhcache_phci_t *cphci;
8063 
8064 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8065 
8066 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8067 	    cphci = cphci->cphci_next) {
8068 		if (cphci->cphci_phci == ph)
8069 			return (cphci);
8070 	}
8071 
8072 	return (NULL);
8073 }
8074 
8075 /*
8076  * Add the specified phci to the vhci cache if not already present.
8077  */
8078 static void
8079 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8080 {
8081 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8082 	mdi_vhcache_phci_t *cphci;
8083 	char *pathname;
8084 	int cache_updated;
8085 
8086 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8087 
8088 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8089 	(void) ddi_pathname(ph->ph_dip, pathname);
8090 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8091 	    != NULL) {
8092 		cphci->cphci_phci = ph;
8093 		cache_updated = 0;
8094 	} else {
8095 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8096 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8097 		cphci->cphci_phci = ph;
8098 		enqueue_vhcache_phci(vhcache, cphci);
8099 		cache_updated = 1;
8100 	}
8101 
8102 	rw_exit(&vhcache->vhcache_lock);
8103 
8104 	/*
8105 	 * Since a new phci has been added, reset
8106 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8107 	 * during next vhcache_discover_paths().
8108 	 */
8109 	mutex_enter(&vhc->vhc_lock);
8110 	vhc->vhc_path_discovery_cutoff_time = 0;
8111 	mutex_exit(&vhc->vhc_lock);
8112 
8113 	kmem_free(pathname, MAXPATHLEN);
8114 	if (cache_updated)
8115 		vhcache_dirty(vhc);
8116 }
8117 
8118 /*
8119  * Remove the reference to the specified phci from the vhci cache.
8120  */
8121 static void
8122 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8123 {
8124 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8125 	mdi_vhcache_phci_t *cphci;
8126 
8127 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8128 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8129 		/* do not remove the actual mdi_vhcache_phci structure */
8130 		cphci->cphci_phci = NULL;
8131 	}
8132 	rw_exit(&vhcache->vhcache_lock);
8133 }
8134 
8135 static void
8136 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8137     mdi_vhcache_lookup_token_t *src)
8138 {
8139 	if (src == NULL) {
8140 		dst->lt_cct = NULL;
8141 		dst->lt_cct_lookup_time = 0;
8142 	} else {
8143 		dst->lt_cct = src->lt_cct;
8144 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8145 	}
8146 }
8147 
8148 /*
8149  * Look up vhcache client for the specified client.
8150  */
8151 static mdi_vhcache_client_t *
8152 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8153     mdi_vhcache_lookup_token_t *token)
8154 {
8155 	mod_hash_val_t hv;
8156 	char *name_addr;
8157 	int len;
8158 
8159 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8160 
8161 	/*
8162 	 * If no vhcache clean occurred since the last lookup, we can
8163 	 * simply return the cct from the last lookup operation.
8164 	 * It works because ccts are never freed except during the vhcache
8165 	 * cleanup operation.
8166 	 */
8167 	if (token != NULL &&
8168 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8169 		return (token->lt_cct);
8170 
8171 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8172 	if (mod_hash_find(vhcache->vhcache_client_hash,
8173 	    (mod_hash_key_t)name_addr, &hv) == 0) {
8174 		if (token) {
8175 			token->lt_cct = (mdi_vhcache_client_t *)hv;
8176 			token->lt_cct_lookup_time = ddi_get_lbolt64();
8177 		}
8178 	} else {
8179 		if (token) {
8180 			token->lt_cct = NULL;
8181 			token->lt_cct_lookup_time = 0;
8182 		}
8183 		hv = NULL;
8184 	}
8185 	kmem_free(name_addr, len);
8186 	return ((mdi_vhcache_client_t *)hv);
8187 }
8188 
8189 /*
8190  * Add the specified path to the vhci cache if not already present.
8191  * Also add the vhcache client for the client corresponding to this path
8192  * if it doesn't already exist.
8193  */
8194 static void
8195 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8196 {
8197 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8198 	mdi_vhcache_client_t *cct;
8199 	mdi_vhcache_pathinfo_t *cpi;
8200 	mdi_phci_t *ph = pip->pi_phci;
8201 	mdi_client_t *ct = pip->pi_client;
8202 	int cache_updated = 0;
8203 
8204 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8205 
8206 	/* if vhcache client for this pip doesn't already exist, add it */
8207 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8208 	    NULL)) == NULL) {
8209 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8210 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8211 		    ct->ct_guid, NULL);
8212 		enqueue_vhcache_client(vhcache, cct);
8213 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
8214 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8215 		cache_updated = 1;
8216 	}
8217 
8218 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8219 		if (cpi->cpi_cphci->cphci_phci == ph &&
8220 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8221 			cpi->cpi_pip = pip;
8222 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8223 				cpi->cpi_flags &=
8224 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8225 				sort_vhcache_paths(cct);
8226 				cache_updated = 1;
8227 			}
8228 			break;
8229 		}
8230 	}
8231 
8232 	if (cpi == NULL) {
8233 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8234 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8235 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8236 		ASSERT(cpi->cpi_cphci != NULL);
8237 		cpi->cpi_pip = pip;
8238 		enqueue_vhcache_pathinfo(cct, cpi);
8239 		cache_updated = 1;
8240 	}
8241 
8242 	rw_exit(&vhcache->vhcache_lock);
8243 
8244 	if (cache_updated)
8245 		vhcache_dirty(vhc);
8246 }
8247 
8248 /*
8249  * Remove the reference to the specified path from the vhci cache.
8250  */
8251 static void
8252 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8253 {
8254 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8255 	mdi_client_t *ct = pip->pi_client;
8256 	mdi_vhcache_client_t *cct;
8257 	mdi_vhcache_pathinfo_t *cpi;
8258 
8259 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8260 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8261 	    NULL)) != NULL) {
8262 		for (cpi = cct->cct_cpi_head; cpi != NULL;
8263 		    cpi = cpi->cpi_next) {
8264 			if (cpi->cpi_pip == pip) {
8265 				cpi->cpi_pip = NULL;
8266 				break;
8267 			}
8268 		}
8269 	}
8270 	rw_exit(&vhcache->vhcache_lock);
8271 }
8272 
8273 /*
8274  * Flush the vhci cache to disk.
8275  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8276  */
8277 static int
8278 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8279 {
8280 	nvlist_t *nvl;
8281 	int err;
8282 	int rv;
8283 
8284 	/*
8285 	 * It is possible that the system may shutdown before
8286 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
8287 	 * flushing the cache in this case do not check for
8288 	 * i_ddi_io_initialized when force flag is set.
8289 	 */
8290 	if (force_flag == 0 && !i_ddi_io_initialized())
8291 		return (MDI_FAILURE);
8292 
8293 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8294 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8295 		nvlist_free(nvl);
8296 	} else
8297 		err = EFAULT;
8298 
8299 	rv = MDI_SUCCESS;
8300 	mutex_enter(&vhc->vhc_lock);
8301 	if (err != 0) {
8302 		if (err == EROFS) {
8303 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8304 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8305 			    MDI_VHC_VHCACHE_DIRTY);
8306 		} else {
8307 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8308 				cmn_err(CE_CONT, "%s: update failed\n",
8309 				    vhc->vhc_vhcache_filename);
8310 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8311 			}
8312 			rv = MDI_FAILURE;
8313 		}
8314 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8315 		cmn_err(CE_CONT,
8316 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
8317 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8318 	}
8319 	mutex_exit(&vhc->vhc_lock);
8320 
8321 	return (rv);
8322 }
8323 
8324 /*
8325  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8326  * Exits itself if left idle for the idle timeout period.
8327  */
8328 static void
8329 vhcache_flush_thread(void *arg)
8330 {
8331 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8332 	clock_t idle_time, quit_at_ticks;
8333 	callb_cpr_t cprinfo;
8334 
8335 	/* number of seconds to sleep idle before exiting */
8336 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8337 
8338 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8339 	    "mdi_vhcache_flush");
8340 	mutex_enter(&vhc->vhc_lock);
8341 	for (; ; ) {
8342 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8343 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8344 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8345 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
8346 				(void) cv_timedwait(&vhc->vhc_cv,
8347 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8348 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8349 			} else {
8350 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8351 				mutex_exit(&vhc->vhc_lock);
8352 
8353 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8354 					vhcache_dirty(vhc);
8355 
8356 				mutex_enter(&vhc->vhc_lock);
8357 			}
8358 		}
8359 
8360 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8361 
8362 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8363 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8364 		    ddi_get_lbolt() < quit_at_ticks) {
8365 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8366 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8367 			    quit_at_ticks);
8368 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8369 		}
8370 
8371 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8372 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8373 			goto out;
8374 	}
8375 
8376 out:
8377 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8378 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8379 	CALLB_CPR_EXIT(&cprinfo);
8380 }
8381 
8382 /*
8383  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8384  */
8385 static void
8386 vhcache_dirty(mdi_vhci_config_t *vhc)
8387 {
8388 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8389 	int create_thread;
8390 
8391 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8392 	/* do not flush cache until the cache is fully built */
8393 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8394 		rw_exit(&vhcache->vhcache_lock);
8395 		return;
8396 	}
8397 	rw_exit(&vhcache->vhcache_lock);
8398 
8399 	mutex_enter(&vhc->vhc_lock);
8400 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8401 		mutex_exit(&vhc->vhc_lock);
8402 		return;
8403 	}
8404 
8405 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8406 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8407 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8408 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8409 		cv_broadcast(&vhc->vhc_cv);
8410 		create_thread = 0;
8411 	} else {
8412 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8413 		create_thread = 1;
8414 	}
8415 	mutex_exit(&vhc->vhc_lock);
8416 
8417 	if (create_thread)
8418 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8419 		    0, &p0, TS_RUN, minclsyspri);
8420 }
8421 
8422 /*
8423  * phci bus config structure - one for for each phci bus config operation that
8424  * we initiate on behalf of a vhci.
8425  */
8426 typedef struct mdi_phci_bus_config_s {
8427 	char *phbc_phci_path;
8428 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8429 	struct mdi_phci_bus_config_s *phbc_next;
8430 } mdi_phci_bus_config_t;
8431 
8432 /* vhci bus config structure - one for each vhci bus config operation */
8433 typedef struct mdi_vhci_bus_config_s {
8434 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8435 	major_t vhbc_op_major;		/* bus config op major */
8436 	uint_t vhbc_op_flags;		/* bus config op flags */
8437 	kmutex_t vhbc_lock;
8438 	kcondvar_t vhbc_cv;
8439 	int vhbc_thr_count;
8440 } mdi_vhci_bus_config_t;
8441 
8442 /*
8443  * bus config the specified phci
8444  */
8445 static void
8446 bus_config_phci(void *arg)
8447 {
8448 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8449 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8450 	dev_info_t *ph_dip;
8451 
8452 	/*
8453 	 * first configure all path components upto phci and then configure
8454 	 * the phci children.
8455 	 */
8456 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8457 	    != NULL) {
8458 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8459 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8460 			(void) ndi_devi_config_driver(ph_dip,
8461 			    vhbc->vhbc_op_flags,
8462 			    vhbc->vhbc_op_major);
8463 		} else
8464 			(void) ndi_devi_config(ph_dip,
8465 			    vhbc->vhbc_op_flags);
8466 
8467 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8468 		ndi_rele_devi(ph_dip);
8469 	}
8470 
8471 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8472 	kmem_free(phbc, sizeof (*phbc));
8473 
8474 	mutex_enter(&vhbc->vhbc_lock);
8475 	vhbc->vhbc_thr_count--;
8476 	if (vhbc->vhbc_thr_count == 0)
8477 		cv_broadcast(&vhbc->vhbc_cv);
8478 	mutex_exit(&vhbc->vhbc_lock);
8479 }
8480 
8481 /*
8482  * Bus config all phcis associated with the vhci in parallel.
8483  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8484  */
8485 static void
8486 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8487     ddi_bus_config_op_t op, major_t maj)
8488 {
8489 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8490 	mdi_vhci_bus_config_t *vhbc;
8491 	mdi_vhcache_phci_t *cphci;
8492 
8493 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8494 	if (vhcache->vhcache_phci_head == NULL) {
8495 		rw_exit(&vhcache->vhcache_lock);
8496 		return;
8497 	}
8498 
8499 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8500 
8501 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8502 	    cphci = cphci->cphci_next) {
8503 		/* skip phcis that haven't attached before root is available */
8504 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8505 			continue;
8506 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8507 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8508 		    KM_SLEEP);
8509 		phbc->phbc_vhbusconfig = vhbc;
8510 		phbc->phbc_next = phbc_head;
8511 		phbc_head = phbc;
8512 		vhbc->vhbc_thr_count++;
8513 	}
8514 	rw_exit(&vhcache->vhcache_lock);
8515 
8516 	vhbc->vhbc_op = op;
8517 	vhbc->vhbc_op_major = maj;
8518 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8519 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8520 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8521 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8522 
8523 	/* now create threads to initiate bus config on all phcis in parallel */
8524 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8525 		phbc_next = phbc->phbc_next;
8526 		if (mdi_mtc_off)
8527 			bus_config_phci((void *)phbc);
8528 		else
8529 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8530 			    0, &p0, TS_RUN, minclsyspri);
8531 	}
8532 
8533 	mutex_enter(&vhbc->vhbc_lock);
8534 	/* wait until all threads exit */
8535 	while (vhbc->vhbc_thr_count > 0)
8536 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8537 	mutex_exit(&vhbc->vhbc_lock);
8538 
8539 	mutex_destroy(&vhbc->vhbc_lock);
8540 	cv_destroy(&vhbc->vhbc_cv);
8541 	kmem_free(vhbc, sizeof (*vhbc));
8542 }
8543 
8544 /*
8545  * Single threaded version of bus_config_all_phcis()
8546  */
8547 static void
8548 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8549     ddi_bus_config_op_t op, major_t maj)
8550 {
8551 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8552 
8553 	single_threaded_vhconfig_enter(vhc);
8554 	bus_config_all_phcis(vhcache, flags, op, maj);
8555 	single_threaded_vhconfig_exit(vhc);
8556 }
8557 
8558 /*
8559  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8560  * The path includes the child component in addition to the phci path.
8561  */
8562 static int
8563 bus_config_one_phci_child(char *path)
8564 {
8565 	dev_info_t *ph_dip, *child;
8566 	char *devnm;
8567 	int rv = MDI_FAILURE;
8568 
8569 	/* extract the child component of the phci */
8570 	devnm = strrchr(path, '/');
8571 	*devnm++ = '\0';
8572 
8573 	/*
8574 	 * first configure all path components upto phci and then
8575 	 * configure the phci child.
8576 	 */
8577 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8578 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8579 		    NDI_SUCCESS) {
8580 			/*
8581 			 * release the hold that ndi_devi_config_one() placed
8582 			 */
8583 			ndi_rele_devi(child);
8584 			rv = MDI_SUCCESS;
8585 		}
8586 
8587 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8588 		ndi_rele_devi(ph_dip);
8589 	}
8590 
8591 	devnm--;
8592 	*devnm = '/';
8593 	return (rv);
8594 }
8595 
8596 /*
8597  * Build a list of phci client paths for the specified vhci client.
8598  * The list includes only those phci client paths which aren't configured yet.
8599  */
8600 static mdi_phys_path_t *
8601 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8602 {
8603 	mdi_vhcache_pathinfo_t *cpi;
8604 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8605 	int config_path, len;
8606 
8607 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8608 		/*
8609 		 * include only those paths that aren't configured.
8610 		 */
8611 		config_path = 0;
8612 		if (cpi->cpi_pip == NULL)
8613 			config_path = 1;
8614 		else {
8615 			MDI_PI_LOCK(cpi->cpi_pip);
8616 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8617 				config_path = 1;
8618 			MDI_PI_UNLOCK(cpi->cpi_pip);
8619 		}
8620 
8621 		if (config_path) {
8622 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8623 			len = strlen(cpi->cpi_cphci->cphci_path) +
8624 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8625 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8626 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8627 			    cpi->cpi_cphci->cphci_path, ct_name,
8628 			    cpi->cpi_addr);
8629 			pp->phys_path_next = NULL;
8630 
8631 			if (pp_head == NULL)
8632 				pp_head = pp;
8633 			else
8634 				pp_tail->phys_path_next = pp;
8635 			pp_tail = pp;
8636 		}
8637 	}
8638 
8639 	return (pp_head);
8640 }
8641 
8642 /*
8643  * Free the memory allocated for phci client path list.
8644  */
8645 static void
8646 free_phclient_path_list(mdi_phys_path_t *pp_head)
8647 {
8648 	mdi_phys_path_t *pp, *pp_next;
8649 
8650 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8651 		pp_next = pp->phys_path_next;
8652 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8653 		kmem_free(pp, sizeof (*pp));
8654 	}
8655 }
8656 
8657 /*
8658  * Allocated async client structure and initialize with the specified values.
8659  */
8660 static mdi_async_client_config_t *
8661 alloc_async_client_config(char *ct_name, char *ct_addr,
8662     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8663 {
8664 	mdi_async_client_config_t *acc;
8665 
8666 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8667 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8668 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8669 	acc->acc_phclient_path_list_head = pp_head;
8670 	init_vhcache_lookup_token(&acc->acc_token, tok);
8671 	acc->acc_next = NULL;
8672 	return (acc);
8673 }
8674 
8675 /*
8676  * Free the memory allocated for the async client structure and their members.
8677  */
8678 static void
8679 free_async_client_config(mdi_async_client_config_t *acc)
8680 {
8681 	if (acc->acc_phclient_path_list_head)
8682 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8683 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8684 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8685 	kmem_free(acc, sizeof (*acc));
8686 }
8687 
8688 /*
8689  * Sort vhcache pathinfos (cpis) of the specified client.
8690  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8691  * flag set come at the beginning of the list. All cpis which have this
8692  * flag set come at the end of the list.
8693  */
8694 static void
8695 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8696 {
8697 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8698 
8699 	cpi_head = cct->cct_cpi_head;
8700 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8701 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8702 		cpi_next = cpi->cpi_next;
8703 		enqueue_vhcache_pathinfo(cct, cpi);
8704 	}
8705 }
8706 
8707 /*
8708  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8709  * every vhcache pathinfo of the specified client. If not adjust the flag
8710  * setting appropriately.
8711  *
8712  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8713  * on-disk vhci cache. So every time this flag is updated the cache must be
8714  * flushed.
8715  */
8716 static void
8717 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8718     mdi_vhcache_lookup_token_t *tok)
8719 {
8720 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8721 	mdi_vhcache_client_t *cct;
8722 	mdi_vhcache_pathinfo_t *cpi;
8723 
8724 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8725 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8726 	    == NULL) {
8727 		rw_exit(&vhcache->vhcache_lock);
8728 		return;
8729 	}
8730 
8731 	/*
8732 	 * to avoid unnecessary on-disk cache updates, first check if an
8733 	 * update is really needed. If no update is needed simply return.
8734 	 */
8735 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8736 		if ((cpi->cpi_pip != NULL &&
8737 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8738 		    (cpi->cpi_pip == NULL &&
8739 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8740 			break;
8741 		}
8742 	}
8743 	if (cpi == NULL) {
8744 		rw_exit(&vhcache->vhcache_lock);
8745 		return;
8746 	}
8747 
8748 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8749 		rw_exit(&vhcache->vhcache_lock);
8750 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8751 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8752 		    tok)) == NULL) {
8753 			rw_exit(&vhcache->vhcache_lock);
8754 			return;
8755 		}
8756 	}
8757 
8758 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8759 		if (cpi->cpi_pip != NULL)
8760 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8761 		else
8762 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8763 	}
8764 	sort_vhcache_paths(cct);
8765 
8766 	rw_exit(&vhcache->vhcache_lock);
8767 	vhcache_dirty(vhc);
8768 }
8769 
8770 /*
8771  * Configure all specified paths of the client.
8772  */
8773 static void
8774 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8775     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8776 {
8777 	mdi_phys_path_t *pp;
8778 
8779 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8780 		(void) bus_config_one_phci_child(pp->phys_path);
8781 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8782 }
8783 
8784 /*
8785  * Dequeue elements from vhci async client config list and bus configure
8786  * their corresponding phci clients.
8787  */
8788 static void
8789 config_client_paths_thread(void *arg)
8790 {
8791 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8792 	mdi_async_client_config_t *acc;
8793 	clock_t quit_at_ticks;
8794 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8795 	callb_cpr_t cprinfo;
8796 
8797 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8798 	    "mdi_config_client_paths");
8799 
8800 	for (; ; ) {
8801 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8802 
8803 		mutex_enter(&vhc->vhc_lock);
8804 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8805 		    vhc->vhc_acc_list_head == NULL &&
8806 		    ddi_get_lbolt() < quit_at_ticks) {
8807 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8808 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8809 			    quit_at_ticks);
8810 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8811 		}
8812 
8813 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8814 		    vhc->vhc_acc_list_head == NULL)
8815 			goto out;
8816 
8817 		acc = vhc->vhc_acc_list_head;
8818 		vhc->vhc_acc_list_head = acc->acc_next;
8819 		if (vhc->vhc_acc_list_head == NULL)
8820 			vhc->vhc_acc_list_tail = NULL;
8821 		vhc->vhc_acc_count--;
8822 		mutex_exit(&vhc->vhc_lock);
8823 
8824 		config_client_paths_sync(vhc, acc->acc_ct_name,
8825 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8826 		    &acc->acc_token);
8827 
8828 		free_async_client_config(acc);
8829 	}
8830 
8831 out:
8832 	vhc->vhc_acc_thrcount--;
8833 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8834 	CALLB_CPR_EXIT(&cprinfo);
8835 }
8836 
8837 /*
8838  * Arrange for all the phci client paths (pp_head) for the specified client
8839  * to be bus configured asynchronously by a thread.
8840  */
8841 static void
8842 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8843     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8844 {
8845 	mdi_async_client_config_t *acc, *newacc;
8846 	int create_thread;
8847 
8848 	if (pp_head == NULL)
8849 		return;
8850 
8851 	if (mdi_mtc_off) {
8852 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8853 		free_phclient_path_list(pp_head);
8854 		return;
8855 	}
8856 
8857 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8858 	ASSERT(newacc);
8859 
8860 	mutex_enter(&vhc->vhc_lock);
8861 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8862 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8863 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8864 			free_async_client_config(newacc);
8865 			mutex_exit(&vhc->vhc_lock);
8866 			return;
8867 		}
8868 	}
8869 
8870 	if (vhc->vhc_acc_list_head == NULL)
8871 		vhc->vhc_acc_list_head = newacc;
8872 	else
8873 		vhc->vhc_acc_list_tail->acc_next = newacc;
8874 	vhc->vhc_acc_list_tail = newacc;
8875 	vhc->vhc_acc_count++;
8876 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8877 		cv_broadcast(&vhc->vhc_cv);
8878 		create_thread = 0;
8879 	} else {
8880 		vhc->vhc_acc_thrcount++;
8881 		create_thread = 1;
8882 	}
8883 	mutex_exit(&vhc->vhc_lock);
8884 
8885 	if (create_thread)
8886 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8887 		    0, &p0, TS_RUN, minclsyspri);
8888 }
8889 
8890 /*
8891  * Return number of online paths for the specified client.
8892  */
8893 static int
8894 nonline_paths(mdi_vhcache_client_t *cct)
8895 {
8896 	mdi_vhcache_pathinfo_t *cpi;
8897 	int online_count = 0;
8898 
8899 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8900 		if (cpi->cpi_pip != NULL) {
8901 			MDI_PI_LOCK(cpi->cpi_pip);
8902 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8903 				online_count++;
8904 			MDI_PI_UNLOCK(cpi->cpi_pip);
8905 		}
8906 	}
8907 
8908 	return (online_count);
8909 }
8910 
8911 /*
8912  * Bus configure all paths for the specified vhci client.
8913  * If at least one path for the client is already online, the remaining paths
8914  * will be configured asynchronously. Otherwise, it synchronously configures
8915  * the paths until at least one path is online and then rest of the paths
8916  * will be configured asynchronously.
8917  */
8918 static void
8919 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8920 {
8921 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8922 	mdi_phys_path_t *pp_head, *pp;
8923 	mdi_vhcache_client_t *cct;
8924 	mdi_vhcache_lookup_token_t tok;
8925 
8926 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8927 
8928 	init_vhcache_lookup_token(&tok, NULL);
8929 
8930 	if (ct_name == NULL || ct_addr == NULL ||
8931 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8932 	    == NULL ||
8933 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8934 		rw_exit(&vhcache->vhcache_lock);
8935 		return;
8936 	}
8937 
8938 	/* if at least one path is online, configure the rest asynchronously */
8939 	if (nonline_paths(cct) > 0) {
8940 		rw_exit(&vhcache->vhcache_lock);
8941 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8942 		return;
8943 	}
8944 
8945 	rw_exit(&vhcache->vhcache_lock);
8946 
8947 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8948 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8949 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8950 
8951 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8952 			    ct_addr, &tok)) == NULL) {
8953 				rw_exit(&vhcache->vhcache_lock);
8954 				goto out;
8955 			}
8956 
8957 			if (nonline_paths(cct) > 0 &&
8958 			    pp->phys_path_next != NULL) {
8959 				rw_exit(&vhcache->vhcache_lock);
8960 				config_client_paths_async(vhc, ct_name, ct_addr,
8961 				    pp->phys_path_next, &tok);
8962 				pp->phys_path_next = NULL;
8963 				goto out;
8964 			}
8965 
8966 			rw_exit(&vhcache->vhcache_lock);
8967 		}
8968 	}
8969 
8970 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8971 out:
8972 	free_phclient_path_list(pp_head);
8973 }
8974 
8975 static void
8976 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8977 {
8978 	mutex_enter(&vhc->vhc_lock);
8979 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8980 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8981 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8982 	mutex_exit(&vhc->vhc_lock);
8983 }
8984 
8985 static void
8986 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8987 {
8988 	mutex_enter(&vhc->vhc_lock);
8989 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8990 	cv_broadcast(&vhc->vhc_cv);
8991 	mutex_exit(&vhc->vhc_lock);
8992 }
8993 
8994 typedef struct mdi_phci_driver_info {
8995 	char	*phdriver_name;	/* name of the phci driver */
8996 
8997 	/* set to non zero if the phci driver supports root device */
8998 	int	phdriver_root_support;
8999 } mdi_phci_driver_info_t;
9000 
9001 /*
9002  * vhci class and root support capability of a phci driver can be
9003  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9004  * phci driver.conf file. The built-in tables below contain this information
9005  * for those phci drivers whose driver.conf files don't yet contain this info.
9006  *
9007  * All phci drivers expect iscsi have root device support.
9008  */
9009 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9010 	{ "fp", 1 },
9011 	{ "iscsi", 0 },
9012 	{ "ibsrp", 1 }
9013 	};
9014 
9015 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9016 
9017 static void *
9018 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9019 {
9020 	void *new_ptr;
9021 
9022 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9023 	if (old_ptr) {
9024 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9025 		kmem_free(old_ptr, old_size);
9026 	}
9027 	return (new_ptr);
9028 }
9029 
9030 static void
9031 add_to_phci_list(char ***driver_list, int **root_support_list,
9032     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9033 {
9034 	ASSERT(*cur_elements <= *max_elements);
9035 	if (*cur_elements == *max_elements) {
9036 		*max_elements += 10;
9037 		*driver_list = mdi_realloc(*driver_list,
9038 		    sizeof (char *) * (*cur_elements),
9039 		    sizeof (char *) * (*max_elements));
9040 		*root_support_list = mdi_realloc(*root_support_list,
9041 		    sizeof (int) * (*cur_elements),
9042 		    sizeof (int) * (*max_elements));
9043 	}
9044 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9045 	(*root_support_list)[*cur_elements] = root_support;
9046 	(*cur_elements)++;
9047 }
9048 
9049 static void
9050 get_phci_driver_list(char *vhci_class, char ***driver_list,
9051     int **root_support_list, int *cur_elements, int *max_elements)
9052 {
9053 	mdi_phci_driver_info_t	*st_driver_list, *p;
9054 	int		st_ndrivers, root_support, i, j, driver_conf_count;
9055 	major_t		m;
9056 	struct devnames	*dnp;
9057 	ddi_prop_t	*propp;
9058 
9059 	*driver_list = NULL;
9060 	*root_support_list = NULL;
9061 	*cur_elements = 0;
9062 	*max_elements = 0;
9063 
9064 	/* add the phci drivers derived from the phci driver.conf files */
9065 	for (m = 0; m < devcnt; m++) {
9066 		dnp = &devnamesp[m];
9067 
9068 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
9069 			LOCK_DEV_OPS(&dnp->dn_lock);
9070 			if (dnp->dn_global_prop_ptr != NULL &&
9071 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9072 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9073 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9074 			    strcmp(propp->prop_val, vhci_class) == 0) {
9075 
9076 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9077 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9078 				    &dnp->dn_global_prop_ptr->prop_list)
9079 				    == NULL) ? 1 : 0;
9080 
9081 				add_to_phci_list(driver_list, root_support_list,
9082 				    cur_elements, max_elements, dnp->dn_name,
9083 				    root_support);
9084 
9085 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9086 			} else
9087 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9088 		}
9089 	}
9090 
9091 	driver_conf_count = *cur_elements;
9092 
9093 	/* add the phci drivers specified in the built-in tables */
9094 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9095 		st_driver_list = scsi_phci_driver_list;
9096 		st_ndrivers = sizeof (scsi_phci_driver_list) /
9097 		    sizeof (mdi_phci_driver_info_t);
9098 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9099 		st_driver_list = ib_phci_driver_list;
9100 		st_ndrivers = sizeof (ib_phci_driver_list) /
9101 		    sizeof (mdi_phci_driver_info_t);
9102 	} else {
9103 		st_driver_list = NULL;
9104 		st_ndrivers = 0;
9105 	}
9106 
9107 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9108 		/* add this phci driver if not already added before */
9109 		for (j = 0; j < driver_conf_count; j++) {
9110 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9111 				break;
9112 		}
9113 		if (j == driver_conf_count) {
9114 			add_to_phci_list(driver_list, root_support_list,
9115 			    cur_elements, max_elements, p->phdriver_name,
9116 			    p->phdriver_root_support);
9117 		}
9118 	}
9119 }
9120 
9121 /*
9122  * Attach the phci driver instances associated with the specified vhci class.
9123  * If root is mounted attach all phci driver instances.
9124  * If root is not mounted, attach the instances of only those phci
9125  * drivers that have the root support.
9126  */
9127 static void
9128 attach_phci_drivers(char *vhci_class)
9129 {
9130 	char	**driver_list, **p;
9131 	int	*root_support_list;
9132 	int	cur_elements, max_elements, i;
9133 	major_t	m;
9134 
9135 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9136 	    &cur_elements, &max_elements);
9137 
9138 	for (i = 0; i < cur_elements; i++) {
9139 		if (modrootloaded || root_support_list[i]) {
9140 			m = ddi_name_to_major(driver_list[i]);
9141 			if (m != DDI_MAJOR_T_NONE &&
9142 			    ddi_hold_installed_driver(m))
9143 				ddi_rele_driver(m);
9144 		}
9145 	}
9146 
9147 	if (driver_list) {
9148 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9149 			kmem_free(*p, strlen(*p) + 1);
9150 		kmem_free(driver_list, sizeof (char *) * max_elements);
9151 		kmem_free(root_support_list, sizeof (int) * max_elements);
9152 	}
9153 }
9154 
9155 /*
9156  * Build vhci cache:
9157  *
9158  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9159  * the phci driver instances. During this process the cache gets built.
9160  *
9161  * Cache is built fully if the root is mounted.
9162  * If the root is not mounted, phci drivers that do not have root support
9163  * are not attached. As a result the cache is built partially. The entries
9164  * in the cache reflect only those phci drivers that have root support.
9165  */
9166 static int
9167 build_vhci_cache(mdi_vhci_t *vh)
9168 {
9169 	mdi_vhci_config_t *vhc = vh->vh_config;
9170 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9171 
9172 	single_threaded_vhconfig_enter(vhc);
9173 
9174 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9175 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9176 		rw_exit(&vhcache->vhcache_lock);
9177 		single_threaded_vhconfig_exit(vhc);
9178 		return (0);
9179 	}
9180 	rw_exit(&vhcache->vhcache_lock);
9181 
9182 	attach_phci_drivers(vh->vh_class);
9183 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9184 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9185 
9186 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9187 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9188 	rw_exit(&vhcache->vhcache_lock);
9189 
9190 	single_threaded_vhconfig_exit(vhc);
9191 	vhcache_dirty(vhc);
9192 	return (1);
9193 }
9194 
9195 /*
9196  * Determine if discovery of paths is needed.
9197  */
9198 static int
9199 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9200 {
9201 	int rv = 1;
9202 
9203 	mutex_enter(&vhc->vhc_lock);
9204 	if (i_ddi_io_initialized() == 0) {
9205 		if (vhc->vhc_path_discovery_boot > 0) {
9206 			vhc->vhc_path_discovery_boot--;
9207 			goto out;
9208 		}
9209 	} else {
9210 		if (vhc->vhc_path_discovery_postboot > 0) {
9211 			vhc->vhc_path_discovery_postboot--;
9212 			goto out;
9213 		}
9214 	}
9215 
9216 	/*
9217 	 * Do full path discovery at most once per mdi_path_discovery_interval.
9218 	 * This is to avoid a series of full path discoveries when opening
9219 	 * stale /dev/[r]dsk links.
9220 	 */
9221 	if (mdi_path_discovery_interval != -1 &&
9222 	    ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9223 		goto out;
9224 
9225 	rv = 0;
9226 out:
9227 	mutex_exit(&vhc->vhc_lock);
9228 	return (rv);
9229 }
9230 
9231 /*
9232  * Discover all paths:
9233  *
9234  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9235  * driver instances. During this process all paths will be discovered.
9236  */
9237 static int
9238 vhcache_discover_paths(mdi_vhci_t *vh)
9239 {
9240 	mdi_vhci_config_t *vhc = vh->vh_config;
9241 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9242 	int rv = 0;
9243 
9244 	single_threaded_vhconfig_enter(vhc);
9245 
9246 	if (vhcache_do_discovery(vhc)) {
9247 		attach_phci_drivers(vh->vh_class);
9248 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9249 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9250 
9251 		mutex_enter(&vhc->vhc_lock);
9252 		vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9253 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
9254 		mutex_exit(&vhc->vhc_lock);
9255 		rv = 1;
9256 	}
9257 
9258 	single_threaded_vhconfig_exit(vhc);
9259 	return (rv);
9260 }
9261 
9262 /*
9263  * Generic vhci bus config implementation:
9264  *
9265  * Parameters
9266  *	vdip	vhci dip
9267  *	flags	bus config flags
9268  *	op	bus config operation
9269  *	The remaining parameters are bus config operation specific
9270  *
9271  * for BUS_CONFIG_ONE
9272  *	arg	pointer to name@addr
9273  *	child	upon successful return from this function, *child will be
9274  *		set to the configured and held devinfo child node of vdip.
9275  *	ct_addr	pointer to client address (i.e. GUID)
9276  *
9277  * for BUS_CONFIG_DRIVER
9278  *	arg	major number of the driver
9279  *	child and ct_addr parameters are ignored
9280  *
9281  * for BUS_CONFIG_ALL
9282  *	arg, child, and ct_addr parameters are ignored
9283  *
9284  * Note that for the rest of the bus config operations, this function simply
9285  * calls the framework provided default bus config routine.
9286  */
9287 int
9288 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9289     void *arg, dev_info_t **child, char *ct_addr)
9290 {
9291 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9292 	mdi_vhci_config_t *vhc = vh->vh_config;
9293 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9294 	int rv = 0;
9295 	int params_valid = 0;
9296 	char *cp;
9297 
9298 	/*
9299 	 * To bus config vhcis we relay operation, possibly using another
9300 	 * thread, to phcis. The phci driver then interacts with MDI to cause
9301 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
9302 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
9303 	 * thread may be adding the child, to avoid deadlock we can't wait
9304 	 * for the relayed operations to complete if we have already entered
9305 	 * the vhci node.
9306 	 */
9307 	if (DEVI_BUSY_OWNED(vdip)) {
9308 		MDI_DEBUG(2, (MDI_NOTE, vdip,
9309 		    "vhci dip is busy owned %p", (void *)vdip));
9310 		goto default_bus_config;
9311 	}
9312 
9313 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9314 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9315 		rw_exit(&vhcache->vhcache_lock);
9316 		rv = build_vhci_cache(vh);
9317 		rw_enter(&vhcache->vhcache_lock, RW_READER);
9318 	}
9319 
9320 	switch (op) {
9321 	case BUS_CONFIG_ONE:
9322 		if (arg != NULL && ct_addr != NULL) {
9323 			/* extract node name */
9324 			cp = (char *)arg;
9325 			while (*cp != '\0' && *cp != '@')
9326 				cp++;
9327 			if (*cp == '@') {
9328 				params_valid = 1;
9329 				*cp = '\0';
9330 				config_client_paths(vhc, (char *)arg, ct_addr);
9331 				/* config_client_paths() releases cache_lock */
9332 				*cp = '@';
9333 				break;
9334 			}
9335 		}
9336 
9337 		rw_exit(&vhcache->vhcache_lock);
9338 		break;
9339 
9340 	case BUS_CONFIG_DRIVER:
9341 		rw_exit(&vhcache->vhcache_lock);
9342 		if (rv == 0)
9343 			st_bus_config_all_phcis(vhc, flags, op,
9344 			    (major_t)(uintptr_t)arg);
9345 		break;
9346 
9347 	case BUS_CONFIG_ALL:
9348 		rw_exit(&vhcache->vhcache_lock);
9349 		if (rv == 0)
9350 			st_bus_config_all_phcis(vhc, flags, op, -1);
9351 		break;
9352 
9353 	default:
9354 		rw_exit(&vhcache->vhcache_lock);
9355 		break;
9356 	}
9357 
9358 
9359 default_bus_config:
9360 	/*
9361 	 * All requested child nodes are enumerated under the vhci.
9362 	 * Now configure them.
9363 	 */
9364 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9365 	    NDI_SUCCESS) {
9366 		return (MDI_SUCCESS);
9367 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9368 		/* discover all paths and try configuring again */
9369 		if (vhcache_discover_paths(vh) &&
9370 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9371 		    NDI_SUCCESS)
9372 			return (MDI_SUCCESS);
9373 	}
9374 
9375 	return (MDI_FAILURE);
9376 }
9377 
9378 /*
9379  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9380  */
9381 static nvlist_t *
9382 read_on_disk_vhci_cache(char *vhci_class)
9383 {
9384 	nvlist_t *nvl;
9385 	int err;
9386 	char *filename;
9387 
9388 	filename = vhclass2vhcache_filename(vhci_class);
9389 
9390 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
9391 		kmem_free(filename, strlen(filename) + 1);
9392 		return (nvl);
9393 	} else if (err == EIO)
9394 		cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9395 	else if (err == EINVAL)
9396 		cmn_err(CE_WARN,
9397 		    "%s: data file corrupted, will recreate", filename);
9398 
9399 	kmem_free(filename, strlen(filename) + 1);
9400 	return (NULL);
9401 }
9402 
9403 /*
9404  * Read on-disk vhci cache into nvlists for all vhci classes.
9405  * Called during booting by i_ddi_read_devices_files().
9406  */
9407 void
9408 mdi_read_devices_files(void)
9409 {
9410 	int i;
9411 
9412 	for (i = 0; i < N_VHCI_CLASSES; i++)
9413 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9414 }
9415 
9416 /*
9417  * Remove all stale entries from vhci cache.
9418  */
9419 static void
9420 clean_vhcache(mdi_vhci_config_t *vhc)
9421 {
9422 	mdi_vhci_cache_t	*vhcache = &vhc->vhc_vhcache;
9423 	mdi_vhcache_phci_t	*phci, *nxt_phci;
9424 	mdi_vhcache_client_t	*client, *nxt_client;
9425 	mdi_vhcache_pathinfo_t	*path, *nxt_path;
9426 
9427 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9428 
9429 	client = vhcache->vhcache_client_head;
9430 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9431 	for ( ; client != NULL; client = nxt_client) {
9432 		nxt_client = client->cct_next;
9433 
9434 		path = client->cct_cpi_head;
9435 		client->cct_cpi_head = client->cct_cpi_tail = NULL;
9436 		for ( ; path != NULL; path = nxt_path) {
9437 			nxt_path = path->cpi_next;
9438 			if ((path->cpi_cphci->cphci_phci != NULL) &&
9439 			    (path->cpi_pip != NULL)) {
9440 				enqueue_tail_vhcache_pathinfo(client, path);
9441 			} else if (path->cpi_pip != NULL) {
9442 				/* Not valid to have a path without a phci. */
9443 				free_vhcache_pathinfo(path);
9444 			}
9445 		}
9446 
9447 		if (client->cct_cpi_head != NULL)
9448 			enqueue_vhcache_client(vhcache, client);
9449 		else {
9450 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9451 			    (mod_hash_key_t)client->cct_name_addr);
9452 			free_vhcache_client(client);
9453 		}
9454 	}
9455 
9456 	phci = vhcache->vhcache_phci_head;
9457 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9458 	for ( ; phci != NULL; phci = nxt_phci) {
9459 
9460 		nxt_phci = phci->cphci_next;
9461 		if (phci->cphci_phci != NULL)
9462 			enqueue_vhcache_phci(vhcache, phci);
9463 		else
9464 			free_vhcache_phci(phci);
9465 	}
9466 
9467 	vhcache->vhcache_clean_time = ddi_get_lbolt64();
9468 	rw_exit(&vhcache->vhcache_lock);
9469 	vhcache_dirty(vhc);
9470 }
9471 
9472 /*
9473  * Remove all stale entries from vhci cache.
9474  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9475  */
9476 void
9477 mdi_clean_vhcache(void)
9478 {
9479 	mdi_vhci_t *vh;
9480 
9481 	mutex_enter(&mdi_mutex);
9482 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9483 		vh->vh_refcnt++;
9484 		mutex_exit(&mdi_mutex);
9485 		clean_vhcache(vh->vh_config);
9486 		mutex_enter(&mdi_mutex);
9487 		vh->vh_refcnt--;
9488 	}
9489 	mutex_exit(&mdi_mutex);
9490 }
9491 
9492 /*
9493  * mdi_vhci_walk_clients():
9494  *		Walker routine to traverse client dev_info nodes
9495  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9496  * below the client, including nexus devices, which we dont want.
9497  * So we just traverse the immediate siblings, starting from 1st client.
9498  */
9499 void
9500 mdi_vhci_walk_clients(dev_info_t *vdip,
9501     int (*f)(dev_info_t *, void *), void *arg)
9502 {
9503 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9504 	dev_info_t	*cdip;
9505 	mdi_client_t	*ct;
9506 
9507 	MDI_VHCI_CLIENT_LOCK(vh);
9508 	cdip = ddi_get_child(vdip);
9509 	while (cdip) {
9510 		ct = i_devi_get_client(cdip);
9511 		MDI_CLIENT_LOCK(ct);
9512 
9513 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9514 			cdip = ddi_get_next_sibling(cdip);
9515 		else
9516 			cdip = NULL;
9517 
9518 		MDI_CLIENT_UNLOCK(ct);
9519 	}
9520 	MDI_VHCI_CLIENT_UNLOCK(vh);
9521 }
9522 
9523 /*
9524  * mdi_vhci_walk_phcis():
9525  *		Walker routine to traverse phci dev_info nodes
9526  */
9527 void
9528 mdi_vhci_walk_phcis(dev_info_t *vdip,
9529     int (*f)(dev_info_t *, void *), void *arg)
9530 {
9531 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9532 	mdi_phci_t	*ph, *next;
9533 
9534 	MDI_VHCI_PHCI_LOCK(vh);
9535 	ph = vh->vh_phci_head;
9536 	while (ph) {
9537 		MDI_PHCI_LOCK(ph);
9538 
9539 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9540 			next = ph->ph_next;
9541 		else
9542 			next = NULL;
9543 
9544 		MDI_PHCI_UNLOCK(ph);
9545 		ph = next;
9546 	}
9547 	MDI_VHCI_PHCI_UNLOCK(vh);
9548 }
9549 
9550 
9551 /*
9552  * mdi_walk_vhcis():
9553  *		Walker routine to traverse vhci dev_info nodes
9554  */
9555 void
9556 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9557 {
9558 	mdi_vhci_t	*vh = NULL;
9559 
9560 	mutex_enter(&mdi_mutex);
9561 	/*
9562 	 * Scan for already registered vhci
9563 	 */
9564 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9565 		vh->vh_refcnt++;
9566 		mutex_exit(&mdi_mutex);
9567 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9568 			mutex_enter(&mdi_mutex);
9569 			vh->vh_refcnt--;
9570 			break;
9571 		} else {
9572 			mutex_enter(&mdi_mutex);
9573 			vh->vh_refcnt--;
9574 		}
9575 	}
9576 
9577 	mutex_exit(&mdi_mutex);
9578 }
9579 
9580 /*
9581  * i_mdi_log_sysevent():
9582  *		Logs events for pickup by syseventd
9583  */
9584 static void
9585 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9586 {
9587 	char		*path_name;
9588 	nvlist_t	*attr_list;
9589 
9590 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9591 	    KM_SLEEP) != DDI_SUCCESS) {
9592 		goto alloc_failed;
9593 	}
9594 
9595 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9596 	(void) ddi_pathname(dip, path_name);
9597 
9598 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9599 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9600 		goto error;
9601 	}
9602 
9603 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9604 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9605 		goto error;
9606 	}
9607 
9608 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9609 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9610 		goto error;
9611 	}
9612 
9613 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9614 	    path_name) != DDI_SUCCESS) {
9615 		goto error;
9616 	}
9617 
9618 	if (nvlist_add_string(attr_list, DDI_CLASS,
9619 	    ph_vh_class) != DDI_SUCCESS) {
9620 		goto error;
9621 	}
9622 
9623 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9624 	    attr_list, NULL, DDI_SLEEP);
9625 
9626 error:
9627 	kmem_free(path_name, MAXPATHLEN);
9628 	nvlist_free(attr_list);
9629 	return;
9630 
9631 alloc_failed:
9632 	MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9633 }
9634 
9635 char **
9636 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9637 {
9638 	char	**driver_list, **ret_driver_list = NULL;
9639 	int	*root_support_list;
9640 	int	cur_elements, max_elements;
9641 
9642 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9643 	    &cur_elements, &max_elements);
9644 
9645 
9646 	if (driver_list) {
9647 		kmem_free(root_support_list, sizeof (int) * max_elements);
9648 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9649 		    * max_elements, sizeof (char *) * cur_elements);
9650 	}
9651 	*ndrivers = cur_elements;
9652 
9653 	return (ret_driver_list);
9654 
9655 }
9656 
9657 void
9658 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9659 {
9660 	char	**p;
9661 	int	i;
9662 
9663 	if (driver_list) {
9664 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9665 			kmem_free(*p, strlen(*p) + 1);
9666 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9667 	}
9668 }
9669 
9670 /*
9671  * mdi_is_dev_supported():
9672  *		function called by pHCI bus config operation to determine if a
9673  *		device should be represented as a child of the vHCI or the
9674  *		pHCI.  This decision is made by the vHCI, using cinfo idenity
9675  *		information passed by the pHCI - specifics of the cinfo
9676  *		representation are by agreement between the pHCI and vHCI.
9677  * Return Values:
9678  *		MDI_SUCCESS
9679  *		MDI_FAILURE
9680  */
9681 int
9682 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9683 {
9684 	mdi_vhci_t	*vh;
9685 
9686 	ASSERT(class && pdip);
9687 
9688 	/*
9689 	 * For dev_supported, mdi_phci_register() must have established pdip as
9690 	 * a pHCI.
9691 	 *
9692 	 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9693 	 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9694 	 */
9695 	if (!MDI_PHCI(pdip))
9696 		return (MDI_FAILURE);
9697 
9698 	/* Return MDI_FAILURE if vHCI does not support asking the question. */
9699 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9700 	if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9701 		return (MDI_FAILURE);
9702 	}
9703 
9704 	/* Return vHCI answer */
9705 	return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9706 }
9707 
9708 int
9709 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9710 {
9711 	uint_t devstate = 0;
9712 	dev_info_t *cdip;
9713 
9714 	if ((pip == NULL) || (dcp == NULL))
9715 		return (MDI_FAILURE);
9716 
9717 	cdip = mdi_pi_get_client(pip);
9718 
9719 	switch (mdi_pi_get_state(pip)) {
9720 	case MDI_PATHINFO_STATE_INIT:
9721 		devstate = DEVICE_DOWN;
9722 		break;
9723 	case MDI_PATHINFO_STATE_ONLINE:
9724 		devstate = DEVICE_ONLINE;
9725 		if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9726 			devstate |= DEVICE_BUSY;
9727 		break;
9728 	case MDI_PATHINFO_STATE_STANDBY:
9729 		devstate = DEVICE_ONLINE;
9730 		break;
9731 	case MDI_PATHINFO_STATE_FAULT:
9732 		devstate = DEVICE_DOWN;
9733 		break;
9734 	case MDI_PATHINFO_STATE_OFFLINE:
9735 		devstate = DEVICE_OFFLINE;
9736 		break;
9737 	default:
9738 		ASSERT(MDI_PI(pip)->pi_state);
9739 	}
9740 
9741 	if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9742 		return (MDI_FAILURE);
9743 
9744 	return (MDI_SUCCESS);
9745 }
9746