xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  */
26 
27 /*
28  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
29  * more detailed discussion of the overall mpxio architecture.
30  *
31  * Default locking order:
32  *
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
36  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
38  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
39  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
40  */
41 
42 #include <sys/note.h>
43 #include <sys/types.h>
44 #include <sys/varargs.h>
45 #include <sys/param.h>
46 #include <sys/errno.h>
47 #include <sys/uio.h>
48 #include <sys/buf.h>
49 #include <sys/modctl.h>
50 #include <sys/open.h>
51 #include <sys/kmem.h>
52 #include <sys/poll.h>
53 #include <sys/conf.h>
54 #include <sys/bootconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/stat.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddipropdefs.h>
60 #include <sys/sunndi.h>
61 #include <sys/ndi_impldefs.h>
62 #include <sys/promif.h>
63 #include <sys/sunmdi.h>
64 #include <sys/mdi_impldefs.h>
65 #include <sys/taskq.h>
66 #include <sys/epm.h>
67 #include <sys/sunpm.h>
68 #include <sys/modhash.h>
69 #include <sys/disp.h>
70 #include <sys/autoconf.h>
71 #include <sys/sysmacros.h>
72 
73 #ifdef	DEBUG
74 #include <sys/debug.h>
75 int	mdi_debug = 1;
76 int	mdi_debug_logonly = 0;
77 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
78 #define	MDI_WARN	CE_WARN, __func__
79 #define	MDI_NOTE	CE_NOTE, __func__
80 #define	MDI_CONT	CE_CONT, __func__
81 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
82 #else	/* !DEBUG */
83 #define	MDI_DEBUG(dbglevel, pargs)
84 #endif	/* DEBUG */
85 int	mdi_debug_consoleonly = 0;
86 int	mdi_delay = 3;
87 
88 extern pri_t	minclsyspri;
89 extern int	modrootloaded;
90 
91 /*
92  * Global mutex:
93  * Protects vHCI list and structure members.
94  */
95 kmutex_t	mdi_mutex;
96 
97 /*
98  * Registered vHCI class driver lists
99  */
100 int		mdi_vhci_count;
101 mdi_vhci_t	*mdi_vhci_head;
102 mdi_vhci_t	*mdi_vhci_tail;
103 
104 /*
105  * Client Hash Table size
106  */
107 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
108 
109 /*
110  * taskq interface definitions
111  */
112 #define	MDI_TASKQ_N_THREADS	8
113 #define	MDI_TASKQ_PRI		minclsyspri
114 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
115 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
116 
117 taskq_t				*mdi_taskq;
118 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
119 
120 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
121 
122 /*
123  * The data should be "quiet" for this interval (in seconds) before the
124  * vhci cached data is flushed to the disk.
125  */
126 static int mdi_vhcache_flush_delay = 10;
127 
128 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
129 static int mdi_vhcache_flush_daemon_idle_time = 60;
130 
131 /*
132  * MDI falls back to discovery of all paths when a bus_config_one fails.
133  * The following parameters can be used to tune this operation.
134  *
135  * mdi_path_discovery_boot
136  *	Number of times path discovery will be attempted during early boot.
137  *	Probably there is no reason to ever set this value to greater than one.
138  *
139  * mdi_path_discovery_postboot
140  *	Number of times path discovery will be attempted after early boot.
141  *	Set it to a minimum of two to allow for discovery of iscsi paths which
142  *	may happen very late during booting.
143  *
144  * mdi_path_discovery_interval
145  *	Minimum number of seconds MDI will wait between successive discovery
146  *	of all paths. Set it to -1 to disable discovery of all paths.
147  */
148 static int mdi_path_discovery_boot = 1;
149 static int mdi_path_discovery_postboot = 2;
150 static int mdi_path_discovery_interval = 10;
151 
152 /*
153  * number of seconds the asynchronous configuration thread will sleep idle
154  * before exiting.
155  */
156 static int mdi_async_config_idle_time = 600;
157 
158 static int mdi_bus_config_cache_hash_size = 256;
159 
160 /* turns off multithreaded configuration for certain operations */
161 static int mdi_mtc_off = 0;
162 
163 /*
164  * The "path" to a pathinfo node is identical to the /devices path to a
165  * devinfo node had the device been enumerated under a pHCI instead of
166  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
167  * This association persists across create/delete of the pathinfo nodes,
168  * but not across reboot.
169  */
170 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
171 static int		mdi_pathmap_hash_size = 256;
172 static kmutex_t		mdi_pathmap_mutex;
173 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
174 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
175 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
176 
177 /*
178  * MDI component property name/value string definitions
179  */
180 const char 		*mdi_component_prop = "mpxio-component";
181 const char		*mdi_component_prop_vhci = "vhci";
182 const char		*mdi_component_prop_phci = "phci";
183 const char		*mdi_component_prop_client = "client";
184 
185 /*
186  * MDI client global unique identifier property name
187  */
188 const char		*mdi_client_guid_prop = "client-guid";
189 
190 /*
191  * MDI client load balancing property name/value string definitions
192  */
193 const char		*mdi_load_balance = "load-balance";
194 const char		*mdi_load_balance_none = "none";
195 const char		*mdi_load_balance_rr = "round-robin";
196 const char		*mdi_load_balance_lba = "logical-block";
197 
198 /*
199  * Obsolete vHCI class definition; to be removed after Leadville update
200  */
201 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
202 
203 static char vhci_greeting[] =
204 	"\tThere already exists one vHCI driver for class %s\n"
205 	"\tOnly one vHCI driver for each class is allowed\n";
206 
207 /*
208  * Static function prototypes
209  */
210 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
211 static int		i_mdi_client_offline(dev_info_t *, uint_t);
212 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
213 static void		i_mdi_phci_post_detach(dev_info_t *,
214 			    ddi_detach_cmd_t, int);
215 static int		i_mdi_client_pre_detach(dev_info_t *,
216 			    ddi_detach_cmd_t);
217 static void		i_mdi_client_post_detach(dev_info_t *,
218 			    ddi_detach_cmd_t, int);
219 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
220 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
221 static int 		i_mdi_lba_lb(mdi_client_t *ct,
222 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
223 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
224 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
225 static void		i_mdi_pm_reset_client(mdi_client_t *);
226 static int		i_mdi_power_all_phci(mdi_client_t *);
227 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
228 
229 
230 /*
231  * Internal mdi_pathinfo node functions
232  */
233 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
234 
235 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
236 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
237 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
238 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
239 static void		i_mdi_phci_unlock(mdi_phci_t *);
240 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
241 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
242 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
243 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
244 			    mdi_client_t *);
245 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
246 static void		i_mdi_client_remove_path(mdi_client_t *,
247 			    mdi_pathinfo_t *);
248 
249 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
250 			    mdi_pathinfo_state_t, int);
251 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
252 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
253 			    char **, int);
254 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
255 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
256 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
257 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
258 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
259 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
260 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
261 static void		i_mdi_client_update_state(mdi_client_t *);
262 static int		i_mdi_client_compute_state(mdi_client_t *,
263 			    mdi_phci_t *);
264 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
265 static void		i_mdi_client_unlock(mdi_client_t *);
266 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
267 static mdi_client_t	*i_devi_get_client(dev_info_t *);
268 /*
269  * NOTE: this will be removed once the NWS files are changed to use the new
270  * mdi_{enable,disable}_path interfaces
271  */
272 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
273 				int, int);
274 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
275 				mdi_vhci_t *vh, int flags, int op);
276 /*
277  * Failover related function prototypes
278  */
279 static int		i_mdi_failover(void *);
280 
281 /*
282  * misc internal functions
283  */
284 static int		i_mdi_get_hash_key(char *);
285 static int		i_map_nvlist_error_to_mdi(int);
286 static void		i_mdi_report_path_state(mdi_client_t *,
287 			    mdi_pathinfo_t *);
288 
289 static void		setup_vhci_cache(mdi_vhci_t *);
290 static int		destroy_vhci_cache(mdi_vhci_t *);
291 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
292 static boolean_t	stop_vhcache_flush_thread(void *, int);
293 static void		free_string_array(char **, int);
294 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
295 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
296 static void		free_vhcache_client(mdi_vhcache_client_t *);
297 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
298 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
299 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
300 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
301 static void		vhcache_pi_add(mdi_vhci_config_t *,
302 			    struct mdi_pathinfo *);
303 static void		vhcache_pi_remove(mdi_vhci_config_t *,
304 			    struct mdi_pathinfo *);
305 static void		free_phclient_path_list(mdi_phys_path_t *);
306 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
307 static int		flush_vhcache(mdi_vhci_config_t *, int);
308 static void		vhcache_dirty(mdi_vhci_config_t *);
309 static void		free_async_client_config(mdi_async_client_config_t *);
310 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
311 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
312 static nvlist_t		*read_on_disk_vhci_cache(char *);
313 extern int		fread_nvlist(char *, nvlist_t **);
314 extern int		fwrite_nvlist(char *, nvlist_t *);
315 
316 /* called once when first vhci registers with mdi */
317 static void
318 i_mdi_init()
319 {
320 	static int initialized = 0;
321 
322 	if (initialized)
323 		return;
324 	initialized = 1;
325 
326 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
327 
328 	/* Create our taskq resources */
329 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
330 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
331 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
332 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
333 
334 	/* Allocate ['path_instance' <-> "path"] maps */
335 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
336 	mdi_pathmap_bypath = mod_hash_create_strhash(
337 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
338 	    mod_hash_null_valdtor);
339 	mdi_pathmap_byinstance = mod_hash_create_idhash(
340 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
341 	    mod_hash_null_valdtor);
342 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
343 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
344 	    mod_hash_null_valdtor);
345 }
346 
347 /*
348  * mdi_get_component_type():
349  *		Return mpxio component type
350  * Return Values:
351  *		MDI_COMPONENT_NONE
352  *		MDI_COMPONENT_VHCI
353  *		MDI_COMPONENT_PHCI
354  *		MDI_COMPONENT_CLIENT
355  * XXX This doesn't work under multi-level MPxIO and should be
356  *	removed when clients migrate mdi_component_is_*() interfaces.
357  */
358 int
359 mdi_get_component_type(dev_info_t *dip)
360 {
361 	return (DEVI(dip)->devi_mdi_component);
362 }
363 
364 /*
365  * mdi_vhci_register():
366  *		Register a vHCI module with the mpxio framework
367  *		mdi_vhci_register() is called by vHCI drivers to register the
368  *		'class_driver' vHCI driver and its MDI entrypoints with the
369  *		mpxio framework.  The vHCI driver must call this interface as
370  *		part of its attach(9e) handler.
371  *		Competing threads may try to attach mdi_vhci_register() as
372  *		the vHCI drivers are loaded and attached as a result of pHCI
373  *		driver instance registration (mdi_phci_register()) with the
374  *		framework.
375  * Return Values:
376  *		MDI_SUCCESS
377  *		MDI_FAILURE
378  */
379 /*ARGSUSED*/
380 int
381 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
382     int flags)
383 {
384 	mdi_vhci_t		*vh = NULL;
385 
386 	/* Registrant can't be older */
387 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
388 
389 #ifdef DEBUG
390 	/*
391 	 * IB nexus driver is loaded only when IB hardware is present.
392 	 * In order to be able to do this there is a need to drive the loading
393 	 * and attaching of the IB nexus driver (especially when an IB hardware
394 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
395 	 * is being attached. Unfortunately this gets into the limitations
396 	 * of devfs as there seems to be no clean way to drive configuration
397 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
398 	 * for IB.
399 	 */
400 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
401 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
402 #endif
403 
404 	i_mdi_init();
405 
406 	mutex_enter(&mdi_mutex);
407 	/*
408 	 * Scan for already registered vhci
409 	 */
410 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
411 		if (strcmp(vh->vh_class, class) == 0) {
412 			/*
413 			 * vHCI has already been created.  Check for valid
414 			 * vHCI ops registration.  We only support one vHCI
415 			 * module per class
416 			 */
417 			if (vh->vh_ops != NULL) {
418 				mutex_exit(&mdi_mutex);
419 				cmn_err(CE_NOTE, vhci_greeting, class);
420 				return (MDI_FAILURE);
421 			}
422 			break;
423 		}
424 	}
425 
426 	/*
427 	 * if not yet created, create the vHCI component
428 	 */
429 	if (vh == NULL) {
430 		struct client_hash	*hash = NULL;
431 		char			*load_balance;
432 
433 		/*
434 		 * Allocate and initialize the mdi extensions
435 		 */
436 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
437 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
438 		    KM_SLEEP);
439 		vh->vh_client_table = hash;
440 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
441 		(void) strcpy(vh->vh_class, class);
442 		vh->vh_lb = LOAD_BALANCE_RR;
443 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
444 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
445 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
446 				vh->vh_lb = LOAD_BALANCE_NONE;
447 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
448 				    == 0) {
449 				vh->vh_lb = LOAD_BALANCE_LBA;
450 			}
451 			ddi_prop_free(load_balance);
452 		}
453 
454 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
455 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
456 
457 		/*
458 		 * Store the vHCI ops vectors
459 		 */
460 		vh->vh_dip = vdip;
461 		vh->vh_ops = vops;
462 
463 		setup_vhci_cache(vh);
464 
465 		if (mdi_vhci_head == NULL) {
466 			mdi_vhci_head = vh;
467 		}
468 		if (mdi_vhci_tail) {
469 			mdi_vhci_tail->vh_next = vh;
470 		}
471 		mdi_vhci_tail = vh;
472 		mdi_vhci_count++;
473 	}
474 
475 	/*
476 	 * Claim the devfs node as a vhci component
477 	 */
478 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
479 
480 	/*
481 	 * Initialize our back reference from dev_info node
482 	 */
483 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
484 	mutex_exit(&mdi_mutex);
485 	return (MDI_SUCCESS);
486 }
487 
488 /*
489  * mdi_vhci_unregister():
490  *		Unregister a vHCI module from mpxio framework
491  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
492  * 		of a vhci to unregister it from the framework.
493  * Return Values:
494  *		MDI_SUCCESS
495  *		MDI_FAILURE
496  */
497 /*ARGSUSED*/
498 int
499 mdi_vhci_unregister(dev_info_t *vdip, int flags)
500 {
501 	mdi_vhci_t	*found, *vh, *prev = NULL;
502 
503 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
504 
505 	/*
506 	 * Check for invalid VHCI
507 	 */
508 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
509 		return (MDI_FAILURE);
510 
511 	/*
512 	 * Scan the list of registered vHCIs for a match
513 	 */
514 	mutex_enter(&mdi_mutex);
515 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
516 		if (found == vh)
517 			break;
518 		prev = found;
519 	}
520 
521 	if (found == NULL) {
522 		mutex_exit(&mdi_mutex);
523 		return (MDI_FAILURE);
524 	}
525 
526 	/*
527 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
528 	 * should have been unregistered, before a vHCI can be
529 	 * unregistered.
530 	 */
531 	MDI_VHCI_PHCI_LOCK(vh);
532 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
533 		MDI_VHCI_PHCI_UNLOCK(vh);
534 		mutex_exit(&mdi_mutex);
535 		return (MDI_FAILURE);
536 	}
537 	MDI_VHCI_PHCI_UNLOCK(vh);
538 
539 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
540 		mutex_exit(&mdi_mutex);
541 		return (MDI_FAILURE);
542 	}
543 
544 	/*
545 	 * Remove the vHCI from the global list
546 	 */
547 	if (vh == mdi_vhci_head) {
548 		mdi_vhci_head = vh->vh_next;
549 	} else {
550 		prev->vh_next = vh->vh_next;
551 	}
552 	if (vh == mdi_vhci_tail) {
553 		mdi_vhci_tail = prev;
554 	}
555 	mdi_vhci_count--;
556 	mutex_exit(&mdi_mutex);
557 
558 	vh->vh_ops = NULL;
559 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
560 	DEVI(vdip)->devi_mdi_xhci = NULL;
561 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
562 	kmem_free(vh->vh_client_table,
563 	    mdi_client_table_size * sizeof (struct client_hash));
564 	mutex_destroy(&vh->vh_phci_mutex);
565 	mutex_destroy(&vh->vh_client_mutex);
566 
567 	kmem_free(vh, sizeof (mdi_vhci_t));
568 	return (MDI_SUCCESS);
569 }
570 
571 /*
572  * i_mdi_vhci_class2vhci():
573  *		Look for a matching vHCI module given a vHCI class name
574  * Return Values:
575  *		Handle to a vHCI component
576  *		NULL
577  */
578 static mdi_vhci_t *
579 i_mdi_vhci_class2vhci(char *class)
580 {
581 	mdi_vhci_t	*vh = NULL;
582 
583 	ASSERT(!MUTEX_HELD(&mdi_mutex));
584 
585 	mutex_enter(&mdi_mutex);
586 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
587 		if (strcmp(vh->vh_class, class) == 0) {
588 			break;
589 		}
590 	}
591 	mutex_exit(&mdi_mutex);
592 	return (vh);
593 }
594 
595 /*
596  * i_devi_get_vhci():
597  *		Utility function to get the handle to a vHCI component
598  * Return Values:
599  *		Handle to a vHCI component
600  *		NULL
601  */
602 mdi_vhci_t *
603 i_devi_get_vhci(dev_info_t *vdip)
604 {
605 	mdi_vhci_t	*vh = NULL;
606 	if (MDI_VHCI(vdip)) {
607 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
608 	}
609 	return (vh);
610 }
611 
612 /*
613  * mdi_phci_register():
614  *		Register a pHCI module with mpxio framework
615  *		mdi_phci_register() is called by pHCI drivers to register with
616  *		the mpxio framework and a specific 'class_driver' vHCI.  The
617  *		pHCI driver must call this interface as part of its attach(9e)
618  *		handler.
619  * Return Values:
620  *		MDI_SUCCESS
621  *		MDI_FAILURE
622  */
623 /*ARGSUSED*/
624 int
625 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
626 {
627 	mdi_phci_t		*ph;
628 	mdi_vhci_t		*vh;
629 	char			*data;
630 
631 	/*
632 	 * Some subsystems, like fcp, perform pHCI registration from a
633 	 * different thread than the one doing the pHCI attach(9E) - the
634 	 * driver attach code is waiting for this other thread to complete.
635 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
636 	 * (indicating that some thread has done an ndi_devi_enter of parent)
637 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
638 	 */
639 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
640 
641 	/*
642 	 * Check for mpxio-disable property. Enable mpxio if the property is
643 	 * missing or not set to "yes".
644 	 * If the property is set to "yes" then emit a brief message.
645 	 */
646 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
647 	    &data) == DDI_SUCCESS)) {
648 		if (strcmp(data, "yes") == 0) {
649 			MDI_DEBUG(1, (MDI_CONT, pdip,
650 			    "?multipath capabilities disabled via %s.conf.",
651 			    ddi_driver_name(pdip)));
652 			ddi_prop_free(data);
653 			return (MDI_FAILURE);
654 		}
655 		ddi_prop_free(data);
656 	}
657 
658 	/*
659 	 * Search for a matching vHCI
660 	 */
661 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
662 	if (vh == NULL) {
663 		return (MDI_FAILURE);
664 	}
665 
666 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
667 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
668 	ph->ph_dip = pdip;
669 	ph->ph_vhci = vh;
670 	ph->ph_next = NULL;
671 	ph->ph_unstable = 0;
672 	ph->ph_vprivate = 0;
673 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
674 
675 	MDI_PHCI_LOCK(ph);
676 	MDI_PHCI_SET_POWER_UP(ph);
677 	MDI_PHCI_UNLOCK(ph);
678 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
679 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
680 
681 	vhcache_phci_add(vh->vh_config, ph);
682 
683 	MDI_VHCI_PHCI_LOCK(vh);
684 	if (vh->vh_phci_head == NULL) {
685 		vh->vh_phci_head = ph;
686 	}
687 	if (vh->vh_phci_tail) {
688 		vh->vh_phci_tail->ph_next = ph;
689 	}
690 	vh->vh_phci_tail = ph;
691 	vh->vh_phci_count++;
692 	MDI_VHCI_PHCI_UNLOCK(vh);
693 
694 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
695 	return (MDI_SUCCESS);
696 }
697 
698 /*
699  * mdi_phci_unregister():
700  *		Unregister a pHCI module from mpxio framework
701  *		mdi_phci_unregister() is called by the pHCI drivers from their
702  *		detach(9E) handler to unregister their instances from the
703  *		framework.
704  * Return Values:
705  *		MDI_SUCCESS
706  *		MDI_FAILURE
707  */
708 /*ARGSUSED*/
709 int
710 mdi_phci_unregister(dev_info_t *pdip, int flags)
711 {
712 	mdi_vhci_t		*vh;
713 	mdi_phci_t		*ph;
714 	mdi_phci_t		*tmp;
715 	mdi_phci_t		*prev = NULL;
716 	mdi_pathinfo_t		*pip;
717 
718 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
719 
720 	ph = i_devi_get_phci(pdip);
721 	if (ph == NULL) {
722 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
723 		return (MDI_FAILURE);
724 	}
725 
726 	vh = ph->ph_vhci;
727 	ASSERT(vh != NULL);
728 	if (vh == NULL) {
729 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
730 		return (MDI_FAILURE);
731 	}
732 
733 	MDI_VHCI_PHCI_LOCK(vh);
734 	tmp = vh->vh_phci_head;
735 	while (tmp) {
736 		if (tmp == ph) {
737 			break;
738 		}
739 		prev = tmp;
740 		tmp = tmp->ph_next;
741 	}
742 
743 	if (ph == vh->vh_phci_head) {
744 		vh->vh_phci_head = ph->ph_next;
745 	} else {
746 		prev->ph_next = ph->ph_next;
747 	}
748 
749 	if (ph == vh->vh_phci_tail) {
750 		vh->vh_phci_tail = prev;
751 	}
752 
753 	vh->vh_phci_count--;
754 	MDI_VHCI_PHCI_UNLOCK(vh);
755 
756 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
757 	MDI_PHCI_LOCK(ph);
758 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
759 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
760 		MDI_PI(pip)->pi_phci = NULL;
761 	MDI_PHCI_UNLOCK(ph);
762 
763 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
764 	    ESC_DDI_INITIATOR_UNREGISTER);
765 	vhcache_phci_remove(vh->vh_config, ph);
766 	cv_destroy(&ph->ph_unstable_cv);
767 	mutex_destroy(&ph->ph_mutex);
768 	kmem_free(ph, sizeof (mdi_phci_t));
769 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
770 	DEVI(pdip)->devi_mdi_xhci = NULL;
771 	return (MDI_SUCCESS);
772 }
773 
774 /*
775  * i_devi_get_phci():
776  * 		Utility function to return the phci extensions.
777  */
778 static mdi_phci_t *
779 i_devi_get_phci(dev_info_t *pdip)
780 {
781 	mdi_phci_t	*ph = NULL;
782 
783 	if (MDI_PHCI(pdip)) {
784 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
785 	}
786 	return (ph);
787 }
788 
789 /*
790  * Single thread mdi entry into devinfo node for modifying its children.
791  * If necessary we perform an ndi_devi_enter of the vHCI before doing
792  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
793  * for the vHCI and one for the pHCI.
794  */
795 void
796 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
797 {
798 	dev_info_t	*vdip;
799 	int		vcircular, pcircular;
800 
801 	/* Verify calling context */
802 	ASSERT(MDI_PHCI(phci_dip));
803 	vdip = mdi_devi_get_vdip(phci_dip);
804 	ASSERT(vdip);			/* A pHCI always has a vHCI */
805 
806 	/*
807 	 * If pHCI is detaching then the framework has already entered the
808 	 * vHCI on a threads that went down the code path leading to
809 	 * detach_node().  This framework enter of the vHCI during pHCI
810 	 * detach is done to avoid deadlock with vHCI power management
811 	 * operations which enter the vHCI and the enter down the path
812 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
813 	 * enter of the vHCI on frameworks vHCI enter that has already
814 	 * occurred - this is OK because we know that the framework thread
815 	 * doing detach is waiting for our completion.
816 	 *
817 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
818 	 * race with detach - but we can't do that because the framework has
819 	 * already entered the parent, so we have some complexity instead.
820 	 */
821 	for (;;) {
822 		if (ndi_devi_tryenter(vdip, &vcircular)) {
823 			ASSERT(vcircular != -1);
824 			if (DEVI_IS_DETACHING(phci_dip)) {
825 				ndi_devi_exit(vdip, vcircular);
826 				vcircular = -1;
827 			}
828 			break;
829 		} else if (DEVI_IS_DETACHING(phci_dip)) {
830 			vcircular = -1;
831 			break;
832 		} else if (servicing_interrupt()) {
833 			/*
834 			 * Don't delay an interrupt (and ensure adaptive
835 			 * mutex inversion support).
836 			 */
837 			ndi_devi_enter(vdip, &vcircular);
838 			break;
839 		} else {
840 			delay_random(mdi_delay);
841 		}
842 	}
843 
844 	ndi_devi_enter(phci_dip, &pcircular);
845 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
846 }
847 
848 /*
849  * Attempt to mdi_devi_enter.
850  */
851 int
852 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
853 {
854 	dev_info_t	*vdip;
855 	int		vcircular, pcircular;
856 
857 	/* Verify calling context */
858 	ASSERT(MDI_PHCI(phci_dip));
859 	vdip = mdi_devi_get_vdip(phci_dip);
860 	ASSERT(vdip);			/* A pHCI always has a vHCI */
861 
862 	if (ndi_devi_tryenter(vdip, &vcircular)) {
863 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
864 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
865 			return (1);	/* locked */
866 		}
867 		ndi_devi_exit(vdip, vcircular);
868 	}
869 	return (0);			/* busy */
870 }
871 
872 /*
873  * Release mdi_devi_enter or successful mdi_devi_tryenter.
874  */
875 void
876 mdi_devi_exit(dev_info_t *phci_dip, int circular)
877 {
878 	dev_info_t	*vdip;
879 	int		vcircular, pcircular;
880 
881 	/* Verify calling context */
882 	ASSERT(MDI_PHCI(phci_dip));
883 	vdip = mdi_devi_get_vdip(phci_dip);
884 	ASSERT(vdip);			/* A pHCI always has a vHCI */
885 
886 	/* extract two circular recursion values from single int */
887 	pcircular = (short)(circular & 0xFFFF);
888 	vcircular = (short)((circular >> 16) & 0xFFFF);
889 
890 	ndi_devi_exit(phci_dip, pcircular);
891 	if (vcircular != -1)
892 		ndi_devi_exit(vdip, vcircular);
893 }
894 
895 /*
896  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
897  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
898  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
899  * with vHCI power management code during path online/offline.  Each
900  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
901  * occur within the scope of an active mdi_devi_enter that establishes the
902  * circular value.
903  */
904 void
905 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
906 {
907 	int		pcircular;
908 
909 	/* Verify calling context */
910 	ASSERT(MDI_PHCI(phci_dip));
911 
912 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
913 	ndi_hold_devi(phci_dip);
914 
915 	pcircular = (short)(circular & 0xFFFF);
916 	ndi_devi_exit(phci_dip, pcircular);
917 }
918 
919 void
920 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
921 {
922 	int		pcircular;
923 
924 	/* Verify calling context */
925 	ASSERT(MDI_PHCI(phci_dip));
926 
927 	ndi_devi_enter(phci_dip, &pcircular);
928 
929 	/* Drop hold from mdi_devi_exit_phci. */
930 	ndi_rele_devi(phci_dip);
931 
932 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
933 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
934 }
935 
936 /*
937  * mdi_devi_get_vdip():
938  *		given a pHCI dip return vHCI dip
939  */
940 dev_info_t *
941 mdi_devi_get_vdip(dev_info_t *pdip)
942 {
943 	mdi_phci_t	*ph;
944 
945 	ph = i_devi_get_phci(pdip);
946 	if (ph && ph->ph_vhci)
947 		return (ph->ph_vhci->vh_dip);
948 	return (NULL);
949 }
950 
951 /*
952  * mdi_devi_pdip_entered():
953  *		Return 1 if we are vHCI and have done an ndi_devi_enter
954  *		of a pHCI
955  */
956 int
957 mdi_devi_pdip_entered(dev_info_t *vdip)
958 {
959 	mdi_vhci_t	*vh;
960 	mdi_phci_t	*ph;
961 
962 	vh = i_devi_get_vhci(vdip);
963 	if (vh == NULL)
964 		return (0);
965 
966 	MDI_VHCI_PHCI_LOCK(vh);
967 	ph = vh->vh_phci_head;
968 	while (ph) {
969 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
970 			MDI_VHCI_PHCI_UNLOCK(vh);
971 			return (1);
972 		}
973 		ph = ph->ph_next;
974 	}
975 	MDI_VHCI_PHCI_UNLOCK(vh);
976 	return (0);
977 }
978 
979 /*
980  * mdi_phci_path2devinfo():
981  * 		Utility function to search for a valid phci device given
982  *		the devfs pathname.
983  */
984 dev_info_t *
985 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
986 {
987 	char		*temp_pathname;
988 	mdi_vhci_t	*vh;
989 	mdi_phci_t	*ph;
990 	dev_info_t 	*pdip = NULL;
991 
992 	vh = i_devi_get_vhci(vdip);
993 	ASSERT(vh != NULL);
994 
995 	if (vh == NULL) {
996 		/*
997 		 * Invalid vHCI component, return failure
998 		 */
999 		return (NULL);
1000 	}
1001 
1002 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1003 	MDI_VHCI_PHCI_LOCK(vh);
1004 	ph = vh->vh_phci_head;
1005 	while (ph != NULL) {
1006 		pdip = ph->ph_dip;
1007 		ASSERT(pdip != NULL);
1008 		*temp_pathname = '\0';
1009 		(void) ddi_pathname(pdip, temp_pathname);
1010 		if (strcmp(temp_pathname, pathname) == 0) {
1011 			break;
1012 		}
1013 		ph = ph->ph_next;
1014 	}
1015 	if (ph == NULL) {
1016 		pdip = NULL;
1017 	}
1018 	MDI_VHCI_PHCI_UNLOCK(vh);
1019 	kmem_free(temp_pathname, MAXPATHLEN);
1020 	return (pdip);
1021 }
1022 
1023 /*
1024  * mdi_phci_get_path_count():
1025  * 		get number of path information nodes associated with a given
1026  *		pHCI device.
1027  */
1028 int
1029 mdi_phci_get_path_count(dev_info_t *pdip)
1030 {
1031 	mdi_phci_t	*ph;
1032 	int		count = 0;
1033 
1034 	ph = i_devi_get_phci(pdip);
1035 	if (ph != NULL) {
1036 		count = ph->ph_path_count;
1037 	}
1038 	return (count);
1039 }
1040 
1041 /*
1042  * i_mdi_phci_lock():
1043  *		Lock a pHCI device
1044  * Return Values:
1045  *		None
1046  * Note:
1047  *		The default locking order is:
1048  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1049  *		But there are number of situations where locks need to be
1050  *		grabbed in reverse order.  This routine implements try and lock
1051  *		mechanism depending on the requested parameter option.
1052  */
1053 static void
1054 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1055 {
1056 	if (pip) {
1057 		/* Reverse locking is requested. */
1058 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1059 			if (servicing_interrupt()) {
1060 				MDI_PI_HOLD(pip);
1061 				MDI_PI_UNLOCK(pip);
1062 				MDI_PHCI_LOCK(ph);
1063 				MDI_PI_LOCK(pip);
1064 				MDI_PI_RELE(pip);
1065 				break;
1066 			} else {
1067 				/*
1068 				 * tryenter failed. Try to grab again
1069 				 * after a small delay
1070 				 */
1071 				MDI_PI_HOLD(pip);
1072 				MDI_PI_UNLOCK(pip);
1073 				delay_random(mdi_delay);
1074 				MDI_PI_LOCK(pip);
1075 				MDI_PI_RELE(pip);
1076 			}
1077 		}
1078 	} else {
1079 		MDI_PHCI_LOCK(ph);
1080 	}
1081 }
1082 
1083 /*
1084  * i_mdi_phci_unlock():
1085  *		Unlock the pHCI component
1086  */
1087 static void
1088 i_mdi_phci_unlock(mdi_phci_t *ph)
1089 {
1090 	MDI_PHCI_UNLOCK(ph);
1091 }
1092 
1093 /*
1094  * i_mdi_devinfo_create():
1095  *		create client device's devinfo node
1096  * Return Values:
1097  *		dev_info
1098  *		NULL
1099  * Notes:
1100  */
1101 static dev_info_t *
1102 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1103 	char **compatible, int ncompatible)
1104 {
1105 	dev_info_t *cdip = NULL;
1106 
1107 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1108 
1109 	/* Verify for duplicate entry */
1110 	cdip = i_mdi_devinfo_find(vh, name, guid);
1111 	ASSERT(cdip == NULL);
1112 	if (cdip) {
1113 		cmn_err(CE_WARN,
1114 		    "i_mdi_devinfo_create: client %s@%s already exists",
1115 			name ? name : "", guid ? guid : "");
1116 	}
1117 
1118 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1119 	if (cdip == NULL)
1120 		goto fail;
1121 
1122 	/*
1123 	 * Create component type and Global unique identifier
1124 	 * properties
1125 	 */
1126 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1127 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1128 		goto fail;
1129 	}
1130 
1131 	/* Decorate the node with compatible property */
1132 	if (compatible &&
1133 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1134 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1135 		goto fail;
1136 	}
1137 
1138 	return (cdip);
1139 
1140 fail:
1141 	if (cdip) {
1142 		(void) ndi_prop_remove_all(cdip);
1143 		(void) ndi_devi_free(cdip);
1144 	}
1145 	return (NULL);
1146 }
1147 
1148 /*
1149  * i_mdi_devinfo_find():
1150  *		Find a matching devinfo node for given client node name
1151  *		and its guid.
1152  * Return Values:
1153  *		Handle to a dev_info node or NULL
1154  */
1155 static dev_info_t *
1156 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1157 {
1158 	char			*data;
1159 	dev_info_t 		*cdip = NULL;
1160 	dev_info_t 		*ndip = NULL;
1161 	int			circular;
1162 
1163 	ndi_devi_enter(vh->vh_dip, &circular);
1164 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1165 	while ((cdip = ndip) != NULL) {
1166 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1167 
1168 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1169 			continue;
1170 		}
1171 
1172 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1173 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1174 		    &data) != DDI_PROP_SUCCESS) {
1175 			continue;
1176 		}
1177 
1178 		if (strcmp(data, guid) != 0) {
1179 			ddi_prop_free(data);
1180 			continue;
1181 		}
1182 		ddi_prop_free(data);
1183 		break;
1184 	}
1185 	ndi_devi_exit(vh->vh_dip, circular);
1186 	return (cdip);
1187 }
1188 
1189 /*
1190  * i_mdi_devinfo_remove():
1191  *		Remove a client device node
1192  */
1193 static int
1194 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1195 {
1196 	int	rv = MDI_SUCCESS;
1197 
1198 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1199 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1200 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1201 		if (rv != NDI_SUCCESS) {
1202 			MDI_DEBUG(1, (MDI_NOTE, cdip,
1203 			    "!failed: cdip %p", (void *)cdip));
1204 		}
1205 		/*
1206 		 * Convert to MDI error code
1207 		 */
1208 		switch (rv) {
1209 		case NDI_SUCCESS:
1210 			rv = MDI_SUCCESS;
1211 			break;
1212 		case NDI_BUSY:
1213 			rv = MDI_BUSY;
1214 			break;
1215 		default:
1216 			rv = MDI_FAILURE;
1217 			break;
1218 		}
1219 	}
1220 	return (rv);
1221 }
1222 
1223 /*
1224  * i_devi_get_client()
1225  *		Utility function to get mpxio component extensions
1226  */
1227 static mdi_client_t *
1228 i_devi_get_client(dev_info_t *cdip)
1229 {
1230 	mdi_client_t	*ct = NULL;
1231 
1232 	if (MDI_CLIENT(cdip)) {
1233 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1234 	}
1235 	return (ct);
1236 }
1237 
1238 /*
1239  * i_mdi_is_child_present():
1240  *		Search for the presence of client device dev_info node
1241  */
1242 static int
1243 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1244 {
1245 	int		rv = MDI_FAILURE;
1246 	struct dev_info	*dip;
1247 	int		circular;
1248 
1249 	ndi_devi_enter(vdip, &circular);
1250 	dip = DEVI(vdip)->devi_child;
1251 	while (dip) {
1252 		if (dip == DEVI(cdip)) {
1253 			rv = MDI_SUCCESS;
1254 			break;
1255 		}
1256 		dip = dip->devi_sibling;
1257 	}
1258 	ndi_devi_exit(vdip, circular);
1259 	return (rv);
1260 }
1261 
1262 
1263 /*
1264  * i_mdi_client_lock():
1265  *		Grab client component lock
1266  * Return Values:
1267  *		None
1268  * Note:
1269  *		The default locking order is:
1270  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1271  *		But there are number of situations where locks need to be
1272  *		grabbed in reverse order.  This routine implements try and lock
1273  *		mechanism depending on the requested parameter option.
1274  */
1275 static void
1276 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1277 {
1278 	if (pip) {
1279 		/*
1280 		 * Reverse locking is requested.
1281 		 */
1282 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1283 			if (servicing_interrupt()) {
1284 				MDI_PI_HOLD(pip);
1285 				MDI_PI_UNLOCK(pip);
1286 				MDI_CLIENT_LOCK(ct);
1287 				MDI_PI_LOCK(pip);
1288 				MDI_PI_RELE(pip);
1289 				break;
1290 			} else {
1291 				/*
1292 				 * tryenter failed. Try to grab again
1293 				 * after a small delay
1294 				 */
1295 				MDI_PI_HOLD(pip);
1296 				MDI_PI_UNLOCK(pip);
1297 				delay_random(mdi_delay);
1298 				MDI_PI_LOCK(pip);
1299 				MDI_PI_RELE(pip);
1300 			}
1301 		}
1302 	} else {
1303 		MDI_CLIENT_LOCK(ct);
1304 	}
1305 }
1306 
1307 /*
1308  * i_mdi_client_unlock():
1309  *		Unlock a client component
1310  */
1311 static void
1312 i_mdi_client_unlock(mdi_client_t *ct)
1313 {
1314 	MDI_CLIENT_UNLOCK(ct);
1315 }
1316 
1317 /*
1318  * i_mdi_client_alloc():
1319  * 		Allocate and initialize a client structure.  Caller should
1320  *		hold the vhci client lock.
1321  * Return Values:
1322  *		Handle to a client component
1323  */
1324 /*ARGSUSED*/
1325 static mdi_client_t *
1326 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1327 {
1328 	mdi_client_t	*ct;
1329 
1330 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1331 
1332 	/*
1333 	 * Allocate and initialize a component structure.
1334 	 */
1335 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1336 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1337 	ct->ct_hnext = NULL;
1338 	ct->ct_hprev = NULL;
1339 	ct->ct_dip = NULL;
1340 	ct->ct_vhci = vh;
1341 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1342 	(void) strcpy(ct->ct_drvname, name);
1343 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1344 	(void) strcpy(ct->ct_guid, lguid);
1345 	ct->ct_cprivate = NULL;
1346 	ct->ct_vprivate = NULL;
1347 	ct->ct_flags = 0;
1348 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1349 	MDI_CLIENT_LOCK(ct);
1350 	MDI_CLIENT_SET_OFFLINE(ct);
1351 	MDI_CLIENT_SET_DETACH(ct);
1352 	MDI_CLIENT_SET_POWER_UP(ct);
1353 	MDI_CLIENT_UNLOCK(ct);
1354 	ct->ct_failover_flags = 0;
1355 	ct->ct_failover_status = 0;
1356 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1357 	ct->ct_unstable = 0;
1358 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1359 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1360 	ct->ct_lb = vh->vh_lb;
1361 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1362 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1363 	ct->ct_path_count = 0;
1364 	ct->ct_path_head = NULL;
1365 	ct->ct_path_tail = NULL;
1366 	ct->ct_path_last = NULL;
1367 
1368 	/*
1369 	 * Add this client component to our client hash queue
1370 	 */
1371 	i_mdi_client_enlist_table(vh, ct);
1372 	return (ct);
1373 }
1374 
1375 /*
1376  * i_mdi_client_enlist_table():
1377  *		Attach the client device to the client hash table. Caller
1378  *		should hold the vhci client lock.
1379  */
1380 static void
1381 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1382 {
1383 	int 			index;
1384 	struct client_hash	*head;
1385 
1386 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1387 
1388 	index = i_mdi_get_hash_key(ct->ct_guid);
1389 	head = &vh->vh_client_table[index];
1390 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1391 	head->ct_hash_head = ct;
1392 	head->ct_hash_count++;
1393 	vh->vh_client_count++;
1394 }
1395 
1396 /*
1397  * i_mdi_client_delist_table():
1398  *		Attach the client device to the client hash table.
1399  *		Caller should hold the vhci client lock.
1400  */
1401 static void
1402 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1403 {
1404 	int			index;
1405 	char			*guid;
1406 	struct client_hash 	*head;
1407 	mdi_client_t		*next;
1408 	mdi_client_t		*last;
1409 
1410 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1411 
1412 	guid = ct->ct_guid;
1413 	index = i_mdi_get_hash_key(guid);
1414 	head = &vh->vh_client_table[index];
1415 
1416 	last = NULL;
1417 	next = (mdi_client_t *)head->ct_hash_head;
1418 	while (next != NULL) {
1419 		if (next == ct) {
1420 			break;
1421 		}
1422 		last = next;
1423 		next = next->ct_hnext;
1424 	}
1425 
1426 	if (next) {
1427 		head->ct_hash_count--;
1428 		if (last == NULL) {
1429 			head->ct_hash_head = ct->ct_hnext;
1430 		} else {
1431 			last->ct_hnext = ct->ct_hnext;
1432 		}
1433 		ct->ct_hnext = NULL;
1434 		vh->vh_client_count--;
1435 	}
1436 }
1437 
1438 
1439 /*
1440  * i_mdi_client_free():
1441  *		Free a client component
1442  */
1443 static int
1444 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1445 {
1446 	int		rv = MDI_SUCCESS;
1447 	int		flags = ct->ct_flags;
1448 	dev_info_t	*cdip;
1449 	dev_info_t	*vdip;
1450 
1451 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1452 
1453 	vdip = vh->vh_dip;
1454 	cdip = ct->ct_dip;
1455 
1456 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1457 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1458 	DEVI(cdip)->devi_mdi_client = NULL;
1459 
1460 	/*
1461 	 * Clear out back ref. to dev_info_t node
1462 	 */
1463 	ct->ct_dip = NULL;
1464 
1465 	/*
1466 	 * Remove this client from our hash queue
1467 	 */
1468 	i_mdi_client_delist_table(vh, ct);
1469 
1470 	/*
1471 	 * Uninitialize and free the component
1472 	 */
1473 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1474 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1475 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1476 	cv_destroy(&ct->ct_failover_cv);
1477 	cv_destroy(&ct->ct_unstable_cv);
1478 	cv_destroy(&ct->ct_powerchange_cv);
1479 	mutex_destroy(&ct->ct_mutex);
1480 	kmem_free(ct, sizeof (*ct));
1481 
1482 	MDI_VHCI_CLIENT_UNLOCK(vh);
1483 	(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1484 	MDI_VHCI_CLIENT_LOCK(vh);
1485 
1486 	return (rv);
1487 }
1488 
1489 /*
1490  * i_mdi_client_find():
1491  * 		Find the client structure corresponding to a given guid
1492  *		Caller should hold the vhci client lock.
1493  */
1494 static mdi_client_t *
1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1496 {
1497 	int			index;
1498 	struct client_hash	*head;
1499 	mdi_client_t		*ct;
1500 
1501 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1502 
1503 	index = i_mdi_get_hash_key(guid);
1504 	head = &vh->vh_client_table[index];
1505 
1506 	ct = head->ct_hash_head;
1507 	while (ct != NULL) {
1508 		if (strcmp(ct->ct_guid, guid) == 0 &&
1509 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1510 			break;
1511 		}
1512 		ct = ct->ct_hnext;
1513 	}
1514 	return (ct);
1515 }
1516 
1517 /*
1518  * i_mdi_client_update_state():
1519  *		Compute and update client device state
1520  * Notes:
1521  *		A client device can be in any of three possible states:
1522  *
1523  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1524  *		one online/standby paths. Can tolerate failures.
1525  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1526  *		no alternate paths available as standby. A failure on the online
1527  *		would result in loss of access to device data.
1528  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1529  *		no paths available to access the device.
1530  */
1531 static void
1532 i_mdi_client_update_state(mdi_client_t *ct)
1533 {
1534 	int state;
1535 
1536 	ASSERT(MDI_CLIENT_LOCKED(ct));
1537 	state = i_mdi_client_compute_state(ct, NULL);
1538 	MDI_CLIENT_SET_STATE(ct, state);
1539 }
1540 
1541 /*
1542  * i_mdi_client_compute_state():
1543  *		Compute client device state
1544  *
1545  *		mdi_phci_t *	Pointer to pHCI structure which should
1546  *				while computing the new value.  Used by
1547  *				i_mdi_phci_offline() to find the new
1548  *				client state after DR of a pHCI.
1549  */
1550 static int
1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1552 {
1553 	int		state;
1554 	int		online_count = 0;
1555 	int		standby_count = 0;
1556 	mdi_pathinfo_t	*pip, *next;
1557 
1558 	ASSERT(MDI_CLIENT_LOCKED(ct));
1559 	pip = ct->ct_path_head;
1560 	while (pip != NULL) {
1561 		MDI_PI_LOCK(pip);
1562 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1563 		if (MDI_PI(pip)->pi_phci == ph) {
1564 			MDI_PI_UNLOCK(pip);
1565 			pip = next;
1566 			continue;
1567 		}
1568 
1569 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1570 				== MDI_PATHINFO_STATE_ONLINE)
1571 			online_count++;
1572 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1573 				== MDI_PATHINFO_STATE_STANDBY)
1574 			standby_count++;
1575 		MDI_PI_UNLOCK(pip);
1576 		pip = next;
1577 	}
1578 
1579 	if (online_count == 0) {
1580 		if (standby_count == 0) {
1581 			state = MDI_CLIENT_STATE_FAILED;
1582 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1583 			    "client state failed: ct = %p", (void *)ct));
1584 		} else if (standby_count == 1) {
1585 			state = MDI_CLIENT_STATE_DEGRADED;
1586 		} else {
1587 			state = MDI_CLIENT_STATE_OPTIMAL;
1588 		}
1589 	} else if (online_count == 1) {
1590 		if (standby_count == 0) {
1591 			state = MDI_CLIENT_STATE_DEGRADED;
1592 		} else {
1593 			state = MDI_CLIENT_STATE_OPTIMAL;
1594 		}
1595 	} else {
1596 		state = MDI_CLIENT_STATE_OPTIMAL;
1597 	}
1598 	return (state);
1599 }
1600 
1601 /*
1602  * i_mdi_client2devinfo():
1603  *		Utility function
1604  */
1605 dev_info_t *
1606 i_mdi_client2devinfo(mdi_client_t *ct)
1607 {
1608 	return (ct->ct_dip);
1609 }
1610 
1611 /*
1612  * mdi_client_path2_devinfo():
1613  * 		Given the parent devinfo and child devfs pathname, search for
1614  *		a valid devfs node handle.
1615  */
1616 dev_info_t *
1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1618 {
1619 	dev_info_t 	*cdip = NULL;
1620 	dev_info_t 	*ndip = NULL;
1621 	char		*temp_pathname;
1622 	int		circular;
1623 
1624 	/*
1625 	 * Allocate temp buffer
1626 	 */
1627 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1628 
1629 	/*
1630 	 * Lock parent against changes
1631 	 */
1632 	ndi_devi_enter(vdip, &circular);
1633 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1634 	while ((cdip = ndip) != NULL) {
1635 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1636 
1637 		*temp_pathname = '\0';
1638 		(void) ddi_pathname(cdip, temp_pathname);
1639 		if (strcmp(temp_pathname, pathname) == 0) {
1640 			break;
1641 		}
1642 	}
1643 	/*
1644 	 * Release devinfo lock
1645 	 */
1646 	ndi_devi_exit(vdip, circular);
1647 
1648 	/*
1649 	 * Free the temp buffer
1650 	 */
1651 	kmem_free(temp_pathname, MAXPATHLEN);
1652 	return (cdip);
1653 }
1654 
1655 /*
1656  * mdi_client_get_path_count():
1657  * 		Utility function to get number of path information nodes
1658  *		associated with a given client device.
1659  */
1660 int
1661 mdi_client_get_path_count(dev_info_t *cdip)
1662 {
1663 	mdi_client_t	*ct;
1664 	int		count = 0;
1665 
1666 	ct = i_devi_get_client(cdip);
1667 	if (ct != NULL) {
1668 		count = ct->ct_path_count;
1669 	}
1670 	return (count);
1671 }
1672 
1673 
1674 /*
1675  * i_mdi_get_hash_key():
1676  * 		Create a hash using strings as keys
1677  *
1678  */
1679 static int
1680 i_mdi_get_hash_key(char *str)
1681 {
1682 	uint32_t	g, hash = 0;
1683 	char		*p;
1684 
1685 	for (p = str; *p != '\0'; p++) {
1686 		g = *p;
1687 		hash += g;
1688 	}
1689 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1690 }
1691 
1692 /*
1693  * mdi_get_lb_policy():
1694  * 		Get current load balancing policy for a given client device
1695  */
1696 client_lb_t
1697 mdi_get_lb_policy(dev_info_t *cdip)
1698 {
1699 	client_lb_t	lb = LOAD_BALANCE_NONE;
1700 	mdi_client_t	*ct;
1701 
1702 	ct = i_devi_get_client(cdip);
1703 	if (ct != NULL) {
1704 		lb = ct->ct_lb;
1705 	}
1706 	return (lb);
1707 }
1708 
1709 /*
1710  * mdi_set_lb_region_size():
1711  * 		Set current region size for the load-balance
1712  */
1713 int
1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1715 {
1716 	mdi_client_t	*ct;
1717 	int		rv = MDI_FAILURE;
1718 
1719 	ct = i_devi_get_client(cdip);
1720 	if (ct != NULL && ct->ct_lb_args != NULL) {
1721 		ct->ct_lb_args->region_size = region_size;
1722 		rv = MDI_SUCCESS;
1723 	}
1724 	return (rv);
1725 }
1726 
1727 /*
1728  * mdi_Set_lb_policy():
1729  * 		Set current load balancing policy for a given client device
1730  */
1731 int
1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1733 {
1734 	mdi_client_t	*ct;
1735 	int		rv = MDI_FAILURE;
1736 
1737 	ct = i_devi_get_client(cdip);
1738 	if (ct != NULL) {
1739 		ct->ct_lb = lb;
1740 		rv = MDI_SUCCESS;
1741 	}
1742 	return (rv);
1743 }
1744 
1745 static void
1746 mdi_failover_cb(void *arg)
1747 {
1748 	(void)i_mdi_failover(arg);
1749 }
1750 
1751 /*
1752  * mdi_failover():
1753  *		failover function called by the vHCI drivers to initiate
1754  *		a failover operation.  This is typically due to non-availability
1755  *		of online paths to route I/O requests.  Failover can be
1756  *		triggered through user application also.
1757  *
1758  *		The vHCI driver calls mdi_failover() to initiate a failover
1759  *		operation. mdi_failover() calls back into the vHCI driver's
1760  *		vo_failover() entry point to perform the actual failover
1761  *		operation.  The reason for requiring the vHCI driver to
1762  *		initiate failover by calling mdi_failover(), instead of directly
1763  *		executing vo_failover() itself, is to ensure that the mdi
1764  *		framework can keep track of the client state properly.
1765  *		Additionally, mdi_failover() provides as a convenience the
1766  *		option of performing the failover operation synchronously or
1767  *		asynchronously
1768  *
1769  *		Upon successful completion of the failover operation, the
1770  *		paths that were previously ONLINE will be in the STANDBY state,
1771  *		and the newly activated paths will be in the ONLINE state.
1772  *
1773  *		The flags modifier determines whether the activation is done
1774  *		synchronously: MDI_FAILOVER_SYNC
1775  * Return Values:
1776  *		MDI_SUCCESS
1777  *		MDI_FAILURE
1778  *		MDI_BUSY
1779  */
1780 /*ARGSUSED*/
1781 int
1782 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1783 {
1784 	int			rv;
1785 	mdi_client_t		*ct;
1786 
1787 	ct = i_devi_get_client(cdip);
1788 	ASSERT(ct != NULL);
1789 	if (ct == NULL) {
1790 		/* cdip is not a valid client device. Nothing more to do. */
1791 		return (MDI_FAILURE);
1792 	}
1793 
1794 	MDI_CLIENT_LOCK(ct);
1795 
1796 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1797 		/* A path to the client is being freed */
1798 		MDI_CLIENT_UNLOCK(ct);
1799 		return (MDI_BUSY);
1800 	}
1801 
1802 
1803 	if (MDI_CLIENT_IS_FAILED(ct)) {
1804 		/*
1805 		 * Client is in failed state. Nothing more to do.
1806 		 */
1807 		MDI_CLIENT_UNLOCK(ct);
1808 		return (MDI_FAILURE);
1809 	}
1810 
1811 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1812 		/*
1813 		 * Failover is already in progress; return BUSY
1814 		 */
1815 		MDI_CLIENT_UNLOCK(ct);
1816 		return (MDI_BUSY);
1817 	}
1818 	/*
1819 	 * Make sure that mdi_pathinfo node state changes are processed.
1820 	 * We do not allow failovers to progress while client path state
1821 	 * changes are in progress
1822 	 */
1823 	if (ct->ct_unstable) {
1824 		if (flags == MDI_FAILOVER_ASYNC) {
1825 			MDI_CLIENT_UNLOCK(ct);
1826 			return (MDI_BUSY);
1827 		} else {
1828 			while (ct->ct_unstable)
1829 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1830 		}
1831 	}
1832 
1833 	/*
1834 	 * Client device is in stable state. Before proceeding, perform sanity
1835 	 * checks again.
1836 	 */
1837 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1838 	    (!i_ddi_devi_attached(cdip))) {
1839 		/*
1840 		 * Client is in failed state. Nothing more to do.
1841 		 */
1842 		MDI_CLIENT_UNLOCK(ct);
1843 		return (MDI_FAILURE);
1844 	}
1845 
1846 	/*
1847 	 * Set the client state as failover in progress.
1848 	 */
1849 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1850 	ct->ct_failover_flags = flags;
1851 	MDI_CLIENT_UNLOCK(ct);
1852 
1853 	if (flags == MDI_FAILOVER_ASYNC) {
1854 		/*
1855 		 * Submit the initiate failover request via CPR safe
1856 		 * taskq threads.
1857 		 */
1858 		(void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP);
1859 		return (MDI_ACCEPT);
1860 	} else {
1861 		/*
1862 		 * Synchronous failover mode.  Typically invoked from the user
1863 		 * land.
1864 		 */
1865 		rv = i_mdi_failover(ct);
1866 	}
1867 	return (rv);
1868 }
1869 
1870 /*
1871  * i_mdi_failover():
1872  *		internal failover function. Invokes vHCI drivers failover
1873  *		callback function and process the failover status
1874  * Return Values:
1875  *		None
1876  *
1877  * Note: A client device in failover state can not be detached or freed.
1878  */
1879 static int
1880 i_mdi_failover(void *arg)
1881 {
1882 	int		rv = MDI_SUCCESS;
1883 	mdi_client_t	*ct = (mdi_client_t *)arg;
1884 	mdi_vhci_t	*vh = ct->ct_vhci;
1885 
1886 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1887 
1888 	if (vh->vh_ops->vo_failover != NULL) {
1889 		/*
1890 		 * Call vHCI drivers callback routine
1891 		 */
1892 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1893 		    ct->ct_failover_flags);
1894 	}
1895 
1896 	MDI_CLIENT_LOCK(ct);
1897 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1898 
1899 	/*
1900 	 * Save the failover return status
1901 	 */
1902 	ct->ct_failover_status = rv;
1903 
1904 	/*
1905 	 * As a result of failover, client status would have been changed.
1906 	 * Update the client state and wake up anyone waiting on this client
1907 	 * device.
1908 	 */
1909 	i_mdi_client_update_state(ct);
1910 
1911 	cv_broadcast(&ct->ct_failover_cv);
1912 	MDI_CLIENT_UNLOCK(ct);
1913 	return (rv);
1914 }
1915 
1916 /*
1917  * Load balancing is logical block.
1918  * IOs within the range described by region_size
1919  * would go on the same path. This would improve the
1920  * performance by cache-hit on some of the RAID devices.
1921  * Search only for online paths(At some point we
1922  * may want to balance across target ports).
1923  * If no paths are found then default to round-robin.
1924  */
1925 static int
1926 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1927 {
1928 	int		path_index = -1;
1929 	int		online_path_count = 0;
1930 	int		online_nonpref_path_count = 0;
1931 	int 		region_size = ct->ct_lb_args->region_size;
1932 	mdi_pathinfo_t	*pip;
1933 	mdi_pathinfo_t	*next;
1934 	int		preferred, path_cnt;
1935 
1936 	pip = ct->ct_path_head;
1937 	while (pip) {
1938 		MDI_PI_LOCK(pip);
1939 		if (MDI_PI(pip)->pi_state ==
1940 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1941 			online_path_count++;
1942 		} else if (MDI_PI(pip)->pi_state ==
1943 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1944 			online_nonpref_path_count++;
1945 		}
1946 		next = (mdi_pathinfo_t *)
1947 		    MDI_PI(pip)->pi_client_link;
1948 		MDI_PI_UNLOCK(pip);
1949 		pip = next;
1950 	}
1951 	/* if found any online/preferred then use this type */
1952 	if (online_path_count > 0) {
1953 		path_cnt = online_path_count;
1954 		preferred = 1;
1955 	} else if (online_nonpref_path_count > 0) {
1956 		path_cnt = online_nonpref_path_count;
1957 		preferred = 0;
1958 	} else {
1959 		path_cnt = 0;
1960 	}
1961 	if (path_cnt) {
1962 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1963 		pip = ct->ct_path_head;
1964 		while (pip && path_index != -1) {
1965 			MDI_PI_LOCK(pip);
1966 			if (path_index == 0 &&
1967 			    (MDI_PI(pip)->pi_state ==
1968 			    MDI_PATHINFO_STATE_ONLINE) &&
1969 				MDI_PI(pip)->pi_preferred == preferred) {
1970 				MDI_PI_HOLD(pip);
1971 				MDI_PI_UNLOCK(pip);
1972 				*ret_pip = pip;
1973 				return (MDI_SUCCESS);
1974 			}
1975 			path_index --;
1976 			next = (mdi_pathinfo_t *)
1977 			    MDI_PI(pip)->pi_client_link;
1978 			MDI_PI_UNLOCK(pip);
1979 			pip = next;
1980 		}
1981 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1982 		    "lba %llx: path %s %p",
1983 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1984 	}
1985 	return (MDI_FAILURE);
1986 }
1987 
1988 /*
1989  * mdi_select_path():
1990  *		select a path to access a client device.
1991  *
1992  *		mdi_select_path() function is called by the vHCI drivers to
1993  *		select a path to route the I/O request to.  The caller passes
1994  *		the block I/O data transfer structure ("buf") as one of the
1995  *		parameters.  The mpxio framework uses the buf structure
1996  *		contents to maintain per path statistics (total I/O size /
1997  *		count pending).  If more than one online paths are available to
1998  *		select, the framework automatically selects a suitable path
1999  *		for routing I/O request. If a failover operation is active for
2000  *		this client device the call shall be failed with MDI_BUSY error
2001  *		code.
2002  *
2003  *		By default this function returns a suitable path in online
2004  *		state based on the current load balancing policy.  Currently
2005  *		we support LOAD_BALANCE_NONE (Previously selected online path
2006  *		will continue to be used till the path is usable) and
2007  *		LOAD_BALANCE_RR (Online paths will be selected in a round
2008  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2009  *		based on the logical block).  The load balancing
2010  *		through vHCI drivers configuration file (driver.conf).
2011  *
2012  *		vHCI drivers may override this default behavior by specifying
2013  *		appropriate flags.  The meaning of the thrid argument depends
2014  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2015  *		then the argument is the "path instance" of the path to select.
2016  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2017  *		"start_pip". A non NULL "start_pip" is the starting point to
2018  *		walk and find the next appropriate path.  The following values
2019  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2020  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2021  *		STANDBY path).
2022  *
2023  *		The non-standard behavior is used by the scsi_vhci driver,
2024  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
2025  *		attach of client devices (to avoid an unnecessary failover
2026  *		when the STANDBY path comes up first), during failover
2027  *		(to activate a STANDBY path as ONLINE).
2028  *
2029  *		The selected path is returned in a a mdi_hold_path() state
2030  *		(pi_ref_cnt). Caller should release the hold by calling
2031  *		mdi_rele_path().
2032  *
2033  * Return Values:
2034  *		MDI_SUCCESS	- Completed successfully
2035  *		MDI_BUSY 	- Client device is busy failing over
2036  *		MDI_NOPATH	- Client device is online, but no valid path are
2037  *				  available to access this client device
2038  *		MDI_FAILURE	- Invalid client device or state
2039  *		MDI_DEVI_ONLINING
2040  *				- Client device (struct dev_info state) is in
2041  *				  onlining state.
2042  */
2043 
2044 /*ARGSUSED*/
2045 int
2046 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2047     void *arg, mdi_pathinfo_t **ret_pip)
2048 {
2049 	mdi_client_t	*ct;
2050 	mdi_pathinfo_t	*pip;
2051 	mdi_pathinfo_t	*next;
2052 	mdi_pathinfo_t	*head;
2053 	mdi_pathinfo_t	*start;
2054 	client_lb_t	lbp;	/* load balancing policy */
2055 	int		sb = 1;	/* standard behavior */
2056 	int		preferred = 1;	/* preferred path */
2057 	int		cond, cont = 1;
2058 	int		retry = 0;
2059 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
2060 	int		path_instance;	/* request specific path instance */
2061 
2062 	/* determine type of arg based on flags */
2063 	if (flags & MDI_SELECT_PATH_INSTANCE) {
2064 		path_instance = (int)(intptr_t)arg;
2065 		start_pip = NULL;
2066 	} else {
2067 		path_instance = 0;
2068 		start_pip = (mdi_pathinfo_t *)arg;
2069 	}
2070 
2071 	if (flags != 0) {
2072 		/*
2073 		 * disable default behavior
2074 		 */
2075 		sb = 0;
2076 	}
2077 
2078 	*ret_pip = NULL;
2079 	ct = i_devi_get_client(cdip);
2080 	if (ct == NULL) {
2081 		/* mdi extensions are NULL, Nothing more to do */
2082 		return (MDI_FAILURE);
2083 	}
2084 
2085 	MDI_CLIENT_LOCK(ct);
2086 
2087 	if (sb) {
2088 		if (MDI_CLIENT_IS_FAILED(ct)) {
2089 			/*
2090 			 * Client is not ready to accept any I/O requests.
2091 			 * Fail this request.
2092 			 */
2093 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2094 			    "client state offline ct = %p", (void *)ct));
2095 			MDI_CLIENT_UNLOCK(ct);
2096 			return (MDI_FAILURE);
2097 		}
2098 
2099 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2100 			/*
2101 			 * Check for Failover is in progress. If so tell the
2102 			 * caller that this device is busy.
2103 			 */
2104 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2105 			    "client failover in progress ct = %p",
2106 			    (void *)ct));
2107 			MDI_CLIENT_UNLOCK(ct);
2108 			return (MDI_BUSY);
2109 		}
2110 
2111 		/*
2112 		 * Check to see whether the client device is attached.
2113 		 * If not so, let the vHCI driver manually select a path
2114 		 * (standby) and let the probe/attach process to continue.
2115 		 */
2116 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2117 			MDI_DEBUG(4, (MDI_NOTE, cdip,
2118 			    "devi is onlining ct = %p", (void *)ct));
2119 			MDI_CLIENT_UNLOCK(ct);
2120 			return (MDI_DEVI_ONLINING);
2121 		}
2122 	}
2123 
2124 	/*
2125 	 * Cache in the client list head.  If head of the list is NULL
2126 	 * return MDI_NOPATH
2127 	 */
2128 	head = ct->ct_path_head;
2129 	if (head == NULL) {
2130 		MDI_CLIENT_UNLOCK(ct);
2131 		return (MDI_NOPATH);
2132 	}
2133 
2134 	/* Caller is specifying a specific pathinfo path by path_instance */
2135 	if (path_instance) {
2136 		/* search for pathinfo with correct path_instance */
2137 		for (pip = head;
2138 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2139 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2140 			;
2141 
2142 		/* If path can't be selected then MDI_NOPATH is returned. */
2143 		if (pip == NULL) {
2144 			MDI_CLIENT_UNLOCK(ct);
2145 			return (MDI_NOPATH);
2146 		}
2147 
2148 		/*
2149 		 * Verify state of path. When asked to select a specific
2150 		 * path_instance, we select the requested path in any
2151 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2152 		 * We don't however select paths where the pHCI has detached.
2153 		 * NOTE: last pathinfo node of an opened client device may
2154 		 * exist in an OFFLINE state after the pHCI associated with
2155 		 * that path has detached (but pi_phci will be NULL if that
2156 		 * has occurred).
2157 		 */
2158 		MDI_PI_LOCK(pip);
2159 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2160 		    (MDI_PI(pip)->pi_phci == NULL)) {
2161 			MDI_PI_UNLOCK(pip);
2162 			MDI_CLIENT_UNLOCK(ct);
2163 			return (MDI_FAILURE);
2164 		}
2165 
2166 		/* Return MDI_BUSY if we have a transient condition */
2167 		if (MDI_PI_IS_TRANSIENT(pip)) {
2168 			MDI_PI_UNLOCK(pip);
2169 			MDI_CLIENT_UNLOCK(ct);
2170 			return (MDI_BUSY);
2171 		}
2172 
2173 		/*
2174 		 * Return the path in hold state. Caller should release the
2175 		 * lock by calling mdi_rele_path()
2176 		 */
2177 		MDI_PI_HOLD(pip);
2178 		MDI_PI_UNLOCK(pip);
2179 		*ret_pip = pip;
2180 		MDI_CLIENT_UNLOCK(ct);
2181 		return (MDI_SUCCESS);
2182 	}
2183 
2184 	/*
2185 	 * for non default behavior, bypass current
2186 	 * load balancing policy and always use LOAD_BALANCE_RR
2187 	 * except that the start point will be adjusted based
2188 	 * on the provided start_pip
2189 	 */
2190 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2191 
2192 	switch (lbp) {
2193 	case LOAD_BALANCE_NONE:
2194 		/*
2195 		 * Load balancing is None  or Alternate path mode
2196 		 * Start looking for a online mdi_pathinfo node starting from
2197 		 * last known selected path
2198 		 */
2199 		preferred = 1;
2200 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2201 		if (pip == NULL) {
2202 			pip = head;
2203 		}
2204 		start = pip;
2205 		do {
2206 			MDI_PI_LOCK(pip);
2207 			/*
2208 			 * No need to explicitly check if the path is disabled.
2209 			 * Since we are checking for state == ONLINE and the
2210 			 * same variable is used for DISABLE/ENABLE information.
2211 			 */
2212 			if ((MDI_PI(pip)->pi_state  ==
2213 				MDI_PATHINFO_STATE_ONLINE) &&
2214 				preferred == MDI_PI(pip)->pi_preferred) {
2215 				/*
2216 				 * Return the path in hold state. Caller should
2217 				 * release the lock by calling mdi_rele_path()
2218 				 */
2219 				MDI_PI_HOLD(pip);
2220 				MDI_PI_UNLOCK(pip);
2221 				ct->ct_path_last = pip;
2222 				*ret_pip = pip;
2223 				MDI_CLIENT_UNLOCK(ct);
2224 				return (MDI_SUCCESS);
2225 			}
2226 
2227 			/*
2228 			 * Path is busy.
2229 			 */
2230 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2231 			    MDI_PI_IS_TRANSIENT(pip))
2232 				retry = 1;
2233 			/*
2234 			 * Keep looking for a next available online path
2235 			 */
2236 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2237 			if (next == NULL) {
2238 				next = head;
2239 			}
2240 			MDI_PI_UNLOCK(pip);
2241 			pip = next;
2242 			if (start == pip && preferred) {
2243 				preferred = 0;
2244 			} else if (start == pip && !preferred) {
2245 				cont = 0;
2246 			}
2247 		} while (cont);
2248 		break;
2249 
2250 	case LOAD_BALANCE_LBA:
2251 		/*
2252 		 * Make sure we are looking
2253 		 * for an online path. Otherwise, if it is for a STANDBY
2254 		 * path request, it will go through and fetch an ONLINE
2255 		 * path which is not desirable.
2256 		 */
2257 		if ((ct->ct_lb_args != NULL) &&
2258 			    (ct->ct_lb_args->region_size) && bp &&
2259 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2260 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2261 				    == MDI_SUCCESS) {
2262 				MDI_CLIENT_UNLOCK(ct);
2263 				return (MDI_SUCCESS);
2264 			}
2265 		}
2266 		/* FALLTHROUGH */
2267 	case LOAD_BALANCE_RR:
2268 		/*
2269 		 * Load balancing is Round Robin. Start looking for a online
2270 		 * mdi_pathinfo node starting from last known selected path
2271 		 * as the start point.  If override flags are specified,
2272 		 * process accordingly.
2273 		 * If the search is already in effect(start_pip not null),
2274 		 * then lets just use the same path preference to continue the
2275 		 * traversal.
2276 		 */
2277 
2278 		if (start_pip != NULL) {
2279 			preferred = MDI_PI(start_pip)->pi_preferred;
2280 		} else {
2281 			preferred = 1;
2282 		}
2283 
2284 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2285 		if (start == NULL) {
2286 			pip = head;
2287 		} else {
2288 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2289 			if (pip == NULL) {
2290 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2291 					/*
2292 					 * Return since we hit the end of list
2293 					 */
2294 					MDI_CLIENT_UNLOCK(ct);
2295 					return (MDI_NOPATH);
2296 				}
2297 
2298 				if (!sb) {
2299 					if (preferred == 0) {
2300 						/*
2301 						 * Looks like we have completed
2302 						 * the traversal as preferred
2303 						 * value is 0. Time to bail out.
2304 						 */
2305 						*ret_pip = NULL;
2306 						MDI_CLIENT_UNLOCK(ct);
2307 						return (MDI_NOPATH);
2308 					} else {
2309 						/*
2310 						 * Looks like we reached the
2311 						 * end of the list. Lets enable
2312 						 * traversal of non preferred
2313 						 * paths.
2314 						 */
2315 						preferred = 0;
2316 					}
2317 				}
2318 				pip = head;
2319 			}
2320 		}
2321 		start = pip;
2322 		do {
2323 			MDI_PI_LOCK(pip);
2324 			if (sb) {
2325 				cond = ((MDI_PI(pip)->pi_state ==
2326 				    MDI_PATHINFO_STATE_ONLINE &&
2327 					MDI_PI(pip)->pi_preferred ==
2328 						preferred) ? 1 : 0);
2329 			} else {
2330 				if (flags == MDI_SELECT_ONLINE_PATH) {
2331 					cond = ((MDI_PI(pip)->pi_state ==
2332 					    MDI_PATHINFO_STATE_ONLINE &&
2333 						MDI_PI(pip)->pi_preferred ==
2334 						preferred) ? 1 : 0);
2335 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2336 					cond = ((MDI_PI(pip)->pi_state ==
2337 					    MDI_PATHINFO_STATE_STANDBY &&
2338 						MDI_PI(pip)->pi_preferred ==
2339 						preferred) ? 1 : 0);
2340 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2341 				    MDI_SELECT_STANDBY_PATH)) {
2342 					cond = (((MDI_PI(pip)->pi_state ==
2343 					    MDI_PATHINFO_STATE_ONLINE ||
2344 					    (MDI_PI(pip)->pi_state ==
2345 					    MDI_PATHINFO_STATE_STANDBY)) &&
2346 						MDI_PI(pip)->pi_preferred ==
2347 						preferred) ? 1 : 0);
2348 				} else if (flags ==
2349 					(MDI_SELECT_STANDBY_PATH |
2350 					MDI_SELECT_ONLINE_PATH |
2351 					MDI_SELECT_USER_DISABLE_PATH)) {
2352 					cond = (((MDI_PI(pip)->pi_state ==
2353 					    MDI_PATHINFO_STATE_ONLINE ||
2354 					    (MDI_PI(pip)->pi_state ==
2355 					    MDI_PATHINFO_STATE_STANDBY) ||
2356 						(MDI_PI(pip)->pi_state ==
2357 					    (MDI_PATHINFO_STATE_ONLINE|
2358 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2359 						(MDI_PI(pip)->pi_state ==
2360 					    (MDI_PATHINFO_STATE_STANDBY |
2361 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2362 						MDI_PI(pip)->pi_preferred ==
2363 						preferred) ? 1 : 0);
2364 				} else if (flags ==
2365 				    (MDI_SELECT_STANDBY_PATH |
2366 				    MDI_SELECT_ONLINE_PATH |
2367 				    MDI_SELECT_NO_PREFERRED)) {
2368 					cond = (((MDI_PI(pip)->pi_state ==
2369 					    MDI_PATHINFO_STATE_ONLINE) ||
2370 					    (MDI_PI(pip)->pi_state ==
2371 					    MDI_PATHINFO_STATE_STANDBY))
2372 					    ? 1 : 0);
2373 				} else {
2374 					cond = 0;
2375 				}
2376 			}
2377 			/*
2378 			 * No need to explicitly check if the path is disabled.
2379 			 * Since we are checking for state == ONLINE and the
2380 			 * same variable is used for DISABLE/ENABLE information.
2381 			 */
2382 			if (cond) {
2383 				/*
2384 				 * Return the path in hold state. Caller should
2385 				 * release the lock by calling mdi_rele_path()
2386 				 */
2387 				MDI_PI_HOLD(pip);
2388 				MDI_PI_UNLOCK(pip);
2389 				if (sb)
2390 					ct->ct_path_last = pip;
2391 				*ret_pip = pip;
2392 				MDI_CLIENT_UNLOCK(ct);
2393 				return (MDI_SUCCESS);
2394 			}
2395 			/*
2396 			 * Path is busy.
2397 			 */
2398 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2399 			    MDI_PI_IS_TRANSIENT(pip))
2400 				retry = 1;
2401 
2402 			/*
2403 			 * Keep looking for a next available online path
2404 			 */
2405 do_again:
2406 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2407 			if (next == NULL) {
2408 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2409 					/*
2410 					 * Bail out since we hit the end of list
2411 					 */
2412 					MDI_PI_UNLOCK(pip);
2413 					break;
2414 				}
2415 
2416 				if (!sb) {
2417 					if (preferred == 1) {
2418 						/*
2419 						 * Looks like we reached the
2420 						 * end of the list. Lets enable
2421 						 * traversal of non preferred
2422 						 * paths.
2423 						 */
2424 						preferred = 0;
2425 						next = head;
2426 					} else {
2427 						/*
2428 						 * We have done both the passes
2429 						 * Preferred as well as for
2430 						 * Non-preferred. Bail out now.
2431 						 */
2432 						cont = 0;
2433 					}
2434 				} else {
2435 					/*
2436 					 * Standard behavior case.
2437 					 */
2438 					next = head;
2439 				}
2440 			}
2441 			MDI_PI_UNLOCK(pip);
2442 			if (cont == 0) {
2443 				break;
2444 			}
2445 			pip = next;
2446 
2447 			if (!sb) {
2448 				/*
2449 				 * We need to handle the selection of
2450 				 * non-preferred path in the following
2451 				 * case:
2452 				 *
2453 				 * +------+   +------+   +------+   +-----+
2454 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2455 				 * +------+   +------+   +------+   +-----+
2456 				 *
2457 				 * If we start the search with B, we need to
2458 				 * skip beyond B to pick C which is non -
2459 				 * preferred in the second pass. The following
2460 				 * test, if true, will allow us to skip over
2461 				 * the 'start'(B in the example) to select
2462 				 * other non preferred elements.
2463 				 */
2464 				if ((start_pip != NULL) && (start_pip == pip) &&
2465 				    (MDI_PI(start_pip)->pi_preferred
2466 				    != preferred)) {
2467 					/*
2468 					 * try again after going past the start
2469 					 * pip
2470 					 */
2471 					MDI_PI_LOCK(pip);
2472 					goto do_again;
2473 				}
2474 			} else {
2475 				/*
2476 				 * Standard behavior case
2477 				 */
2478 				if (start == pip && preferred) {
2479 					/* look for nonpreferred paths */
2480 					preferred = 0;
2481 				} else if (start == pip && !preferred) {
2482 					/*
2483 					 * Exit condition
2484 					 */
2485 					cont = 0;
2486 				}
2487 			}
2488 		} while (cont);
2489 		break;
2490 	}
2491 
2492 	MDI_CLIENT_UNLOCK(ct);
2493 	if (retry == 1) {
2494 		return (MDI_BUSY);
2495 	} else {
2496 		return (MDI_NOPATH);
2497 	}
2498 }
2499 
2500 /*
2501  * For a client, return the next available path to any phci
2502  *
2503  * Note:
2504  *		Caller should hold the branch's devinfo node to get a consistent
2505  *		snap shot of the mdi_pathinfo nodes.
2506  *
2507  *		Please note that even the list is stable the mdi_pathinfo
2508  *		node state and properties are volatile.  The caller should lock
2509  *		and unlock the nodes by calling mdi_pi_lock() and
2510  *		mdi_pi_unlock() functions to get a stable properties.
2511  *
2512  *		If there is a need to use the nodes beyond the hold of the
2513  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2514  *		need to be held against unexpected removal by calling
2515  *		mdi_hold_path() and should be released by calling
2516  *		mdi_rele_path() on completion.
2517  */
2518 mdi_pathinfo_t *
2519 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2520 {
2521 	mdi_client_t *ct;
2522 
2523 	if (!MDI_CLIENT(ct_dip))
2524 		return (NULL);
2525 
2526 	/*
2527 	 * Walk through client link
2528 	 */
2529 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2530 	ASSERT(ct != NULL);
2531 
2532 	if (pip == NULL)
2533 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2534 
2535 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2536 }
2537 
2538 /*
2539  * For a phci, return the next available path to any client
2540  * Note: ditto mdi_get_next_phci_path()
2541  */
2542 mdi_pathinfo_t *
2543 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2544 {
2545 	mdi_phci_t *ph;
2546 
2547 	if (!MDI_PHCI(ph_dip))
2548 		return (NULL);
2549 
2550 	/*
2551 	 * Walk through pHCI link
2552 	 */
2553 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2554 	ASSERT(ph != NULL);
2555 
2556 	if (pip == NULL)
2557 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2558 
2559 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2560 }
2561 
2562 /*
2563  * mdi_hold_path():
2564  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2565  * Return Values:
2566  *		None
2567  */
2568 void
2569 mdi_hold_path(mdi_pathinfo_t *pip)
2570 {
2571 	if (pip) {
2572 		MDI_PI_LOCK(pip);
2573 		MDI_PI_HOLD(pip);
2574 		MDI_PI_UNLOCK(pip);
2575 	}
2576 }
2577 
2578 
2579 /*
2580  * mdi_rele_path():
2581  *		Release the mdi_pathinfo node which was selected
2582  *		through mdi_select_path() mechanism or manually held by
2583  *		calling mdi_hold_path().
2584  * Return Values:
2585  *		None
2586  */
2587 void
2588 mdi_rele_path(mdi_pathinfo_t *pip)
2589 {
2590 	if (pip) {
2591 		MDI_PI_LOCK(pip);
2592 		MDI_PI_RELE(pip);
2593 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2594 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2595 		}
2596 		MDI_PI_UNLOCK(pip);
2597 	}
2598 }
2599 
2600 /*
2601  * mdi_pi_lock():
2602  * 		Lock the mdi_pathinfo node.
2603  * Note:
2604  *		The caller should release the lock by calling mdi_pi_unlock()
2605  */
2606 void
2607 mdi_pi_lock(mdi_pathinfo_t *pip)
2608 {
2609 	ASSERT(pip != NULL);
2610 	if (pip) {
2611 		MDI_PI_LOCK(pip);
2612 	}
2613 }
2614 
2615 
2616 /*
2617  * mdi_pi_unlock():
2618  * 		Unlock the mdi_pathinfo node.
2619  * Note:
2620  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2621  */
2622 void
2623 mdi_pi_unlock(mdi_pathinfo_t *pip)
2624 {
2625 	ASSERT(pip != NULL);
2626 	if (pip) {
2627 		MDI_PI_UNLOCK(pip);
2628 	}
2629 }
2630 
2631 /*
2632  * mdi_pi_find():
2633  *		Search the list of mdi_pathinfo nodes attached to the
2634  *		pHCI/Client device node whose path address matches "paddr".
2635  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2636  *		found.
2637  * Return Values:
2638  *		mdi_pathinfo node handle
2639  *		NULL
2640  * Notes:
2641  *		Caller need not hold any locks to call this function.
2642  */
2643 mdi_pathinfo_t *
2644 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2645 {
2646 	mdi_phci_t		*ph;
2647 	mdi_vhci_t		*vh;
2648 	mdi_client_t		*ct;
2649 	mdi_pathinfo_t		*pip = NULL;
2650 
2651 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2652 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2653 	if ((pdip == NULL) || (paddr == NULL)) {
2654 		return (NULL);
2655 	}
2656 	ph = i_devi_get_phci(pdip);
2657 	if (ph == NULL) {
2658 		/*
2659 		 * Invalid pHCI device, Nothing more to do.
2660 		 */
2661 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2662 		return (NULL);
2663 	}
2664 
2665 	vh = ph->ph_vhci;
2666 	if (vh == NULL) {
2667 		/*
2668 		 * Invalid vHCI device, Nothing more to do.
2669 		 */
2670 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2671 		return (NULL);
2672 	}
2673 
2674 	/*
2675 	 * Look for pathinfo node identified by paddr.
2676 	 */
2677 	if (caddr == NULL) {
2678 		/*
2679 		 * Find a mdi_pathinfo node under pHCI list for a matching
2680 		 * unit address.
2681 		 */
2682 		MDI_PHCI_LOCK(ph);
2683 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2684 			MDI_DEBUG(2, (MDI_WARN, pdip,
2685 			    "offline phci %p", (void *)ph));
2686 			MDI_PHCI_UNLOCK(ph);
2687 			return (NULL);
2688 		}
2689 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2690 
2691 		while (pip != NULL) {
2692 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2693 				break;
2694 			}
2695 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2696 		}
2697 		MDI_PHCI_UNLOCK(ph);
2698 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2699 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2700 		return (pip);
2701 	}
2702 
2703 	/*
2704 	 * XXX - Is the rest of the code in this function really necessary?
2705 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2706 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2707 	 * whether the search is based on the pathinfo nodes attached to
2708 	 * the pHCI or the client node, the result will be the same.
2709 	 */
2710 
2711 	/*
2712 	 * Find the client device corresponding to 'caddr'
2713 	 */
2714 	MDI_VHCI_CLIENT_LOCK(vh);
2715 
2716 	/*
2717 	 * XXX - Passing NULL to the following function works as long as the
2718 	 * the client addresses (caddr) are unique per vhci basis.
2719 	 */
2720 	ct = i_mdi_client_find(vh, NULL, caddr);
2721 	if (ct == NULL) {
2722 		/*
2723 		 * Client not found, Obviously mdi_pathinfo node has not been
2724 		 * created yet.
2725 		 */
2726 		MDI_VHCI_CLIENT_UNLOCK(vh);
2727 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2728 		    "client not found for caddr @%s", caddr ? caddr : ""));
2729 		return (NULL);
2730 	}
2731 
2732 	/*
2733 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2734 	 * pHCI and paddr
2735 	 */
2736 	MDI_CLIENT_LOCK(ct);
2737 
2738 	/*
2739 	 * Release the global mutex as it is no more needed. Note: We always
2740 	 * respect the locking order while acquiring.
2741 	 */
2742 	MDI_VHCI_CLIENT_UNLOCK(vh);
2743 
2744 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2745 	while (pip != NULL) {
2746 		/*
2747 		 * Compare the unit address
2748 		 */
2749 		if ((MDI_PI(pip)->pi_phci == ph) &&
2750 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2751 			break;
2752 		}
2753 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2754 	}
2755 	MDI_CLIENT_UNLOCK(ct);
2756 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2757 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2758 	return (pip);
2759 }
2760 
2761 /*
2762  * mdi_pi_alloc():
2763  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2764  *		The mdi_pathinfo node returned by this function identifies a
2765  *		unique device path is capable of having properties attached
2766  *		and passed to mdi_pi_online() to fully attach and online the
2767  *		path and client device node.
2768  *		The mdi_pathinfo node returned by this function must be
2769  *		destroyed using mdi_pi_free() if the path is no longer
2770  *		operational or if the caller fails to attach a client device
2771  *		node when calling mdi_pi_online(). The framework will not free
2772  *		the resources allocated.
2773  *		This function can be called from both interrupt and kernel
2774  *		contexts.  DDI_NOSLEEP flag should be used while calling
2775  *		from interrupt contexts.
2776  * Return Values:
2777  *		MDI_SUCCESS
2778  *		MDI_FAILURE
2779  *		MDI_NOMEM
2780  */
2781 /*ARGSUSED*/
2782 int
2783 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2784     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2785 {
2786 	mdi_vhci_t	*vh;
2787 	mdi_phci_t	*ph;
2788 	mdi_client_t	*ct;
2789 	mdi_pathinfo_t	*pip = NULL;
2790 	dev_info_t	*cdip;
2791 	int		rv = MDI_NOMEM;
2792 	int		path_allocated = 0;
2793 
2794 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2795 	    "cname %s: caddr@%s paddr@%s",
2796 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2797 
2798 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2799 	    ret_pip == NULL) {
2800 		/* Nothing more to do */
2801 		return (MDI_FAILURE);
2802 	}
2803 
2804 	*ret_pip = NULL;
2805 
2806 	/* No allocations on detaching pHCI */
2807 	if (DEVI_IS_DETACHING(pdip)) {
2808 		/* Invalid pHCI device, return failure */
2809 		MDI_DEBUG(1, (MDI_WARN, pdip,
2810 		    "!detaching pHCI=%p", (void *)pdip));
2811 		return (MDI_FAILURE);
2812 	}
2813 
2814 	ph = i_devi_get_phci(pdip);
2815 	ASSERT(ph != NULL);
2816 	if (ph == NULL) {
2817 		/* Invalid pHCI device, return failure */
2818 		MDI_DEBUG(1, (MDI_WARN, pdip,
2819 		    "!invalid pHCI=%p", (void *)pdip));
2820 		return (MDI_FAILURE);
2821 	}
2822 
2823 	MDI_PHCI_LOCK(ph);
2824 	vh = ph->ph_vhci;
2825 	if (vh == NULL) {
2826 		/* Invalid vHCI device, return failure */
2827 		MDI_DEBUG(1, (MDI_WARN, pdip,
2828 		    "!invalid vHCI=%p", (void *)pdip));
2829 		MDI_PHCI_UNLOCK(ph);
2830 		return (MDI_FAILURE);
2831 	}
2832 
2833 	if (MDI_PHCI_IS_READY(ph) == 0) {
2834 		/*
2835 		 * Do not allow new node creation when pHCI is in
2836 		 * offline/suspended states
2837 		 */
2838 		MDI_DEBUG(1, (MDI_WARN, pdip,
2839 		    "pHCI=%p is not ready", (void *)ph));
2840 		MDI_PHCI_UNLOCK(ph);
2841 		return (MDI_BUSY);
2842 	}
2843 	MDI_PHCI_UNSTABLE(ph);
2844 	MDI_PHCI_UNLOCK(ph);
2845 
2846 	/* look for a matching client, create one if not found */
2847 	MDI_VHCI_CLIENT_LOCK(vh);
2848 	ct = i_mdi_client_find(vh, cname, caddr);
2849 	if (ct == NULL) {
2850 		ct = i_mdi_client_alloc(vh, cname, caddr);
2851 		ASSERT(ct != NULL);
2852 	}
2853 
2854 	if (ct->ct_dip == NULL) {
2855 		/*
2856 		 * Allocate a devinfo node
2857 		 */
2858 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2859 		    compatible, ncompatible);
2860 		if (ct->ct_dip == NULL) {
2861 			(void) i_mdi_client_free(vh, ct);
2862 			goto fail;
2863 		}
2864 	}
2865 	cdip = ct->ct_dip;
2866 
2867 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2868 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2869 
2870 	MDI_CLIENT_LOCK(ct);
2871 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2872 	while (pip != NULL) {
2873 		/*
2874 		 * Compare the unit address
2875 		 */
2876 		if ((MDI_PI(pip)->pi_phci == ph) &&
2877 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2878 			break;
2879 		}
2880 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2881 	}
2882 	MDI_CLIENT_UNLOCK(ct);
2883 
2884 	if (pip == NULL) {
2885 		/*
2886 		 * This is a new path for this client device.  Allocate and
2887 		 * initialize a new pathinfo node
2888 		 */
2889 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2890 		ASSERT(pip != NULL);
2891 		path_allocated = 1;
2892 	}
2893 	rv = MDI_SUCCESS;
2894 
2895 fail:
2896 	/*
2897 	 * Release the global mutex.
2898 	 */
2899 	MDI_VHCI_CLIENT_UNLOCK(vh);
2900 
2901 	/*
2902 	 * Mark the pHCI as stable
2903 	 */
2904 	MDI_PHCI_LOCK(ph);
2905 	MDI_PHCI_STABLE(ph);
2906 	MDI_PHCI_UNLOCK(ph);
2907 	*ret_pip = pip;
2908 
2909 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2910 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2911 
2912 	if (path_allocated)
2913 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2914 
2915 	return (rv);
2916 }
2917 
2918 /*ARGSUSED*/
2919 int
2920 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2921     int flags, mdi_pathinfo_t **ret_pip)
2922 {
2923 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2924 	    flags, ret_pip));
2925 }
2926 
2927 /*
2928  * i_mdi_pi_alloc():
2929  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2930  * Return Values:
2931  *		mdi_pathinfo
2932  */
2933 /*ARGSUSED*/
2934 static mdi_pathinfo_t *
2935 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2936 {
2937 	mdi_pathinfo_t	*pip;
2938 	int		ct_circular;
2939 	int		ph_circular;
2940 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
2941 	char		*path_persistent;
2942 	int		path_instance;
2943 	mod_hash_val_t	hv;
2944 
2945 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2946 
2947 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2948 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2949 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2950 	    MDI_PATHINFO_STATE_TRANSIENT;
2951 
2952 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2953 		MDI_PI_SET_USER_DISABLE(pip);
2954 
2955 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2956 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2957 
2958 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2959 		MDI_PI_SET_DRV_DISABLE(pip);
2960 
2961 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2962 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2963 	MDI_PI(pip)->pi_client = ct;
2964 	MDI_PI(pip)->pi_phci = ph;
2965 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2966 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2967 
2968         /*
2969 	 * We form the "path" to the pathinfo node, and see if we have
2970 	 * already allocated a 'path_instance' for that "path".  If so,
2971 	 * we use the already allocated 'path_instance'.  If not, we
2972 	 * allocate a new 'path_instance' and associate it with a copy of
2973 	 * the "path" string (which is never freed). The association
2974 	 * between a 'path_instance' this "path" string persists until
2975 	 * reboot.
2976 	 */
2977         mutex_enter(&mdi_pathmap_mutex);
2978 	(void) ddi_pathname(ph->ph_dip, path);
2979 	(void) sprintf(path + strlen(path), "/%s@%s",
2980 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2981         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2982                 path_instance = (uint_t)(intptr_t)hv;
2983         } else {
2984 		/* allocate a new 'path_instance' and persistent "path" */
2985 		path_instance = mdi_pathmap_instance++;
2986 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2987                 (void) mod_hash_insert(mdi_pathmap_bypath,
2988                     (mod_hash_key_t)path_persistent,
2989                     (mod_hash_val_t)(intptr_t)path_instance);
2990 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2991 		    (mod_hash_key_t)(intptr_t)path_instance,
2992 		    (mod_hash_val_t)path_persistent);
2993 
2994 		/* create shortpath name */
2995 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2996 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2997 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2998 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2999 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
3000 		    (mod_hash_key_t)(intptr_t)path_instance,
3001 		    (mod_hash_val_t)path_persistent);
3002         }
3003         mutex_exit(&mdi_pathmap_mutex);
3004 	MDI_PI(pip)->pi_path_instance = path_instance;
3005 
3006 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3007 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
3008 	MDI_PI(pip)->pi_pprivate = NULL;
3009 	MDI_PI(pip)->pi_cprivate = NULL;
3010 	MDI_PI(pip)->pi_vprivate = NULL;
3011 	MDI_PI(pip)->pi_client_link = NULL;
3012 	MDI_PI(pip)->pi_phci_link = NULL;
3013 	MDI_PI(pip)->pi_ref_cnt = 0;
3014 	MDI_PI(pip)->pi_kstats = NULL;
3015 	MDI_PI(pip)->pi_preferred = 1;
3016 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3017 
3018 	/*
3019 	 * Lock both dev_info nodes against changes in parallel.
3020 	 *
3021 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3022 	 * This atypical operation is done to synchronize pathinfo nodes
3023 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3024 	 * the pathinfo nodes are children of the Client.
3025 	 */
3026 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3027 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3028 
3029 	i_mdi_phci_add_path(ph, pip);
3030 	i_mdi_client_add_path(ct, pip);
3031 
3032 	ndi_devi_exit(ph->ph_dip, ph_circular);
3033 	ndi_devi_exit(ct->ct_dip, ct_circular);
3034 
3035 	return (pip);
3036 }
3037 
3038 /*
3039  * mdi_pi_pathname_by_instance():
3040  *	Lookup of "path" by 'path_instance'. Return "path".
3041  *	NOTE: returned "path" remains valid forever (until reboot).
3042  */
3043 char *
3044 mdi_pi_pathname_by_instance(int path_instance)
3045 {
3046 	char		*path;
3047 	mod_hash_val_t	hv;
3048 
3049 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3050 	mutex_enter(&mdi_pathmap_mutex);
3051 	if (mod_hash_find(mdi_pathmap_byinstance,
3052 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3053 		path = (char *)hv;
3054 	else
3055 		path = NULL;
3056 	mutex_exit(&mdi_pathmap_mutex);
3057 	return (path);
3058 }
3059 
3060 /*
3061  * mdi_pi_spathname_by_instance():
3062  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3063  *	NOTE: returned "shortpath" remains valid forever (until reboot).
3064  */
3065 char *
3066 mdi_pi_spathname_by_instance(int path_instance)
3067 {
3068 	char		*path;
3069 	mod_hash_val_t	hv;
3070 
3071 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3072 	mutex_enter(&mdi_pathmap_mutex);
3073 	if (mod_hash_find(mdi_pathmap_sbyinstance,
3074 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3075 		path = (char *)hv;
3076 	else
3077 		path = NULL;
3078 	mutex_exit(&mdi_pathmap_mutex);
3079 	return (path);
3080 }
3081 
3082 
3083 /*
3084  * i_mdi_phci_add_path():
3085  * 		Add a mdi_pathinfo node to pHCI list.
3086  * Notes:
3087  *		Caller should per-pHCI mutex
3088  */
3089 static void
3090 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3091 {
3092 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3093 
3094 	MDI_PHCI_LOCK(ph);
3095 	if (ph->ph_path_head == NULL) {
3096 		ph->ph_path_head = pip;
3097 	} else {
3098 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3099 	}
3100 	ph->ph_path_tail = pip;
3101 	ph->ph_path_count++;
3102 	MDI_PHCI_UNLOCK(ph);
3103 }
3104 
3105 /*
3106  * i_mdi_client_add_path():
3107  *		Add mdi_pathinfo node to client list
3108  */
3109 static void
3110 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3111 {
3112 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3113 
3114 	MDI_CLIENT_LOCK(ct);
3115 	if (ct->ct_path_head == NULL) {
3116 		ct->ct_path_head = pip;
3117 	} else {
3118 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3119 	}
3120 	ct->ct_path_tail = pip;
3121 	ct->ct_path_count++;
3122 	MDI_CLIENT_UNLOCK(ct);
3123 }
3124 
3125 /*
3126  * mdi_pi_free():
3127  *		Free the mdi_pathinfo node and also client device node if this
3128  *		is the last path to the device
3129  * Return Values:
3130  *		MDI_SUCCESS
3131  *		MDI_FAILURE
3132  *		MDI_BUSY
3133  */
3134 /*ARGSUSED*/
3135 int
3136 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3137 {
3138 	int		rv;
3139 	mdi_vhci_t	*vh;
3140 	mdi_phci_t	*ph;
3141 	mdi_client_t	*ct;
3142 	int		(*f)();
3143 	int		client_held = 0;
3144 
3145 	MDI_PI_LOCK(pip);
3146 	ph = MDI_PI(pip)->pi_phci;
3147 	ASSERT(ph != NULL);
3148 	if (ph == NULL) {
3149 		/*
3150 		 * Invalid pHCI device, return failure
3151 		 */
3152 		MDI_DEBUG(1, (MDI_WARN, NULL,
3153 		    "!invalid pHCI: pip %s %p",
3154 		    mdi_pi_spathname(pip), (void *)pip));
3155 		MDI_PI_UNLOCK(pip);
3156 		return (MDI_FAILURE);
3157 	}
3158 
3159 	vh = ph->ph_vhci;
3160 	ASSERT(vh != NULL);
3161 	if (vh == NULL) {
3162 		/* Invalid pHCI device, return failure */
3163 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3164 		    "!invalid vHCI: pip %s %p",
3165 		    mdi_pi_spathname(pip), (void *)pip));
3166 		MDI_PI_UNLOCK(pip);
3167 		return (MDI_FAILURE);
3168 	}
3169 
3170 	ct = MDI_PI(pip)->pi_client;
3171 	ASSERT(ct != NULL);
3172 	if (ct == NULL) {
3173 		/*
3174 		 * Invalid Client device, return failure
3175 		 */
3176 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3177 		    "!invalid client: pip %s %p",
3178 		    mdi_pi_spathname(pip), (void *)pip));
3179 		MDI_PI_UNLOCK(pip);
3180 		return (MDI_FAILURE);
3181 	}
3182 
3183 	/*
3184 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3185 	 * if the node state is either offline or init and the reference count
3186 	 * is zero.
3187 	 */
3188 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3189 	    MDI_PI_IS_INITING(pip))) {
3190 		/*
3191 		 * Node is busy
3192 		 */
3193 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3194 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3195 		MDI_PI_UNLOCK(pip);
3196 		return (MDI_BUSY);
3197 	}
3198 
3199 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3200 		/*
3201 		 * Give a chance for pending I/Os to complete.
3202 		 */
3203 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3204 		    "!%d cmds still pending on path: %s %p",
3205 		    MDI_PI(pip)->pi_ref_cnt,
3206 		    mdi_pi_spathname(pip), (void *)pip));
3207 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3208 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3209 		    TR_CLOCK_TICK) == -1) {
3210 			/*
3211 			 * The timeout time reached without ref_cnt being zero
3212 			 * being signaled.
3213 			 */
3214 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3215 			    "!Timeout reached on path %s %p without the cond",
3216 			    mdi_pi_spathname(pip), (void *)pip));
3217 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3218 			    "!%d cmds still pending on path %s %p",
3219 			    MDI_PI(pip)->pi_ref_cnt,
3220 			    mdi_pi_spathname(pip), (void *)pip));
3221 			MDI_PI_UNLOCK(pip);
3222 			return (MDI_BUSY);
3223 		}
3224 	}
3225 	if (MDI_PI(pip)->pi_pm_held) {
3226 		client_held = 1;
3227 	}
3228 	MDI_PI_UNLOCK(pip);
3229 
3230 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3231 
3232 	MDI_CLIENT_LOCK(ct);
3233 
3234 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3235 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3236 
3237 	/*
3238 	 * Wait till failover is complete before removing this node.
3239 	 */
3240 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3241 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3242 
3243 	MDI_CLIENT_UNLOCK(ct);
3244 	MDI_VHCI_CLIENT_LOCK(vh);
3245 	MDI_CLIENT_LOCK(ct);
3246 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3247 
3248 	rv = MDI_SUCCESS;
3249 	if (!MDI_PI_IS_INITING(pip)) {
3250 		f = vh->vh_ops->vo_pi_uninit;
3251 		if (f != NULL) {
3252 			rv = (*f)(vh->vh_dip, pip, 0);
3253 		}
3254 	}
3255 
3256 	/*
3257 	 * If vo_pi_uninit() completed successfully.
3258 	 */
3259 	if (rv == MDI_SUCCESS) {
3260 		if (client_held) {
3261 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3262 			    "i_mdi_pm_rele_client\n"));
3263 			i_mdi_pm_rele_client(ct, 1);
3264 		}
3265 		i_mdi_pi_free(ph, pip, ct);
3266 		if (ct->ct_path_count == 0) {
3267 			/*
3268 			 * Client lost its last path.
3269 			 * Clean up the client device
3270 			 */
3271 			MDI_CLIENT_UNLOCK(ct);
3272 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3273 			MDI_VHCI_CLIENT_UNLOCK(vh);
3274 			return (rv);
3275 		}
3276 	}
3277 	MDI_CLIENT_UNLOCK(ct);
3278 	MDI_VHCI_CLIENT_UNLOCK(vh);
3279 
3280 	if (rv == MDI_FAILURE)
3281 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3282 
3283 	return (rv);
3284 }
3285 
3286 /*
3287  * i_mdi_pi_free():
3288  *		Free the mdi_pathinfo node
3289  */
3290 static void
3291 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3292 {
3293 	int	ct_circular;
3294 	int	ph_circular;
3295 
3296 	ASSERT(MDI_CLIENT_LOCKED(ct));
3297 
3298 	/*
3299 	 * remove any per-path kstats
3300 	 */
3301 	i_mdi_pi_kstat_destroy(pip);
3302 
3303 	/* See comments in i_mdi_pi_alloc() */
3304 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3305 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3306 
3307 	i_mdi_client_remove_path(ct, pip);
3308 	i_mdi_phci_remove_path(ph, pip);
3309 
3310 	ndi_devi_exit(ph->ph_dip, ph_circular);
3311 	ndi_devi_exit(ct->ct_dip, ct_circular);
3312 
3313 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3314 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3315 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3316 	if (MDI_PI(pip)->pi_addr) {
3317 		kmem_free(MDI_PI(pip)->pi_addr,
3318 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3319 		MDI_PI(pip)->pi_addr = NULL;
3320 	}
3321 
3322 	if (MDI_PI(pip)->pi_prop) {
3323 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3324 		MDI_PI(pip)->pi_prop = NULL;
3325 	}
3326 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3327 }
3328 
3329 
3330 /*
3331  * i_mdi_phci_remove_path():
3332  * 		Remove a mdi_pathinfo node from pHCI list.
3333  * Notes:
3334  *		Caller should hold per-pHCI mutex
3335  */
3336 static void
3337 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3338 {
3339 	mdi_pathinfo_t	*prev = NULL;
3340 	mdi_pathinfo_t	*path = NULL;
3341 
3342 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3343 
3344 	MDI_PHCI_LOCK(ph);
3345 	path = ph->ph_path_head;
3346 	while (path != NULL) {
3347 		if (path == pip) {
3348 			break;
3349 		}
3350 		prev = path;
3351 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3352 	}
3353 
3354 	if (path) {
3355 		ph->ph_path_count--;
3356 		if (prev) {
3357 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3358 		} else {
3359 			ph->ph_path_head =
3360 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3361 		}
3362 		if (ph->ph_path_tail == path) {
3363 			ph->ph_path_tail = prev;
3364 		}
3365 	}
3366 
3367 	/*
3368 	 * Clear the pHCI link
3369 	 */
3370 	MDI_PI(pip)->pi_phci_link = NULL;
3371 	MDI_PI(pip)->pi_phci = NULL;
3372 	MDI_PHCI_UNLOCK(ph);
3373 }
3374 
3375 /*
3376  * i_mdi_client_remove_path():
3377  * 		Remove a mdi_pathinfo node from client path list.
3378  */
3379 static void
3380 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3381 {
3382 	mdi_pathinfo_t	*prev = NULL;
3383 	mdi_pathinfo_t	*path;
3384 
3385 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3386 
3387 	ASSERT(MDI_CLIENT_LOCKED(ct));
3388 	path = ct->ct_path_head;
3389 	while (path != NULL) {
3390 		if (path == pip) {
3391 			break;
3392 		}
3393 		prev = path;
3394 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3395 	}
3396 
3397 	if (path) {
3398 		ct->ct_path_count--;
3399 		if (prev) {
3400 			MDI_PI(prev)->pi_client_link =
3401 			    MDI_PI(path)->pi_client_link;
3402 		} else {
3403 			ct->ct_path_head =
3404 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3405 		}
3406 		if (ct->ct_path_tail == path) {
3407 			ct->ct_path_tail = prev;
3408 		}
3409 		if (ct->ct_path_last == path) {
3410 			ct->ct_path_last = ct->ct_path_head;
3411 		}
3412 	}
3413 	MDI_PI(pip)->pi_client_link = NULL;
3414 	MDI_PI(pip)->pi_client = NULL;
3415 }
3416 
3417 /*
3418  * i_mdi_pi_state_change():
3419  *		online a mdi_pathinfo node
3420  *
3421  * Return Values:
3422  *		MDI_SUCCESS
3423  *		MDI_FAILURE
3424  */
3425 /*ARGSUSED*/
3426 static int
3427 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3428 {
3429 	int		rv = MDI_SUCCESS;
3430 	mdi_vhci_t	*vh;
3431 	mdi_phci_t	*ph;
3432 	mdi_client_t	*ct;
3433 	int		(*f)();
3434 	dev_info_t	*cdip;
3435 
3436 	MDI_PI_LOCK(pip);
3437 
3438 	ph = MDI_PI(pip)->pi_phci;
3439 	ASSERT(ph);
3440 	if (ph == NULL) {
3441 		/*
3442 		 * Invalid pHCI device, fail the request
3443 		 */
3444 		MDI_PI_UNLOCK(pip);
3445 		MDI_DEBUG(1, (MDI_WARN, NULL,
3446 		    "!invalid phci: pip %s %p",
3447 		    mdi_pi_spathname(pip), (void *)pip));
3448 		return (MDI_FAILURE);
3449 	}
3450 
3451 	vh = ph->ph_vhci;
3452 	ASSERT(vh);
3453 	if (vh == NULL) {
3454 		/*
3455 		 * Invalid vHCI device, fail the request
3456 		 */
3457 		MDI_PI_UNLOCK(pip);
3458 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3459 		    "!invalid vhci: pip %s %p",
3460 		    mdi_pi_spathname(pip), (void *)pip));
3461 		return (MDI_FAILURE);
3462 	}
3463 
3464 	ct = MDI_PI(pip)->pi_client;
3465 	ASSERT(ct != NULL);
3466 	if (ct == NULL) {
3467 		/*
3468 		 * Invalid client device, fail the request
3469 		 */
3470 		MDI_PI_UNLOCK(pip);
3471 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3472 		    "!invalid client: pip %s %p",
3473 		    mdi_pi_spathname(pip), (void *)pip));
3474 		return (MDI_FAILURE);
3475 	}
3476 
3477 	/*
3478 	 * If this path has not been initialized yet, Callback vHCI driver's
3479 	 * pathinfo node initialize entry point
3480 	 */
3481 
3482 	if (MDI_PI_IS_INITING(pip)) {
3483 		MDI_PI_UNLOCK(pip);
3484 		f = vh->vh_ops->vo_pi_init;
3485 		if (f != NULL) {
3486 			rv = (*f)(vh->vh_dip, pip, 0);
3487 			if (rv != MDI_SUCCESS) {
3488 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3489 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
3490 				    (void *)vh, mdi_pi_spathname(pip),
3491 				    (void *)pip));
3492 				return (MDI_FAILURE);
3493 			}
3494 		}
3495 		MDI_PI_LOCK(pip);
3496 		MDI_PI_CLEAR_TRANSIENT(pip);
3497 	}
3498 
3499 	/*
3500 	 * Do not allow state transition when pHCI is in offline/suspended
3501 	 * states
3502 	 */
3503 	i_mdi_phci_lock(ph, pip);
3504 	if (MDI_PHCI_IS_READY(ph) == 0) {
3505 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3506 		    "!pHCI not ready, pHCI=%p", (void *)ph));
3507 		MDI_PI_UNLOCK(pip);
3508 		i_mdi_phci_unlock(ph);
3509 		return (MDI_BUSY);
3510 	}
3511 	MDI_PHCI_UNSTABLE(ph);
3512 	i_mdi_phci_unlock(ph);
3513 
3514 	/*
3515 	 * Check if mdi_pathinfo state is in transient state.
3516 	 * If yes, offlining is in progress and wait till transient state is
3517 	 * cleared.
3518 	 */
3519 	if (MDI_PI_IS_TRANSIENT(pip)) {
3520 		while (MDI_PI_IS_TRANSIENT(pip)) {
3521 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3522 			    &MDI_PI(pip)->pi_mutex);
3523 		}
3524 	}
3525 
3526 	/*
3527 	 * Grab the client lock in reverse order sequence and release the
3528 	 * mdi_pathinfo mutex.
3529 	 */
3530 	i_mdi_client_lock(ct, pip);
3531 	MDI_PI_UNLOCK(pip);
3532 
3533 	/*
3534 	 * Wait till failover state is cleared
3535 	 */
3536 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3537 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3538 
3539 	/*
3540 	 * Mark the mdi_pathinfo node state as transient
3541 	 */
3542 	MDI_PI_LOCK(pip);
3543 	switch (state) {
3544 	case MDI_PATHINFO_STATE_ONLINE:
3545 		MDI_PI_SET_ONLINING(pip);
3546 		break;
3547 
3548 	case MDI_PATHINFO_STATE_STANDBY:
3549 		MDI_PI_SET_STANDBYING(pip);
3550 		break;
3551 
3552 	case MDI_PATHINFO_STATE_FAULT:
3553 		/*
3554 		 * Mark the pathinfo state as FAULTED
3555 		 */
3556 		MDI_PI_SET_FAULTING(pip);
3557 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3558 		break;
3559 
3560 	case MDI_PATHINFO_STATE_OFFLINE:
3561 		/*
3562 		 * ndi_devi_offline() cannot hold pip or ct locks.
3563 		 */
3564 		MDI_PI_UNLOCK(pip);
3565 
3566 		/*
3567 		 * If this is a user initiated path online->offline operation
3568 		 * who's success would transition a client from DEGRADED to
3569 		 * FAILED then only proceed if we can offline the client first.
3570 		 */
3571 		cdip = ct->ct_dip;
3572 		if ((flag & NDI_USER_REQ) &&
3573 		    MDI_PI_IS_ONLINE(pip) &&
3574 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3575 			i_mdi_client_unlock(ct);
3576 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3577 			if (rv != NDI_SUCCESS) {
3578 				/*
3579 				 * Convert to MDI error code
3580 				 */
3581 				switch (rv) {
3582 				case NDI_BUSY:
3583 					rv = MDI_BUSY;
3584 					break;
3585 				default:
3586 					rv = MDI_FAILURE;
3587 					break;
3588 				}
3589 				goto state_change_exit;
3590 			} else {
3591 				i_mdi_client_lock(ct, NULL);
3592 			}
3593 		}
3594 		/*
3595 		 * Mark the mdi_pathinfo node state as transient
3596 		 */
3597 		MDI_PI_LOCK(pip);
3598 		MDI_PI_SET_OFFLINING(pip);
3599 		break;
3600 
3601 	case MDI_PATHINFO_STATE_INIT:
3602 		/*
3603 		 * Callers are not allowed to ask us to change the state to the
3604 		 * initial state.
3605 		 */
3606 		rv = MDI_FAILURE;
3607 		MDI_PI_UNLOCK(pip);
3608 		goto state_change_exit;
3609 
3610 	}
3611 	MDI_PI_UNLOCK(pip);
3612 	MDI_CLIENT_UNSTABLE(ct);
3613 	i_mdi_client_unlock(ct);
3614 
3615 	f = vh->vh_ops->vo_pi_state_change;
3616 	if (f != NULL)
3617 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3618 
3619 	MDI_CLIENT_LOCK(ct);
3620 	MDI_PI_LOCK(pip);
3621 	if (rv == MDI_NOT_SUPPORTED) {
3622 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3623 	}
3624 	if (rv != MDI_SUCCESS) {
3625 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3626 		    "vo_pi_state_change failed: rv %x", rv));
3627 	}
3628 	if (MDI_PI_IS_TRANSIENT(pip)) {
3629 		if (rv == MDI_SUCCESS) {
3630 			MDI_PI_CLEAR_TRANSIENT(pip);
3631 		} else {
3632 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3633 		}
3634 	}
3635 
3636 	/*
3637 	 * Wake anyone waiting for this mdi_pathinfo node
3638 	 */
3639 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3640 	MDI_PI_UNLOCK(pip);
3641 
3642 	/*
3643 	 * Mark the client device as stable
3644 	 */
3645 	MDI_CLIENT_STABLE(ct);
3646 	if (rv == MDI_SUCCESS) {
3647 		if (ct->ct_unstable == 0) {
3648 			cdip = ct->ct_dip;
3649 
3650 			/*
3651 			 * Onlining the mdi_pathinfo node will impact the
3652 			 * client state Update the client and dev_info node
3653 			 * state accordingly
3654 			 */
3655 			rv = NDI_SUCCESS;
3656 			i_mdi_client_update_state(ct);
3657 			switch (MDI_CLIENT_STATE(ct)) {
3658 			case MDI_CLIENT_STATE_OPTIMAL:
3659 			case MDI_CLIENT_STATE_DEGRADED:
3660 				if (cdip && !i_ddi_devi_attached(cdip) &&
3661 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3662 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3663 
3664 					/*
3665 					 * Must do ndi_devi_online() through
3666 					 * hotplug thread for deferred
3667 					 * attach mechanism to work
3668 					 */
3669 					MDI_CLIENT_UNLOCK(ct);
3670 					rv = ndi_devi_online(cdip, 0);
3671 					MDI_CLIENT_LOCK(ct);
3672 					if ((rv != NDI_SUCCESS) &&
3673 					    (MDI_CLIENT_STATE(ct) ==
3674 					    MDI_CLIENT_STATE_DEGRADED)) {
3675 						MDI_DEBUG(1, (MDI_WARN, cdip,
3676 						    "!ndi_devi_online failed "
3677 						    "error %x", rv));
3678 					}
3679 					rv = NDI_SUCCESS;
3680 				}
3681 				break;
3682 
3683 			case MDI_CLIENT_STATE_FAILED:
3684 				/*
3685 				 * This is the last path case for
3686 				 * non-user initiated events.
3687 				 */
3688 				if (((flag & NDI_USER_REQ) == 0) &&
3689 				    cdip && (i_ddi_node_state(cdip) >=
3690 				    DS_INITIALIZED)) {
3691 					MDI_CLIENT_UNLOCK(ct);
3692 					rv = ndi_devi_offline(cdip,
3693 					    NDI_DEVFS_CLEAN);
3694 					MDI_CLIENT_LOCK(ct);
3695 
3696 					if (rv != NDI_SUCCESS) {
3697 						/*
3698 						 * ndi_devi_offline failed.
3699 						 * Reset client flags to
3700 						 * online as the path could not
3701 						 * be offlined.
3702 						 */
3703 						MDI_DEBUG(1, (MDI_WARN, cdip,
3704 						    "!ndi_devi_offline failed: "
3705 						    "error %x", rv));
3706 						MDI_CLIENT_SET_ONLINE(ct);
3707 					}
3708 				}
3709 				break;
3710 			}
3711 			/*
3712 			 * Convert to MDI error code
3713 			 */
3714 			switch (rv) {
3715 			case NDI_SUCCESS:
3716 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3717 				i_mdi_report_path_state(ct, pip);
3718 				rv = MDI_SUCCESS;
3719 				break;
3720 			case NDI_BUSY:
3721 				rv = MDI_BUSY;
3722 				break;
3723 			default:
3724 				rv = MDI_FAILURE;
3725 				break;
3726 			}
3727 		}
3728 	}
3729 	MDI_CLIENT_UNLOCK(ct);
3730 
3731 state_change_exit:
3732 	/*
3733 	 * Mark the pHCI as stable again.
3734 	 */
3735 	MDI_PHCI_LOCK(ph);
3736 	MDI_PHCI_STABLE(ph);
3737 	MDI_PHCI_UNLOCK(ph);
3738 	return (rv);
3739 }
3740 
3741 /*
3742  * mdi_pi_online():
3743  *		Place the path_info node in the online state.  The path is
3744  *		now available to be selected by mdi_select_path() for
3745  *		transporting I/O requests to client devices.
3746  * Return Values:
3747  *		MDI_SUCCESS
3748  *		MDI_FAILURE
3749  */
3750 int
3751 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3752 {
3753 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3754 	int		client_held = 0;
3755 	int		rv;
3756 
3757 	ASSERT(ct != NULL);
3758 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3759 	if (rv != MDI_SUCCESS)
3760 		return (rv);
3761 
3762 	MDI_PI_LOCK(pip);
3763 	if (MDI_PI(pip)->pi_pm_held == 0) {
3764 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3765 		    "i_mdi_pm_hold_pip %p", (void *)pip));
3766 		i_mdi_pm_hold_pip(pip);
3767 		client_held = 1;
3768 	}
3769 	MDI_PI_UNLOCK(pip);
3770 
3771 	if (client_held) {
3772 		MDI_CLIENT_LOCK(ct);
3773 		if (ct->ct_power_cnt == 0) {
3774 			rv = i_mdi_power_all_phci(ct);
3775 		}
3776 
3777 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3778 		    "i_mdi_pm_hold_client %p", (void *)ct));
3779 		i_mdi_pm_hold_client(ct, 1);
3780 		MDI_CLIENT_UNLOCK(ct);
3781 	}
3782 
3783 	return (rv);
3784 }
3785 
3786 /*
3787  * mdi_pi_standby():
3788  *		Place the mdi_pathinfo node in standby state
3789  *
3790  * Return Values:
3791  *		MDI_SUCCESS
3792  *		MDI_FAILURE
3793  */
3794 int
3795 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3796 {
3797 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3798 }
3799 
3800 /*
3801  * mdi_pi_fault():
3802  *		Place the mdi_pathinfo node in fault'ed state
3803  * Return Values:
3804  *		MDI_SUCCESS
3805  *		MDI_FAILURE
3806  */
3807 int
3808 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3809 {
3810 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3811 }
3812 
3813 /*
3814  * mdi_pi_offline():
3815  *		Offline a mdi_pathinfo node.
3816  * Return Values:
3817  *		MDI_SUCCESS
3818  *		MDI_FAILURE
3819  */
3820 int
3821 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3822 {
3823 	int	ret, client_held = 0;
3824 	mdi_client_t	*ct;
3825 
3826 	/*
3827 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3828 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
3829 	 * should now just use NDI_USER_REQ.
3830 	 */
3831 	if (flags & NDI_DEVI_REMOVE) {
3832 		flags &= ~NDI_DEVI_REMOVE;
3833 		flags |= NDI_USER_REQ;
3834 	}
3835 
3836 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3837 
3838 	if (ret == MDI_SUCCESS) {
3839 		MDI_PI_LOCK(pip);
3840 		if (MDI_PI(pip)->pi_pm_held) {
3841 			client_held = 1;
3842 		}
3843 		MDI_PI_UNLOCK(pip);
3844 
3845 		if (client_held) {
3846 			ct = MDI_PI(pip)->pi_client;
3847 			MDI_CLIENT_LOCK(ct);
3848 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3849 			    "i_mdi_pm_rele_client\n"));
3850 			i_mdi_pm_rele_client(ct, 1);
3851 			MDI_CLIENT_UNLOCK(ct);
3852 		}
3853 	}
3854 
3855 	return (ret);
3856 }
3857 
3858 /*
3859  * i_mdi_pi_offline():
3860  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3861  */
3862 static int
3863 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3864 {
3865 	dev_info_t	*vdip = NULL;
3866 	mdi_vhci_t	*vh = NULL;
3867 	mdi_client_t	*ct = NULL;
3868 	int		(*f)();
3869 	int		rv;
3870 
3871 	MDI_PI_LOCK(pip);
3872 	ct = MDI_PI(pip)->pi_client;
3873 	ASSERT(ct != NULL);
3874 
3875 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3876 		/*
3877 		 * Give a chance for pending I/Os to complete.
3878 		 */
3879 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3880 		    "!%d cmds still pending on path %s %p",
3881 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3882 		    (void *)pip));
3883 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3884 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3885 		    TR_CLOCK_TICK) == -1) {
3886 			/*
3887 			 * The timeout time reached without ref_cnt being zero
3888 			 * being signaled.
3889 			 */
3890 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3891 			    "!Timeout reached on path %s %p without the cond",
3892 			    mdi_pi_spathname(pip), (void *)pip));
3893 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3894 			    "!%d cmds still pending on path %s %p",
3895 			    MDI_PI(pip)->pi_ref_cnt,
3896 			    mdi_pi_spathname(pip), (void *)pip));
3897 		}
3898 	}
3899 	vh = ct->ct_vhci;
3900 	vdip = vh->vh_dip;
3901 
3902 	/*
3903 	 * Notify vHCI that has registered this event
3904 	 */
3905 	ASSERT(vh->vh_ops);
3906 	f = vh->vh_ops->vo_pi_state_change;
3907 
3908 	rv = MDI_SUCCESS;
3909 	if (f != NULL) {
3910 		MDI_PI_UNLOCK(pip);
3911 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3912 		    flags)) != MDI_SUCCESS) {
3913 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3914 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3915 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
3916 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3917 		}
3918 		MDI_PI_LOCK(pip);
3919 	}
3920 
3921 	/*
3922 	 * Set the mdi_pathinfo node state and clear the transient condition
3923 	 */
3924 	MDI_PI_SET_OFFLINE(pip);
3925 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3926 	MDI_PI_UNLOCK(pip);
3927 
3928 	MDI_CLIENT_LOCK(ct);
3929 	if (rv == MDI_SUCCESS) {
3930 		if (ct->ct_unstable == 0) {
3931 			dev_info_t	*cdip = ct->ct_dip;
3932 
3933 			/*
3934 			 * Onlining the mdi_pathinfo node will impact the
3935 			 * client state Update the client and dev_info node
3936 			 * state accordingly
3937 			 */
3938 			i_mdi_client_update_state(ct);
3939 			rv = NDI_SUCCESS;
3940 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3941 				if (cdip &&
3942 				    (i_ddi_node_state(cdip) >=
3943 				    DS_INITIALIZED)) {
3944 					MDI_CLIENT_UNLOCK(ct);
3945 					rv = ndi_devi_offline(cdip,
3946 					    NDI_DEVFS_CLEAN);
3947 					MDI_CLIENT_LOCK(ct);
3948 					if (rv != NDI_SUCCESS) {
3949 						/*
3950 						 * ndi_devi_offline failed.
3951 						 * Reset client flags to
3952 						 * online.
3953 						 */
3954 						MDI_DEBUG(4, (MDI_WARN, cdip,
3955 						    "ndi_devi_offline failed: "
3956 						    "error %x", rv));
3957 						MDI_CLIENT_SET_ONLINE(ct);
3958 					}
3959 				}
3960 			}
3961 			/*
3962 			 * Convert to MDI error code
3963 			 */
3964 			switch (rv) {
3965 			case NDI_SUCCESS:
3966 				rv = MDI_SUCCESS;
3967 				break;
3968 			case NDI_BUSY:
3969 				rv = MDI_BUSY;
3970 				break;
3971 			default:
3972 				rv = MDI_FAILURE;
3973 				break;
3974 			}
3975 		}
3976 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3977 		i_mdi_report_path_state(ct, pip);
3978 	}
3979 
3980 	MDI_CLIENT_UNLOCK(ct);
3981 
3982 	/*
3983 	 * Change in the mdi_pathinfo node state will impact the client state
3984 	 */
3985 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3986 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
3987 	return (rv);
3988 }
3989 
3990 /*
3991  * i_mdi_pi_online():
3992  *		Online a mdi_pathinfo node and call the vHCI driver's callback
3993  */
3994 static int
3995 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3996 {
3997 	mdi_vhci_t	*vh = NULL;
3998 	mdi_client_t	*ct = NULL;
3999 	mdi_phci_t	*ph;
4000 	int		(*f)();
4001 	int		rv;
4002 
4003 	MDI_PI_LOCK(pip);
4004 	ph = MDI_PI(pip)->pi_phci;
4005 	vh = ph->ph_vhci;
4006 	ct = MDI_PI(pip)->pi_client;
4007 	MDI_PI_SET_ONLINING(pip)
4008 	MDI_PI_UNLOCK(pip);
4009 	f = vh->vh_ops->vo_pi_state_change;
4010 	rv = MDI_SUCCESS;
4011 	if (f != NULL)
4012 		rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, flags);
4013 	MDI_CLIENT_LOCK(ct);
4014 	MDI_PI_LOCK(pip);
4015 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4016 	MDI_PI_UNLOCK(pip);
4017 	if (rv == MDI_SUCCESS) {
4018 		dev_info_t	*cdip = ct->ct_dip;
4019 
4020 		i_mdi_client_update_state(ct);
4021 		if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4022 		    MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4023 			if (cdip && !i_ddi_devi_attached(cdip)) {
4024 				MDI_CLIENT_UNLOCK(ct);
4025 				rv = ndi_devi_online(cdip, 0);
4026 				MDI_CLIENT_LOCK(ct);
4027 				if ((rv != NDI_SUCCESS) &&
4028 				    (MDI_CLIENT_STATE(ct) ==
4029 				    MDI_CLIENT_STATE_DEGRADED)) {
4030 					MDI_CLIENT_SET_OFFLINE(ct);
4031 				}
4032 				if (rv != NDI_SUCCESS) {
4033 					/* Reset the path state */
4034 					MDI_PI_LOCK(pip);
4035 					MDI_PI(pip)->pi_state =
4036 					    MDI_PI_OLD_STATE(pip);
4037 					MDI_PI_UNLOCK(pip);
4038 				}
4039 			}
4040 		}
4041 		switch (rv) {
4042 		case NDI_SUCCESS:
4043 			MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4044 			i_mdi_report_path_state(ct, pip);
4045 			rv = MDI_SUCCESS;
4046 			break;
4047 		case NDI_BUSY:
4048 			rv = MDI_BUSY;
4049 			break;
4050 		default:
4051 			rv = MDI_FAILURE;
4052 			break;
4053 		}
4054 	} else {
4055 		/* Reset the path state */
4056 		MDI_PI_LOCK(pip);
4057 		MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4058 		MDI_PI_UNLOCK(pip);
4059 	}
4060 	MDI_CLIENT_UNLOCK(ct);
4061 	return (rv);
4062 }
4063 
4064 /*
4065  * mdi_pi_get_node_name():
4066  *              Get the name associated with a mdi_pathinfo node.
4067  *              Since pathinfo nodes are not directly named, we
4068  *              return the node_name of the client.
4069  *
4070  * Return Values:
4071  *              char *
4072  */
4073 char *
4074 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4075 {
4076 	mdi_client_t    *ct;
4077 
4078 	if (pip == NULL)
4079 		return (NULL);
4080 	ct = MDI_PI(pip)->pi_client;
4081 	if ((ct == NULL) || (ct->ct_dip == NULL))
4082 		return (NULL);
4083 	return (ddi_node_name(ct->ct_dip));
4084 }
4085 
4086 /*
4087  * mdi_pi_get_addr():
4088  *		Get the unit address associated with a mdi_pathinfo node
4089  *
4090  * Return Values:
4091  *		char *
4092  */
4093 char *
4094 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4095 {
4096 	if (pip == NULL)
4097 		return (NULL);
4098 
4099 	return (MDI_PI(pip)->pi_addr);
4100 }
4101 
4102 /*
4103  * mdi_pi_get_path_instance():
4104  *		Get the 'path_instance' of a mdi_pathinfo node
4105  *
4106  * Return Values:
4107  *		path_instance
4108  */
4109 int
4110 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4111 {
4112 	if (pip == NULL)
4113 		return (0);
4114 
4115 	return (MDI_PI(pip)->pi_path_instance);
4116 }
4117 
4118 /*
4119  * mdi_pi_pathname():
4120  *		Return pointer to path to pathinfo node.
4121  */
4122 char *
4123 mdi_pi_pathname(mdi_pathinfo_t *pip)
4124 {
4125 	if (pip == NULL)
4126 		return (NULL);
4127 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4128 }
4129 
4130 /*
4131  * mdi_pi_spathname():
4132  *		Return pointer to shortpath to pathinfo node. Used for debug
4133  *		messages, so return "" instead of NULL when unknown.
4134  */
4135 char *
4136 mdi_pi_spathname(mdi_pathinfo_t *pip)
4137 {
4138 	char	*spath = "";
4139 
4140 	if (pip) {
4141 		spath = mdi_pi_spathname_by_instance(
4142 		    mdi_pi_get_path_instance(pip));
4143 		if (spath == NULL)
4144 			spath = "";
4145 	}
4146 	return (spath);
4147 }
4148 
4149 char *
4150 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4151 {
4152 	char *obp_path = NULL;
4153 	if ((pip == NULL) || (path == NULL))
4154 		return (NULL);
4155 
4156 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4157 		(void) strcpy(path, obp_path);
4158 		(void) mdi_prop_free(obp_path);
4159 	} else {
4160 		path = NULL;
4161 	}
4162 	return (path);
4163 }
4164 
4165 int
4166 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4167 {
4168 	dev_info_t *pdip;
4169 	char *obp_path = NULL;
4170 	int rc = MDI_FAILURE;
4171 
4172 	if (pip == NULL)
4173 		return (MDI_FAILURE);
4174 
4175 	pdip = mdi_pi_get_phci(pip);
4176 	if (pdip == NULL)
4177 		return (MDI_FAILURE);
4178 
4179 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4180 
4181 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4182 		(void) ddi_pathname(pdip, obp_path);
4183 	}
4184 
4185 	if (component) {
4186 		(void) strncat(obp_path, "/", MAXPATHLEN);
4187 		(void) strncat(obp_path, component, MAXPATHLEN);
4188 	}
4189 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4190 
4191 	if (obp_path)
4192 		kmem_free(obp_path, MAXPATHLEN);
4193 	return (rc);
4194 }
4195 
4196 /*
4197  * mdi_pi_get_client():
4198  *		Get the client devinfo associated with a mdi_pathinfo node
4199  *
4200  * Return Values:
4201  *		Handle to client device dev_info node
4202  */
4203 dev_info_t *
4204 mdi_pi_get_client(mdi_pathinfo_t *pip)
4205 {
4206 	dev_info_t	*dip = NULL;
4207 	if (pip) {
4208 		dip = MDI_PI(pip)->pi_client->ct_dip;
4209 	}
4210 	return (dip);
4211 }
4212 
4213 /*
4214  * mdi_pi_get_phci():
4215  *		Get the pHCI devinfo associated with the mdi_pathinfo node
4216  * Return Values:
4217  *		Handle to dev_info node
4218  */
4219 dev_info_t *
4220 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4221 {
4222 	dev_info_t	*dip = NULL;
4223 	mdi_phci_t	*ph;
4224 
4225 	if (pip) {
4226 		ph = MDI_PI(pip)->pi_phci;
4227 		if (ph)
4228 			dip = ph->ph_dip;
4229 	}
4230 	return (dip);
4231 }
4232 
4233 /*
4234  * mdi_pi_get_client_private():
4235  *		Get the client private information associated with the
4236  *		mdi_pathinfo node
4237  */
4238 void *
4239 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4240 {
4241 	void *cprivate = NULL;
4242 	if (pip) {
4243 		cprivate = MDI_PI(pip)->pi_cprivate;
4244 	}
4245 	return (cprivate);
4246 }
4247 
4248 /*
4249  * mdi_pi_set_client_private():
4250  *		Set the client private information in the mdi_pathinfo node
4251  */
4252 void
4253 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4254 {
4255 	if (pip) {
4256 		MDI_PI(pip)->pi_cprivate = priv;
4257 	}
4258 }
4259 
4260 /*
4261  * mdi_pi_get_phci_private():
4262  *		Get the pHCI private information associated with the
4263  *		mdi_pathinfo node
4264  */
4265 caddr_t
4266 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4267 {
4268 	caddr_t	pprivate = NULL;
4269 
4270 	if (pip) {
4271 		pprivate = MDI_PI(pip)->pi_pprivate;
4272 	}
4273 	return (pprivate);
4274 }
4275 
4276 /*
4277  * mdi_pi_set_phci_private():
4278  *		Set the pHCI private information in the mdi_pathinfo node
4279  */
4280 void
4281 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4282 {
4283 	if (pip) {
4284 		MDI_PI(pip)->pi_pprivate = priv;
4285 	}
4286 }
4287 
4288 /*
4289  * mdi_pi_get_state():
4290  *		Get the mdi_pathinfo node state. Transient states are internal
4291  *		and not provided to the users
4292  */
4293 mdi_pathinfo_state_t
4294 mdi_pi_get_state(mdi_pathinfo_t *pip)
4295 {
4296 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4297 
4298 	if (pip) {
4299 		if (MDI_PI_IS_TRANSIENT(pip)) {
4300 			/*
4301 			 * mdi_pathinfo is in state transition.  Return the
4302 			 * last good state.
4303 			 */
4304 			state = MDI_PI_OLD_STATE(pip);
4305 		} else {
4306 			state = MDI_PI_STATE(pip);
4307 		}
4308 	}
4309 	return (state);
4310 }
4311 
4312 /*
4313  * mdi_pi_get_flags():
4314  *		Get the mdi_pathinfo node flags.
4315  */
4316 uint_t
4317 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4318 {
4319 	return (pip ? MDI_PI(pip)->pi_flags : 0);
4320 }
4321 
4322 /*
4323  * Note that the following function needs to be the new interface for
4324  * mdi_pi_get_state when mpxio gets integrated to ON.
4325  */
4326 int
4327 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4328 		uint32_t *ext_state)
4329 {
4330 	*state = MDI_PATHINFO_STATE_INIT;
4331 
4332 	if (pip) {
4333 		if (MDI_PI_IS_TRANSIENT(pip)) {
4334 			/*
4335 			 * mdi_pathinfo is in state transition.  Return the
4336 			 * last good state.
4337 			 */
4338 			*state = MDI_PI_OLD_STATE(pip);
4339 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4340 		} else {
4341 			*state = MDI_PI_STATE(pip);
4342 			*ext_state = MDI_PI_EXT_STATE(pip);
4343 		}
4344 	}
4345 	return (MDI_SUCCESS);
4346 }
4347 
4348 /*
4349  * mdi_pi_get_preferred:
4350  *	Get the preferred path flag
4351  */
4352 int
4353 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4354 {
4355 	if (pip) {
4356 		return (MDI_PI(pip)->pi_preferred);
4357 	}
4358 	return (0);
4359 }
4360 
4361 /*
4362  * mdi_pi_set_preferred:
4363  *	Set the preferred path flag
4364  */
4365 void
4366 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4367 {
4368 	if (pip) {
4369 		MDI_PI(pip)->pi_preferred = preferred;
4370 	}
4371 }
4372 
4373 /*
4374  * mdi_pi_set_state():
4375  *		Set the mdi_pathinfo node state
4376  */
4377 void
4378 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4379 {
4380 	uint32_t	ext_state;
4381 
4382 	if (pip) {
4383 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4384 		MDI_PI(pip)->pi_state = state;
4385 		MDI_PI(pip)->pi_state |= ext_state;
4386 
4387 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
4388 		i_ddi_di_cache_invalidate();
4389 	}
4390 }
4391 
4392 /*
4393  * Property functions:
4394  */
4395 int
4396 i_map_nvlist_error_to_mdi(int val)
4397 {
4398 	int rv;
4399 
4400 	switch (val) {
4401 	case 0:
4402 		rv = DDI_PROP_SUCCESS;
4403 		break;
4404 	case EINVAL:
4405 	case ENOTSUP:
4406 		rv = DDI_PROP_INVAL_ARG;
4407 		break;
4408 	case ENOMEM:
4409 		rv = DDI_PROP_NO_MEMORY;
4410 		break;
4411 	default:
4412 		rv = DDI_PROP_NOT_FOUND;
4413 		break;
4414 	}
4415 	return (rv);
4416 }
4417 
4418 /*
4419  * mdi_pi_get_next_prop():
4420  * 		Property walk function.  The caller should hold mdi_pi_lock()
4421  *		and release by calling mdi_pi_unlock() at the end of walk to
4422  *		get a consistent value.
4423  */
4424 nvpair_t *
4425 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4426 {
4427 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4428 		return (NULL);
4429 	}
4430 	ASSERT(MDI_PI_LOCKED(pip));
4431 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4432 }
4433 
4434 /*
4435  * mdi_prop_remove():
4436  * 		Remove the named property from the named list.
4437  */
4438 int
4439 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4440 {
4441 	if (pip == NULL) {
4442 		return (DDI_PROP_NOT_FOUND);
4443 	}
4444 	ASSERT(!MDI_PI_LOCKED(pip));
4445 	MDI_PI_LOCK(pip);
4446 	if (MDI_PI(pip)->pi_prop == NULL) {
4447 		MDI_PI_UNLOCK(pip);
4448 		return (DDI_PROP_NOT_FOUND);
4449 	}
4450 	if (name) {
4451 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4452 	} else {
4453 		char		nvp_name[MAXNAMELEN];
4454 		nvpair_t	*nvp;
4455 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4456 		while (nvp) {
4457 			nvpair_t	*next;
4458 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4459 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4460 			    nvpair_name(nvp));
4461 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4462 			    nvp_name);
4463 			nvp = next;
4464 		}
4465 	}
4466 	MDI_PI_UNLOCK(pip);
4467 	return (DDI_PROP_SUCCESS);
4468 }
4469 
4470 /*
4471  * mdi_prop_size():
4472  * 		Get buffer size needed to pack the property data.
4473  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4474  *		buffer size.
4475  */
4476 int
4477 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4478 {
4479 	int	rv;
4480 	size_t	bufsize;
4481 
4482 	*buflenp = 0;
4483 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4484 		return (DDI_PROP_NOT_FOUND);
4485 	}
4486 	ASSERT(MDI_PI_LOCKED(pip));
4487 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4488 	    &bufsize, NV_ENCODE_NATIVE);
4489 	*buflenp = bufsize;
4490 	return (i_map_nvlist_error_to_mdi(rv));
4491 }
4492 
4493 /*
4494  * mdi_prop_pack():
4495  * 		pack the property list.  The caller should hold the
4496  *		mdi_pathinfo_t node to get a consistent data
4497  */
4498 int
4499 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4500 {
4501 	int	rv;
4502 	size_t	bufsize;
4503 
4504 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4505 		return (DDI_PROP_NOT_FOUND);
4506 	}
4507 
4508 	ASSERT(MDI_PI_LOCKED(pip));
4509 
4510 	bufsize = buflen;
4511 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4512 	    NV_ENCODE_NATIVE, KM_SLEEP);
4513 
4514 	return (i_map_nvlist_error_to_mdi(rv));
4515 }
4516 
4517 /*
4518  * mdi_prop_update_byte():
4519  *		Create/Update a byte property
4520  */
4521 int
4522 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4523 {
4524 	int rv;
4525 
4526 	if (pip == NULL) {
4527 		return (DDI_PROP_INVAL_ARG);
4528 	}
4529 	ASSERT(!MDI_PI_LOCKED(pip));
4530 	MDI_PI_LOCK(pip);
4531 	if (MDI_PI(pip)->pi_prop == NULL) {
4532 		MDI_PI_UNLOCK(pip);
4533 		return (DDI_PROP_NOT_FOUND);
4534 	}
4535 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4536 	MDI_PI_UNLOCK(pip);
4537 	return (i_map_nvlist_error_to_mdi(rv));
4538 }
4539 
4540 /*
4541  * mdi_prop_update_byte_array():
4542  *		Create/Update a byte array property
4543  */
4544 int
4545 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4546     uint_t nelements)
4547 {
4548 	int rv;
4549 
4550 	if (pip == NULL) {
4551 		return (DDI_PROP_INVAL_ARG);
4552 	}
4553 	ASSERT(!MDI_PI_LOCKED(pip));
4554 	MDI_PI_LOCK(pip);
4555 	if (MDI_PI(pip)->pi_prop == NULL) {
4556 		MDI_PI_UNLOCK(pip);
4557 		return (DDI_PROP_NOT_FOUND);
4558 	}
4559 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4560 	MDI_PI_UNLOCK(pip);
4561 	return (i_map_nvlist_error_to_mdi(rv));
4562 }
4563 
4564 /*
4565  * mdi_prop_update_int():
4566  *		Create/Update a 32 bit integer property
4567  */
4568 int
4569 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4570 {
4571 	int rv;
4572 
4573 	if (pip == NULL) {
4574 		return (DDI_PROP_INVAL_ARG);
4575 	}
4576 	ASSERT(!MDI_PI_LOCKED(pip));
4577 	MDI_PI_LOCK(pip);
4578 	if (MDI_PI(pip)->pi_prop == NULL) {
4579 		MDI_PI_UNLOCK(pip);
4580 		return (DDI_PROP_NOT_FOUND);
4581 	}
4582 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4583 	MDI_PI_UNLOCK(pip);
4584 	return (i_map_nvlist_error_to_mdi(rv));
4585 }
4586 
4587 /*
4588  * mdi_prop_update_int64():
4589  *		Create/Update a 64 bit integer property
4590  */
4591 int
4592 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4593 {
4594 	int rv;
4595 
4596 	if (pip == NULL) {
4597 		return (DDI_PROP_INVAL_ARG);
4598 	}
4599 	ASSERT(!MDI_PI_LOCKED(pip));
4600 	MDI_PI_LOCK(pip);
4601 	if (MDI_PI(pip)->pi_prop == NULL) {
4602 		MDI_PI_UNLOCK(pip);
4603 		return (DDI_PROP_NOT_FOUND);
4604 	}
4605 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4606 	MDI_PI_UNLOCK(pip);
4607 	return (i_map_nvlist_error_to_mdi(rv));
4608 }
4609 
4610 /*
4611  * mdi_prop_update_int_array():
4612  *		Create/Update a int array property
4613  */
4614 int
4615 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4616 	    uint_t nelements)
4617 {
4618 	int rv;
4619 
4620 	if (pip == NULL) {
4621 		return (DDI_PROP_INVAL_ARG);
4622 	}
4623 	ASSERT(!MDI_PI_LOCKED(pip));
4624 	MDI_PI_LOCK(pip);
4625 	if (MDI_PI(pip)->pi_prop == NULL) {
4626 		MDI_PI_UNLOCK(pip);
4627 		return (DDI_PROP_NOT_FOUND);
4628 	}
4629 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4630 	    nelements);
4631 	MDI_PI_UNLOCK(pip);
4632 	return (i_map_nvlist_error_to_mdi(rv));
4633 }
4634 
4635 /*
4636  * mdi_prop_update_string():
4637  *		Create/Update a string property
4638  */
4639 int
4640 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4641 {
4642 	int rv;
4643 
4644 	if (pip == NULL) {
4645 		return (DDI_PROP_INVAL_ARG);
4646 	}
4647 	ASSERT(!MDI_PI_LOCKED(pip));
4648 	MDI_PI_LOCK(pip);
4649 	if (MDI_PI(pip)->pi_prop == NULL) {
4650 		MDI_PI_UNLOCK(pip);
4651 		return (DDI_PROP_NOT_FOUND);
4652 	}
4653 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4654 	MDI_PI_UNLOCK(pip);
4655 	return (i_map_nvlist_error_to_mdi(rv));
4656 }
4657 
4658 /*
4659  * mdi_prop_update_string_array():
4660  *		Create/Update a string array property
4661  */
4662 int
4663 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4664     uint_t nelements)
4665 {
4666 	int rv;
4667 
4668 	if (pip == NULL) {
4669 		return (DDI_PROP_INVAL_ARG);
4670 	}
4671 	ASSERT(!MDI_PI_LOCKED(pip));
4672 	MDI_PI_LOCK(pip);
4673 	if (MDI_PI(pip)->pi_prop == NULL) {
4674 		MDI_PI_UNLOCK(pip);
4675 		return (DDI_PROP_NOT_FOUND);
4676 	}
4677 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4678 	    nelements);
4679 	MDI_PI_UNLOCK(pip);
4680 	return (i_map_nvlist_error_to_mdi(rv));
4681 }
4682 
4683 /*
4684  * mdi_prop_lookup_byte():
4685  * 		Look for byte property identified by name.  The data returned
4686  *		is the actual property and valid as long as mdi_pathinfo_t node
4687  *		is alive.
4688  */
4689 int
4690 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4691 {
4692 	int rv;
4693 
4694 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4695 		return (DDI_PROP_NOT_FOUND);
4696 	}
4697 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4698 	return (i_map_nvlist_error_to_mdi(rv));
4699 }
4700 
4701 
4702 /*
4703  * mdi_prop_lookup_byte_array():
4704  * 		Look for byte array property identified by name.  The data
4705  *		returned is the actual property and valid as long as
4706  *		mdi_pathinfo_t node is alive.
4707  */
4708 int
4709 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4710     uint_t *nelements)
4711 {
4712 	int rv;
4713 
4714 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4715 		return (DDI_PROP_NOT_FOUND);
4716 	}
4717 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4718 	    nelements);
4719 	return (i_map_nvlist_error_to_mdi(rv));
4720 }
4721 
4722 /*
4723  * mdi_prop_lookup_int():
4724  * 		Look for int property identified by name.  The data returned
4725  *		is the actual property and valid as long as mdi_pathinfo_t
4726  *		node is alive.
4727  */
4728 int
4729 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4730 {
4731 	int rv;
4732 
4733 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4734 		return (DDI_PROP_NOT_FOUND);
4735 	}
4736 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4737 	return (i_map_nvlist_error_to_mdi(rv));
4738 }
4739 
4740 /*
4741  * mdi_prop_lookup_int64():
4742  * 		Look for int64 property identified by name.  The data returned
4743  *		is the actual property and valid as long as mdi_pathinfo_t node
4744  *		is alive.
4745  */
4746 int
4747 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4748 {
4749 	int rv;
4750 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4751 		return (DDI_PROP_NOT_FOUND);
4752 	}
4753 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4754 	return (i_map_nvlist_error_to_mdi(rv));
4755 }
4756 
4757 /*
4758  * mdi_prop_lookup_int_array():
4759  * 		Look for int array property identified by name.  The data
4760  *		returned is the actual property and valid as long as
4761  *		mdi_pathinfo_t node is alive.
4762  */
4763 int
4764 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4765     uint_t *nelements)
4766 {
4767 	int rv;
4768 
4769 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4770 		return (DDI_PROP_NOT_FOUND);
4771 	}
4772 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4773 	    (int32_t **)data, nelements);
4774 	return (i_map_nvlist_error_to_mdi(rv));
4775 }
4776 
4777 /*
4778  * mdi_prop_lookup_string():
4779  * 		Look for string property identified by name.  The data
4780  *		returned is the actual property and valid as long as
4781  *		mdi_pathinfo_t node is alive.
4782  */
4783 int
4784 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4785 {
4786 	int rv;
4787 
4788 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4789 		return (DDI_PROP_NOT_FOUND);
4790 	}
4791 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4792 	return (i_map_nvlist_error_to_mdi(rv));
4793 }
4794 
4795 /*
4796  * mdi_prop_lookup_string_array():
4797  * 		Look for string array property identified by name.  The data
4798  *		returned is the actual property and valid as long as
4799  *		mdi_pathinfo_t node is alive.
4800  */
4801 int
4802 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4803     uint_t *nelements)
4804 {
4805 	int rv;
4806 
4807 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4808 		return (DDI_PROP_NOT_FOUND);
4809 	}
4810 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4811 	    nelements);
4812 	return (i_map_nvlist_error_to_mdi(rv));
4813 }
4814 
4815 /*
4816  * mdi_prop_free():
4817  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4818  *		functions return the pointer to actual property data and not a
4819  *		copy of it.  So the data returned is valid as long as
4820  *		mdi_pathinfo_t node is valid.
4821  */
4822 /*ARGSUSED*/
4823 int
4824 mdi_prop_free(void *data)
4825 {
4826 	return (DDI_PROP_SUCCESS);
4827 }
4828 
4829 /*ARGSUSED*/
4830 static void
4831 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4832 {
4833 	char		*ct_path;
4834 	char		*ct_status;
4835 	char		*status;
4836 	dev_info_t	*cdip = ct->ct_dip;
4837 	char		lb_buf[64];
4838 	int		report_lb_c = 0, report_lb_p = 0;
4839 
4840 	ASSERT(MDI_CLIENT_LOCKED(ct));
4841 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4842 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4843 		return;
4844 	}
4845 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4846 		ct_status = "optimal";
4847 		report_lb_c = 1;
4848 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4849 		ct_status = "degraded";
4850 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4851 		ct_status = "failed";
4852 	} else {
4853 		ct_status = "unknown";
4854 	}
4855 
4856 	lb_buf[0] = 0;		/* not interested in load balancing config */
4857 
4858 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4859 		status = "removed";
4860 	} else if (MDI_PI_IS_OFFLINE(pip)) {
4861 		status = "offline";
4862 	} else if (MDI_PI_IS_ONLINE(pip)) {
4863 		status = "online";
4864 		report_lb_p = 1;
4865 	} else if (MDI_PI_IS_STANDBY(pip)) {
4866 		status = "standby";
4867 	} else if (MDI_PI_IS_FAULT(pip)) {
4868 		status = "faulted";
4869 	} else {
4870 		status = "unknown";
4871 	}
4872 
4873 	if (cdip) {
4874 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4875 
4876 		/*
4877 		 * NOTE: Keeping "multipath status: %s" and
4878 		 * "Load balancing: %s" format unchanged in case someone
4879 		 * scrubs /var/adm/messages looking for these messages.
4880 		 */
4881 		if (report_lb_c && report_lb_p) {
4882 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
4883 				(void) snprintf(lb_buf, sizeof (lb_buf),
4884 				    "%s, region-size: %d", mdi_load_balance_lba,
4885 				    ct->ct_lb_args->region_size);
4886 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4887 				(void) snprintf(lb_buf, sizeof (lb_buf),
4888 				    "%s", mdi_load_balance_none);
4889 			} else {
4890 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4891 				    mdi_load_balance_rr);
4892 			}
4893 
4894 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4895 			    "?%s (%s%d) multipath status: %s: "
4896 			    "path %d %s is %s: Load balancing: %s\n",
4897 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4898 			    ddi_get_instance(cdip), ct_status,
4899 			    mdi_pi_get_path_instance(pip),
4900 			    mdi_pi_spathname(pip), status, lb_buf);
4901 		} else {
4902 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4903 			    "?%s (%s%d) multipath status: %s: "
4904 			    "path %d %s is %s\n",
4905 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4906 			    ddi_get_instance(cdip), ct_status,
4907 			    mdi_pi_get_path_instance(pip),
4908 			    mdi_pi_spathname(pip), status);
4909 		}
4910 
4911 		kmem_free(ct_path, MAXPATHLEN);
4912 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4913 	}
4914 }
4915 
4916 #ifdef	DEBUG
4917 /*
4918  * i_mdi_log():
4919  *		Utility function for error message management
4920  *
4921  *		NOTE: Implementation takes care of trailing \n for cmn_err,
4922  *		MDI_DEBUG should not terminate fmt strings with \n.
4923  *
4924  *		NOTE: If the level is >= 2, and there is no leading !?^
4925  *		then a leading ! is implied (but can be overriden via
4926  *		mdi_debug_consoleonly). If you are using kmdb on the console,
4927  *		consider setting mdi_debug_consoleonly to 1 as an aid.
4928  */
4929 /*PRINTFLIKE4*/
4930 static void
4931 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4932 {
4933 	char		name[MAXNAMELEN];
4934 	char		buf[512];
4935 	char		*bp;
4936 	va_list		ap;
4937 	int		log_only = 0;
4938 	int		boot_only = 0;
4939 	int		console_only = 0;
4940 
4941 	if (dip) {
4942 		(void) snprintf(name, sizeof(name), "%s%d: ",
4943 		    ddi_driver_name(dip), ddi_get_instance(dip));
4944 	} else {
4945 		name[0] = 0;
4946 	}
4947 
4948 	va_start(ap, fmt);
4949 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
4950 	va_end(ap);
4951 
4952 	switch (buf[0]) {
4953 	case '!':
4954 		bp = &buf[1];
4955 		log_only = 1;
4956 		break;
4957 	case '?':
4958 		bp = &buf[1];
4959 		boot_only = 1;
4960 		break;
4961 	case '^':
4962 		bp = &buf[1];
4963 		console_only = 1;
4964 		break;
4965 	default:
4966 		if (level >= 2)
4967 			log_only = 1;		/* ! implied */
4968 		bp = buf;
4969 		break;
4970 	}
4971 	if (mdi_debug_logonly) {
4972 		log_only = 1;
4973 		boot_only = 0;
4974 		console_only = 0;
4975 	}
4976 	if (mdi_debug_consoleonly) {
4977 		log_only = 0;
4978 		boot_only = 0;
4979 		console_only = 1;
4980 		level = CE_NOTE;
4981 		goto console;
4982 	}
4983 
4984 	switch (level) {
4985 	case CE_NOTE:
4986 		level = CE_CONT;
4987 		/* FALLTHROUGH */
4988 	case CE_CONT:
4989 		if (boot_only) {
4990 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4991 		} else if (console_only) {
4992 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4993 		} else if (log_only) {
4994 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4995 		} else {
4996 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4997 		}
4998 		break;
4999 
5000 	case CE_WARN:
5001 	case CE_PANIC:
5002 	console:
5003 		if (boot_only) {
5004 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
5005 		} else if (console_only) {
5006 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
5007 		} else if (log_only) {
5008 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
5009 		} else {
5010 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
5011 		}
5012 		break;
5013 	default:
5014 		cmn_err(level, "mdi: %s%s", name, bp);
5015 		break;
5016 	}
5017 }
5018 #endif	/* DEBUG */
5019 
5020 void
5021 i_mdi_client_online(dev_info_t *ct_dip)
5022 {
5023 	mdi_client_t	*ct;
5024 
5025 	/*
5026 	 * Client online notification. Mark client state as online
5027 	 * restore our binding with dev_info node
5028 	 */
5029 	ct = i_devi_get_client(ct_dip);
5030 	ASSERT(ct != NULL);
5031 	MDI_CLIENT_LOCK(ct);
5032 	MDI_CLIENT_SET_ONLINE(ct);
5033 	/* catch for any memory leaks */
5034 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5035 	ct->ct_dip = ct_dip;
5036 
5037 	if (ct->ct_power_cnt == 0)
5038 		(void) i_mdi_power_all_phci(ct);
5039 
5040 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5041 	    "i_mdi_pm_hold_client %p", (void *)ct));
5042 	i_mdi_pm_hold_client(ct, 1);
5043 
5044 	MDI_CLIENT_UNLOCK(ct);
5045 }
5046 
5047 void
5048 i_mdi_phci_online(dev_info_t *ph_dip)
5049 {
5050 	mdi_phci_t	*ph;
5051 
5052 	/* pHCI online notification. Mark state accordingly */
5053 	ph = i_devi_get_phci(ph_dip);
5054 	ASSERT(ph != NULL);
5055 	MDI_PHCI_LOCK(ph);
5056 	MDI_PHCI_SET_ONLINE(ph);
5057 	MDI_PHCI_UNLOCK(ph);
5058 }
5059 
5060 /*
5061  * mdi_devi_online():
5062  * 		Online notification from NDI framework on pHCI/client
5063  *		device online.
5064  * Return Values:
5065  *		NDI_SUCCESS
5066  *		MDI_FAILURE
5067  */
5068 /*ARGSUSED*/
5069 int
5070 mdi_devi_online(dev_info_t *dip, uint_t flags)
5071 {
5072 	if (MDI_PHCI(dip)) {
5073 		i_mdi_phci_online(dip);
5074 	}
5075 
5076 	if (MDI_CLIENT(dip)) {
5077 		i_mdi_client_online(dip);
5078 	}
5079 	return (NDI_SUCCESS);
5080 }
5081 
5082 /*
5083  * mdi_devi_offline():
5084  * 		Offline notification from NDI framework on pHCI/Client device
5085  *		offline.
5086  *
5087  * Return Values:
5088  *		NDI_SUCCESS
5089  *		NDI_FAILURE
5090  */
5091 /*ARGSUSED*/
5092 int
5093 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5094 {
5095 	int		rv = NDI_SUCCESS;
5096 
5097 	if (MDI_CLIENT(dip)) {
5098 		rv = i_mdi_client_offline(dip, flags);
5099 		if (rv != NDI_SUCCESS)
5100 			return (rv);
5101 	}
5102 
5103 	if (MDI_PHCI(dip)) {
5104 		rv = i_mdi_phci_offline(dip, flags);
5105 
5106 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5107 			/* set client back online */
5108 			i_mdi_client_online(dip);
5109 		}
5110 	}
5111 
5112 	return (rv);
5113 }
5114 
5115 /*ARGSUSED*/
5116 static int
5117 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5118 {
5119 	int		rv = NDI_SUCCESS;
5120 	mdi_phci_t	*ph;
5121 	mdi_client_t	*ct;
5122 	mdi_pathinfo_t	*pip;
5123 	mdi_pathinfo_t	*next;
5124 	mdi_pathinfo_t	*failed_pip = NULL;
5125 	dev_info_t	*cdip;
5126 
5127 	/*
5128 	 * pHCI component offline notification
5129 	 * Make sure that this pHCI instance is free to be offlined.
5130 	 * If it is OK to proceed, Offline and remove all the child
5131 	 * mdi_pathinfo nodes.  This process automatically offlines
5132 	 * corresponding client devices, for which this pHCI provides
5133 	 * critical services.
5134 	 */
5135 	ph = i_devi_get_phci(dip);
5136 	MDI_DEBUG(2, (MDI_NOTE, dip,
5137 	    "called %p %p", (void *)dip, (void *)ph));
5138 	if (ph == NULL) {
5139 		return (rv);
5140 	}
5141 
5142 	MDI_PHCI_LOCK(ph);
5143 
5144 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5145 		MDI_DEBUG(1, (MDI_WARN, dip,
5146 		    "!pHCI already offlined: %p", (void *)dip));
5147 		MDI_PHCI_UNLOCK(ph);
5148 		return (NDI_SUCCESS);
5149 	}
5150 
5151 	/*
5152 	 * Check to see if the pHCI can be offlined
5153 	 */
5154 	if (ph->ph_unstable) {
5155 		MDI_DEBUG(1, (MDI_WARN, dip,
5156 		    "!One or more target devices are in transient state. "
5157 		    "This device can not be removed at this moment. "
5158 		    "Please try again later."));
5159 		MDI_PHCI_UNLOCK(ph);
5160 		return (NDI_BUSY);
5161 	}
5162 
5163 	pip = ph->ph_path_head;
5164 	while (pip != NULL) {
5165 		MDI_PI_LOCK(pip);
5166 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5167 
5168 		/*
5169 		 * The mdi_pathinfo state is OK. Check the client state.
5170 		 * If failover in progress fail the pHCI from offlining
5171 		 */
5172 		ct = MDI_PI(pip)->pi_client;
5173 		i_mdi_client_lock(ct, pip);
5174 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5175 		    (ct->ct_unstable)) {
5176 			/*
5177 			 * Failover is in progress, Fail the DR
5178 			 */
5179 			MDI_DEBUG(1, (MDI_WARN, dip,
5180 			    "!pHCI device is busy. "
5181 			    "This device can not be removed at this moment. "
5182 			    "Please try again later."));
5183 			MDI_PI_UNLOCK(pip);
5184 			i_mdi_client_unlock(ct);
5185 			MDI_PHCI_UNLOCK(ph);
5186 			return (NDI_BUSY);
5187 		}
5188 		MDI_PI_UNLOCK(pip);
5189 
5190 		/*
5191 		 * Check to see of we are removing the last path of this
5192 		 * client device...
5193 		 */
5194 		cdip = ct->ct_dip;
5195 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5196 		    (i_mdi_client_compute_state(ct, ph) ==
5197 		    MDI_CLIENT_STATE_FAILED)) {
5198 			i_mdi_client_unlock(ct);
5199 			MDI_PHCI_UNLOCK(ph);
5200 			if (ndi_devi_offline(cdip,
5201 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5202 				/*
5203 				 * ndi_devi_offline() failed.
5204 				 * This pHCI provides the critical path
5205 				 * to one or more client devices.
5206 				 * Return busy.
5207 				 */
5208 				MDI_PHCI_LOCK(ph);
5209 				MDI_DEBUG(1, (MDI_WARN, dip,
5210 				    "!pHCI device is busy. "
5211 				    "This device can not be removed at this "
5212 				    "moment. Please try again later."));
5213 				failed_pip = pip;
5214 				break;
5215 			} else {
5216 				MDI_PHCI_LOCK(ph);
5217 				pip = next;
5218 			}
5219 		} else {
5220 			i_mdi_client_unlock(ct);
5221 			pip = next;
5222 		}
5223 	}
5224 
5225 	if (failed_pip) {
5226 		pip = ph->ph_path_head;
5227 		while (pip != failed_pip) {
5228 			MDI_PI_LOCK(pip);
5229 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5230 			ct = MDI_PI(pip)->pi_client;
5231 			i_mdi_client_lock(ct, pip);
5232 			cdip = ct->ct_dip;
5233 			switch (MDI_CLIENT_STATE(ct)) {
5234 			case MDI_CLIENT_STATE_OPTIMAL:
5235 			case MDI_CLIENT_STATE_DEGRADED:
5236 				if (cdip) {
5237 					MDI_PI_UNLOCK(pip);
5238 					i_mdi_client_unlock(ct);
5239 					MDI_PHCI_UNLOCK(ph);
5240 					(void) ndi_devi_online(cdip, 0);
5241 					MDI_PHCI_LOCK(ph);
5242 					pip = next;
5243 					continue;
5244 				}
5245 				break;
5246 
5247 			case MDI_CLIENT_STATE_FAILED:
5248 				if (cdip) {
5249 					MDI_PI_UNLOCK(pip);
5250 					i_mdi_client_unlock(ct);
5251 					MDI_PHCI_UNLOCK(ph);
5252 					(void) ndi_devi_offline(cdip,
5253 						NDI_DEVFS_CLEAN);
5254 					MDI_PHCI_LOCK(ph);
5255 					pip = next;
5256 					continue;
5257 				}
5258 				break;
5259 			}
5260 			MDI_PI_UNLOCK(pip);
5261 			i_mdi_client_unlock(ct);
5262 			pip = next;
5263 		}
5264 		MDI_PHCI_UNLOCK(ph);
5265 		return (NDI_BUSY);
5266 	}
5267 
5268 	/*
5269 	 * Mark the pHCI as offline
5270 	 */
5271 	MDI_PHCI_SET_OFFLINE(ph);
5272 
5273 	/*
5274 	 * Mark the child mdi_pathinfo nodes as transient
5275 	 */
5276 	pip = ph->ph_path_head;
5277 	while (pip != NULL) {
5278 		MDI_PI_LOCK(pip);
5279 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5280 		MDI_PI_SET_OFFLINING(pip);
5281 		MDI_PI_UNLOCK(pip);
5282 		pip = next;
5283 	}
5284 	MDI_PHCI_UNLOCK(ph);
5285 	/*
5286 	 * Give a chance for any pending commands to execute
5287 	 */
5288 	delay_random(mdi_delay);
5289 	MDI_PHCI_LOCK(ph);
5290 	pip = ph->ph_path_head;
5291 	while (pip != NULL) {
5292 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5293 		(void) i_mdi_pi_offline(pip, flags);
5294 		MDI_PI_LOCK(pip);
5295 		ct = MDI_PI(pip)->pi_client;
5296 		if (!MDI_PI_IS_OFFLINE(pip)) {
5297 			MDI_DEBUG(1, (MDI_WARN, dip,
5298 			    "!pHCI device is busy. "
5299 			    "This device can not be removed at this moment. "
5300 			    "Please try again later."));
5301 			MDI_PI_UNLOCK(pip);
5302 			MDI_PHCI_SET_ONLINE(ph);
5303 			MDI_PHCI_UNLOCK(ph);
5304 			return (NDI_BUSY);
5305 		}
5306 		MDI_PI_UNLOCK(pip);
5307 		pip = next;
5308 	}
5309 	MDI_PHCI_UNLOCK(ph);
5310 
5311 	return (rv);
5312 }
5313 
5314 void
5315 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5316 {
5317 	mdi_phci_t	*ph;
5318 	mdi_client_t	*ct;
5319 	mdi_pathinfo_t	*pip;
5320 	mdi_pathinfo_t	*next;
5321 	dev_info_t	*cdip;
5322 
5323 	if (!MDI_PHCI(dip))
5324 		return;
5325 
5326 	ph = i_devi_get_phci(dip);
5327 	if (ph == NULL) {
5328 		return;
5329 	}
5330 
5331 	MDI_PHCI_LOCK(ph);
5332 
5333 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5334 		/* has no last path */
5335 		MDI_PHCI_UNLOCK(ph);
5336 		return;
5337 	}
5338 
5339 	pip = ph->ph_path_head;
5340 	while (pip != NULL) {
5341 		MDI_PI_LOCK(pip);
5342 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5343 
5344 		ct = MDI_PI(pip)->pi_client;
5345 		i_mdi_client_lock(ct, pip);
5346 		MDI_PI_UNLOCK(pip);
5347 
5348 		cdip = ct->ct_dip;
5349 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5350 		    (i_mdi_client_compute_state(ct, ph) ==
5351 		    MDI_CLIENT_STATE_FAILED)) {
5352 			/* Last path. Mark client dip as retiring */
5353 			i_mdi_client_unlock(ct);
5354 			MDI_PHCI_UNLOCK(ph);
5355 			(void) e_ddi_mark_retiring(cdip, cons_array);
5356 			MDI_PHCI_LOCK(ph);
5357 			pip = next;
5358 		} else {
5359 			i_mdi_client_unlock(ct);
5360 			pip = next;
5361 		}
5362 	}
5363 
5364 	MDI_PHCI_UNLOCK(ph);
5365 
5366 	return;
5367 }
5368 
5369 void
5370 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5371 {
5372 	mdi_phci_t	*ph;
5373 	mdi_client_t	*ct;
5374 	mdi_pathinfo_t	*pip;
5375 	mdi_pathinfo_t	*next;
5376 	dev_info_t	*cdip;
5377 
5378 	if (!MDI_PHCI(dip))
5379 		return;
5380 
5381 	ph = i_devi_get_phci(dip);
5382 	if (ph == NULL)
5383 		return;
5384 
5385 	MDI_PHCI_LOCK(ph);
5386 
5387 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5388 		MDI_PHCI_UNLOCK(ph);
5389 		/* not last path */
5390 		return;
5391 	}
5392 
5393 	if (ph->ph_unstable) {
5394 		MDI_PHCI_UNLOCK(ph);
5395 		/* can't check for constraints */
5396 		*constraint = 0;
5397 		return;
5398 	}
5399 
5400 	pip = ph->ph_path_head;
5401 	while (pip != NULL) {
5402 		MDI_PI_LOCK(pip);
5403 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5404 
5405 		/*
5406 		 * The mdi_pathinfo state is OK. Check the client state.
5407 		 * If failover in progress fail the pHCI from offlining
5408 		 */
5409 		ct = MDI_PI(pip)->pi_client;
5410 		i_mdi_client_lock(ct, pip);
5411 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5412 		    (ct->ct_unstable)) {
5413 			/*
5414 			 * Failover is in progress, can't check for constraints
5415 			 */
5416 			MDI_PI_UNLOCK(pip);
5417 			i_mdi_client_unlock(ct);
5418 			MDI_PHCI_UNLOCK(ph);
5419 			*constraint = 0;
5420 			return;
5421 		}
5422 		MDI_PI_UNLOCK(pip);
5423 
5424 		/*
5425 		 * Check to see of we are retiring the last path of this
5426 		 * client device...
5427 		 */
5428 		cdip = ct->ct_dip;
5429 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5430 		    (i_mdi_client_compute_state(ct, ph) ==
5431 		    MDI_CLIENT_STATE_FAILED)) {
5432 			i_mdi_client_unlock(ct);
5433 			MDI_PHCI_UNLOCK(ph);
5434 			(void) e_ddi_retire_notify(cdip, constraint);
5435 			MDI_PHCI_LOCK(ph);
5436 			pip = next;
5437 		} else {
5438 			i_mdi_client_unlock(ct);
5439 			pip = next;
5440 		}
5441 	}
5442 
5443 	MDI_PHCI_UNLOCK(ph);
5444 
5445 	return;
5446 }
5447 
5448 /*
5449  * offline the path(s) hanging off the pHCI. If the
5450  * last path to any client, check that constraints
5451  * have been applied.
5452  *
5453  * If constraint is 0, we aren't going to retire the
5454  * pHCI. However we still need to go through the paths
5455  * calling e_ddi_retire_finalize() to clear their
5456  * contract barriers.
5457  */
5458 void
5459 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5460 {
5461 	mdi_phci_t	*ph;
5462 	mdi_client_t	*ct;
5463 	mdi_pathinfo_t	*pip;
5464 	mdi_pathinfo_t	*next;
5465 	dev_info_t	*cdip;
5466 	int		unstable = 0;
5467 	int		tmp_constraint;
5468 
5469 	if (!MDI_PHCI(dip))
5470 		return;
5471 
5472 	ph = i_devi_get_phci(dip);
5473 	if (ph == NULL) {
5474 		/* no last path and no pips */
5475 		return;
5476 	}
5477 
5478 	MDI_PHCI_LOCK(ph);
5479 
5480 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5481 		MDI_PHCI_UNLOCK(ph);
5482 		/* no last path and no pips */
5483 		return;
5484 	}
5485 
5486 	/*
5487 	 * Check to see if the pHCI can be offlined
5488 	 */
5489 	if (ph->ph_unstable) {
5490 		unstable = 1;
5491 	}
5492 
5493 	pip = ph->ph_path_head;
5494 	while (pip != NULL) {
5495 		MDI_PI_LOCK(pip);
5496 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5497 
5498 		/*
5499 		 * if failover in progress fail the pHCI from offlining
5500 		 */
5501 		ct = MDI_PI(pip)->pi_client;
5502 		i_mdi_client_lock(ct, pip);
5503 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5504 		    (ct->ct_unstable)) {
5505 			unstable = 1;
5506 		}
5507 		MDI_PI_UNLOCK(pip);
5508 
5509 		/*
5510 		 * Check to see of we are removing the last path of this
5511 		 * client device...
5512 		 */
5513 		cdip = ct->ct_dip;
5514 		if (!phci_only && cdip &&
5515 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5516 		    (i_mdi_client_compute_state(ct, ph) ==
5517 		    MDI_CLIENT_STATE_FAILED)) {
5518 			i_mdi_client_unlock(ct);
5519 			MDI_PHCI_UNLOCK(ph);
5520 			/*
5521 			 * This is the last path to this client.
5522 			 *
5523 			 * Constraint will only be set to 1 if this client can
5524 			 * be retired (as already determined by
5525 			 * mdi_phci_retire_notify). However we don't actually
5526 			 * need to retire the client (we just retire the last
5527 			 * path - MPXIO will then fail all I/Os to the client).
5528 			 * But we still need to call e_ddi_retire_finalize so
5529 			 * the contract barriers can be cleared. Therefore we
5530 			 * temporarily set constraint = 0 so that the client
5531 			 * dip is not retired.
5532 			 */
5533 			tmp_constraint = 0;
5534 			(void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5535 			MDI_PHCI_LOCK(ph);
5536 			pip = next;
5537 		} else {
5538 			i_mdi_client_unlock(ct);
5539 			pip = next;
5540 		}
5541 	}
5542 
5543 	if (!phci_only && *((int *)constraint) == 0) {
5544 		MDI_PHCI_UNLOCK(ph);
5545 		return;
5546 	}
5547 
5548 	/*
5549 	 * Cannot offline pip(s)
5550 	 */
5551 	if (unstable) {
5552 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5553 		    "pHCI in transient state, cannot retire",
5554 		    ddi_driver_name(dip), ddi_get_instance(dip));
5555 		MDI_PHCI_UNLOCK(ph);
5556 		return;
5557 	}
5558 
5559 	/*
5560 	 * Mark the pHCI as offline
5561 	 */
5562 	MDI_PHCI_SET_OFFLINE(ph);
5563 
5564 	/*
5565 	 * Mark the child mdi_pathinfo nodes as transient
5566 	 */
5567 	pip = ph->ph_path_head;
5568 	while (pip != NULL) {
5569 		MDI_PI_LOCK(pip);
5570 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5571 		MDI_PI_SET_OFFLINING(pip);
5572 		MDI_PI_UNLOCK(pip);
5573 		pip = next;
5574 	}
5575 	MDI_PHCI_UNLOCK(ph);
5576 	/*
5577 	 * Give a chance for any pending commands to execute
5578 	 */
5579 	delay_random(mdi_delay);
5580 	MDI_PHCI_LOCK(ph);
5581 	pip = ph->ph_path_head;
5582 	while (pip != NULL) {
5583 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5584 		(void) i_mdi_pi_offline(pip, 0);
5585 		MDI_PI_LOCK(pip);
5586 		ct = MDI_PI(pip)->pi_client;
5587 		if (!MDI_PI_IS_OFFLINE(pip)) {
5588 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5589 			    "path %d %s busy, cannot offline",
5590 			    mdi_pi_get_path_instance(pip),
5591 			    mdi_pi_spathname(pip));
5592 			MDI_PI_UNLOCK(pip);
5593 			MDI_PHCI_SET_ONLINE(ph);
5594 			MDI_PHCI_UNLOCK(ph);
5595 			return;
5596 		}
5597 		MDI_PI_UNLOCK(pip);
5598 		pip = next;
5599 	}
5600 	MDI_PHCI_UNLOCK(ph);
5601 
5602 	return;
5603 }
5604 
5605 void
5606 mdi_phci_unretire(dev_info_t *dip)
5607 {
5608 	mdi_phci_t	*ph;
5609 	mdi_pathinfo_t	*pip;
5610 	mdi_pathinfo_t	*next;
5611 
5612 	ASSERT(MDI_PHCI(dip));
5613 
5614 	/*
5615 	 * Online the phci
5616 	 */
5617 	i_mdi_phci_online(dip);
5618 
5619 	ph = i_devi_get_phci(dip);
5620 	MDI_PHCI_LOCK(ph);
5621 	pip = ph->ph_path_head;
5622 	while (pip != NULL) {
5623 		MDI_PI_LOCK(pip);
5624 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5625 		MDI_PI_UNLOCK(pip);
5626 		(void) i_mdi_pi_online(pip, 0);
5627 		pip = next;
5628 	}
5629 	MDI_PHCI_UNLOCK(ph);
5630 }
5631 
5632 /*ARGSUSED*/
5633 static int
5634 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5635 {
5636 	int		rv = NDI_SUCCESS;
5637 	mdi_client_t	*ct;
5638 
5639 	/*
5640 	 * Client component to go offline.  Make sure that we are
5641 	 * not in failing over state and update client state
5642 	 * accordingly
5643 	 */
5644 	ct = i_devi_get_client(dip);
5645 	MDI_DEBUG(2, (MDI_NOTE, dip,
5646 	    "called %p %p", (void *)dip, (void *)ct));
5647 	if (ct != NULL) {
5648 		MDI_CLIENT_LOCK(ct);
5649 		if (ct->ct_unstable) {
5650 			/*
5651 			 * One or more paths are in transient state,
5652 			 * Dont allow offline of a client device
5653 			 */
5654 			MDI_DEBUG(1, (MDI_WARN, dip,
5655 			    "!One or more paths to "
5656 			    "this device are in transient state. "
5657 			    "This device can not be removed at this moment. "
5658 			    "Please try again later."));
5659 			MDI_CLIENT_UNLOCK(ct);
5660 			return (NDI_BUSY);
5661 		}
5662 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5663 			/*
5664 			 * Failover is in progress, Dont allow DR of
5665 			 * a client device
5666 			 */
5667 			MDI_DEBUG(1, (MDI_WARN, dip,
5668 			    "!Client device is Busy. "
5669 			    "This device can not be removed at this moment. "
5670 			    "Please try again later."));
5671 			MDI_CLIENT_UNLOCK(ct);
5672 			return (NDI_BUSY);
5673 		}
5674 		MDI_CLIENT_SET_OFFLINE(ct);
5675 
5676 		/*
5677 		 * Unbind our relationship with the dev_info node
5678 		 */
5679 		if (flags & NDI_DEVI_REMOVE) {
5680 			ct->ct_dip = NULL;
5681 		}
5682 		MDI_CLIENT_UNLOCK(ct);
5683 	}
5684 	return (rv);
5685 }
5686 
5687 /*
5688  * mdi_pre_attach():
5689  *		Pre attach() notification handler
5690  */
5691 /*ARGSUSED*/
5692 int
5693 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5694 {
5695 	/* don't support old DDI_PM_RESUME */
5696 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5697 	    (cmd == DDI_PM_RESUME))
5698 		return (DDI_FAILURE);
5699 
5700 	return (DDI_SUCCESS);
5701 }
5702 
5703 /*
5704  * mdi_post_attach():
5705  *		Post attach() notification handler
5706  */
5707 /*ARGSUSED*/
5708 void
5709 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5710 {
5711 	mdi_phci_t	*ph;
5712 	mdi_client_t	*ct;
5713 	mdi_vhci_t	*vh;
5714 
5715 	if (MDI_PHCI(dip)) {
5716 		ph = i_devi_get_phci(dip);
5717 		ASSERT(ph != NULL);
5718 
5719 		MDI_PHCI_LOCK(ph);
5720 		switch (cmd) {
5721 		case DDI_ATTACH:
5722 			MDI_DEBUG(2, (MDI_NOTE, dip,
5723 			    "phci post_attach called %p", (void *)ph));
5724 			if (error == DDI_SUCCESS) {
5725 				MDI_PHCI_SET_ATTACH(ph);
5726 			} else {
5727 				MDI_DEBUG(1, (MDI_NOTE, dip,
5728 				    "!pHCI post_attach failed: error %d",
5729 				    error));
5730 				MDI_PHCI_SET_DETACH(ph);
5731 			}
5732 			break;
5733 
5734 		case DDI_RESUME:
5735 		case DDI_PM_RESUME:
5736 			MDI_DEBUG(2, (MDI_NOTE, dip,
5737 			    "pHCI post_resume: called %p", (void *)ph));
5738 			if (error == DDI_SUCCESS) {
5739 				MDI_PHCI_SET_RESUME(ph);
5740 			} else {
5741 				MDI_DEBUG(1, (MDI_NOTE, dip,
5742 				    "!pHCI post_resume failed: error %d",
5743 				    error));
5744 				MDI_PHCI_SET_SUSPEND(ph);
5745 			}
5746 			break;
5747 		}
5748 		MDI_PHCI_UNLOCK(ph);
5749 	}
5750 
5751 	if (MDI_CLIENT(dip)) {
5752 		ct = i_devi_get_client(dip);
5753 		ASSERT(ct != NULL);
5754 
5755 		MDI_CLIENT_LOCK(ct);
5756 		switch (cmd) {
5757 		case DDI_ATTACH:
5758 			MDI_DEBUG(2, (MDI_NOTE, dip,
5759 			    "client post_attach called %p", (void *)ct));
5760 			if (error != DDI_SUCCESS) {
5761 				MDI_DEBUG(1, (MDI_NOTE, dip,
5762 				    "!client post_attach failed: error %d",
5763 				    error));
5764 				MDI_CLIENT_SET_DETACH(ct);
5765 				MDI_DEBUG(4, (MDI_WARN, dip,
5766 				    "i_mdi_pm_reset_client"));
5767 				i_mdi_pm_reset_client(ct);
5768 				break;
5769 			}
5770 
5771 			/*
5772 			 * Client device has successfully attached, inform
5773 			 * the vhci.
5774 			 */
5775 			vh = ct->ct_vhci;
5776 			if (vh->vh_ops->vo_client_attached)
5777 				(*vh->vh_ops->vo_client_attached)(dip);
5778 
5779 			MDI_CLIENT_SET_ATTACH(ct);
5780 			break;
5781 
5782 		case DDI_RESUME:
5783 		case DDI_PM_RESUME:
5784 			MDI_DEBUG(2, (MDI_NOTE, dip,
5785 			    "client post_attach: called %p", (void *)ct));
5786 			if (error == DDI_SUCCESS) {
5787 				MDI_CLIENT_SET_RESUME(ct);
5788 			} else {
5789 				MDI_DEBUG(1, (MDI_NOTE, dip,
5790 				    "!client post_resume failed: error %d",
5791 				    error));
5792 				MDI_CLIENT_SET_SUSPEND(ct);
5793 			}
5794 			break;
5795 		}
5796 		MDI_CLIENT_UNLOCK(ct);
5797 	}
5798 }
5799 
5800 /*
5801  * mdi_pre_detach():
5802  *		Pre detach notification handler
5803  */
5804 /*ARGSUSED*/
5805 int
5806 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5807 {
5808 	int rv = DDI_SUCCESS;
5809 
5810 	if (MDI_CLIENT(dip)) {
5811 		(void) i_mdi_client_pre_detach(dip, cmd);
5812 	}
5813 
5814 	if (MDI_PHCI(dip)) {
5815 		rv = i_mdi_phci_pre_detach(dip, cmd);
5816 	}
5817 
5818 	return (rv);
5819 }
5820 
5821 /*ARGSUSED*/
5822 static int
5823 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5824 {
5825 	int		rv = DDI_SUCCESS;
5826 	mdi_phci_t	*ph;
5827 	mdi_client_t	*ct;
5828 	mdi_pathinfo_t	*pip;
5829 	mdi_pathinfo_t	*failed_pip = NULL;
5830 	mdi_pathinfo_t	*next;
5831 
5832 	ph = i_devi_get_phci(dip);
5833 	if (ph == NULL) {
5834 		return (rv);
5835 	}
5836 
5837 	MDI_PHCI_LOCK(ph);
5838 	switch (cmd) {
5839 	case DDI_DETACH:
5840 		MDI_DEBUG(2, (MDI_NOTE, dip,
5841 		    "pHCI pre_detach: called %p", (void *)ph));
5842 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5843 			/*
5844 			 * mdi_pathinfo nodes are still attached to
5845 			 * this pHCI. Fail the detach for this pHCI.
5846 			 */
5847 			MDI_DEBUG(2, (MDI_WARN, dip,
5848 			    "pHCI pre_detach: paths are still attached %p",
5849 			    (void *)ph));
5850 			rv = DDI_FAILURE;
5851 			break;
5852 		}
5853 		MDI_PHCI_SET_DETACH(ph);
5854 		break;
5855 
5856 	case DDI_SUSPEND:
5857 		/*
5858 		 * pHCI is getting suspended.  Since mpxio client
5859 		 * devices may not be suspended at this point, to avoid
5860 		 * a potential stack overflow, it is important to suspend
5861 		 * client devices before pHCI can be suspended.
5862 		 */
5863 
5864 		MDI_DEBUG(2, (MDI_NOTE, dip,
5865 		    "pHCI pre_suspend: called %p", (void *)ph));
5866 		/*
5867 		 * Suspend all the client devices accessible through this pHCI
5868 		 */
5869 		pip = ph->ph_path_head;
5870 		while (pip != NULL && rv == DDI_SUCCESS) {
5871 			dev_info_t *cdip;
5872 			MDI_PI_LOCK(pip);
5873 			next =
5874 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5875 			ct = MDI_PI(pip)->pi_client;
5876 			i_mdi_client_lock(ct, pip);
5877 			cdip = ct->ct_dip;
5878 			MDI_PI_UNLOCK(pip);
5879 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5880 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5881 				i_mdi_client_unlock(ct);
5882 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5883 				    DDI_SUCCESS) {
5884 					/*
5885 					 * Suspend of one of the client
5886 					 * device has failed.
5887 					 */
5888 					MDI_DEBUG(1, (MDI_WARN, dip,
5889 					    "!suspend of device (%s%d) failed.",
5890 					    ddi_driver_name(cdip),
5891 					    ddi_get_instance(cdip)));
5892 					failed_pip = pip;
5893 					break;
5894 				}
5895 			} else {
5896 				i_mdi_client_unlock(ct);
5897 			}
5898 			pip = next;
5899 		}
5900 
5901 		if (rv == DDI_SUCCESS) {
5902 			/*
5903 			 * Suspend of client devices is complete. Proceed
5904 			 * with pHCI suspend.
5905 			 */
5906 			MDI_PHCI_SET_SUSPEND(ph);
5907 		} else {
5908 			/*
5909 			 * Revert back all the suspended client device states
5910 			 * to converse.
5911 			 */
5912 			pip = ph->ph_path_head;
5913 			while (pip != failed_pip) {
5914 				dev_info_t *cdip;
5915 				MDI_PI_LOCK(pip);
5916 				next =
5917 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5918 				ct = MDI_PI(pip)->pi_client;
5919 				i_mdi_client_lock(ct, pip);
5920 				cdip = ct->ct_dip;
5921 				MDI_PI_UNLOCK(pip);
5922 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5923 					i_mdi_client_unlock(ct);
5924 					(void) devi_attach(cdip, DDI_RESUME);
5925 				} else {
5926 					i_mdi_client_unlock(ct);
5927 				}
5928 				pip = next;
5929 			}
5930 		}
5931 		break;
5932 
5933 	default:
5934 		rv = DDI_FAILURE;
5935 		break;
5936 	}
5937 	MDI_PHCI_UNLOCK(ph);
5938 	return (rv);
5939 }
5940 
5941 /*ARGSUSED*/
5942 static int
5943 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5944 {
5945 	int		rv = DDI_SUCCESS;
5946 	mdi_client_t	*ct;
5947 
5948 	ct = i_devi_get_client(dip);
5949 	if (ct == NULL) {
5950 		return (rv);
5951 	}
5952 
5953 	MDI_CLIENT_LOCK(ct);
5954 	switch (cmd) {
5955 	case DDI_DETACH:
5956 		MDI_DEBUG(2, (MDI_NOTE, dip,
5957 		    "client pre_detach: called %p",
5958 		     (void *)ct));
5959 		MDI_CLIENT_SET_DETACH(ct);
5960 		break;
5961 
5962 	case DDI_SUSPEND:
5963 		MDI_DEBUG(2, (MDI_NOTE, dip,
5964 		    "client pre_suspend: called %p",
5965 		    (void *)ct));
5966 		MDI_CLIENT_SET_SUSPEND(ct);
5967 		break;
5968 
5969 	default:
5970 		rv = DDI_FAILURE;
5971 		break;
5972 	}
5973 	MDI_CLIENT_UNLOCK(ct);
5974 	return (rv);
5975 }
5976 
5977 /*
5978  * mdi_post_detach():
5979  *		Post detach notification handler
5980  */
5981 /*ARGSUSED*/
5982 void
5983 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5984 {
5985 	/*
5986 	 * Detach/Suspend of mpxio component failed. Update our state
5987 	 * too
5988 	 */
5989 	if (MDI_PHCI(dip))
5990 		i_mdi_phci_post_detach(dip, cmd, error);
5991 
5992 	if (MDI_CLIENT(dip))
5993 		i_mdi_client_post_detach(dip, cmd, error);
5994 }
5995 
5996 /*ARGSUSED*/
5997 static void
5998 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5999 {
6000 	mdi_phci_t	*ph;
6001 
6002 	/*
6003 	 * Detach/Suspend of phci component failed. Update our state
6004 	 * too
6005 	 */
6006 	ph = i_devi_get_phci(dip);
6007 	if (ph == NULL) {
6008 		return;
6009 	}
6010 
6011 	MDI_PHCI_LOCK(ph);
6012 	/*
6013 	 * Detach of pHCI failed. Restore back converse
6014 	 * state
6015 	 */
6016 	switch (cmd) {
6017 	case DDI_DETACH:
6018 		MDI_DEBUG(2, (MDI_NOTE, dip,
6019 		    "pHCI post_detach: called %p",
6020 		    (void *)ph));
6021 		if (error != DDI_SUCCESS)
6022 			MDI_PHCI_SET_ATTACH(ph);
6023 		break;
6024 
6025 	case DDI_SUSPEND:
6026 	case DDI_PM_SUSPEND:
6027 		MDI_DEBUG(2, (MDI_NOTE, dip,
6028 		    "pHCI post_suspend: called %p",
6029 		    (void *)ph));
6030 		if (error != DDI_SUCCESS)
6031 			MDI_PHCI_SET_RESUME(ph);
6032 		break;
6033 	case DDI_HOTPLUG_DETACH:
6034 		break;
6035 	}
6036 	MDI_PHCI_UNLOCK(ph);
6037 }
6038 
6039 /*ARGSUSED*/
6040 static void
6041 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6042 {
6043 	mdi_client_t	*ct;
6044 
6045 	ct = i_devi_get_client(dip);
6046 	if (ct == NULL) {
6047 		return;
6048 	}
6049 	MDI_CLIENT_LOCK(ct);
6050 	/*
6051 	 * Detach of Client failed. Restore back converse
6052 	 * state
6053 	 */
6054 	switch (cmd) {
6055 	case DDI_DETACH:
6056 		MDI_DEBUG(2, (MDI_NOTE, dip,
6057 		    "client post_detach: called %p", (void *)ct));
6058 		if (DEVI_IS_ATTACHING(dip)) {
6059 			MDI_DEBUG(4, (MDI_NOTE, dip,
6060 			    "i_mdi_pm_rele_client\n"));
6061 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6062 		} else {
6063 			MDI_DEBUG(4, (MDI_NOTE, dip,
6064 			    "i_mdi_pm_reset_client\n"));
6065 			i_mdi_pm_reset_client(ct);
6066 		}
6067 		if (error != DDI_SUCCESS)
6068 			MDI_CLIENT_SET_ATTACH(ct);
6069 		break;
6070 
6071 	case DDI_SUSPEND:
6072 	case DDI_PM_SUSPEND:
6073 		MDI_DEBUG(2, (MDI_NOTE, dip,
6074 		    "called %p", (void *)ct));
6075 		if (error != DDI_SUCCESS)
6076 			MDI_CLIENT_SET_RESUME(ct);
6077 		break;
6078 	case DDI_HOTPLUG_DETACH:
6079 		break;
6080 	}
6081 	MDI_CLIENT_UNLOCK(ct);
6082 }
6083 
6084 int
6085 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6086 {
6087 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6088 }
6089 
6090 /*
6091  * create and install per-path (client - pHCI) statistics
6092  * I/O stats supported: nread, nwritten, reads, and writes
6093  * Error stats - hard errors, soft errors, & transport errors
6094  */
6095 int
6096 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6097 {
6098 	kstat_t			*kiosp, *kerrsp;
6099 	struct pi_errs		*nsp;
6100 	struct mdi_pi_kstats	*mdi_statp;
6101 
6102 	if (MDI_PI(pip)->pi_kstats != NULL)
6103 		return (MDI_SUCCESS);
6104 
6105 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6106 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6107 		return (MDI_FAILURE);
6108 	}
6109 
6110 	(void) strcat(ksname, ",err");
6111 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6112 	    KSTAT_TYPE_NAMED,
6113 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6114 	if (kerrsp == NULL) {
6115 		kstat_delete(kiosp);
6116 		return (MDI_FAILURE);
6117 	}
6118 
6119 	nsp = (struct pi_errs *)kerrsp->ks_data;
6120 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6121 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6122 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6123 	    KSTAT_DATA_UINT32);
6124 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6125 	    KSTAT_DATA_UINT32);
6126 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6127 	    KSTAT_DATA_UINT32);
6128 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6129 	    KSTAT_DATA_UINT32);
6130 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6131 	    KSTAT_DATA_UINT32);
6132 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6133 	    KSTAT_DATA_UINT32);
6134 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6135 	    KSTAT_DATA_UINT32);
6136 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6137 
6138 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6139 	mdi_statp->pi_kstat_ref = 1;
6140 	mdi_statp->pi_kstat_iostats = kiosp;
6141 	mdi_statp->pi_kstat_errstats = kerrsp;
6142 	kstat_install(kiosp);
6143 	kstat_install(kerrsp);
6144 	MDI_PI(pip)->pi_kstats = mdi_statp;
6145 	return (MDI_SUCCESS);
6146 }
6147 
6148 /*
6149  * destroy per-path properties
6150  */
6151 static void
6152 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6153 {
6154 
6155 	struct mdi_pi_kstats *mdi_statp;
6156 
6157 	if (MDI_PI(pip)->pi_kstats == NULL)
6158 		return;
6159 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6160 		return;
6161 
6162 	MDI_PI(pip)->pi_kstats = NULL;
6163 
6164 	/*
6165 	 * the kstat may be shared between multiple pathinfo nodes
6166 	 * decrement this pathinfo's usage, removing the kstats
6167 	 * themselves when the last pathinfo reference is removed.
6168 	 */
6169 	ASSERT(mdi_statp->pi_kstat_ref > 0);
6170 	if (--mdi_statp->pi_kstat_ref != 0)
6171 		return;
6172 
6173 	kstat_delete(mdi_statp->pi_kstat_iostats);
6174 	kstat_delete(mdi_statp->pi_kstat_errstats);
6175 	kmem_free(mdi_statp, sizeof (*mdi_statp));
6176 }
6177 
6178 /*
6179  * update I/O paths KSTATS
6180  */
6181 void
6182 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6183 {
6184 	kstat_t *iostatp;
6185 	size_t xfer_cnt;
6186 
6187 	ASSERT(pip != NULL);
6188 
6189 	/*
6190 	 * I/O can be driven across a path prior to having path
6191 	 * statistics available, i.e. probe(9e).
6192 	 */
6193 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6194 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6195 		xfer_cnt = bp->b_bcount - bp->b_resid;
6196 		if (bp->b_flags & B_READ) {
6197 			KSTAT_IO_PTR(iostatp)->reads++;
6198 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6199 		} else {
6200 			KSTAT_IO_PTR(iostatp)->writes++;
6201 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6202 		}
6203 	}
6204 }
6205 
6206 /*
6207  * Enable the path(specific client/target/initiator)
6208  * Enabling a path means that MPxIO may select the enabled path for routing
6209  * future I/O requests, subject to other path state constraints.
6210  */
6211 int
6212 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6213 {
6214 	mdi_phci_t	*ph;
6215 
6216 	ph = MDI_PI(pip)->pi_phci;
6217 	if (ph == NULL) {
6218 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6219 		    "!failed: path %s %p: NULL ph",
6220 		    mdi_pi_spathname(pip), (void *)pip));
6221 		return (MDI_FAILURE);
6222 	}
6223 
6224 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6225 		MDI_ENABLE_OP);
6226 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6227 	    "!returning success pip = %p. ph = %p",
6228 	    (void *)pip, (void *)ph));
6229 	return (MDI_SUCCESS);
6230 
6231 }
6232 
6233 /*
6234  * Disable the path (specific client/target/initiator)
6235  * Disabling a path means that MPxIO will not select the disabled path for
6236  * routing any new I/O requests.
6237  */
6238 int
6239 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6240 {
6241 	mdi_phci_t	*ph;
6242 
6243 	ph = MDI_PI(pip)->pi_phci;
6244 	if (ph == NULL) {
6245 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6246 		    "!failed: path %s %p: NULL ph",
6247 		    mdi_pi_spathname(pip), (void *)pip));
6248 		return (MDI_FAILURE);
6249 	}
6250 
6251 	(void) i_mdi_enable_disable_path(pip,
6252 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
6253 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6254 	    "!returning success pip = %p. ph = %p",
6255 	    (void *)pip, (void *)ph));
6256 	return (MDI_SUCCESS);
6257 }
6258 
6259 /*
6260  * disable the path to a particular pHCI (pHCI specified in the phci_path
6261  * argument) for a particular client (specified in the client_path argument).
6262  * Disabling a path means that MPxIO will not select the disabled path for
6263  * routing any new I/O requests.
6264  * NOTE: this will be removed once the NWS files are changed to use the new
6265  * mdi_{enable,disable}_path interfaces
6266  */
6267 int
6268 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6269 {
6270 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6271 }
6272 
6273 /*
6274  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6275  * argument) for a particular client (specified in the client_path argument).
6276  * Enabling a path means that MPxIO may select the enabled path for routing
6277  * future I/O requests, subject to other path state constraints.
6278  * NOTE: this will be removed once the NWS files are changed to use the new
6279  * mdi_{enable,disable}_path interfaces
6280  */
6281 
6282 int
6283 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6284 {
6285 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6286 }
6287 
6288 /*
6289  * Common routine for doing enable/disable.
6290  */
6291 static mdi_pathinfo_t *
6292 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6293 		int op)
6294 {
6295 	int		sync_flag = 0;
6296 	int		rv;
6297 	mdi_pathinfo_t 	*next;
6298 	int		(*f)() = NULL;
6299 
6300 	/*
6301 	 * Check to make sure the path is not already in the
6302 	 * requested state. If it is just return the next path
6303 	 * as we have nothing to do here.
6304 	 */
6305 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6306 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6307 		MDI_PI_LOCK(pip);
6308 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6309 		MDI_PI_UNLOCK(pip);
6310 		return (next);
6311 	}
6312 
6313 	f = vh->vh_ops->vo_pi_state_change;
6314 
6315 	sync_flag = (flags << 8) & 0xf00;
6316 
6317 	/*
6318 	 * Do a callback into the mdi consumer to let it
6319 	 * know that path is about to get enabled/disabled.
6320 	 */
6321 	rv = MDI_SUCCESS;
6322 	if (f != NULL) {
6323 		rv = (*f)(vh->vh_dip, pip, 0,
6324 			MDI_PI_EXT_STATE(pip),
6325 			MDI_EXT_STATE_CHANGE | sync_flag |
6326 			op | MDI_BEFORE_STATE_CHANGE);
6327 		if (rv != MDI_SUCCESS) {
6328 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6329 			    "vo_pi_state_change: failed rv = %x", rv));
6330 		}
6331 	}
6332 	MDI_PI_LOCK(pip);
6333 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6334 
6335 	switch (flags) {
6336 		case USER_DISABLE:
6337 			if (op == MDI_DISABLE_OP) {
6338 				MDI_PI_SET_USER_DISABLE(pip);
6339 			} else {
6340 				MDI_PI_SET_USER_ENABLE(pip);
6341 			}
6342 			break;
6343 		case DRIVER_DISABLE:
6344 			if (op == MDI_DISABLE_OP) {
6345 				MDI_PI_SET_DRV_DISABLE(pip);
6346 			} else {
6347 				MDI_PI_SET_DRV_ENABLE(pip);
6348 			}
6349 			break;
6350 		case DRIVER_DISABLE_TRANSIENT:
6351 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6352 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6353 			} else {
6354 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6355 			}
6356 			break;
6357 	}
6358 	MDI_PI_UNLOCK(pip);
6359 	/*
6360 	 * Do a callback into the mdi consumer to let it
6361 	 * know that path is now enabled/disabled.
6362 	 */
6363 	if (f != NULL) {
6364 		rv = (*f)(vh->vh_dip, pip, 0,
6365 			MDI_PI_EXT_STATE(pip),
6366 			MDI_EXT_STATE_CHANGE | sync_flag |
6367 			op | MDI_AFTER_STATE_CHANGE);
6368 		if (rv != MDI_SUCCESS) {
6369 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6370 			    "vo_pi_state_change failed: rv = %x", rv));
6371 		}
6372 	}
6373 	return (next);
6374 }
6375 
6376 /*
6377  * Common routine for doing enable/disable.
6378  * NOTE: this will be removed once the NWS files are changed to use the new
6379  * mdi_{enable,disable}_path has been putback
6380  */
6381 int
6382 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6383 {
6384 
6385 	mdi_phci_t	*ph;
6386 	mdi_vhci_t	*vh = NULL;
6387 	mdi_client_t	*ct;
6388 	mdi_pathinfo_t	*next, *pip;
6389 	int		found_it;
6390 
6391 	ph = i_devi_get_phci(pdip);
6392 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6393 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6394 	    (void *)cdip));
6395 	if (ph == NULL) {
6396 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6397 		    "!failed: operation %d: NULL ph", op));
6398 		return (MDI_FAILURE);
6399 	}
6400 
6401 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6402 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6403 		    "!failed: invalid operation %d", op));
6404 		return (MDI_FAILURE);
6405 	}
6406 
6407 	vh = ph->ph_vhci;
6408 
6409 	if (cdip == NULL) {
6410 		/*
6411 		 * Need to mark the Phci as enabled/disabled.
6412 		 */
6413 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6414 		    "op %d for the phci", op));
6415 		MDI_PHCI_LOCK(ph);
6416 		switch (flags) {
6417 			case USER_DISABLE:
6418 				if (op == MDI_DISABLE_OP) {
6419 					MDI_PHCI_SET_USER_DISABLE(ph);
6420 				} else {
6421 					MDI_PHCI_SET_USER_ENABLE(ph);
6422 				}
6423 				break;
6424 			case DRIVER_DISABLE:
6425 				if (op == MDI_DISABLE_OP) {
6426 					MDI_PHCI_SET_DRV_DISABLE(ph);
6427 				} else {
6428 					MDI_PHCI_SET_DRV_ENABLE(ph);
6429 				}
6430 				break;
6431 			case DRIVER_DISABLE_TRANSIENT:
6432 				if (op == MDI_DISABLE_OP) {
6433 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6434 				} else {
6435 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6436 				}
6437 				break;
6438 			default:
6439 				MDI_PHCI_UNLOCK(ph);
6440 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6441 				    "!invalid flag argument= %d", flags));
6442 		}
6443 
6444 		/*
6445 		 * Phci has been disabled. Now try to enable/disable
6446 		 * path info's to each client.
6447 		 */
6448 		pip = ph->ph_path_head;
6449 		while (pip != NULL) {
6450 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6451 		}
6452 		MDI_PHCI_UNLOCK(ph);
6453 	} else {
6454 
6455 		/*
6456 		 * Disable a specific client.
6457 		 */
6458 		ct = i_devi_get_client(cdip);
6459 		if (ct == NULL) {
6460 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6461 			    "!failed: operation = %d: NULL ct", op));
6462 			return (MDI_FAILURE);
6463 		}
6464 
6465 		MDI_CLIENT_LOCK(ct);
6466 		pip = ct->ct_path_head;
6467 		found_it = 0;
6468 		while (pip != NULL) {
6469 			MDI_PI_LOCK(pip);
6470 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6471 			if (MDI_PI(pip)->pi_phci == ph) {
6472 				MDI_PI_UNLOCK(pip);
6473 				found_it = 1;
6474 				break;
6475 			}
6476 			MDI_PI_UNLOCK(pip);
6477 			pip = next;
6478 		}
6479 
6480 
6481 		MDI_CLIENT_UNLOCK(ct);
6482 		if (found_it == 0) {
6483 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6484 			    "!failed. Could not find corresponding pip\n"));
6485 			return (MDI_FAILURE);
6486 		}
6487 
6488 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6489 	}
6490 
6491 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6492 	    "!op %d returning success pdip = %p cdip = %p",
6493 	    op, (void *)pdip, (void *)cdip));
6494 	return (MDI_SUCCESS);
6495 }
6496 
6497 /*
6498  * Ensure phci powered up
6499  */
6500 static void
6501 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6502 {
6503 	dev_info_t	*ph_dip;
6504 
6505 	ASSERT(pip != NULL);
6506 	ASSERT(MDI_PI_LOCKED(pip));
6507 
6508 	if (MDI_PI(pip)->pi_pm_held) {
6509 		return;
6510 	}
6511 
6512 	ph_dip = mdi_pi_get_phci(pip);
6513 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6514 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6515 	if (ph_dip == NULL) {
6516 		return;
6517 	}
6518 
6519 	MDI_PI_UNLOCK(pip);
6520 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6521 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6522 	pm_hold_power(ph_dip);
6523 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6524 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6525 	MDI_PI_LOCK(pip);
6526 
6527 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6528 	if (DEVI(ph_dip)->devi_pm_info)
6529 		MDI_PI(pip)->pi_pm_held = 1;
6530 }
6531 
6532 /*
6533  * Allow phci powered down
6534  */
6535 static void
6536 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6537 {
6538 	dev_info_t	*ph_dip = NULL;
6539 
6540 	ASSERT(pip != NULL);
6541 	ASSERT(MDI_PI_LOCKED(pip));
6542 
6543 	if (MDI_PI(pip)->pi_pm_held == 0) {
6544 		return;
6545 	}
6546 
6547 	ph_dip = mdi_pi_get_phci(pip);
6548 	ASSERT(ph_dip != NULL);
6549 
6550 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6551 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6552 
6553 	MDI_PI_UNLOCK(pip);
6554 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6555 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6556 	pm_rele_power(ph_dip);
6557 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6558 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6559 	MDI_PI_LOCK(pip);
6560 
6561 	MDI_PI(pip)->pi_pm_held = 0;
6562 }
6563 
6564 static void
6565 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6566 {
6567 	ASSERT(MDI_CLIENT_LOCKED(ct));
6568 
6569 	ct->ct_power_cnt += incr;
6570 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6571 	    "%p ct_power_cnt = %d incr = %d",
6572 	    (void *)ct, ct->ct_power_cnt, incr));
6573 	ASSERT(ct->ct_power_cnt >= 0);
6574 }
6575 
6576 static void
6577 i_mdi_rele_all_phci(mdi_client_t *ct)
6578 {
6579 	mdi_pathinfo_t  *pip;
6580 
6581 	ASSERT(MDI_CLIENT_LOCKED(ct));
6582 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6583 	while (pip != NULL) {
6584 		mdi_hold_path(pip);
6585 		MDI_PI_LOCK(pip);
6586 		i_mdi_pm_rele_pip(pip);
6587 		MDI_PI_UNLOCK(pip);
6588 		mdi_rele_path(pip);
6589 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6590 	}
6591 }
6592 
6593 static void
6594 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6595 {
6596 	ASSERT(MDI_CLIENT_LOCKED(ct));
6597 
6598 	if (i_ddi_devi_attached(ct->ct_dip)) {
6599 		ct->ct_power_cnt -= decr;
6600 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6601 		    "%p ct_power_cnt = %d decr = %d",
6602 		    (void *)ct, ct->ct_power_cnt, decr));
6603 	}
6604 
6605 	ASSERT(ct->ct_power_cnt >= 0);
6606 	if (ct->ct_power_cnt == 0) {
6607 		i_mdi_rele_all_phci(ct);
6608 		return;
6609 	}
6610 }
6611 
6612 static void
6613 i_mdi_pm_reset_client(mdi_client_t *ct)
6614 {
6615 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6616 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6617 	ASSERT(MDI_CLIENT_LOCKED(ct));
6618 	ct->ct_power_cnt = 0;
6619 	i_mdi_rele_all_phci(ct);
6620 	ct->ct_powercnt_config = 0;
6621 	ct->ct_powercnt_unconfig = 0;
6622 	ct->ct_powercnt_reset = 1;
6623 }
6624 
6625 static int
6626 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6627 {
6628 	int		ret;
6629 	dev_info_t	*ph_dip;
6630 
6631 	MDI_PI_LOCK(pip);
6632 	i_mdi_pm_hold_pip(pip);
6633 
6634 	ph_dip = mdi_pi_get_phci(pip);
6635 	MDI_PI_UNLOCK(pip);
6636 
6637 	/* bring all components of phci to full power */
6638 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6639 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6640 	    ddi_get_instance(ph_dip), (void *)pip));
6641 
6642 	ret = pm_powerup(ph_dip);
6643 
6644 	if (ret == DDI_FAILURE) {
6645 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6646 		    "pm_powerup FAILED for %s%d %p",
6647 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6648 		    (void *)pip));
6649 
6650 		MDI_PI_LOCK(pip);
6651 		i_mdi_pm_rele_pip(pip);
6652 		MDI_PI_UNLOCK(pip);
6653 		return (MDI_FAILURE);
6654 	}
6655 
6656 	return (MDI_SUCCESS);
6657 }
6658 
6659 static int
6660 i_mdi_power_all_phci(mdi_client_t *ct)
6661 {
6662 	mdi_pathinfo_t  *pip;
6663 	int		succeeded = 0;
6664 
6665 	ASSERT(MDI_CLIENT_LOCKED(ct));
6666 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6667 	while (pip != NULL) {
6668 		/*
6669 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6670 		 * or MDI_PATHINFO_STATE_OFFLINE.
6671 		 */
6672 		if (MDI_PI_IS_INIT(pip) ||
6673 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6674 			mdi_hold_path(pip);
6675 			MDI_CLIENT_UNLOCK(ct);
6676 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6677 				succeeded = 1;
6678 
6679 			ASSERT(ct == MDI_PI(pip)->pi_client);
6680 			MDI_CLIENT_LOCK(ct);
6681 			mdi_rele_path(pip);
6682 		}
6683 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6684 	}
6685 
6686 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6687 }
6688 
6689 /*
6690  * mdi_bus_power():
6691  *		1. Place the phci(s) into powered up state so that
6692  *		   client can do power management
6693  *		2. Ensure phci powered up as client power managing
6694  * Return Values:
6695  *		MDI_SUCCESS
6696  *		MDI_FAILURE
6697  */
6698 int
6699 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6700     void *arg, void *result)
6701 {
6702 	int			ret = MDI_SUCCESS;
6703 	pm_bp_child_pwrchg_t	*bpc;
6704 	mdi_client_t		*ct;
6705 	dev_info_t		*cdip;
6706 	pm_bp_has_changed_t	*bphc;
6707 
6708 	/*
6709 	 * BUS_POWER_NOINVOL not supported
6710 	 */
6711 	if (op == BUS_POWER_NOINVOL)
6712 		return (MDI_FAILURE);
6713 
6714 	/*
6715 	 * ignore other OPs.
6716 	 * return quickly to save cou cycles on the ct processing
6717 	 */
6718 	switch (op) {
6719 	case BUS_POWER_PRE_NOTIFICATION:
6720 	case BUS_POWER_POST_NOTIFICATION:
6721 		bpc = (pm_bp_child_pwrchg_t *)arg;
6722 		cdip = bpc->bpc_dip;
6723 		break;
6724 	case BUS_POWER_HAS_CHANGED:
6725 		bphc = (pm_bp_has_changed_t *)arg;
6726 		cdip = bphc->bphc_dip;
6727 		break;
6728 	default:
6729 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6730 	}
6731 
6732 	ASSERT(MDI_CLIENT(cdip));
6733 
6734 	ct = i_devi_get_client(cdip);
6735 	if (ct == NULL)
6736 		return (MDI_FAILURE);
6737 
6738 	/*
6739 	 * wait till the mdi_pathinfo node state change are processed
6740 	 */
6741 	MDI_CLIENT_LOCK(ct);
6742 	switch (op) {
6743 	case BUS_POWER_PRE_NOTIFICATION:
6744 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6745 		    "BUS_POWER_PRE_NOTIFICATION:"
6746 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6747 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6748 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6749 
6750 		/* serialize power level change per client */
6751 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6752 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6753 
6754 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6755 
6756 		if (ct->ct_power_cnt == 0) {
6757 			ret = i_mdi_power_all_phci(ct);
6758 		}
6759 
6760 		/*
6761 		 * if new_level > 0:
6762 		 *	- hold phci(s)
6763 		 *	- power up phci(s) if not already
6764 		 * ignore power down
6765 		 */
6766 		if (bpc->bpc_nlevel > 0) {
6767 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6768 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6769 				    "i_mdi_pm_hold_client\n"));
6770 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6771 			}
6772 		}
6773 		break;
6774 	case BUS_POWER_POST_NOTIFICATION:
6775 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6776 		    "BUS_POWER_POST_NOTIFICATION:"
6777 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6778 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6779 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6780 		    *(int *)result));
6781 
6782 		if (*(int *)result == DDI_SUCCESS) {
6783 			if (bpc->bpc_nlevel > 0) {
6784 				MDI_CLIENT_SET_POWER_UP(ct);
6785 			} else {
6786 				MDI_CLIENT_SET_POWER_DOWN(ct);
6787 			}
6788 		}
6789 
6790 		/* release the hold we did in pre-notification */
6791 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6792 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6793 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794 			    "i_mdi_pm_rele_client\n"));
6795 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6796 		}
6797 
6798 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6799 			/* another thread might started attaching */
6800 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6801 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6802 				    "i_mdi_pm_rele_client\n"));
6803 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6804 			/* detaching has been taken care in pm_post_unconfig */
6805 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6806 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6807 				    "i_mdi_pm_reset_client\n"));
6808 				i_mdi_pm_reset_client(ct);
6809 			}
6810 		}
6811 
6812 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6813 		cv_broadcast(&ct->ct_powerchange_cv);
6814 
6815 		break;
6816 
6817 	/* need to do more */
6818 	case BUS_POWER_HAS_CHANGED:
6819 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6820 		    "BUS_POWER_HAS_CHANGED:"
6821 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6822 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6823 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6824 
6825 		if (bphc->bphc_nlevel > 0 &&
6826 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6827 			if (ct->ct_power_cnt == 0) {
6828 				ret = i_mdi_power_all_phci(ct);
6829 			}
6830 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6831 			    "i_mdi_pm_hold_client\n"));
6832 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6833 		}
6834 
6835 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6836 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6837 			    "i_mdi_pm_rele_client\n"));
6838 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6839 		}
6840 		break;
6841 	default:
6842 		dev_err(parent, CE_WARN, "!unhandled bus power operation: 0x%x",
6843 		    op);
6844 		break;
6845 	}
6846 
6847 	MDI_CLIENT_UNLOCK(ct);
6848 	return (ret);
6849 }
6850 
6851 static int
6852 i_mdi_pm_pre_config_one(dev_info_t *child)
6853 {
6854 	int		ret = MDI_SUCCESS;
6855 	mdi_client_t	*ct;
6856 
6857 	ct = i_devi_get_client(child);
6858 	if (ct == NULL)
6859 		return (MDI_FAILURE);
6860 
6861 	MDI_CLIENT_LOCK(ct);
6862 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6863 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6864 
6865 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6866 		MDI_CLIENT_UNLOCK(ct);
6867 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6868 		return (MDI_SUCCESS);
6869 	}
6870 
6871 	if (ct->ct_powercnt_config) {
6872 		MDI_CLIENT_UNLOCK(ct);
6873 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6874 		return (MDI_SUCCESS);
6875 	}
6876 
6877 	if (ct->ct_power_cnt == 0) {
6878 		ret = i_mdi_power_all_phci(ct);
6879 	}
6880 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6881 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6882 	ct->ct_powercnt_config = 1;
6883 	ct->ct_powercnt_reset = 0;
6884 	MDI_CLIENT_UNLOCK(ct);
6885 	return (ret);
6886 }
6887 
6888 static int
6889 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6890 {
6891 	int			ret = MDI_SUCCESS;
6892 	dev_info_t		*cdip;
6893 	int			circ;
6894 
6895 	ASSERT(MDI_VHCI(vdip));
6896 
6897 	/* ndi_devi_config_one */
6898 	if (child) {
6899 		ASSERT(DEVI_BUSY_OWNED(vdip));
6900 		return (i_mdi_pm_pre_config_one(child));
6901 	}
6902 
6903 	/* devi_config_common */
6904 	ndi_devi_enter(vdip, &circ);
6905 	cdip = ddi_get_child(vdip);
6906 	while (cdip) {
6907 		dev_info_t *next = ddi_get_next_sibling(cdip);
6908 
6909 		ret = i_mdi_pm_pre_config_one(cdip);
6910 		if (ret != MDI_SUCCESS)
6911 			break;
6912 		cdip = next;
6913 	}
6914 	ndi_devi_exit(vdip, circ);
6915 	return (ret);
6916 }
6917 
6918 static int
6919 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6920 {
6921 	int		ret = MDI_SUCCESS;
6922 	mdi_client_t	*ct;
6923 
6924 	ct = i_devi_get_client(child);
6925 	if (ct == NULL)
6926 		return (MDI_FAILURE);
6927 
6928 	MDI_CLIENT_LOCK(ct);
6929 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6930 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6931 
6932 	if (!i_ddi_devi_attached(child)) {
6933 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6934 		MDI_CLIENT_UNLOCK(ct);
6935 		return (MDI_SUCCESS);
6936 	}
6937 
6938 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6939 	    (flags & NDI_AUTODETACH)) {
6940 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6941 		MDI_CLIENT_UNLOCK(ct);
6942 		return (MDI_FAILURE);
6943 	}
6944 
6945 	if (ct->ct_powercnt_unconfig) {
6946 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6947 		MDI_CLIENT_UNLOCK(ct);
6948 		*held = 1;
6949 		return (MDI_SUCCESS);
6950 	}
6951 
6952 	if (ct->ct_power_cnt == 0) {
6953 		ret = i_mdi_power_all_phci(ct);
6954 	}
6955 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6956 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6957 	ct->ct_powercnt_unconfig = 1;
6958 	ct->ct_powercnt_reset = 0;
6959 	MDI_CLIENT_UNLOCK(ct);
6960 	if (ret == MDI_SUCCESS)
6961 		*held = 1;
6962 	return (ret);
6963 }
6964 
6965 static int
6966 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6967     int flags)
6968 {
6969 	int			ret = MDI_SUCCESS;
6970 	dev_info_t		*cdip;
6971 	int			circ;
6972 
6973 	ASSERT(MDI_VHCI(vdip));
6974 	*held = 0;
6975 
6976 	/* ndi_devi_unconfig_one */
6977 	if (child) {
6978 		ASSERT(DEVI_BUSY_OWNED(vdip));
6979 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6980 	}
6981 
6982 	/* devi_unconfig_common */
6983 	ndi_devi_enter(vdip, &circ);
6984 	cdip = ddi_get_child(vdip);
6985 	while (cdip) {
6986 		dev_info_t *next = ddi_get_next_sibling(cdip);
6987 
6988 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6989 		cdip = next;
6990 	}
6991 	ndi_devi_exit(vdip, circ);
6992 
6993 	if (*held)
6994 		ret = MDI_SUCCESS;
6995 
6996 	return (ret);
6997 }
6998 
6999 static void
7000 i_mdi_pm_post_config_one(dev_info_t *child)
7001 {
7002 	mdi_client_t	*ct;
7003 
7004 	ct = i_devi_get_client(child);
7005 	if (ct == NULL)
7006 		return;
7007 
7008 	MDI_CLIENT_LOCK(ct);
7009 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7010 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7011 
7012 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
7013 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
7014 		MDI_CLIENT_UNLOCK(ct);
7015 		return;
7016 	}
7017 
7018 	/* client has not been updated */
7019 	if (MDI_CLIENT_IS_FAILED(ct)) {
7020 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7021 		MDI_CLIENT_UNLOCK(ct);
7022 		return;
7023 	}
7024 
7025 	/* another thread might have powered it down or detached it */
7026 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7027 	    !DEVI_IS_ATTACHING(child)) ||
7028 	    (!i_ddi_devi_attached(child) &&
7029 	    !DEVI_IS_ATTACHING(child))) {
7030 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7031 		i_mdi_pm_reset_client(ct);
7032 	} else {
7033 		mdi_pathinfo_t  *pip, *next;
7034 		int	valid_path_count = 0;
7035 
7036 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7037 		pip = ct->ct_path_head;
7038 		while (pip != NULL) {
7039 			MDI_PI_LOCK(pip);
7040 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7041 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7042 				valid_path_count ++;
7043 			MDI_PI_UNLOCK(pip);
7044 			pip = next;
7045 		}
7046 		i_mdi_pm_rele_client(ct, valid_path_count);
7047 	}
7048 	ct->ct_powercnt_config = 0;
7049 	MDI_CLIENT_UNLOCK(ct);
7050 }
7051 
7052 static void
7053 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7054 {
7055 	int		circ;
7056 	dev_info_t	*cdip;
7057 
7058 	ASSERT(MDI_VHCI(vdip));
7059 
7060 	/* ndi_devi_config_one */
7061 	if (child) {
7062 		ASSERT(DEVI_BUSY_OWNED(vdip));
7063 		i_mdi_pm_post_config_one(child);
7064 		return;
7065 	}
7066 
7067 	/* devi_config_common */
7068 	ndi_devi_enter(vdip, &circ);
7069 	cdip = ddi_get_child(vdip);
7070 	while (cdip) {
7071 		dev_info_t *next = ddi_get_next_sibling(cdip);
7072 
7073 		i_mdi_pm_post_config_one(cdip);
7074 		cdip = next;
7075 	}
7076 	ndi_devi_exit(vdip, circ);
7077 }
7078 
7079 static void
7080 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7081 {
7082 	mdi_client_t	*ct;
7083 
7084 	ct = i_devi_get_client(child);
7085 	if (ct == NULL)
7086 		return;
7087 
7088 	MDI_CLIENT_LOCK(ct);
7089 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7090 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7091 
7092 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7093 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7094 		MDI_CLIENT_UNLOCK(ct);
7095 		return;
7096 	}
7097 
7098 	/* failure detaching or another thread just attached it */
7099 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7100 	    i_ddi_devi_attached(child)) ||
7101 	    (!i_ddi_devi_attached(child) &&
7102 	    !DEVI_IS_ATTACHING(child))) {
7103 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7104 		i_mdi_pm_reset_client(ct);
7105 	} else {
7106 		mdi_pathinfo_t  *pip, *next;
7107 		int	valid_path_count = 0;
7108 
7109 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7110 		pip = ct->ct_path_head;
7111 		while (pip != NULL) {
7112 			MDI_PI_LOCK(pip);
7113 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7114 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7115 				valid_path_count ++;
7116 			MDI_PI_UNLOCK(pip);
7117 			pip = next;
7118 		}
7119 		i_mdi_pm_rele_client(ct, valid_path_count);
7120 		ct->ct_powercnt_unconfig = 0;
7121 	}
7122 
7123 	MDI_CLIENT_UNLOCK(ct);
7124 }
7125 
7126 static void
7127 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7128 {
7129 	int			circ;
7130 	dev_info_t		*cdip;
7131 
7132 	ASSERT(MDI_VHCI(vdip));
7133 
7134 	if (!held) {
7135 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7136 		return;
7137 	}
7138 
7139 	if (child) {
7140 		ASSERT(DEVI_BUSY_OWNED(vdip));
7141 		i_mdi_pm_post_unconfig_one(child);
7142 		return;
7143 	}
7144 
7145 	ndi_devi_enter(vdip, &circ);
7146 	cdip = ddi_get_child(vdip);
7147 	while (cdip) {
7148 		dev_info_t *next = ddi_get_next_sibling(cdip);
7149 
7150 		i_mdi_pm_post_unconfig_one(cdip);
7151 		cdip = next;
7152 	}
7153 	ndi_devi_exit(vdip, circ);
7154 }
7155 
7156 int
7157 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7158 {
7159 	int			circ, ret = MDI_SUCCESS;
7160 	dev_info_t		*client_dip = NULL;
7161 	mdi_client_t		*ct;
7162 
7163 	/*
7164 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7165 	 * Power up pHCI for the named client device.
7166 	 * Note: Before the client is enumerated under vhci by phci,
7167 	 * client_dip can be NULL. Then proceed to power up all the
7168 	 * pHCIs.
7169 	 */
7170 	if (devnm != NULL) {
7171 		ndi_devi_enter(vdip, &circ);
7172 		client_dip = ndi_devi_findchild(vdip, devnm);
7173 	}
7174 
7175 	MDI_DEBUG(4, (MDI_NOTE, vdip,
7176 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7177 
7178 	switch (op) {
7179 	case MDI_PM_PRE_CONFIG:
7180 		ret = i_mdi_pm_pre_config(vdip, client_dip);
7181 		break;
7182 
7183 	case MDI_PM_PRE_UNCONFIG:
7184 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7185 		    flags);
7186 		break;
7187 
7188 	case MDI_PM_POST_CONFIG:
7189 		i_mdi_pm_post_config(vdip, client_dip);
7190 		break;
7191 
7192 	case MDI_PM_POST_UNCONFIG:
7193 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7194 		break;
7195 
7196 	case MDI_PM_HOLD_POWER:
7197 	case MDI_PM_RELE_POWER:
7198 		ASSERT(args);
7199 
7200 		client_dip = (dev_info_t *)args;
7201 		ASSERT(MDI_CLIENT(client_dip));
7202 
7203 		ct = i_devi_get_client(client_dip);
7204 		MDI_CLIENT_LOCK(ct);
7205 
7206 		if (op == MDI_PM_HOLD_POWER) {
7207 			if (ct->ct_power_cnt == 0) {
7208 				(void) i_mdi_power_all_phci(ct);
7209 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7210 				    "i_mdi_pm_hold_client\n"));
7211 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
7212 			}
7213 		} else {
7214 			if (DEVI_IS_ATTACHING(client_dip)) {
7215 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7216 				    "i_mdi_pm_rele_client\n"));
7217 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
7218 			} else {
7219 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7220 				    "i_mdi_pm_reset_client\n"));
7221 				i_mdi_pm_reset_client(ct);
7222 			}
7223 		}
7224 
7225 		MDI_CLIENT_UNLOCK(ct);
7226 		break;
7227 
7228 	default:
7229 		break;
7230 	}
7231 
7232 	if (devnm)
7233 		ndi_devi_exit(vdip, circ);
7234 
7235 	return (ret);
7236 }
7237 
7238 int
7239 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7240 {
7241 	mdi_vhci_t *vhci;
7242 
7243 	if (!MDI_VHCI(dip))
7244 		return (MDI_FAILURE);
7245 
7246 	if (mdi_class) {
7247 		vhci = DEVI(dip)->devi_mdi_xhci;
7248 		ASSERT(vhci);
7249 		*mdi_class = vhci->vh_class;
7250 	}
7251 
7252 	return (MDI_SUCCESS);
7253 }
7254 
7255 int
7256 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7257 {
7258 	mdi_phci_t *phci;
7259 
7260 	if (!MDI_PHCI(dip))
7261 		return (MDI_FAILURE);
7262 
7263 	if (mdi_class) {
7264 		phci = DEVI(dip)->devi_mdi_xhci;
7265 		ASSERT(phci);
7266 		*mdi_class = phci->ph_vhci->vh_class;
7267 	}
7268 
7269 	return (MDI_SUCCESS);
7270 }
7271 
7272 int
7273 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7274 {
7275 	mdi_client_t *client;
7276 
7277 	if (!MDI_CLIENT(dip))
7278 		return (MDI_FAILURE);
7279 
7280 	if (mdi_class) {
7281 		client = DEVI(dip)->devi_mdi_client;
7282 		ASSERT(client);
7283 		*mdi_class = client->ct_vhci->vh_class;
7284 	}
7285 
7286 	return (MDI_SUCCESS);
7287 }
7288 
7289 void *
7290 mdi_client_get_vhci_private(dev_info_t *dip)
7291 {
7292 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7293 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7294 		mdi_client_t	*ct;
7295 		ct = i_devi_get_client(dip);
7296 		return (ct->ct_vprivate);
7297 	}
7298 	return (NULL);
7299 }
7300 
7301 void
7302 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7303 {
7304 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7305 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7306 		mdi_client_t	*ct;
7307 		ct = i_devi_get_client(dip);
7308 		ct->ct_vprivate = data;
7309 	}
7310 }
7311 /*
7312  * mdi_pi_get_vhci_private():
7313  *		Get the vhci private information associated with the
7314  *		mdi_pathinfo node
7315  */
7316 void *
7317 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7318 {
7319 	caddr_t	vprivate = NULL;
7320 	if (pip) {
7321 		vprivate = MDI_PI(pip)->pi_vprivate;
7322 	}
7323 	return (vprivate);
7324 }
7325 
7326 /*
7327  * mdi_pi_set_vhci_private():
7328  *		Set the vhci private information in the mdi_pathinfo node
7329  */
7330 void
7331 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7332 {
7333 	if (pip) {
7334 		MDI_PI(pip)->pi_vprivate = priv;
7335 	}
7336 }
7337 
7338 /*
7339  * mdi_phci_get_vhci_private():
7340  *		Get the vhci private information associated with the
7341  *		mdi_phci node
7342  */
7343 void *
7344 mdi_phci_get_vhci_private(dev_info_t *dip)
7345 {
7346 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7347 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7348 		mdi_phci_t	*ph;
7349 		ph = i_devi_get_phci(dip);
7350 		return (ph->ph_vprivate);
7351 	}
7352 	return (NULL);
7353 }
7354 
7355 /*
7356  * mdi_phci_set_vhci_private():
7357  *		Set the vhci private information in the mdi_phci node
7358  */
7359 void
7360 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7361 {
7362 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7363 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7364 		mdi_phci_t	*ph;
7365 		ph = i_devi_get_phci(dip);
7366 		ph->ph_vprivate = priv;
7367 	}
7368 }
7369 
7370 int
7371 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7372 {
7373 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7374 }
7375 
7376 int
7377 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7378 {
7379 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7380 }
7381 
7382 /* Return 1 if all client paths are device_removed */
7383 static int
7384 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7385 {
7386 	mdi_pathinfo_t  *pip;
7387 	int		all_devices_removed = 1;
7388 
7389 	MDI_CLIENT_LOCK(ct);
7390 	for (pip = ct->ct_path_head; pip;
7391 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7392 		if (!mdi_pi_device_isremoved(pip)) {
7393 			all_devices_removed = 0;
7394 			break;
7395 		}
7396 	}
7397 	MDI_CLIENT_UNLOCK(ct);
7398 	return (all_devices_removed);
7399 }
7400 
7401 /*
7402  * When processing path hotunplug, represent device removal.
7403  */
7404 int
7405 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7406 {
7407 	mdi_client_t	*ct;
7408 
7409 	MDI_PI_LOCK(pip);
7410 	if (mdi_pi_device_isremoved(pip)) {
7411 		MDI_PI_UNLOCK(pip);
7412 		return (0);
7413 	}
7414 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7415 	MDI_PI_FLAGS_SET_HIDDEN(pip);
7416 	MDI_PI_UNLOCK(pip);
7417 
7418 	/*
7419 	 * If all paths associated with the client are now DEVICE_REMOVED,
7420 	 * reflect DEVICE_REMOVED in the client.
7421 	 */
7422 	ct = MDI_PI(pip)->pi_client;
7423 	if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7424 		(void) ndi_devi_device_remove(ct->ct_dip);
7425 	else
7426 		i_ddi_di_cache_invalidate();
7427 
7428 	return (1);
7429 }
7430 
7431 /*
7432  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7433  * is now accessible then this interfaces is used to represent device insertion.
7434  */
7435 int
7436 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7437 {
7438 	MDI_PI_LOCK(pip);
7439 	if (!mdi_pi_device_isremoved(pip)) {
7440 		MDI_PI_UNLOCK(pip);
7441 		return (0);
7442 	}
7443 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7444 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
7445 	MDI_PI_UNLOCK(pip);
7446 
7447 	i_ddi_di_cache_invalidate();
7448 
7449 	return (1);
7450 }
7451 
7452 /*
7453  * List of vhci class names:
7454  * A vhci class name must be in this list only if the corresponding vhci
7455  * driver intends to use the mdi provided bus config implementation
7456  * (i.e., mdi_vhci_bus_config()).
7457  */
7458 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7459 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7460 
7461 /*
7462  * During boot time, the on-disk vhci cache for every vhci class is read
7463  * in the form of an nvlist and stored here.
7464  */
7465 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7466 
7467 /* nvpair names in vhci cache nvlist */
7468 #define	MDI_VHCI_CACHE_VERSION	1
7469 #define	MDI_NVPNAME_VERSION	"version"
7470 #define	MDI_NVPNAME_PHCIS	"phcis"
7471 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7472 
7473 /*
7474  * Given vhci class name, return its on-disk vhci cache filename.
7475  * Memory for the returned filename which includes the full path is allocated
7476  * by this function.
7477  */
7478 static char *
7479 vhclass2vhcache_filename(char *vhclass)
7480 {
7481 	char *filename;
7482 	int len;
7483 	static char *fmt = "/etc/devices/mdi_%s_cache";
7484 
7485 	/*
7486 	 * fmt contains the on-disk vhci cache file name format;
7487 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7488 	 */
7489 
7490 	/* the -1 below is to account for "%s" in the format string */
7491 	len = strlen(fmt) + strlen(vhclass) - 1;
7492 	filename = kmem_alloc(len, KM_SLEEP);
7493 	(void) snprintf(filename, len, fmt, vhclass);
7494 	ASSERT(len == (strlen(filename) + 1));
7495 	return (filename);
7496 }
7497 
7498 /*
7499  * initialize the vhci cache related data structures and read the on-disk
7500  * vhci cached data into memory.
7501  */
7502 static void
7503 setup_vhci_cache(mdi_vhci_t *vh)
7504 {
7505 	mdi_vhci_config_t *vhc;
7506 	mdi_vhci_cache_t *vhcache;
7507 	int i;
7508 	nvlist_t *nvl = NULL;
7509 
7510 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7511 	vh->vh_config = vhc;
7512 	vhcache = &vhc->vhc_vhcache;
7513 
7514 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7515 
7516 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7517 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7518 
7519 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7520 
7521 	/*
7522 	 * Create string hash; same as mod_hash_create_strhash() except that
7523 	 * we use NULL key destructor.
7524 	 */
7525 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7526 	    mdi_bus_config_cache_hash_size,
7527 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7528 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7529 
7530 	/*
7531 	 * The on-disk vhci cache is read during booting prior to the
7532 	 * lights-out period by mdi_read_devices_files().
7533 	 */
7534 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7535 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7536 			nvl = vhcache_nvl[i];
7537 			vhcache_nvl[i] = NULL;
7538 			break;
7539 		}
7540 	}
7541 
7542 	/*
7543 	 * this is to cover the case of some one manually causing unloading
7544 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7545 	 */
7546 	if (nvl == NULL && modrootloaded)
7547 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7548 
7549 	if (nvl != NULL) {
7550 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7551 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7552 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7553 		else  {
7554 			cmn_err(CE_WARN,
7555 			    "%s: data file corrupted, will recreate",
7556 			    vhc->vhc_vhcache_filename);
7557 		}
7558 		rw_exit(&vhcache->vhcache_lock);
7559 		nvlist_free(nvl);
7560 	}
7561 
7562 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7563 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7564 
7565 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7566 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7567 }
7568 
7569 /*
7570  * free all vhci cache related resources
7571  */
7572 static int
7573 destroy_vhci_cache(mdi_vhci_t *vh)
7574 {
7575 	mdi_vhci_config_t *vhc = vh->vh_config;
7576 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7577 	mdi_vhcache_phci_t *cphci, *cphci_next;
7578 	mdi_vhcache_client_t *cct, *cct_next;
7579 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7580 
7581 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7582 		return (MDI_FAILURE);
7583 
7584 	kmem_free(vhc->vhc_vhcache_filename,
7585 	    strlen(vhc->vhc_vhcache_filename) + 1);
7586 
7587 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7588 
7589 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7590 	    cphci = cphci_next) {
7591 		cphci_next = cphci->cphci_next;
7592 		free_vhcache_phci(cphci);
7593 	}
7594 
7595 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7596 		cct_next = cct->cct_next;
7597 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7598 			cpi_next = cpi->cpi_next;
7599 			free_vhcache_pathinfo(cpi);
7600 		}
7601 		free_vhcache_client(cct);
7602 	}
7603 
7604 	rw_destroy(&vhcache->vhcache_lock);
7605 
7606 	mutex_destroy(&vhc->vhc_lock);
7607 	cv_destroy(&vhc->vhc_cv);
7608 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7609 	return (MDI_SUCCESS);
7610 }
7611 
7612 /*
7613  * Stop all vhci cache related async threads and free their resources.
7614  */
7615 static int
7616 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7617 {
7618 	mdi_async_client_config_t *acc, *acc_next;
7619 
7620 	mutex_enter(&vhc->vhc_lock);
7621 	vhc->vhc_flags |= MDI_VHC_EXIT;
7622 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7623 	cv_broadcast(&vhc->vhc_cv);
7624 
7625 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7626 	    vhc->vhc_acc_thrcount != 0) {
7627 		mutex_exit(&vhc->vhc_lock);
7628 		delay_random(mdi_delay);
7629 		mutex_enter(&vhc->vhc_lock);
7630 	}
7631 
7632 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7633 
7634 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7635 		acc_next = acc->acc_next;
7636 		free_async_client_config(acc);
7637 	}
7638 	vhc->vhc_acc_list_head = NULL;
7639 	vhc->vhc_acc_list_tail = NULL;
7640 	vhc->vhc_acc_count = 0;
7641 
7642 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7643 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7644 		mutex_exit(&vhc->vhc_lock);
7645 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7646 			vhcache_dirty(vhc);
7647 			return (MDI_FAILURE);
7648 		}
7649 	} else
7650 		mutex_exit(&vhc->vhc_lock);
7651 
7652 	if (callb_delete(vhc->vhc_cbid) != 0)
7653 		return (MDI_FAILURE);
7654 
7655 	return (MDI_SUCCESS);
7656 }
7657 
7658 /*
7659  * Stop vhci cache flush thread
7660  */
7661 /* ARGSUSED */
7662 static boolean_t
7663 stop_vhcache_flush_thread(void *arg, int code)
7664 {
7665 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7666 
7667 	mutex_enter(&vhc->vhc_lock);
7668 	vhc->vhc_flags |= MDI_VHC_EXIT;
7669 	cv_broadcast(&vhc->vhc_cv);
7670 
7671 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7672 		mutex_exit(&vhc->vhc_lock);
7673 		delay_random(mdi_delay);
7674 		mutex_enter(&vhc->vhc_lock);
7675 	}
7676 
7677 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7678 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7679 		mutex_exit(&vhc->vhc_lock);
7680 		(void) flush_vhcache(vhc, 1);
7681 	} else
7682 		mutex_exit(&vhc->vhc_lock);
7683 
7684 	return (B_TRUE);
7685 }
7686 
7687 /*
7688  * Enqueue the vhcache phci (cphci) at the tail of the list
7689  */
7690 static void
7691 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7692 {
7693 	cphci->cphci_next = NULL;
7694 	if (vhcache->vhcache_phci_head == NULL)
7695 		vhcache->vhcache_phci_head = cphci;
7696 	else
7697 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7698 	vhcache->vhcache_phci_tail = cphci;
7699 }
7700 
7701 /*
7702  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7703  */
7704 static void
7705 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7706     mdi_vhcache_pathinfo_t *cpi)
7707 {
7708 	cpi->cpi_next = NULL;
7709 	if (cct->cct_cpi_head == NULL)
7710 		cct->cct_cpi_head = cpi;
7711 	else
7712 		cct->cct_cpi_tail->cpi_next = cpi;
7713 	cct->cct_cpi_tail = cpi;
7714 }
7715 
7716 /*
7717  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7718  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7719  * flag set come at the beginning of the list. All cpis which have this
7720  * flag set come at the end of the list.
7721  */
7722 static void
7723 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7724     mdi_vhcache_pathinfo_t *newcpi)
7725 {
7726 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7727 
7728 	if (cct->cct_cpi_head == NULL ||
7729 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7730 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7731 	else {
7732 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7733 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7734 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7735 			;
7736 
7737 		if (prev_cpi == NULL)
7738 			cct->cct_cpi_head = newcpi;
7739 		else
7740 			prev_cpi->cpi_next = newcpi;
7741 
7742 		newcpi->cpi_next = cpi;
7743 
7744 		if (cpi == NULL)
7745 			cct->cct_cpi_tail = newcpi;
7746 	}
7747 }
7748 
7749 /*
7750  * Enqueue the vhcache client (cct) at the tail of the list
7751  */
7752 static void
7753 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7754     mdi_vhcache_client_t *cct)
7755 {
7756 	cct->cct_next = NULL;
7757 	if (vhcache->vhcache_client_head == NULL)
7758 		vhcache->vhcache_client_head = cct;
7759 	else
7760 		vhcache->vhcache_client_tail->cct_next = cct;
7761 	vhcache->vhcache_client_tail = cct;
7762 }
7763 
7764 static void
7765 free_string_array(char **str, int nelem)
7766 {
7767 	int i;
7768 
7769 	if (str) {
7770 		for (i = 0; i < nelem; i++) {
7771 			if (str[i])
7772 				kmem_free(str[i], strlen(str[i]) + 1);
7773 		}
7774 		kmem_free(str, sizeof (char *) * nelem);
7775 	}
7776 }
7777 
7778 static void
7779 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7780 {
7781 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7782 	kmem_free(cphci, sizeof (*cphci));
7783 }
7784 
7785 static void
7786 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7787 {
7788 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7789 	kmem_free(cpi, sizeof (*cpi));
7790 }
7791 
7792 static void
7793 free_vhcache_client(mdi_vhcache_client_t *cct)
7794 {
7795 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7796 	kmem_free(cct, sizeof (*cct));
7797 }
7798 
7799 static char *
7800 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7801 {
7802 	char *name_addr;
7803 	int len;
7804 
7805 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7806 	name_addr = kmem_alloc(len, KM_SLEEP);
7807 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7808 
7809 	if (ret_len)
7810 		*ret_len = len;
7811 	return (name_addr);
7812 }
7813 
7814 /*
7815  * Copy the contents of paddrnvl to vhci cache.
7816  * paddrnvl nvlist contains path information for a vhci client.
7817  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7818  */
7819 static void
7820 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7821     mdi_vhcache_client_t *cct)
7822 {
7823 	nvpair_t *nvp = NULL;
7824 	mdi_vhcache_pathinfo_t *cpi;
7825 	uint_t nelem;
7826 	uint32_t *val;
7827 
7828 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7829 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7830 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7831 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7832 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7833 		ASSERT(nelem == 2);
7834 		cpi->cpi_cphci = cphci_list[val[0]];
7835 		cpi->cpi_flags = val[1];
7836 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7837 	}
7838 }
7839 
7840 /*
7841  * Copy the contents of caddrmapnvl to vhci cache.
7842  * caddrmapnvl nvlist contains vhci client address to phci client address
7843  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7844  * this nvlist.
7845  */
7846 static void
7847 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7848     mdi_vhcache_phci_t *cphci_list[])
7849 {
7850 	nvpair_t *nvp = NULL;
7851 	nvlist_t *paddrnvl;
7852 	mdi_vhcache_client_t *cct;
7853 
7854 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7855 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7856 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7857 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7858 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7859 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7860 		/* the client must contain at least one path */
7861 		ASSERT(cct->cct_cpi_head != NULL);
7862 
7863 		enqueue_vhcache_client(vhcache, cct);
7864 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7865 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7866 	}
7867 }
7868 
7869 /*
7870  * Copy the contents of the main nvlist to vhci cache.
7871  *
7872  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7873  * The nvlist contains the mappings between the vhci client addresses and
7874  * their corresponding phci client addresses.
7875  *
7876  * The structure of the nvlist is as follows:
7877  *
7878  * Main nvlist:
7879  *	NAME		TYPE		DATA
7880  *	version		int32		version number
7881  *	phcis		string array	array of phci paths
7882  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7883  *
7884  * structure of c2paddrs_nvl:
7885  *	NAME		TYPE		DATA
7886  *	caddr1		nvlist_t	paddrs_nvl1
7887  *	caddr2		nvlist_t	paddrs_nvl2
7888  *	...
7889  * where caddr1, caddr2, ... are vhci client name and addresses in the
7890  * form of "<clientname>@<clientaddress>".
7891  * (for example: "ssd@2000002037cd9f72");
7892  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7893  *
7894  * structure of paddrs_nvl:
7895  *	NAME		TYPE		DATA
7896  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7897  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7898  *	...
7899  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7900  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7901  * phci-ids are integers that identify pHCIs to which the
7902  * the bus specific address belongs to. These integers are used as an index
7903  * into to the phcis string array in the main nvlist to get the pHCI path.
7904  */
7905 static int
7906 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7907 {
7908 	char **phcis, **phci_namep;
7909 	uint_t nphcis;
7910 	mdi_vhcache_phci_t *cphci, **cphci_list;
7911 	nvlist_t *caddrmapnvl;
7912 	int32_t ver;
7913 	int i;
7914 	size_t cphci_list_size;
7915 
7916 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7917 
7918 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7919 	    ver != MDI_VHCI_CACHE_VERSION)
7920 		return (MDI_FAILURE);
7921 
7922 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7923 	    &nphcis) != 0)
7924 		return (MDI_SUCCESS);
7925 
7926 	ASSERT(nphcis > 0);
7927 
7928 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7929 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7930 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7931 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7932 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7933 		enqueue_vhcache_phci(vhcache, cphci);
7934 		cphci_list[i] = cphci;
7935 	}
7936 
7937 	ASSERT(vhcache->vhcache_phci_head != NULL);
7938 
7939 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7940 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7941 
7942 	kmem_free(cphci_list, cphci_list_size);
7943 	return (MDI_SUCCESS);
7944 }
7945 
7946 /*
7947  * Build paddrnvl for the specified client using the information in the
7948  * vhci cache and add it to the caddrmapnnvl.
7949  * Returns 0 on success, errno on failure.
7950  */
7951 static int
7952 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7953     nvlist_t *caddrmapnvl)
7954 {
7955 	mdi_vhcache_pathinfo_t *cpi;
7956 	nvlist_t *nvl;
7957 	int err;
7958 	uint32_t val[2];
7959 
7960 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7961 
7962 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7963 		return (err);
7964 
7965 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7966 		val[0] = cpi->cpi_cphci->cphci_id;
7967 		val[1] = cpi->cpi_flags;
7968 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7969 		    != 0)
7970 			goto out;
7971 	}
7972 
7973 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7974 out:
7975 	nvlist_free(nvl);
7976 	return (err);
7977 }
7978 
7979 /*
7980  * Build caddrmapnvl using the information in the vhci cache
7981  * and add it to the mainnvl.
7982  * Returns 0 on success, errno on failure.
7983  */
7984 static int
7985 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7986 {
7987 	mdi_vhcache_client_t *cct;
7988 	nvlist_t *nvl;
7989 	int err;
7990 
7991 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7992 
7993 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7994 		return (err);
7995 
7996 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7997 	    cct = cct->cct_next) {
7998 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7999 			goto out;
8000 	}
8001 
8002 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
8003 out:
8004 	nvlist_free(nvl);
8005 	return (err);
8006 }
8007 
8008 /*
8009  * Build nvlist using the information in the vhci cache.
8010  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
8011  * Returns nvl on success, NULL on failure.
8012  */
8013 static nvlist_t *
8014 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
8015 {
8016 	mdi_vhcache_phci_t *cphci;
8017 	uint_t phci_count;
8018 	char **phcis;
8019 	nvlist_t *nvl;
8020 	int err, i;
8021 
8022 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8023 		nvl = NULL;
8024 		goto out;
8025 	}
8026 
8027 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8028 	    MDI_VHCI_CACHE_VERSION)) != 0)
8029 		goto out;
8030 
8031 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8032 	if (vhcache->vhcache_phci_head == NULL) {
8033 		rw_exit(&vhcache->vhcache_lock);
8034 		return (nvl);
8035 	}
8036 
8037 	phci_count = 0;
8038 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8039 	    cphci = cphci->cphci_next)
8040 		cphci->cphci_id = phci_count++;
8041 
8042 	/* build phci pathname list */
8043 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8044 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8045 	    cphci = cphci->cphci_next, i++)
8046 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8047 
8048 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8049 	    phci_count);
8050 	free_string_array(phcis, phci_count);
8051 
8052 	if (err == 0 &&
8053 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8054 		rw_exit(&vhcache->vhcache_lock);
8055 		return (nvl);
8056 	}
8057 
8058 	rw_exit(&vhcache->vhcache_lock);
8059 out:
8060 	nvlist_free(nvl);
8061 	return (NULL);
8062 }
8063 
8064 /*
8065  * Lookup vhcache phci structure for the specified phci path.
8066  */
8067 static mdi_vhcache_phci_t *
8068 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8069 {
8070 	mdi_vhcache_phci_t *cphci;
8071 
8072 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8073 
8074 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8075 	    cphci = cphci->cphci_next) {
8076 		if (strcmp(cphci->cphci_path, phci_path) == 0)
8077 			return (cphci);
8078 	}
8079 
8080 	return (NULL);
8081 }
8082 
8083 /*
8084  * Lookup vhcache phci structure for the specified phci.
8085  */
8086 static mdi_vhcache_phci_t *
8087 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8088 {
8089 	mdi_vhcache_phci_t *cphci;
8090 
8091 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8092 
8093 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8094 	    cphci = cphci->cphci_next) {
8095 		if (cphci->cphci_phci == ph)
8096 			return (cphci);
8097 	}
8098 
8099 	return (NULL);
8100 }
8101 
8102 /*
8103  * Add the specified phci to the vhci cache if not already present.
8104  */
8105 static void
8106 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8107 {
8108 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8109 	mdi_vhcache_phci_t *cphci;
8110 	char *pathname;
8111 	int cache_updated;
8112 
8113 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8114 
8115 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8116 	(void) ddi_pathname(ph->ph_dip, pathname);
8117 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8118 	    != NULL) {
8119 		cphci->cphci_phci = ph;
8120 		cache_updated = 0;
8121 	} else {
8122 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8123 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8124 		cphci->cphci_phci = ph;
8125 		enqueue_vhcache_phci(vhcache, cphci);
8126 		cache_updated = 1;
8127 	}
8128 
8129 	rw_exit(&vhcache->vhcache_lock);
8130 
8131 	/*
8132 	 * Since a new phci has been added, reset
8133 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8134 	 * during next vhcache_discover_paths().
8135 	 */
8136 	mutex_enter(&vhc->vhc_lock);
8137 	vhc->vhc_path_discovery_cutoff_time = 0;
8138 	mutex_exit(&vhc->vhc_lock);
8139 
8140 	kmem_free(pathname, MAXPATHLEN);
8141 	if (cache_updated)
8142 		vhcache_dirty(vhc);
8143 }
8144 
8145 /*
8146  * Remove the reference to the specified phci from the vhci cache.
8147  */
8148 static void
8149 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8150 {
8151 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8152 	mdi_vhcache_phci_t *cphci;
8153 
8154 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8155 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8156 		/* do not remove the actual mdi_vhcache_phci structure */
8157 		cphci->cphci_phci = NULL;
8158 	}
8159 	rw_exit(&vhcache->vhcache_lock);
8160 }
8161 
8162 static void
8163 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8164     mdi_vhcache_lookup_token_t *src)
8165 {
8166 	if (src == NULL) {
8167 		dst->lt_cct = NULL;
8168 		dst->lt_cct_lookup_time = 0;
8169 	} else {
8170 		dst->lt_cct = src->lt_cct;
8171 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8172 	}
8173 }
8174 
8175 /*
8176  * Look up vhcache client for the specified client.
8177  */
8178 static mdi_vhcache_client_t *
8179 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8180     mdi_vhcache_lookup_token_t *token)
8181 {
8182 	mod_hash_val_t hv;
8183 	char *name_addr;
8184 	int len;
8185 
8186 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8187 
8188 	/*
8189 	 * If no vhcache clean occurred since the last lookup, we can
8190 	 * simply return the cct from the last lookup operation.
8191 	 * It works because ccts are never freed except during the vhcache
8192 	 * cleanup operation.
8193 	 */
8194 	if (token != NULL &&
8195 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8196 		return (token->lt_cct);
8197 
8198 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8199 	if (mod_hash_find(vhcache->vhcache_client_hash,
8200 	    (mod_hash_key_t)name_addr, &hv) == 0) {
8201 		if (token) {
8202 			token->lt_cct = (mdi_vhcache_client_t *)hv;
8203 			token->lt_cct_lookup_time = ddi_get_lbolt64();
8204 		}
8205 	} else {
8206 		if (token) {
8207 			token->lt_cct = NULL;
8208 			token->lt_cct_lookup_time = 0;
8209 		}
8210 		hv = NULL;
8211 	}
8212 	kmem_free(name_addr, len);
8213 	return ((mdi_vhcache_client_t *)hv);
8214 }
8215 
8216 /*
8217  * Add the specified path to the vhci cache if not already present.
8218  * Also add the vhcache client for the client corresponding to this path
8219  * if it doesn't already exist.
8220  */
8221 static void
8222 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8223 {
8224 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8225 	mdi_vhcache_client_t *cct;
8226 	mdi_vhcache_pathinfo_t *cpi;
8227 	mdi_phci_t *ph = pip->pi_phci;
8228 	mdi_client_t *ct = pip->pi_client;
8229 	int cache_updated = 0;
8230 
8231 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8232 
8233 	/* if vhcache client for this pip doesn't already exist, add it */
8234 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8235 	    NULL)) == NULL) {
8236 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8237 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8238 		    ct->ct_guid, NULL);
8239 		enqueue_vhcache_client(vhcache, cct);
8240 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
8241 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8242 		cache_updated = 1;
8243 	}
8244 
8245 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8246 		if (cpi->cpi_cphci->cphci_phci == ph &&
8247 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8248 			cpi->cpi_pip = pip;
8249 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8250 				cpi->cpi_flags &=
8251 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8252 				sort_vhcache_paths(cct);
8253 				cache_updated = 1;
8254 			}
8255 			break;
8256 		}
8257 	}
8258 
8259 	if (cpi == NULL) {
8260 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8261 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8262 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8263 		ASSERT(cpi->cpi_cphci != NULL);
8264 		cpi->cpi_pip = pip;
8265 		enqueue_vhcache_pathinfo(cct, cpi);
8266 		cache_updated = 1;
8267 	}
8268 
8269 	rw_exit(&vhcache->vhcache_lock);
8270 
8271 	if (cache_updated)
8272 		vhcache_dirty(vhc);
8273 }
8274 
8275 /*
8276  * Remove the reference to the specified path from the vhci cache.
8277  */
8278 static void
8279 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8280 {
8281 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8282 	mdi_client_t *ct = pip->pi_client;
8283 	mdi_vhcache_client_t *cct;
8284 	mdi_vhcache_pathinfo_t *cpi;
8285 
8286 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8287 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8288 	    NULL)) != NULL) {
8289 		for (cpi = cct->cct_cpi_head; cpi != NULL;
8290 		    cpi = cpi->cpi_next) {
8291 			if (cpi->cpi_pip == pip) {
8292 				cpi->cpi_pip = NULL;
8293 				break;
8294 			}
8295 		}
8296 	}
8297 	rw_exit(&vhcache->vhcache_lock);
8298 }
8299 
8300 /*
8301  * Flush the vhci cache to disk.
8302  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8303  */
8304 static int
8305 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8306 {
8307 	nvlist_t *nvl;
8308 	int err;
8309 	int rv;
8310 
8311 	/*
8312 	 * It is possible that the system may shutdown before
8313 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
8314 	 * flushing the cache in this case do not check for
8315 	 * i_ddi_io_initialized when force flag is set.
8316 	 */
8317 	if (force_flag == 0 && !i_ddi_io_initialized())
8318 		return (MDI_FAILURE);
8319 
8320 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8321 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8322 		nvlist_free(nvl);
8323 	} else
8324 		err = EFAULT;
8325 
8326 	rv = MDI_SUCCESS;
8327 	mutex_enter(&vhc->vhc_lock);
8328 	if (err != 0) {
8329 		if (err == EROFS) {
8330 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8331 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8332 			    MDI_VHC_VHCACHE_DIRTY);
8333 		} else {
8334 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8335 				cmn_err(CE_CONT, "%s: update failed\n",
8336 				    vhc->vhc_vhcache_filename);
8337 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8338 			}
8339 			rv = MDI_FAILURE;
8340 		}
8341 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8342 		cmn_err(CE_CONT,
8343 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
8344 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8345 	}
8346 	mutex_exit(&vhc->vhc_lock);
8347 
8348 	return (rv);
8349 }
8350 
8351 /*
8352  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8353  * Exits itself if left idle for the idle timeout period.
8354  */
8355 static void
8356 vhcache_flush_thread(void *arg)
8357 {
8358 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8359 	clock_t idle_time, quit_at_ticks;
8360 	callb_cpr_t cprinfo;
8361 
8362 	/* number of seconds to sleep idle before exiting */
8363 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8364 
8365 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8366 	    "mdi_vhcache_flush");
8367 	mutex_enter(&vhc->vhc_lock);
8368 	for (; ; ) {
8369 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8370 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8371 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8372 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
8373 				(void) cv_timedwait(&vhc->vhc_cv,
8374 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8375 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8376 			} else {
8377 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8378 				mutex_exit(&vhc->vhc_lock);
8379 
8380 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8381 					vhcache_dirty(vhc);
8382 
8383 				mutex_enter(&vhc->vhc_lock);
8384 			}
8385 		}
8386 
8387 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8388 
8389 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8390 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8391 		    ddi_get_lbolt() < quit_at_ticks) {
8392 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8393 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8394 			    quit_at_ticks);
8395 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8396 		}
8397 
8398 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8399 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8400 			goto out;
8401 	}
8402 
8403 out:
8404 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8405 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8406 	CALLB_CPR_EXIT(&cprinfo);
8407 }
8408 
8409 /*
8410  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8411  */
8412 static void
8413 vhcache_dirty(mdi_vhci_config_t *vhc)
8414 {
8415 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8416 	int create_thread;
8417 
8418 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8419 	/* do not flush cache until the cache is fully built */
8420 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8421 		rw_exit(&vhcache->vhcache_lock);
8422 		return;
8423 	}
8424 	rw_exit(&vhcache->vhcache_lock);
8425 
8426 	mutex_enter(&vhc->vhc_lock);
8427 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8428 		mutex_exit(&vhc->vhc_lock);
8429 		return;
8430 	}
8431 
8432 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8433 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8434 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8435 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8436 		cv_broadcast(&vhc->vhc_cv);
8437 		create_thread = 0;
8438 	} else {
8439 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8440 		create_thread = 1;
8441 	}
8442 	mutex_exit(&vhc->vhc_lock);
8443 
8444 	if (create_thread)
8445 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8446 		    0, &p0, TS_RUN, minclsyspri);
8447 }
8448 
8449 /*
8450  * phci bus config structure - one for for each phci bus config operation that
8451  * we initiate on behalf of a vhci.
8452  */
8453 typedef struct mdi_phci_bus_config_s {
8454 	char *phbc_phci_path;
8455 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8456 	struct mdi_phci_bus_config_s *phbc_next;
8457 } mdi_phci_bus_config_t;
8458 
8459 /* vhci bus config structure - one for each vhci bus config operation */
8460 typedef struct mdi_vhci_bus_config_s {
8461 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8462 	major_t vhbc_op_major;		/* bus config op major */
8463 	uint_t vhbc_op_flags;		/* bus config op flags */
8464 	kmutex_t vhbc_lock;
8465 	kcondvar_t vhbc_cv;
8466 	int vhbc_thr_count;
8467 } mdi_vhci_bus_config_t;
8468 
8469 /*
8470  * bus config the specified phci
8471  */
8472 static void
8473 bus_config_phci(void *arg)
8474 {
8475 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8476 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8477 	dev_info_t *ph_dip;
8478 
8479 	/*
8480 	 * first configure all path components upto phci and then configure
8481 	 * the phci children.
8482 	 */
8483 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8484 	    != NULL) {
8485 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8486 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8487 			(void) ndi_devi_config_driver(ph_dip,
8488 			    vhbc->vhbc_op_flags,
8489 			    vhbc->vhbc_op_major);
8490 		} else
8491 			(void) ndi_devi_config(ph_dip,
8492 			    vhbc->vhbc_op_flags);
8493 
8494 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8495 		ndi_rele_devi(ph_dip);
8496 	}
8497 
8498 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8499 	kmem_free(phbc, sizeof (*phbc));
8500 
8501 	mutex_enter(&vhbc->vhbc_lock);
8502 	vhbc->vhbc_thr_count--;
8503 	if (vhbc->vhbc_thr_count == 0)
8504 		cv_broadcast(&vhbc->vhbc_cv);
8505 	mutex_exit(&vhbc->vhbc_lock);
8506 }
8507 
8508 /*
8509  * Bus config all phcis associated with the vhci in parallel.
8510  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8511  */
8512 static void
8513 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8514     ddi_bus_config_op_t op, major_t maj)
8515 {
8516 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8517 	mdi_vhci_bus_config_t *vhbc;
8518 	mdi_vhcache_phci_t *cphci;
8519 
8520 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8521 	if (vhcache->vhcache_phci_head == NULL) {
8522 		rw_exit(&vhcache->vhcache_lock);
8523 		return;
8524 	}
8525 
8526 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8527 
8528 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8529 	    cphci = cphci->cphci_next) {
8530 		/* skip phcis that haven't attached before root is available */
8531 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8532 			continue;
8533 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8534 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8535 		    KM_SLEEP);
8536 		phbc->phbc_vhbusconfig = vhbc;
8537 		phbc->phbc_next = phbc_head;
8538 		phbc_head = phbc;
8539 		vhbc->vhbc_thr_count++;
8540 	}
8541 	rw_exit(&vhcache->vhcache_lock);
8542 
8543 	vhbc->vhbc_op = op;
8544 	vhbc->vhbc_op_major = maj;
8545 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8546 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8547 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8548 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8549 
8550 	/* now create threads to initiate bus config on all phcis in parallel */
8551 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8552 		phbc_next = phbc->phbc_next;
8553 		if (mdi_mtc_off)
8554 			bus_config_phci((void *)phbc);
8555 		else
8556 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8557 			    0, &p0, TS_RUN, minclsyspri);
8558 	}
8559 
8560 	mutex_enter(&vhbc->vhbc_lock);
8561 	/* wait until all threads exit */
8562 	while (vhbc->vhbc_thr_count > 0)
8563 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8564 	mutex_exit(&vhbc->vhbc_lock);
8565 
8566 	mutex_destroy(&vhbc->vhbc_lock);
8567 	cv_destroy(&vhbc->vhbc_cv);
8568 	kmem_free(vhbc, sizeof (*vhbc));
8569 }
8570 
8571 /*
8572  * Single threaded version of bus_config_all_phcis()
8573  */
8574 static void
8575 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8576     ddi_bus_config_op_t op, major_t maj)
8577 {
8578 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8579 
8580 	single_threaded_vhconfig_enter(vhc);
8581 	bus_config_all_phcis(vhcache, flags, op, maj);
8582 	single_threaded_vhconfig_exit(vhc);
8583 }
8584 
8585 /*
8586  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8587  * The path includes the child component in addition to the phci path.
8588  */
8589 static int
8590 bus_config_one_phci_child(char *path)
8591 {
8592 	dev_info_t *ph_dip, *child;
8593 	char *devnm;
8594 	int rv = MDI_FAILURE;
8595 
8596 	/* extract the child component of the phci */
8597 	devnm = strrchr(path, '/');
8598 	*devnm++ = '\0';
8599 
8600 	/*
8601 	 * first configure all path components upto phci and then
8602 	 * configure the phci child.
8603 	 */
8604 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8605 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8606 		    NDI_SUCCESS) {
8607 			/*
8608 			 * release the hold that ndi_devi_config_one() placed
8609 			 */
8610 			ndi_rele_devi(child);
8611 			rv = MDI_SUCCESS;
8612 		}
8613 
8614 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8615 		ndi_rele_devi(ph_dip);
8616 	}
8617 
8618 	devnm--;
8619 	*devnm = '/';
8620 	return (rv);
8621 }
8622 
8623 /*
8624  * Build a list of phci client paths for the specified vhci client.
8625  * The list includes only those phci client paths which aren't configured yet.
8626  */
8627 static mdi_phys_path_t *
8628 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8629 {
8630 	mdi_vhcache_pathinfo_t *cpi;
8631 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8632 	int config_path, len;
8633 
8634 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8635 		/*
8636 		 * include only those paths that aren't configured.
8637 		 */
8638 		config_path = 0;
8639 		if (cpi->cpi_pip == NULL)
8640 			config_path = 1;
8641 		else {
8642 			MDI_PI_LOCK(cpi->cpi_pip);
8643 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8644 				config_path = 1;
8645 			MDI_PI_UNLOCK(cpi->cpi_pip);
8646 		}
8647 
8648 		if (config_path) {
8649 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8650 			len = strlen(cpi->cpi_cphci->cphci_path) +
8651 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8652 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8653 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8654 			    cpi->cpi_cphci->cphci_path, ct_name,
8655 			    cpi->cpi_addr);
8656 			pp->phys_path_next = NULL;
8657 
8658 			if (pp_head == NULL)
8659 				pp_head = pp;
8660 			else
8661 				pp_tail->phys_path_next = pp;
8662 			pp_tail = pp;
8663 		}
8664 	}
8665 
8666 	return (pp_head);
8667 }
8668 
8669 /*
8670  * Free the memory allocated for phci client path list.
8671  */
8672 static void
8673 free_phclient_path_list(mdi_phys_path_t *pp_head)
8674 {
8675 	mdi_phys_path_t *pp, *pp_next;
8676 
8677 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8678 		pp_next = pp->phys_path_next;
8679 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8680 		kmem_free(pp, sizeof (*pp));
8681 	}
8682 }
8683 
8684 /*
8685  * Allocated async client structure and initialize with the specified values.
8686  */
8687 static mdi_async_client_config_t *
8688 alloc_async_client_config(char *ct_name, char *ct_addr,
8689     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8690 {
8691 	mdi_async_client_config_t *acc;
8692 
8693 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8694 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8695 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8696 	acc->acc_phclient_path_list_head = pp_head;
8697 	init_vhcache_lookup_token(&acc->acc_token, tok);
8698 	acc->acc_next = NULL;
8699 	return (acc);
8700 }
8701 
8702 /*
8703  * Free the memory allocated for the async client structure and their members.
8704  */
8705 static void
8706 free_async_client_config(mdi_async_client_config_t *acc)
8707 {
8708 	if (acc->acc_phclient_path_list_head)
8709 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8710 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8711 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8712 	kmem_free(acc, sizeof (*acc));
8713 }
8714 
8715 /*
8716  * Sort vhcache pathinfos (cpis) of the specified client.
8717  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8718  * flag set come at the beginning of the list. All cpis which have this
8719  * flag set come at the end of the list.
8720  */
8721 static void
8722 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8723 {
8724 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8725 
8726 	cpi_head = cct->cct_cpi_head;
8727 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8728 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8729 		cpi_next = cpi->cpi_next;
8730 		enqueue_vhcache_pathinfo(cct, cpi);
8731 	}
8732 }
8733 
8734 /*
8735  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8736  * every vhcache pathinfo of the specified client. If not adjust the flag
8737  * setting appropriately.
8738  *
8739  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8740  * on-disk vhci cache. So every time this flag is updated the cache must be
8741  * flushed.
8742  */
8743 static void
8744 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8745     mdi_vhcache_lookup_token_t *tok)
8746 {
8747 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8748 	mdi_vhcache_client_t *cct;
8749 	mdi_vhcache_pathinfo_t *cpi;
8750 
8751 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8752 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8753 	    == NULL) {
8754 		rw_exit(&vhcache->vhcache_lock);
8755 		return;
8756 	}
8757 
8758 	/*
8759 	 * to avoid unnecessary on-disk cache updates, first check if an
8760 	 * update is really needed. If no update is needed simply return.
8761 	 */
8762 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8763 		if ((cpi->cpi_pip != NULL &&
8764 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8765 		    (cpi->cpi_pip == NULL &&
8766 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8767 			break;
8768 		}
8769 	}
8770 	if (cpi == NULL) {
8771 		rw_exit(&vhcache->vhcache_lock);
8772 		return;
8773 	}
8774 
8775 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8776 		rw_exit(&vhcache->vhcache_lock);
8777 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8778 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8779 		    tok)) == NULL) {
8780 			rw_exit(&vhcache->vhcache_lock);
8781 			return;
8782 		}
8783 	}
8784 
8785 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8786 		if (cpi->cpi_pip != NULL)
8787 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8788 		else
8789 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8790 	}
8791 	sort_vhcache_paths(cct);
8792 
8793 	rw_exit(&vhcache->vhcache_lock);
8794 	vhcache_dirty(vhc);
8795 }
8796 
8797 /*
8798  * Configure all specified paths of the client.
8799  */
8800 static void
8801 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8802     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8803 {
8804 	mdi_phys_path_t *pp;
8805 
8806 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8807 		(void) bus_config_one_phci_child(pp->phys_path);
8808 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8809 }
8810 
8811 /*
8812  * Dequeue elements from vhci async client config list and bus configure
8813  * their corresponding phci clients.
8814  */
8815 static void
8816 config_client_paths_thread(void *arg)
8817 {
8818 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8819 	mdi_async_client_config_t *acc;
8820 	clock_t quit_at_ticks;
8821 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8822 	callb_cpr_t cprinfo;
8823 
8824 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8825 	    "mdi_config_client_paths");
8826 
8827 	for (; ; ) {
8828 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8829 
8830 		mutex_enter(&vhc->vhc_lock);
8831 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8832 		    vhc->vhc_acc_list_head == NULL &&
8833 		    ddi_get_lbolt() < quit_at_ticks) {
8834 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8835 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8836 			    quit_at_ticks);
8837 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8838 		}
8839 
8840 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8841 		    vhc->vhc_acc_list_head == NULL)
8842 			goto out;
8843 
8844 		acc = vhc->vhc_acc_list_head;
8845 		vhc->vhc_acc_list_head = acc->acc_next;
8846 		if (vhc->vhc_acc_list_head == NULL)
8847 			vhc->vhc_acc_list_tail = NULL;
8848 		vhc->vhc_acc_count--;
8849 		mutex_exit(&vhc->vhc_lock);
8850 
8851 		config_client_paths_sync(vhc, acc->acc_ct_name,
8852 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8853 		    &acc->acc_token);
8854 
8855 		free_async_client_config(acc);
8856 	}
8857 
8858 out:
8859 	vhc->vhc_acc_thrcount--;
8860 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8861 	CALLB_CPR_EXIT(&cprinfo);
8862 }
8863 
8864 /*
8865  * Arrange for all the phci client paths (pp_head) for the specified client
8866  * to be bus configured asynchronously by a thread.
8867  */
8868 static void
8869 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8870     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8871 {
8872 	mdi_async_client_config_t *acc, *newacc;
8873 	int create_thread;
8874 
8875 	if (pp_head == NULL)
8876 		return;
8877 
8878 	if (mdi_mtc_off) {
8879 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8880 		free_phclient_path_list(pp_head);
8881 		return;
8882 	}
8883 
8884 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8885 	ASSERT(newacc);
8886 
8887 	mutex_enter(&vhc->vhc_lock);
8888 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8889 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8890 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8891 			free_async_client_config(newacc);
8892 			mutex_exit(&vhc->vhc_lock);
8893 			return;
8894 		}
8895 	}
8896 
8897 	if (vhc->vhc_acc_list_head == NULL)
8898 		vhc->vhc_acc_list_head = newacc;
8899 	else
8900 		vhc->vhc_acc_list_tail->acc_next = newacc;
8901 	vhc->vhc_acc_list_tail = newacc;
8902 	vhc->vhc_acc_count++;
8903 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8904 		cv_broadcast(&vhc->vhc_cv);
8905 		create_thread = 0;
8906 	} else {
8907 		vhc->vhc_acc_thrcount++;
8908 		create_thread = 1;
8909 	}
8910 	mutex_exit(&vhc->vhc_lock);
8911 
8912 	if (create_thread)
8913 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8914 		    0, &p0, TS_RUN, minclsyspri);
8915 }
8916 
8917 /*
8918  * Return number of online paths for the specified client.
8919  */
8920 static int
8921 nonline_paths(mdi_vhcache_client_t *cct)
8922 {
8923 	mdi_vhcache_pathinfo_t *cpi;
8924 	int online_count = 0;
8925 
8926 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8927 		if (cpi->cpi_pip != NULL) {
8928 			MDI_PI_LOCK(cpi->cpi_pip);
8929 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8930 				online_count++;
8931 			MDI_PI_UNLOCK(cpi->cpi_pip);
8932 		}
8933 	}
8934 
8935 	return (online_count);
8936 }
8937 
8938 /*
8939  * Bus configure all paths for the specified vhci client.
8940  * If at least one path for the client is already online, the remaining paths
8941  * will be configured asynchronously. Otherwise, it synchronously configures
8942  * the paths until at least one path is online and then rest of the paths
8943  * will be configured asynchronously.
8944  */
8945 static void
8946 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8947 {
8948 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8949 	mdi_phys_path_t *pp_head, *pp;
8950 	mdi_vhcache_client_t *cct;
8951 	mdi_vhcache_lookup_token_t tok;
8952 
8953 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8954 
8955 	init_vhcache_lookup_token(&tok, NULL);
8956 
8957 	if (ct_name == NULL || ct_addr == NULL ||
8958 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8959 	    == NULL ||
8960 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8961 		rw_exit(&vhcache->vhcache_lock);
8962 		return;
8963 	}
8964 
8965 	/* if at least one path is online, configure the rest asynchronously */
8966 	if (nonline_paths(cct) > 0) {
8967 		rw_exit(&vhcache->vhcache_lock);
8968 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8969 		return;
8970 	}
8971 
8972 	rw_exit(&vhcache->vhcache_lock);
8973 
8974 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8975 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8976 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8977 
8978 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8979 			    ct_addr, &tok)) == NULL) {
8980 				rw_exit(&vhcache->vhcache_lock);
8981 				goto out;
8982 			}
8983 
8984 			if (nonline_paths(cct) > 0 &&
8985 			    pp->phys_path_next != NULL) {
8986 				rw_exit(&vhcache->vhcache_lock);
8987 				config_client_paths_async(vhc, ct_name, ct_addr,
8988 				    pp->phys_path_next, &tok);
8989 				pp->phys_path_next = NULL;
8990 				goto out;
8991 			}
8992 
8993 			rw_exit(&vhcache->vhcache_lock);
8994 		}
8995 	}
8996 
8997 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8998 out:
8999 	free_phclient_path_list(pp_head);
9000 }
9001 
9002 static void
9003 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
9004 {
9005 	mutex_enter(&vhc->vhc_lock);
9006 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
9007 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
9008 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
9009 	mutex_exit(&vhc->vhc_lock);
9010 }
9011 
9012 static void
9013 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
9014 {
9015 	mutex_enter(&vhc->vhc_lock);
9016 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9017 	cv_broadcast(&vhc->vhc_cv);
9018 	mutex_exit(&vhc->vhc_lock);
9019 }
9020 
9021 typedef struct mdi_phci_driver_info {
9022 	char	*phdriver_name;	/* name of the phci driver */
9023 
9024 	/* set to non zero if the phci driver supports root device */
9025 	int	phdriver_root_support;
9026 } mdi_phci_driver_info_t;
9027 
9028 /*
9029  * vhci class and root support capability of a phci driver can be
9030  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9031  * phci driver.conf file. The built-in tables below contain this information
9032  * for those phci drivers whose driver.conf files don't yet contain this info.
9033  *
9034  * All phci drivers expect iscsi have root device support.
9035  */
9036 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9037 	{ "fp", 1 },
9038 	{ "iscsi", 0 },
9039 	{ "ibsrp", 1 }
9040 	};
9041 
9042 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9043 
9044 static void *
9045 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9046 {
9047 	void *new_ptr;
9048 
9049 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9050 	if (old_ptr) {
9051 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9052 		kmem_free(old_ptr, old_size);
9053 	}
9054 	return (new_ptr);
9055 }
9056 
9057 static void
9058 add_to_phci_list(char ***driver_list, int **root_support_list,
9059     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9060 {
9061 	ASSERT(*cur_elements <= *max_elements);
9062 	if (*cur_elements == *max_elements) {
9063 		*max_elements += 10;
9064 		*driver_list = mdi_realloc(*driver_list,
9065 		    sizeof (char *) * (*cur_elements),
9066 		    sizeof (char *) * (*max_elements));
9067 		*root_support_list = mdi_realloc(*root_support_list,
9068 		    sizeof (int) * (*cur_elements),
9069 		    sizeof (int) * (*max_elements));
9070 	}
9071 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9072 	(*root_support_list)[*cur_elements] = root_support;
9073 	(*cur_elements)++;
9074 }
9075 
9076 static void
9077 get_phci_driver_list(char *vhci_class, char ***driver_list,
9078     int **root_support_list, int *cur_elements, int *max_elements)
9079 {
9080 	mdi_phci_driver_info_t	*st_driver_list, *p;
9081 	int		st_ndrivers, root_support, i, j, driver_conf_count;
9082 	major_t		m;
9083 	struct devnames	*dnp;
9084 	ddi_prop_t	*propp;
9085 
9086 	*driver_list = NULL;
9087 	*root_support_list = NULL;
9088 	*cur_elements = 0;
9089 	*max_elements = 0;
9090 
9091 	/* add the phci drivers derived from the phci driver.conf files */
9092 	for (m = 0; m < devcnt; m++) {
9093 		dnp = &devnamesp[m];
9094 
9095 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
9096 			LOCK_DEV_OPS(&dnp->dn_lock);
9097 			if (dnp->dn_global_prop_ptr != NULL &&
9098 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9099 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9100 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9101 			    strcmp(propp->prop_val, vhci_class) == 0) {
9102 
9103 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9104 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9105 				    &dnp->dn_global_prop_ptr->prop_list)
9106 				    == NULL) ? 1 : 0;
9107 
9108 				add_to_phci_list(driver_list, root_support_list,
9109 				    cur_elements, max_elements, dnp->dn_name,
9110 				    root_support);
9111 
9112 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9113 			} else
9114 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9115 		}
9116 	}
9117 
9118 	driver_conf_count = *cur_elements;
9119 
9120 	/* add the phci drivers specified in the built-in tables */
9121 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9122 		st_driver_list = scsi_phci_driver_list;
9123 		st_ndrivers = sizeof (scsi_phci_driver_list) /
9124 		    sizeof (mdi_phci_driver_info_t);
9125 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9126 		st_driver_list = ib_phci_driver_list;
9127 		st_ndrivers = sizeof (ib_phci_driver_list) /
9128 		    sizeof (mdi_phci_driver_info_t);
9129 	} else {
9130 		st_driver_list = NULL;
9131 		st_ndrivers = 0;
9132 	}
9133 
9134 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9135 		/* add this phci driver if not already added before */
9136 		for (j = 0; j < driver_conf_count; j++) {
9137 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9138 				break;
9139 		}
9140 		if (j == driver_conf_count) {
9141 			add_to_phci_list(driver_list, root_support_list,
9142 			    cur_elements, max_elements, p->phdriver_name,
9143 			    p->phdriver_root_support);
9144 		}
9145 	}
9146 }
9147 
9148 /*
9149  * Attach the phci driver instances associated with the specified vhci class.
9150  * If root is mounted attach all phci driver instances.
9151  * If root is not mounted, attach the instances of only those phci
9152  * drivers that have the root support.
9153  */
9154 static void
9155 attach_phci_drivers(char *vhci_class)
9156 {
9157 	char	**driver_list, **p;
9158 	int	*root_support_list;
9159 	int	cur_elements, max_elements, i;
9160 	major_t	m;
9161 
9162 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9163 	    &cur_elements, &max_elements);
9164 
9165 	for (i = 0; i < cur_elements; i++) {
9166 		if (modrootloaded || root_support_list[i]) {
9167 			m = ddi_name_to_major(driver_list[i]);
9168 			if (m != DDI_MAJOR_T_NONE &&
9169 			    ddi_hold_installed_driver(m))
9170 				ddi_rele_driver(m);
9171 		}
9172 	}
9173 
9174 	if (driver_list) {
9175 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9176 			kmem_free(*p, strlen(*p) + 1);
9177 		kmem_free(driver_list, sizeof (char *) * max_elements);
9178 		kmem_free(root_support_list, sizeof (int) * max_elements);
9179 	}
9180 }
9181 
9182 /*
9183  * Build vhci cache:
9184  *
9185  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9186  * the phci driver instances. During this process the cache gets built.
9187  *
9188  * Cache is built fully if the root is mounted.
9189  * If the root is not mounted, phci drivers that do not have root support
9190  * are not attached. As a result the cache is built partially. The entries
9191  * in the cache reflect only those phci drivers that have root support.
9192  */
9193 static int
9194 build_vhci_cache(mdi_vhci_t *vh)
9195 {
9196 	mdi_vhci_config_t *vhc = vh->vh_config;
9197 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9198 
9199 	single_threaded_vhconfig_enter(vhc);
9200 
9201 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9202 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9203 		rw_exit(&vhcache->vhcache_lock);
9204 		single_threaded_vhconfig_exit(vhc);
9205 		return (0);
9206 	}
9207 	rw_exit(&vhcache->vhcache_lock);
9208 
9209 	attach_phci_drivers(vh->vh_class);
9210 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9211 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9212 
9213 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9214 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9215 	rw_exit(&vhcache->vhcache_lock);
9216 
9217 	single_threaded_vhconfig_exit(vhc);
9218 	vhcache_dirty(vhc);
9219 	return (1);
9220 }
9221 
9222 /*
9223  * Determine if discovery of paths is needed.
9224  */
9225 static int
9226 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9227 {
9228 	int rv = 1;
9229 
9230 	mutex_enter(&vhc->vhc_lock);
9231 	if (i_ddi_io_initialized() == 0) {
9232 		if (vhc->vhc_path_discovery_boot > 0) {
9233 			vhc->vhc_path_discovery_boot--;
9234 			goto out;
9235 		}
9236 	} else {
9237 		if (vhc->vhc_path_discovery_postboot > 0) {
9238 			vhc->vhc_path_discovery_postboot--;
9239 			goto out;
9240 		}
9241 	}
9242 
9243 	/*
9244 	 * Do full path discovery at most once per mdi_path_discovery_interval.
9245 	 * This is to avoid a series of full path discoveries when opening
9246 	 * stale /dev/[r]dsk links.
9247 	 */
9248 	if (mdi_path_discovery_interval != -1 &&
9249 	    ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9250 		goto out;
9251 
9252 	rv = 0;
9253 out:
9254 	mutex_exit(&vhc->vhc_lock);
9255 	return (rv);
9256 }
9257 
9258 /*
9259  * Discover all paths:
9260  *
9261  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9262  * driver instances. During this process all paths will be discovered.
9263  */
9264 static int
9265 vhcache_discover_paths(mdi_vhci_t *vh)
9266 {
9267 	mdi_vhci_config_t *vhc = vh->vh_config;
9268 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9269 	int rv = 0;
9270 
9271 	single_threaded_vhconfig_enter(vhc);
9272 
9273 	if (vhcache_do_discovery(vhc)) {
9274 		attach_phci_drivers(vh->vh_class);
9275 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9276 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9277 
9278 		mutex_enter(&vhc->vhc_lock);
9279 		vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9280 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
9281 		mutex_exit(&vhc->vhc_lock);
9282 		rv = 1;
9283 	}
9284 
9285 	single_threaded_vhconfig_exit(vhc);
9286 	return (rv);
9287 }
9288 
9289 /*
9290  * Generic vhci bus config implementation:
9291  *
9292  * Parameters
9293  *	vdip	vhci dip
9294  *	flags	bus config flags
9295  *	op	bus config operation
9296  *	The remaining parameters are bus config operation specific
9297  *
9298  * for BUS_CONFIG_ONE
9299  *	arg	pointer to name@addr
9300  *	child	upon successful return from this function, *child will be
9301  *		set to the configured and held devinfo child node of vdip.
9302  *	ct_addr	pointer to client address (i.e. GUID)
9303  *
9304  * for BUS_CONFIG_DRIVER
9305  *	arg	major number of the driver
9306  *	child and ct_addr parameters are ignored
9307  *
9308  * for BUS_CONFIG_ALL
9309  *	arg, child, and ct_addr parameters are ignored
9310  *
9311  * Note that for the rest of the bus config operations, this function simply
9312  * calls the framework provided default bus config routine.
9313  */
9314 int
9315 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9316     void *arg, dev_info_t **child, char *ct_addr)
9317 {
9318 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9319 	mdi_vhci_config_t *vhc = vh->vh_config;
9320 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9321 	int rv = 0;
9322 	int params_valid = 0;
9323 	char *cp;
9324 
9325 	/*
9326 	 * To bus config vhcis we relay operation, possibly using another
9327 	 * thread, to phcis. The phci driver then interacts with MDI to cause
9328 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
9329 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
9330 	 * thread may be adding the child, to avoid deadlock we can't wait
9331 	 * for the relayed operations to complete if we have already entered
9332 	 * the vhci node.
9333 	 */
9334 	if (DEVI_BUSY_OWNED(vdip)) {
9335 		MDI_DEBUG(2, (MDI_NOTE, vdip,
9336 		    "vhci dip is busy owned %p", (void *)vdip));
9337 		goto default_bus_config;
9338 	}
9339 
9340 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9341 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9342 		rw_exit(&vhcache->vhcache_lock);
9343 		rv = build_vhci_cache(vh);
9344 		rw_enter(&vhcache->vhcache_lock, RW_READER);
9345 	}
9346 
9347 	switch (op) {
9348 	case BUS_CONFIG_ONE:
9349 		if (arg != NULL && ct_addr != NULL) {
9350 			/* extract node name */
9351 			cp = (char *)arg;
9352 			while (*cp != '\0' && *cp != '@')
9353 				cp++;
9354 			if (*cp == '@') {
9355 				params_valid = 1;
9356 				*cp = '\0';
9357 				config_client_paths(vhc, (char *)arg, ct_addr);
9358 				/* config_client_paths() releases cache_lock */
9359 				*cp = '@';
9360 				break;
9361 			}
9362 		}
9363 
9364 		rw_exit(&vhcache->vhcache_lock);
9365 		break;
9366 
9367 	case BUS_CONFIG_DRIVER:
9368 		rw_exit(&vhcache->vhcache_lock);
9369 		if (rv == 0)
9370 			st_bus_config_all_phcis(vhc, flags, op,
9371 			    (major_t)(uintptr_t)arg);
9372 		break;
9373 
9374 	case BUS_CONFIG_ALL:
9375 		rw_exit(&vhcache->vhcache_lock);
9376 		if (rv == 0)
9377 			st_bus_config_all_phcis(vhc, flags, op, -1);
9378 		break;
9379 
9380 	default:
9381 		rw_exit(&vhcache->vhcache_lock);
9382 		break;
9383 	}
9384 
9385 
9386 default_bus_config:
9387 	/*
9388 	 * All requested child nodes are enumerated under the vhci.
9389 	 * Now configure them.
9390 	 */
9391 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9392 	    NDI_SUCCESS) {
9393 		return (MDI_SUCCESS);
9394 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9395 		/* discover all paths and try configuring again */
9396 		if (vhcache_discover_paths(vh) &&
9397 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9398 		    NDI_SUCCESS)
9399 			return (MDI_SUCCESS);
9400 	}
9401 
9402 	return (MDI_FAILURE);
9403 }
9404 
9405 /*
9406  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9407  */
9408 static nvlist_t *
9409 read_on_disk_vhci_cache(char *vhci_class)
9410 {
9411 	nvlist_t *nvl;
9412 	int err;
9413 	char *filename;
9414 
9415 	filename = vhclass2vhcache_filename(vhci_class);
9416 
9417 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
9418 		kmem_free(filename, strlen(filename) + 1);
9419 		return (nvl);
9420 	} else if (err == EIO)
9421 		cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9422 	else if (err == EINVAL)
9423 		cmn_err(CE_WARN,
9424 		    "%s: data file corrupted, will recreate", filename);
9425 
9426 	kmem_free(filename, strlen(filename) + 1);
9427 	return (NULL);
9428 }
9429 
9430 /*
9431  * Read on-disk vhci cache into nvlists for all vhci classes.
9432  * Called during booting by i_ddi_read_devices_files().
9433  */
9434 void
9435 mdi_read_devices_files(void)
9436 {
9437 	int i;
9438 
9439 	for (i = 0; i < N_VHCI_CLASSES; i++)
9440 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9441 }
9442 
9443 /*
9444  * Remove all stale entries from vhci cache.
9445  */
9446 static void
9447 clean_vhcache(mdi_vhci_config_t *vhc)
9448 {
9449 	mdi_vhci_cache_t	*vhcache = &vhc->vhc_vhcache;
9450 	mdi_vhcache_phci_t	*phci, *nxt_phci;
9451 	mdi_vhcache_client_t	*client, *nxt_client;
9452 	mdi_vhcache_pathinfo_t	*path, *nxt_path;
9453 
9454 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9455 
9456 	client = vhcache->vhcache_client_head;
9457 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9458 	for ( ; client != NULL; client = nxt_client) {
9459 		nxt_client = client->cct_next;
9460 
9461 		path = client->cct_cpi_head;
9462 		client->cct_cpi_head = client->cct_cpi_tail = NULL;
9463 		for ( ; path != NULL; path = nxt_path) {
9464 			nxt_path = path->cpi_next;
9465 			if ((path->cpi_cphci->cphci_phci != NULL) &&
9466 			    (path->cpi_pip != NULL)) {
9467 				enqueue_tail_vhcache_pathinfo(client, path);
9468 			} else if (path->cpi_pip != NULL) {
9469 				/* Not valid to have a path without a phci. */
9470 				free_vhcache_pathinfo(path);
9471 			}
9472 		}
9473 
9474 		if (client->cct_cpi_head != NULL)
9475 			enqueue_vhcache_client(vhcache, client);
9476 		else {
9477 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9478 			    (mod_hash_key_t)client->cct_name_addr);
9479 			free_vhcache_client(client);
9480 		}
9481 	}
9482 
9483 	phci = vhcache->vhcache_phci_head;
9484 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9485 	for ( ; phci != NULL; phci = nxt_phci) {
9486 
9487 		nxt_phci = phci->cphci_next;
9488 		if (phci->cphci_phci != NULL)
9489 			enqueue_vhcache_phci(vhcache, phci);
9490 		else
9491 			free_vhcache_phci(phci);
9492 	}
9493 
9494 	vhcache->vhcache_clean_time = ddi_get_lbolt64();
9495 	rw_exit(&vhcache->vhcache_lock);
9496 	vhcache_dirty(vhc);
9497 }
9498 
9499 /*
9500  * Remove all stale entries from vhci cache.
9501  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9502  */
9503 void
9504 mdi_clean_vhcache(void)
9505 {
9506 	mdi_vhci_t *vh;
9507 
9508 	mutex_enter(&mdi_mutex);
9509 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9510 		vh->vh_refcnt++;
9511 		mutex_exit(&mdi_mutex);
9512 		clean_vhcache(vh->vh_config);
9513 		mutex_enter(&mdi_mutex);
9514 		vh->vh_refcnt--;
9515 	}
9516 	mutex_exit(&mdi_mutex);
9517 }
9518 
9519 /*
9520  * mdi_vhci_walk_clients():
9521  *		Walker routine to traverse client dev_info nodes
9522  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9523  * below the client, including nexus devices, which we dont want.
9524  * So we just traverse the immediate siblings, starting from 1st client.
9525  */
9526 void
9527 mdi_vhci_walk_clients(dev_info_t *vdip,
9528     int (*f)(dev_info_t *, void *), void *arg)
9529 {
9530 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9531 	dev_info_t	*cdip;
9532 	mdi_client_t	*ct;
9533 
9534 	MDI_VHCI_CLIENT_LOCK(vh);
9535 	cdip = ddi_get_child(vdip);
9536 	while (cdip) {
9537 		ct = i_devi_get_client(cdip);
9538 		MDI_CLIENT_LOCK(ct);
9539 
9540 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9541 			cdip = ddi_get_next_sibling(cdip);
9542 		else
9543 			cdip = NULL;
9544 
9545 		MDI_CLIENT_UNLOCK(ct);
9546 	}
9547 	MDI_VHCI_CLIENT_UNLOCK(vh);
9548 }
9549 
9550 /*
9551  * mdi_vhci_walk_phcis():
9552  *		Walker routine to traverse phci dev_info nodes
9553  */
9554 void
9555 mdi_vhci_walk_phcis(dev_info_t *vdip,
9556     int (*f)(dev_info_t *, void *), void *arg)
9557 {
9558 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9559 	mdi_phci_t	*ph, *next;
9560 
9561 	MDI_VHCI_PHCI_LOCK(vh);
9562 	ph = vh->vh_phci_head;
9563 	while (ph) {
9564 		MDI_PHCI_LOCK(ph);
9565 
9566 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9567 			next = ph->ph_next;
9568 		else
9569 			next = NULL;
9570 
9571 		MDI_PHCI_UNLOCK(ph);
9572 		ph = next;
9573 	}
9574 	MDI_VHCI_PHCI_UNLOCK(vh);
9575 }
9576 
9577 
9578 /*
9579  * mdi_walk_vhcis():
9580  *		Walker routine to traverse vhci dev_info nodes
9581  */
9582 void
9583 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9584 {
9585 	mdi_vhci_t	*vh = NULL;
9586 
9587 	mutex_enter(&mdi_mutex);
9588 	/*
9589 	 * Scan for already registered vhci
9590 	 */
9591 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9592 		vh->vh_refcnt++;
9593 		mutex_exit(&mdi_mutex);
9594 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9595 			mutex_enter(&mdi_mutex);
9596 			vh->vh_refcnt--;
9597 			break;
9598 		} else {
9599 			mutex_enter(&mdi_mutex);
9600 			vh->vh_refcnt--;
9601 		}
9602 	}
9603 
9604 	mutex_exit(&mdi_mutex);
9605 }
9606 
9607 /*
9608  * i_mdi_log_sysevent():
9609  *		Logs events for pickup by syseventd
9610  */
9611 static void
9612 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9613 {
9614 	char		*path_name;
9615 	nvlist_t	*attr_list;
9616 
9617 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9618 	    KM_SLEEP) != DDI_SUCCESS) {
9619 		goto alloc_failed;
9620 	}
9621 
9622 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9623 	(void) ddi_pathname(dip, path_name);
9624 
9625 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9626 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9627 		goto error;
9628 	}
9629 
9630 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9631 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9632 		goto error;
9633 	}
9634 
9635 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9636 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9637 		goto error;
9638 	}
9639 
9640 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9641 	    path_name) != DDI_SUCCESS) {
9642 		goto error;
9643 	}
9644 
9645 	if (nvlist_add_string(attr_list, DDI_CLASS,
9646 	    ph_vh_class) != DDI_SUCCESS) {
9647 		goto error;
9648 	}
9649 
9650 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9651 	    attr_list, NULL, DDI_SLEEP);
9652 
9653 error:
9654 	kmem_free(path_name, MAXPATHLEN);
9655 	nvlist_free(attr_list);
9656 	return;
9657 
9658 alloc_failed:
9659 	MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9660 }
9661 
9662 char **
9663 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9664 {
9665 	char	**driver_list, **ret_driver_list = NULL;
9666 	int	*root_support_list;
9667 	int	cur_elements, max_elements;
9668 
9669 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9670 	    &cur_elements, &max_elements);
9671 
9672 
9673 	if (driver_list) {
9674 		kmem_free(root_support_list, sizeof (int) * max_elements);
9675 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9676 		    * max_elements, sizeof (char *) * cur_elements);
9677 	}
9678 	*ndrivers = cur_elements;
9679 
9680 	return (ret_driver_list);
9681 
9682 }
9683 
9684 void
9685 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9686 {
9687 	char	**p;
9688 	int	i;
9689 
9690 	if (driver_list) {
9691 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9692 			kmem_free(*p, strlen(*p) + 1);
9693 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9694 	}
9695 }
9696 
9697 /*
9698  * mdi_is_dev_supported():
9699  *		function called by pHCI bus config operation to determine if a
9700  *		device should be represented as a child of the vHCI or the
9701  *		pHCI.  This decision is made by the vHCI, using cinfo idenity
9702  *		information passed by the pHCI - specifics of the cinfo
9703  *		representation are by agreement between the pHCI and vHCI.
9704  * Return Values:
9705  *		MDI_SUCCESS
9706  *		MDI_FAILURE
9707  */
9708 int
9709 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9710 {
9711 	mdi_vhci_t	*vh;
9712 
9713 	ASSERT(class && pdip);
9714 
9715 	/*
9716 	 * For dev_supported, mdi_phci_register() must have established pdip as
9717 	 * a pHCI.
9718 	 *
9719 	 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9720 	 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9721 	 */
9722 	if (!MDI_PHCI(pdip))
9723 		return (MDI_FAILURE);
9724 
9725 	/* Return MDI_FAILURE if vHCI does not support asking the question. */
9726 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9727 	if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9728 		return (MDI_FAILURE);
9729 	}
9730 
9731 	/* Return vHCI answer */
9732 	return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9733 }
9734 
9735 int
9736 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9737 {
9738 	uint_t devstate = 0;
9739 	dev_info_t *cdip;
9740 
9741 	if ((pip == NULL) || (dcp == NULL))
9742 		return (MDI_FAILURE);
9743 
9744 	cdip = mdi_pi_get_client(pip);
9745 
9746 	switch (mdi_pi_get_state(pip)) {
9747 	case MDI_PATHINFO_STATE_INIT:
9748 		devstate = DEVICE_DOWN;
9749 		break;
9750 	case MDI_PATHINFO_STATE_ONLINE:
9751 		devstate = DEVICE_ONLINE;
9752 		if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9753 			devstate |= DEVICE_BUSY;
9754 		break;
9755 	case MDI_PATHINFO_STATE_STANDBY:
9756 		devstate = DEVICE_ONLINE;
9757 		break;
9758 	case MDI_PATHINFO_STATE_FAULT:
9759 		devstate = DEVICE_DOWN;
9760 		break;
9761 	case MDI_PATHINFO_STATE_OFFLINE:
9762 		devstate = DEVICE_OFFLINE;
9763 		break;
9764 	default:
9765 		ASSERT(MDI_PI(pip)->pi_state);
9766 	}
9767 
9768 	if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9769 		return (MDI_FAILURE);
9770 
9771 	return (MDI_SUCCESS);
9772 }
9773