xref: /titanic_44/usr/src/uts/common/os/sunmdi.c (revision 6b2bcd8e40cb530c97e59f351ceccb5c807ac34a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
28  * detailed discussion of the overall mpxio architecture.
29  *
30  * Default locking order:
31  *
32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 #include <sys/sysmacros.h>
71 
72 #ifdef	DEBUG
73 #include <sys/debug.h>
74 int	mdi_debug = 1;
75 int	mdi_debug_logonly = 0;
76 #define	MDI_DEBUG(level, stmnt) \
77 	    if (mdi_debug >= (level)) i_mdi_log stmnt
78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
79 #else	/* !DEBUG */
80 #define	MDI_DEBUG(level, stmnt)
81 #endif	/* DEBUG */
82 
83 extern pri_t	minclsyspri;
84 extern int	modrootloaded;
85 
86 /*
87  * Global mutex:
88  * Protects vHCI list and structure members.
89  */
90 kmutex_t	mdi_mutex;
91 
92 /*
93  * Registered vHCI class driver lists
94  */
95 int		mdi_vhci_count;
96 mdi_vhci_t	*mdi_vhci_head;
97 mdi_vhci_t	*mdi_vhci_tail;
98 
99 /*
100  * Client Hash Table size
101  */
102 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
103 
104 /*
105  * taskq interface definitions
106  */
107 #define	MDI_TASKQ_N_THREADS	8
108 #define	MDI_TASKQ_PRI		minclsyspri
109 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
110 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
111 
112 taskq_t				*mdi_taskq;
113 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
114 
115 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
116 
117 /*
118  * The data should be "quiet" for this interval (in seconds) before the
119  * vhci cached data is flushed to the disk.
120  */
121 static int mdi_vhcache_flush_delay = 10;
122 
123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
124 static int mdi_vhcache_flush_daemon_idle_time = 60;
125 
126 /*
127  * MDI falls back to discovery of all paths when a bus_config_one fails.
128  * The following parameters can be used to tune this operation.
129  *
130  * mdi_path_discovery_boot
131  *	Number of times path discovery will be attempted during early boot.
132  *	Probably there is no reason to ever set this value to greater than one.
133  *
134  * mdi_path_discovery_postboot
135  *	Number of times path discovery will be attempted after early boot.
136  *	Set it to a minimum of two to allow for discovery of iscsi paths which
137  *	may happen very late during booting.
138  *
139  * mdi_path_discovery_interval
140  *	Minimum number of seconds MDI will wait between successive discovery
141  *	of all paths. Set it to -1 to disable discovery of all paths.
142  */
143 static int mdi_path_discovery_boot = 1;
144 static int mdi_path_discovery_postboot = 2;
145 static int mdi_path_discovery_interval = 10;
146 
147 /*
148  * number of seconds the asynchronous configuration thread will sleep idle
149  * before exiting.
150  */
151 static int mdi_async_config_idle_time = 600;
152 
153 static int mdi_bus_config_cache_hash_size = 256;
154 
155 /* turns off multithreaded configuration for certain operations */
156 static int mdi_mtc_off = 0;
157 
158 /*
159  * The "path" to a pathinfo node is identical to the /devices path to a
160  * devinfo node had the device been enumerated under a pHCI instead of
161  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
162  * This association persists across create/delete of the pathinfo nodes,
163  * but not across reboot.
164  */
165 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
166 static int		mdi_pathmap_hash_size = 256;
167 static kmutex_t		mdi_pathmap_mutex;
168 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
169 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
170 
171 /*
172  * MDI component property name/value string definitions
173  */
174 const char 		*mdi_component_prop = "mpxio-component";
175 const char		*mdi_component_prop_vhci = "vhci";
176 const char		*mdi_component_prop_phci = "phci";
177 const char		*mdi_component_prop_client = "client";
178 
179 /*
180  * MDI client global unique identifier property name
181  */
182 const char		*mdi_client_guid_prop = "client-guid";
183 
184 /*
185  * MDI client load balancing property name/value string definitions
186  */
187 const char		*mdi_load_balance = "load-balance";
188 const char		*mdi_load_balance_none = "none";
189 const char		*mdi_load_balance_rr = "round-robin";
190 const char		*mdi_load_balance_lba = "logical-block";
191 
192 /*
193  * Obsolete vHCI class definition; to be removed after Leadville update
194  */
195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
196 
197 static char vhci_greeting[] =
198 	"\tThere already exists one vHCI driver for class %s\n"
199 	"\tOnly one vHCI driver for each class is allowed\n";
200 
201 /*
202  * Static function prototypes
203  */
204 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
205 static int		i_mdi_client_offline(dev_info_t *, uint_t);
206 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
207 static void		i_mdi_phci_post_detach(dev_info_t *,
208 			    ddi_detach_cmd_t, int);
209 static int		i_mdi_client_pre_detach(dev_info_t *,
210 			    ddi_detach_cmd_t);
211 static void		i_mdi_client_post_detach(dev_info_t *,
212 			    ddi_detach_cmd_t, int);
213 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
214 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
215 static int 		i_mdi_lba_lb(mdi_client_t *ct,
216 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
217 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
218 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
219 static void		i_mdi_pm_reset_client(mdi_client_t *);
220 static int		i_mdi_power_all_phci(mdi_client_t *);
221 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
222 
223 
224 /*
225  * Internal mdi_pathinfo node functions
226  */
227 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
228 
229 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
230 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
231 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
232 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
233 static void		i_mdi_phci_unlock(mdi_phci_t *);
234 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
235 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
236 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
237 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
238 			    mdi_client_t *);
239 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
240 static void		i_mdi_client_remove_path(mdi_client_t *,
241 			    mdi_pathinfo_t *);
242 
243 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
244 			    mdi_pathinfo_state_t, int);
245 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
246 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
247 			    char **, int);
248 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
249 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
250 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
251 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
252 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
253 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
254 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
255 static void		i_mdi_client_update_state(mdi_client_t *);
256 static int		i_mdi_client_compute_state(mdi_client_t *,
257 			    mdi_phci_t *);
258 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
259 static void		i_mdi_client_unlock(mdi_client_t *);
260 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
261 static mdi_client_t	*i_devi_get_client(dev_info_t *);
262 /*
263  * NOTE: this will be removed once the NWS files are changed to use the new
264  * mdi_{enable,disable}_path interfaces
265  */
266 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
267 				int, int);
268 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
269 				mdi_vhci_t *vh, int flags, int op);
270 /*
271  * Failover related function prototypes
272  */
273 static int		i_mdi_failover(void *);
274 
275 /*
276  * misc internal functions
277  */
278 static int		i_mdi_get_hash_key(char *);
279 static int		i_map_nvlist_error_to_mdi(int);
280 static void		i_mdi_report_path_state(mdi_client_t *,
281 			    mdi_pathinfo_t *);
282 
283 static void		setup_vhci_cache(mdi_vhci_t *);
284 static int		destroy_vhci_cache(mdi_vhci_t *);
285 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
286 static boolean_t	stop_vhcache_flush_thread(void *, int);
287 static void		free_string_array(char **, int);
288 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
289 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
290 static void		free_vhcache_client(mdi_vhcache_client_t *);
291 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
292 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
293 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
294 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
295 static void		vhcache_pi_add(mdi_vhci_config_t *,
296 			    struct mdi_pathinfo *);
297 static void		vhcache_pi_remove(mdi_vhci_config_t *,
298 			    struct mdi_pathinfo *);
299 static void		free_phclient_path_list(mdi_phys_path_t *);
300 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
301 static int		flush_vhcache(mdi_vhci_config_t *, int);
302 static void		vhcache_dirty(mdi_vhci_config_t *);
303 static void		free_async_client_config(mdi_async_client_config_t *);
304 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
305 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
306 static nvlist_t		*read_on_disk_vhci_cache(char *);
307 extern int		fread_nvlist(char *, nvlist_t **);
308 extern int		fwrite_nvlist(char *, nvlist_t *);
309 
310 /* called once when first vhci registers with mdi */
311 static void
312 i_mdi_init()
313 {
314 	static int initialized = 0;
315 
316 	if (initialized)
317 		return;
318 	initialized = 1;
319 
320 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
321 
322 	/* Create our taskq resources */
323 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
324 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
325 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
326 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
327 
328 	/* Allocate ['path_instance' <-> "path"] maps */
329 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
330 	mdi_pathmap_bypath = mod_hash_create_strhash(
331 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
332 	    mod_hash_null_valdtor);
333 	mdi_pathmap_byinstance = mod_hash_create_idhash(
334 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
335 	    mod_hash_null_valdtor);
336 }
337 
338 /*
339  * mdi_get_component_type():
340  *		Return mpxio component type
341  * Return Values:
342  *		MDI_COMPONENT_NONE
343  *		MDI_COMPONENT_VHCI
344  *		MDI_COMPONENT_PHCI
345  *		MDI_COMPONENT_CLIENT
346  * XXX This doesn't work under multi-level MPxIO and should be
347  *	removed when clients migrate mdi_component_is_*() interfaces.
348  */
349 int
350 mdi_get_component_type(dev_info_t *dip)
351 {
352 	return (DEVI(dip)->devi_mdi_component);
353 }
354 
355 /*
356  * mdi_vhci_register():
357  *		Register a vHCI module with the mpxio framework
358  *		mdi_vhci_register() is called by vHCI drivers to register the
359  *		'class_driver' vHCI driver and its MDI entrypoints with the
360  *		mpxio framework.  The vHCI driver must call this interface as
361  *		part of its attach(9e) handler.
362  *		Competing threads may try to attach mdi_vhci_register() as
363  *		the vHCI drivers are loaded and attached as a result of pHCI
364  *		driver instance registration (mdi_phci_register()) with the
365  *		framework.
366  * Return Values:
367  *		MDI_SUCCESS
368  *		MDI_FAILURE
369  */
370 /*ARGSUSED*/
371 int
372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
373     int flags)
374 {
375 	mdi_vhci_t		*vh = NULL;
376 
377 	/* Registrant can't be older */
378 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
379 
380 #ifdef DEBUG
381 	/*
382 	 * IB nexus driver is loaded only when IB hardware is present.
383 	 * In order to be able to do this there is a need to drive the loading
384 	 * and attaching of the IB nexus driver (especially when an IB hardware
385 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
386 	 * is being attached. Unfortunately this gets into the limitations
387 	 * of devfs as there seems to be no clean way to drive configuration
388 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
389 	 * for IB.
390 	 */
391 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
392 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
393 #endif
394 
395 	i_mdi_init();
396 
397 	mutex_enter(&mdi_mutex);
398 	/*
399 	 * Scan for already registered vhci
400 	 */
401 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
402 		if (strcmp(vh->vh_class, class) == 0) {
403 			/*
404 			 * vHCI has already been created.  Check for valid
405 			 * vHCI ops registration.  We only support one vHCI
406 			 * module per class
407 			 */
408 			if (vh->vh_ops != NULL) {
409 				mutex_exit(&mdi_mutex);
410 				cmn_err(CE_NOTE, vhci_greeting, class);
411 				return (MDI_FAILURE);
412 			}
413 			break;
414 		}
415 	}
416 
417 	/*
418 	 * if not yet created, create the vHCI component
419 	 */
420 	if (vh == NULL) {
421 		struct client_hash	*hash = NULL;
422 		char			*load_balance;
423 
424 		/*
425 		 * Allocate and initialize the mdi extensions
426 		 */
427 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
428 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
429 		    KM_SLEEP);
430 		vh->vh_client_table = hash;
431 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
432 		(void) strcpy(vh->vh_class, class);
433 		vh->vh_lb = LOAD_BALANCE_RR;
434 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
435 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
436 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
437 				vh->vh_lb = LOAD_BALANCE_NONE;
438 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
439 				    == 0) {
440 				vh->vh_lb = LOAD_BALANCE_LBA;
441 			}
442 			ddi_prop_free(load_balance);
443 		}
444 
445 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
446 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
447 
448 		/*
449 		 * Store the vHCI ops vectors
450 		 */
451 		vh->vh_dip = vdip;
452 		vh->vh_ops = vops;
453 
454 		setup_vhci_cache(vh);
455 
456 		if (mdi_vhci_head == NULL) {
457 			mdi_vhci_head = vh;
458 		}
459 		if (mdi_vhci_tail) {
460 			mdi_vhci_tail->vh_next = vh;
461 		}
462 		mdi_vhci_tail = vh;
463 		mdi_vhci_count++;
464 	}
465 
466 	/*
467 	 * Claim the devfs node as a vhci component
468 	 */
469 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
470 
471 	/*
472 	 * Initialize our back reference from dev_info node
473 	 */
474 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
475 	mutex_exit(&mdi_mutex);
476 	return (MDI_SUCCESS);
477 }
478 
479 /*
480  * mdi_vhci_unregister():
481  *		Unregister a vHCI module from mpxio framework
482  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
483  * 		of a vhci to unregister it from the framework.
484  * Return Values:
485  *		MDI_SUCCESS
486  *		MDI_FAILURE
487  */
488 /*ARGSUSED*/
489 int
490 mdi_vhci_unregister(dev_info_t *vdip, int flags)
491 {
492 	mdi_vhci_t	*found, *vh, *prev = NULL;
493 
494 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
495 
496 	/*
497 	 * Check for invalid VHCI
498 	 */
499 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
500 		return (MDI_FAILURE);
501 
502 	/*
503 	 * Scan the list of registered vHCIs for a match
504 	 */
505 	mutex_enter(&mdi_mutex);
506 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
507 		if (found == vh)
508 			break;
509 		prev = found;
510 	}
511 
512 	if (found == NULL) {
513 		mutex_exit(&mdi_mutex);
514 		return (MDI_FAILURE);
515 	}
516 
517 	/*
518 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
519 	 * should have been unregistered, before a vHCI can be
520 	 * unregistered.
521 	 */
522 	MDI_VHCI_PHCI_LOCK(vh);
523 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
524 		MDI_VHCI_PHCI_UNLOCK(vh);
525 		mutex_exit(&mdi_mutex);
526 		return (MDI_FAILURE);
527 	}
528 	MDI_VHCI_PHCI_UNLOCK(vh);
529 
530 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
531 		mutex_exit(&mdi_mutex);
532 		return (MDI_FAILURE);
533 	}
534 
535 	/*
536 	 * Remove the vHCI from the global list
537 	 */
538 	if (vh == mdi_vhci_head) {
539 		mdi_vhci_head = vh->vh_next;
540 	} else {
541 		prev->vh_next = vh->vh_next;
542 	}
543 	if (vh == mdi_vhci_tail) {
544 		mdi_vhci_tail = prev;
545 	}
546 	mdi_vhci_count--;
547 	mutex_exit(&mdi_mutex);
548 
549 	vh->vh_ops = NULL;
550 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
551 	DEVI(vdip)->devi_mdi_xhci = NULL;
552 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
553 	kmem_free(vh->vh_client_table,
554 	    mdi_client_table_size * sizeof (struct client_hash));
555 	mutex_destroy(&vh->vh_phci_mutex);
556 	mutex_destroy(&vh->vh_client_mutex);
557 
558 	kmem_free(vh, sizeof (mdi_vhci_t));
559 	return (MDI_SUCCESS);
560 }
561 
562 /*
563  * i_mdi_vhci_class2vhci():
564  *		Look for a matching vHCI module given a vHCI class name
565  * Return Values:
566  *		Handle to a vHCI component
567  *		NULL
568  */
569 static mdi_vhci_t *
570 i_mdi_vhci_class2vhci(char *class)
571 {
572 	mdi_vhci_t	*vh = NULL;
573 
574 	ASSERT(!MUTEX_HELD(&mdi_mutex));
575 
576 	mutex_enter(&mdi_mutex);
577 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
578 		if (strcmp(vh->vh_class, class) == 0) {
579 			break;
580 		}
581 	}
582 	mutex_exit(&mdi_mutex);
583 	return (vh);
584 }
585 
586 /*
587  * i_devi_get_vhci():
588  *		Utility function to get the handle to a vHCI component
589  * Return Values:
590  *		Handle to a vHCI component
591  *		NULL
592  */
593 mdi_vhci_t *
594 i_devi_get_vhci(dev_info_t *vdip)
595 {
596 	mdi_vhci_t	*vh = NULL;
597 	if (MDI_VHCI(vdip)) {
598 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
599 	}
600 	return (vh);
601 }
602 
603 /*
604  * mdi_phci_register():
605  *		Register a pHCI module with mpxio framework
606  *		mdi_phci_register() is called by pHCI drivers to register with
607  *		the mpxio framework and a specific 'class_driver' vHCI.  The
608  *		pHCI driver must call this interface as part of its attach(9e)
609  *		handler.
610  * Return Values:
611  *		MDI_SUCCESS
612  *		MDI_FAILURE
613  */
614 /*ARGSUSED*/
615 int
616 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
617 {
618 	mdi_phci_t		*ph;
619 	mdi_vhci_t		*vh;
620 	char			*data;
621 	char			*pathname;
622 
623 	/*
624 	 * Some subsystems, like fcp, perform pHCI registration from a
625 	 * different thread than the one doing the pHCI attach(9E) - the
626 	 * driver attach code is waiting for this other thread to complete.
627 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
628 	 * (indicating that some thread has done an ndi_devi_enter of parent)
629 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
630 	 */
631 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
632 
633 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
634 	(void) ddi_pathname(pdip, pathname);
635 
636 	/*
637 	 * Check for mpxio-disable property. Enable mpxio if the property is
638 	 * missing or not set to "yes".
639 	 * If the property is set to "yes" then emit a brief message.
640 	 */
641 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
642 	    &data) == DDI_SUCCESS)) {
643 		if (strcmp(data, "yes") == 0) {
644 			MDI_DEBUG(1, (CE_CONT, pdip,
645 			    "?%s (%s%d) multipath capabilities "
646 			    "disabled via %s.conf.\n", pathname,
647 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
648 			    ddi_driver_name(pdip)));
649 			ddi_prop_free(data);
650 			kmem_free(pathname, MAXPATHLEN);
651 			return (MDI_FAILURE);
652 		}
653 		ddi_prop_free(data);
654 	}
655 
656 	kmem_free(pathname, MAXPATHLEN);
657 
658 	/*
659 	 * Search for a matching vHCI
660 	 */
661 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
662 	if (vh == NULL) {
663 		return (MDI_FAILURE);
664 	}
665 
666 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
667 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
668 	ph->ph_dip = pdip;
669 	ph->ph_vhci = vh;
670 	ph->ph_next = NULL;
671 	ph->ph_unstable = 0;
672 	ph->ph_vprivate = 0;
673 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
674 
675 	MDI_PHCI_LOCK(ph);
676 	MDI_PHCI_SET_POWER_UP(ph);
677 	MDI_PHCI_UNLOCK(ph);
678 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
679 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
680 
681 	vhcache_phci_add(vh->vh_config, ph);
682 
683 	MDI_VHCI_PHCI_LOCK(vh);
684 	if (vh->vh_phci_head == NULL) {
685 		vh->vh_phci_head = ph;
686 	}
687 	if (vh->vh_phci_tail) {
688 		vh->vh_phci_tail->ph_next = ph;
689 	}
690 	vh->vh_phci_tail = ph;
691 	vh->vh_phci_count++;
692 	MDI_VHCI_PHCI_UNLOCK(vh);
693 
694 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
695 	return (MDI_SUCCESS);
696 }
697 
698 /*
699  * mdi_phci_unregister():
700  *		Unregister a pHCI module from mpxio framework
701  *		mdi_phci_unregister() is called by the pHCI drivers from their
702  *		detach(9E) handler to unregister their instances from the
703  *		framework.
704  * Return Values:
705  *		MDI_SUCCESS
706  *		MDI_FAILURE
707  */
708 /*ARGSUSED*/
709 int
710 mdi_phci_unregister(dev_info_t *pdip, int flags)
711 {
712 	mdi_vhci_t		*vh;
713 	mdi_phci_t		*ph;
714 	mdi_phci_t		*tmp;
715 	mdi_phci_t		*prev = NULL;
716 
717 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
718 
719 	ph = i_devi_get_phci(pdip);
720 	if (ph == NULL) {
721 		MDI_DEBUG(1, (CE_WARN, pdip,
722 		    "!pHCI unregister: Not a valid pHCI"));
723 		return (MDI_FAILURE);
724 	}
725 
726 	vh = ph->ph_vhci;
727 	ASSERT(vh != NULL);
728 	if (vh == NULL) {
729 		MDI_DEBUG(1, (CE_WARN, pdip,
730 		    "!pHCI unregister: Not a valid vHCI"));
731 		return (MDI_FAILURE);
732 	}
733 
734 	MDI_VHCI_PHCI_LOCK(vh);
735 	tmp = vh->vh_phci_head;
736 	while (tmp) {
737 		if (tmp == ph) {
738 			break;
739 		}
740 		prev = tmp;
741 		tmp = tmp->ph_next;
742 	}
743 
744 	if (ph == vh->vh_phci_head) {
745 		vh->vh_phci_head = ph->ph_next;
746 	} else {
747 		prev->ph_next = ph->ph_next;
748 	}
749 
750 	if (ph == vh->vh_phci_tail) {
751 		vh->vh_phci_tail = prev;
752 	}
753 
754 	vh->vh_phci_count--;
755 	MDI_VHCI_PHCI_UNLOCK(vh);
756 
757 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
758 	    ESC_DDI_INITIATOR_UNREGISTER);
759 	vhcache_phci_remove(vh->vh_config, ph);
760 	cv_destroy(&ph->ph_unstable_cv);
761 	mutex_destroy(&ph->ph_mutex);
762 	kmem_free(ph, sizeof (mdi_phci_t));
763 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
764 	DEVI(pdip)->devi_mdi_xhci = NULL;
765 	return (MDI_SUCCESS);
766 }
767 
768 /*
769  * i_devi_get_phci():
770  * 		Utility function to return the phci extensions.
771  */
772 static mdi_phci_t *
773 i_devi_get_phci(dev_info_t *pdip)
774 {
775 	mdi_phci_t	*ph = NULL;
776 
777 	if (MDI_PHCI(pdip)) {
778 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
779 	}
780 	return (ph);
781 }
782 
783 /*
784  * Single thread mdi entry into devinfo node for modifying its children.
785  * If necessary we perform an ndi_devi_enter of the vHCI before doing
786  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
787  * for the vHCI and one for the pHCI.
788  */
789 void
790 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
791 {
792 	dev_info_t	*vdip;
793 	int		vcircular, pcircular;
794 
795 	/* Verify calling context */
796 	ASSERT(MDI_PHCI(phci_dip));
797 	vdip = mdi_devi_get_vdip(phci_dip);
798 	ASSERT(vdip);			/* A pHCI always has a vHCI */
799 
800 	/*
801 	 * If pHCI is detaching then the framework has already entered the
802 	 * vHCI on a threads that went down the code path leading to
803 	 * detach_node().  This framework enter of the vHCI during pHCI
804 	 * detach is done to avoid deadlock with vHCI power management
805 	 * operations which enter the vHCI and the enter down the path
806 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
807 	 * enter of the vHCI on frameworks vHCI enter that has already
808 	 * occurred - this is OK because we know that the framework thread
809 	 * doing detach is waiting for our completion.
810 	 *
811 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
812 	 * race with detach - but we can't do that because the framework has
813 	 * already entered the parent, so we have some complexity instead.
814 	 */
815 	for (;;) {
816 		if (ndi_devi_tryenter(vdip, &vcircular)) {
817 			ASSERT(vcircular != -1);
818 			if (DEVI_IS_DETACHING(phci_dip)) {
819 				ndi_devi_exit(vdip, vcircular);
820 				vcircular = -1;
821 			}
822 			break;
823 		} else if (DEVI_IS_DETACHING(phci_dip)) {
824 			vcircular = -1;
825 			break;
826 		} else {
827 			delay(1);
828 		}
829 	}
830 
831 	ndi_devi_enter(phci_dip, &pcircular);
832 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
833 }
834 
835 /*
836  * Attempt to mdi_devi_enter.
837  */
838 int
839 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
840 {
841 	dev_info_t	*vdip;
842 	int		vcircular, pcircular;
843 
844 	/* Verify calling context */
845 	ASSERT(MDI_PHCI(phci_dip));
846 	vdip = mdi_devi_get_vdip(phci_dip);
847 	ASSERT(vdip);			/* A pHCI always has a vHCI */
848 
849 	if (ndi_devi_tryenter(vdip, &vcircular)) {
850 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
851 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
852 			return (1);	/* locked */
853 		}
854 		ndi_devi_exit(vdip, vcircular);
855 	}
856 	return (0);			/* busy */
857 }
858 
859 /*
860  * Release mdi_devi_enter or successful mdi_devi_tryenter.
861  */
862 void
863 mdi_devi_exit(dev_info_t *phci_dip, int circular)
864 {
865 	dev_info_t	*vdip;
866 	int		vcircular, pcircular;
867 
868 	/* Verify calling context */
869 	ASSERT(MDI_PHCI(phci_dip));
870 	vdip = mdi_devi_get_vdip(phci_dip);
871 	ASSERT(vdip);			/* A pHCI always has a vHCI */
872 
873 	/* extract two circular recursion values from single int */
874 	pcircular = (short)(circular & 0xFFFF);
875 	vcircular = (short)((circular >> 16) & 0xFFFF);
876 
877 	ndi_devi_exit(phci_dip, pcircular);
878 	if (vcircular != -1)
879 		ndi_devi_exit(vdip, vcircular);
880 }
881 
882 /*
883  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
884  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
885  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
886  * with vHCI power management code during path online/offline.  Each
887  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
888  * occur within the scope of an active mdi_devi_enter that establishes the
889  * circular value.
890  */
891 void
892 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
893 {
894 	int		pcircular;
895 
896 	/* Verify calling context */
897 	ASSERT(MDI_PHCI(phci_dip));
898 
899 	pcircular = (short)(circular & 0xFFFF);
900 	ndi_devi_exit(phci_dip, pcircular);
901 }
902 
903 void
904 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
905 {
906 	int		pcircular;
907 
908 	/* Verify calling context */
909 	ASSERT(MDI_PHCI(phci_dip));
910 
911 	ndi_devi_enter(phci_dip, &pcircular);
912 
913 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
914 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
915 }
916 
917 /*
918  * mdi_devi_get_vdip():
919  *		given a pHCI dip return vHCI dip
920  */
921 dev_info_t *
922 mdi_devi_get_vdip(dev_info_t *pdip)
923 {
924 	mdi_phci_t	*ph;
925 
926 	ph = i_devi_get_phci(pdip);
927 	if (ph && ph->ph_vhci)
928 		return (ph->ph_vhci->vh_dip);
929 	return (NULL);
930 }
931 
932 /*
933  * mdi_devi_pdip_entered():
934  *		Return 1 if we are vHCI and have done an ndi_devi_enter
935  *		of a pHCI
936  */
937 int
938 mdi_devi_pdip_entered(dev_info_t *vdip)
939 {
940 	mdi_vhci_t	*vh;
941 	mdi_phci_t	*ph;
942 
943 	vh = i_devi_get_vhci(vdip);
944 	if (vh == NULL)
945 		return (0);
946 
947 	MDI_VHCI_PHCI_LOCK(vh);
948 	ph = vh->vh_phci_head;
949 	while (ph) {
950 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
951 			MDI_VHCI_PHCI_UNLOCK(vh);
952 			return (1);
953 		}
954 		ph = ph->ph_next;
955 	}
956 	MDI_VHCI_PHCI_UNLOCK(vh);
957 	return (0);
958 }
959 
960 /*
961  * mdi_phci_path2devinfo():
962  * 		Utility function to search for a valid phci device given
963  *		the devfs pathname.
964  */
965 dev_info_t *
966 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
967 {
968 	char		*temp_pathname;
969 	mdi_vhci_t	*vh;
970 	mdi_phci_t	*ph;
971 	dev_info_t 	*pdip = NULL;
972 
973 	vh = i_devi_get_vhci(vdip);
974 	ASSERT(vh != NULL);
975 
976 	if (vh == NULL) {
977 		/*
978 		 * Invalid vHCI component, return failure
979 		 */
980 		return (NULL);
981 	}
982 
983 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
984 	MDI_VHCI_PHCI_LOCK(vh);
985 	ph = vh->vh_phci_head;
986 	while (ph != NULL) {
987 		pdip = ph->ph_dip;
988 		ASSERT(pdip != NULL);
989 		*temp_pathname = '\0';
990 		(void) ddi_pathname(pdip, temp_pathname);
991 		if (strcmp(temp_pathname, pathname) == 0) {
992 			break;
993 		}
994 		ph = ph->ph_next;
995 	}
996 	if (ph == NULL) {
997 		pdip = NULL;
998 	}
999 	MDI_VHCI_PHCI_UNLOCK(vh);
1000 	kmem_free(temp_pathname, MAXPATHLEN);
1001 	return (pdip);
1002 }
1003 
1004 /*
1005  * mdi_phci_get_path_count():
1006  * 		get number of path information nodes associated with a given
1007  *		pHCI device.
1008  */
1009 int
1010 mdi_phci_get_path_count(dev_info_t *pdip)
1011 {
1012 	mdi_phci_t	*ph;
1013 	int		count = 0;
1014 
1015 	ph = i_devi_get_phci(pdip);
1016 	if (ph != NULL) {
1017 		count = ph->ph_path_count;
1018 	}
1019 	return (count);
1020 }
1021 
1022 /*
1023  * i_mdi_phci_lock():
1024  *		Lock a pHCI device
1025  * Return Values:
1026  *		None
1027  * Note:
1028  *		The default locking order is:
1029  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1030  *		But there are number of situations where locks need to be
1031  *		grabbed in reverse order.  This routine implements try and lock
1032  *		mechanism depending on the requested parameter option.
1033  */
1034 static void
1035 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1036 {
1037 	if (pip) {
1038 		/* Reverse locking is requested. */
1039 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1040 			/*
1041 			 * tryenter failed. Try to grab again
1042 			 * after a small delay
1043 			 */
1044 			MDI_PI_HOLD(pip);
1045 			MDI_PI_UNLOCK(pip);
1046 			delay(1);
1047 			MDI_PI_LOCK(pip);
1048 			MDI_PI_RELE(pip);
1049 		}
1050 	} else {
1051 		MDI_PHCI_LOCK(ph);
1052 	}
1053 }
1054 
1055 /*
1056  * i_mdi_phci_unlock():
1057  *		Unlock the pHCI component
1058  */
1059 static void
1060 i_mdi_phci_unlock(mdi_phci_t *ph)
1061 {
1062 	MDI_PHCI_UNLOCK(ph);
1063 }
1064 
1065 /*
1066  * i_mdi_devinfo_create():
1067  *		create client device's devinfo node
1068  * Return Values:
1069  *		dev_info
1070  *		NULL
1071  * Notes:
1072  */
1073 static dev_info_t *
1074 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1075 	char **compatible, int ncompatible)
1076 {
1077 	dev_info_t *cdip = NULL;
1078 
1079 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1080 
1081 	/* Verify for duplicate entry */
1082 	cdip = i_mdi_devinfo_find(vh, name, guid);
1083 	ASSERT(cdip == NULL);
1084 	if (cdip) {
1085 		cmn_err(CE_WARN,
1086 		    "i_mdi_devinfo_create: client dip %p already exists",
1087 			(void *)cdip);
1088 	}
1089 
1090 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1091 	if (cdip == NULL)
1092 		goto fail;
1093 
1094 	/*
1095 	 * Create component type and Global unique identifier
1096 	 * properties
1097 	 */
1098 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1099 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1100 		goto fail;
1101 	}
1102 
1103 	/* Decorate the node with compatible property */
1104 	if (compatible &&
1105 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1106 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1107 		goto fail;
1108 	}
1109 
1110 	return (cdip);
1111 
1112 fail:
1113 	if (cdip) {
1114 		(void) ndi_prop_remove_all(cdip);
1115 		(void) ndi_devi_free(cdip);
1116 	}
1117 	return (NULL);
1118 }
1119 
1120 /*
1121  * i_mdi_devinfo_find():
1122  *		Find a matching devinfo node for given client node name
1123  *		and its guid.
1124  * Return Values:
1125  *		Handle to a dev_info node or NULL
1126  */
1127 static dev_info_t *
1128 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1129 {
1130 	char			*data;
1131 	dev_info_t 		*cdip = NULL;
1132 	dev_info_t 		*ndip = NULL;
1133 	int			circular;
1134 
1135 	ndi_devi_enter(vh->vh_dip, &circular);
1136 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1137 	while ((cdip = ndip) != NULL) {
1138 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1139 
1140 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1141 			continue;
1142 		}
1143 
1144 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1145 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1146 		    &data) != DDI_PROP_SUCCESS) {
1147 			continue;
1148 		}
1149 
1150 		if (strcmp(data, guid) != 0) {
1151 			ddi_prop_free(data);
1152 			continue;
1153 		}
1154 		ddi_prop_free(data);
1155 		break;
1156 	}
1157 	ndi_devi_exit(vh->vh_dip, circular);
1158 	return (cdip);
1159 }
1160 
1161 /*
1162  * i_mdi_devinfo_remove():
1163  *		Remove a client device node
1164  */
1165 static int
1166 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1167 {
1168 	int	rv = MDI_SUCCESS;
1169 
1170 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1171 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1172 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
1173 		if (rv != NDI_SUCCESS) {
1174 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
1175 			    " failed. cdip = %p\n", (void *)cdip));
1176 		}
1177 		/*
1178 		 * Convert to MDI error code
1179 		 */
1180 		switch (rv) {
1181 		case NDI_SUCCESS:
1182 			rv = MDI_SUCCESS;
1183 			break;
1184 		case NDI_BUSY:
1185 			rv = MDI_BUSY;
1186 			break;
1187 		default:
1188 			rv = MDI_FAILURE;
1189 			break;
1190 		}
1191 	}
1192 	return (rv);
1193 }
1194 
1195 /*
1196  * i_devi_get_client()
1197  *		Utility function to get mpxio component extensions
1198  */
1199 static mdi_client_t *
1200 i_devi_get_client(dev_info_t *cdip)
1201 {
1202 	mdi_client_t	*ct = NULL;
1203 
1204 	if (MDI_CLIENT(cdip)) {
1205 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1206 	}
1207 	return (ct);
1208 }
1209 
1210 /*
1211  * i_mdi_is_child_present():
1212  *		Search for the presence of client device dev_info node
1213  */
1214 static int
1215 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1216 {
1217 	int		rv = MDI_FAILURE;
1218 	struct dev_info	*dip;
1219 	int		circular;
1220 
1221 	ndi_devi_enter(vdip, &circular);
1222 	dip = DEVI(vdip)->devi_child;
1223 	while (dip) {
1224 		if (dip == DEVI(cdip)) {
1225 			rv = MDI_SUCCESS;
1226 			break;
1227 		}
1228 		dip = dip->devi_sibling;
1229 	}
1230 	ndi_devi_exit(vdip, circular);
1231 	return (rv);
1232 }
1233 
1234 
1235 /*
1236  * i_mdi_client_lock():
1237  *		Grab client component lock
1238  * Return Values:
1239  *		None
1240  * Note:
1241  *		The default locking order is:
1242  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1243  *		But there are number of situations where locks need to be
1244  *		grabbed in reverse order.  This routine implements try and lock
1245  *		mechanism depending on the requested parameter option.
1246  */
1247 static void
1248 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1249 {
1250 	if (pip) {
1251 		/*
1252 		 * Reverse locking is requested.
1253 		 */
1254 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1255 			/*
1256 			 * tryenter failed. Try to grab again
1257 			 * after a small delay
1258 			 */
1259 			MDI_PI_HOLD(pip);
1260 			MDI_PI_UNLOCK(pip);
1261 			delay(1);
1262 			MDI_PI_LOCK(pip);
1263 			MDI_PI_RELE(pip);
1264 		}
1265 	} else {
1266 		MDI_CLIENT_LOCK(ct);
1267 	}
1268 }
1269 
1270 /*
1271  * i_mdi_client_unlock():
1272  *		Unlock a client component
1273  */
1274 static void
1275 i_mdi_client_unlock(mdi_client_t *ct)
1276 {
1277 	MDI_CLIENT_UNLOCK(ct);
1278 }
1279 
1280 /*
1281  * i_mdi_client_alloc():
1282  * 		Allocate and initialize a client structure.  Caller should
1283  *		hold the vhci client lock.
1284  * Return Values:
1285  *		Handle to a client component
1286  */
1287 /*ARGSUSED*/
1288 static mdi_client_t *
1289 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1290 {
1291 	mdi_client_t	*ct;
1292 
1293 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1294 
1295 	/*
1296 	 * Allocate and initialize a component structure.
1297 	 */
1298 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1299 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1300 	ct->ct_hnext = NULL;
1301 	ct->ct_hprev = NULL;
1302 	ct->ct_dip = NULL;
1303 	ct->ct_vhci = vh;
1304 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1305 	(void) strcpy(ct->ct_drvname, name);
1306 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1307 	(void) strcpy(ct->ct_guid, lguid);
1308 	ct->ct_cprivate = NULL;
1309 	ct->ct_vprivate = NULL;
1310 	ct->ct_flags = 0;
1311 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1312 	MDI_CLIENT_LOCK(ct);
1313 	MDI_CLIENT_SET_OFFLINE(ct);
1314 	MDI_CLIENT_SET_DETACH(ct);
1315 	MDI_CLIENT_SET_POWER_UP(ct);
1316 	MDI_CLIENT_UNLOCK(ct);
1317 	ct->ct_failover_flags = 0;
1318 	ct->ct_failover_status = 0;
1319 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1320 	ct->ct_unstable = 0;
1321 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1322 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1323 	ct->ct_lb = vh->vh_lb;
1324 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1325 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1326 	ct->ct_path_count = 0;
1327 	ct->ct_path_head = NULL;
1328 	ct->ct_path_tail = NULL;
1329 	ct->ct_path_last = NULL;
1330 
1331 	/*
1332 	 * Add this client component to our client hash queue
1333 	 */
1334 	i_mdi_client_enlist_table(vh, ct);
1335 	return (ct);
1336 }
1337 
1338 /*
1339  * i_mdi_client_enlist_table():
1340  *		Attach the client device to the client hash table. Caller
1341  *		should hold the vhci client lock.
1342  */
1343 static void
1344 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1345 {
1346 	int 			index;
1347 	struct client_hash	*head;
1348 
1349 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1350 
1351 	index = i_mdi_get_hash_key(ct->ct_guid);
1352 	head = &vh->vh_client_table[index];
1353 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1354 	head->ct_hash_head = ct;
1355 	head->ct_hash_count++;
1356 	vh->vh_client_count++;
1357 }
1358 
1359 /*
1360  * i_mdi_client_delist_table():
1361  *		Attach the client device to the client hash table.
1362  *		Caller should hold the vhci client lock.
1363  */
1364 static void
1365 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1366 {
1367 	int			index;
1368 	char			*guid;
1369 	struct client_hash 	*head;
1370 	mdi_client_t		*next;
1371 	mdi_client_t		*last;
1372 
1373 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1374 
1375 	guid = ct->ct_guid;
1376 	index = i_mdi_get_hash_key(guid);
1377 	head = &vh->vh_client_table[index];
1378 
1379 	last = NULL;
1380 	next = (mdi_client_t *)head->ct_hash_head;
1381 	while (next != NULL) {
1382 		if (next == ct) {
1383 			break;
1384 		}
1385 		last = next;
1386 		next = next->ct_hnext;
1387 	}
1388 
1389 	if (next) {
1390 		head->ct_hash_count--;
1391 		if (last == NULL) {
1392 			head->ct_hash_head = ct->ct_hnext;
1393 		} else {
1394 			last->ct_hnext = ct->ct_hnext;
1395 		}
1396 		ct->ct_hnext = NULL;
1397 		vh->vh_client_count--;
1398 	}
1399 }
1400 
1401 
1402 /*
1403  * i_mdi_client_free():
1404  *		Free a client component
1405  */
1406 static int
1407 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1408 {
1409 	int		rv = MDI_SUCCESS;
1410 	int		flags = ct->ct_flags;
1411 	dev_info_t	*cdip;
1412 	dev_info_t	*vdip;
1413 
1414 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1415 
1416 	vdip = vh->vh_dip;
1417 	cdip = ct->ct_dip;
1418 
1419 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1420 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1421 	DEVI(cdip)->devi_mdi_client = NULL;
1422 
1423 	/*
1424 	 * Clear out back ref. to dev_info_t node
1425 	 */
1426 	ct->ct_dip = NULL;
1427 
1428 	/*
1429 	 * Remove this client from our hash queue
1430 	 */
1431 	i_mdi_client_delist_table(vh, ct);
1432 
1433 	/*
1434 	 * Uninitialize and free the component
1435 	 */
1436 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1437 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1438 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1439 	cv_destroy(&ct->ct_failover_cv);
1440 	cv_destroy(&ct->ct_unstable_cv);
1441 	cv_destroy(&ct->ct_powerchange_cv);
1442 	mutex_destroy(&ct->ct_mutex);
1443 	kmem_free(ct, sizeof (*ct));
1444 
1445 	if (cdip != NULL) {
1446 		MDI_VHCI_CLIENT_UNLOCK(vh);
1447 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1448 		MDI_VHCI_CLIENT_LOCK(vh);
1449 	}
1450 	return (rv);
1451 }
1452 
1453 /*
1454  * i_mdi_client_find():
1455  * 		Find the client structure corresponding to a given guid
1456  *		Caller should hold the vhci client lock.
1457  */
1458 static mdi_client_t *
1459 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1460 {
1461 	int			index;
1462 	struct client_hash	*head;
1463 	mdi_client_t		*ct;
1464 
1465 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1466 
1467 	index = i_mdi_get_hash_key(guid);
1468 	head = &vh->vh_client_table[index];
1469 
1470 	ct = head->ct_hash_head;
1471 	while (ct != NULL) {
1472 		if (strcmp(ct->ct_guid, guid) == 0 &&
1473 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1474 			break;
1475 		}
1476 		ct = ct->ct_hnext;
1477 	}
1478 	return (ct);
1479 }
1480 
1481 /*
1482  * i_mdi_client_update_state():
1483  *		Compute and update client device state
1484  * Notes:
1485  *		A client device can be in any of three possible states:
1486  *
1487  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1488  *		one online/standby paths. Can tolerate failures.
1489  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1490  *		no alternate paths available as standby. A failure on the online
1491  *		would result in loss of access to device data.
1492  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1493  *		no paths available to access the device.
1494  */
1495 static void
1496 i_mdi_client_update_state(mdi_client_t *ct)
1497 {
1498 	int state;
1499 
1500 	ASSERT(MDI_CLIENT_LOCKED(ct));
1501 	state = i_mdi_client_compute_state(ct, NULL);
1502 	MDI_CLIENT_SET_STATE(ct, state);
1503 }
1504 
1505 /*
1506  * i_mdi_client_compute_state():
1507  *		Compute client device state
1508  *
1509  *		mdi_phci_t *	Pointer to pHCI structure which should
1510  *				while computing the new value.  Used by
1511  *				i_mdi_phci_offline() to find the new
1512  *				client state after DR of a pHCI.
1513  */
1514 static int
1515 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1516 {
1517 	int		state;
1518 	int		online_count = 0;
1519 	int		standby_count = 0;
1520 	mdi_pathinfo_t	*pip, *next;
1521 
1522 	ASSERT(MDI_CLIENT_LOCKED(ct));
1523 	pip = ct->ct_path_head;
1524 	while (pip != NULL) {
1525 		MDI_PI_LOCK(pip);
1526 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1527 		if (MDI_PI(pip)->pi_phci == ph) {
1528 			MDI_PI_UNLOCK(pip);
1529 			pip = next;
1530 			continue;
1531 		}
1532 
1533 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1534 				== MDI_PATHINFO_STATE_ONLINE)
1535 			online_count++;
1536 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1537 				== MDI_PATHINFO_STATE_STANDBY)
1538 			standby_count++;
1539 		MDI_PI_UNLOCK(pip);
1540 		pip = next;
1541 	}
1542 
1543 	if (online_count == 0) {
1544 		if (standby_count == 0) {
1545 			state = MDI_CLIENT_STATE_FAILED;
1546 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1547 			    " ct = %p\n", (void *)ct));
1548 		} else if (standby_count == 1) {
1549 			state = MDI_CLIENT_STATE_DEGRADED;
1550 		} else {
1551 			state = MDI_CLIENT_STATE_OPTIMAL;
1552 		}
1553 	} else if (online_count == 1) {
1554 		if (standby_count == 0) {
1555 			state = MDI_CLIENT_STATE_DEGRADED;
1556 		} else {
1557 			state = MDI_CLIENT_STATE_OPTIMAL;
1558 		}
1559 	} else {
1560 		state = MDI_CLIENT_STATE_OPTIMAL;
1561 	}
1562 	return (state);
1563 }
1564 
1565 /*
1566  * i_mdi_client2devinfo():
1567  *		Utility function
1568  */
1569 dev_info_t *
1570 i_mdi_client2devinfo(mdi_client_t *ct)
1571 {
1572 	return (ct->ct_dip);
1573 }
1574 
1575 /*
1576  * mdi_client_path2_devinfo():
1577  * 		Given the parent devinfo and child devfs pathname, search for
1578  *		a valid devfs node handle.
1579  */
1580 dev_info_t *
1581 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1582 {
1583 	dev_info_t 	*cdip = NULL;
1584 	dev_info_t 	*ndip = NULL;
1585 	char		*temp_pathname;
1586 	int		circular;
1587 
1588 	/*
1589 	 * Allocate temp buffer
1590 	 */
1591 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1592 
1593 	/*
1594 	 * Lock parent against changes
1595 	 */
1596 	ndi_devi_enter(vdip, &circular);
1597 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1598 	while ((cdip = ndip) != NULL) {
1599 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1600 
1601 		*temp_pathname = '\0';
1602 		(void) ddi_pathname(cdip, temp_pathname);
1603 		if (strcmp(temp_pathname, pathname) == 0) {
1604 			break;
1605 		}
1606 	}
1607 	/*
1608 	 * Release devinfo lock
1609 	 */
1610 	ndi_devi_exit(vdip, circular);
1611 
1612 	/*
1613 	 * Free the temp buffer
1614 	 */
1615 	kmem_free(temp_pathname, MAXPATHLEN);
1616 	return (cdip);
1617 }
1618 
1619 /*
1620  * mdi_client_get_path_count():
1621  * 		Utility function to get number of path information nodes
1622  *		associated with a given client device.
1623  */
1624 int
1625 mdi_client_get_path_count(dev_info_t *cdip)
1626 {
1627 	mdi_client_t	*ct;
1628 	int		count = 0;
1629 
1630 	ct = i_devi_get_client(cdip);
1631 	if (ct != NULL) {
1632 		count = ct->ct_path_count;
1633 	}
1634 	return (count);
1635 }
1636 
1637 
1638 /*
1639  * i_mdi_get_hash_key():
1640  * 		Create a hash using strings as keys
1641  *
1642  */
1643 static int
1644 i_mdi_get_hash_key(char *str)
1645 {
1646 	uint32_t	g, hash = 0;
1647 	char		*p;
1648 
1649 	for (p = str; *p != '\0'; p++) {
1650 		g = *p;
1651 		hash += g;
1652 	}
1653 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1654 }
1655 
1656 /*
1657  * mdi_get_lb_policy():
1658  * 		Get current load balancing policy for a given client device
1659  */
1660 client_lb_t
1661 mdi_get_lb_policy(dev_info_t *cdip)
1662 {
1663 	client_lb_t	lb = LOAD_BALANCE_NONE;
1664 	mdi_client_t	*ct;
1665 
1666 	ct = i_devi_get_client(cdip);
1667 	if (ct != NULL) {
1668 		lb = ct->ct_lb;
1669 	}
1670 	return (lb);
1671 }
1672 
1673 /*
1674  * mdi_set_lb_region_size():
1675  * 		Set current region size for the load-balance
1676  */
1677 int
1678 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1679 {
1680 	mdi_client_t	*ct;
1681 	int		rv = MDI_FAILURE;
1682 
1683 	ct = i_devi_get_client(cdip);
1684 	if (ct != NULL && ct->ct_lb_args != NULL) {
1685 		ct->ct_lb_args->region_size = region_size;
1686 		rv = MDI_SUCCESS;
1687 	}
1688 	return (rv);
1689 }
1690 
1691 /*
1692  * mdi_Set_lb_policy():
1693  * 		Set current load balancing policy for a given client device
1694  */
1695 int
1696 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1697 {
1698 	mdi_client_t	*ct;
1699 	int		rv = MDI_FAILURE;
1700 
1701 	ct = i_devi_get_client(cdip);
1702 	if (ct != NULL) {
1703 		ct->ct_lb = lb;
1704 		rv = MDI_SUCCESS;
1705 	}
1706 	return (rv);
1707 }
1708 
1709 /*
1710  * mdi_failover():
1711  *		failover function called by the vHCI drivers to initiate
1712  *		a failover operation.  This is typically due to non-availability
1713  *		of online paths to route I/O requests.  Failover can be
1714  *		triggered through user application also.
1715  *
1716  *		The vHCI driver calls mdi_failover() to initiate a failover
1717  *		operation. mdi_failover() calls back into the vHCI driver's
1718  *		vo_failover() entry point to perform the actual failover
1719  *		operation.  The reason for requiring the vHCI driver to
1720  *		initiate failover by calling mdi_failover(), instead of directly
1721  *		executing vo_failover() itself, is to ensure that the mdi
1722  *		framework can keep track of the client state properly.
1723  *		Additionally, mdi_failover() provides as a convenience the
1724  *		option of performing the failover operation synchronously or
1725  *		asynchronously
1726  *
1727  *		Upon successful completion of the failover operation, the
1728  *		paths that were previously ONLINE will be in the STANDBY state,
1729  *		and the newly activated paths will be in the ONLINE state.
1730  *
1731  *		The flags modifier determines whether the activation is done
1732  *		synchronously: MDI_FAILOVER_SYNC
1733  * Return Values:
1734  *		MDI_SUCCESS
1735  *		MDI_FAILURE
1736  *		MDI_BUSY
1737  */
1738 /*ARGSUSED*/
1739 int
1740 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1741 {
1742 	int			rv;
1743 	mdi_client_t		*ct;
1744 
1745 	ct = i_devi_get_client(cdip);
1746 	ASSERT(ct != NULL);
1747 	if (ct == NULL) {
1748 		/* cdip is not a valid client device. Nothing more to do. */
1749 		return (MDI_FAILURE);
1750 	}
1751 
1752 	MDI_CLIENT_LOCK(ct);
1753 
1754 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1755 		/* A path to the client is being freed */
1756 		MDI_CLIENT_UNLOCK(ct);
1757 		return (MDI_BUSY);
1758 	}
1759 
1760 
1761 	if (MDI_CLIENT_IS_FAILED(ct)) {
1762 		/*
1763 		 * Client is in failed state. Nothing more to do.
1764 		 */
1765 		MDI_CLIENT_UNLOCK(ct);
1766 		return (MDI_FAILURE);
1767 	}
1768 
1769 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1770 		/*
1771 		 * Failover is already in progress; return BUSY
1772 		 */
1773 		MDI_CLIENT_UNLOCK(ct);
1774 		return (MDI_BUSY);
1775 	}
1776 	/*
1777 	 * Make sure that mdi_pathinfo node state changes are processed.
1778 	 * We do not allow failovers to progress while client path state
1779 	 * changes are in progress
1780 	 */
1781 	if (ct->ct_unstable) {
1782 		if (flags == MDI_FAILOVER_ASYNC) {
1783 			MDI_CLIENT_UNLOCK(ct);
1784 			return (MDI_BUSY);
1785 		} else {
1786 			while (ct->ct_unstable)
1787 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1788 		}
1789 	}
1790 
1791 	/*
1792 	 * Client device is in stable state. Before proceeding, perform sanity
1793 	 * checks again.
1794 	 */
1795 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1796 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1797 		/*
1798 		 * Client is in failed state. Nothing more to do.
1799 		 */
1800 		MDI_CLIENT_UNLOCK(ct);
1801 		return (MDI_FAILURE);
1802 	}
1803 
1804 	/*
1805 	 * Set the client state as failover in progress.
1806 	 */
1807 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1808 	ct->ct_failover_flags = flags;
1809 	MDI_CLIENT_UNLOCK(ct);
1810 
1811 	if (flags == MDI_FAILOVER_ASYNC) {
1812 		/*
1813 		 * Submit the initiate failover request via CPR safe
1814 		 * taskq threads.
1815 		 */
1816 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1817 		    ct, KM_SLEEP);
1818 		return (MDI_ACCEPT);
1819 	} else {
1820 		/*
1821 		 * Synchronous failover mode.  Typically invoked from the user
1822 		 * land.
1823 		 */
1824 		rv = i_mdi_failover(ct);
1825 	}
1826 	return (rv);
1827 }
1828 
1829 /*
1830  * i_mdi_failover():
1831  *		internal failover function. Invokes vHCI drivers failover
1832  *		callback function and process the failover status
1833  * Return Values:
1834  *		None
1835  *
1836  * Note: A client device in failover state can not be detached or freed.
1837  */
1838 static int
1839 i_mdi_failover(void *arg)
1840 {
1841 	int		rv = MDI_SUCCESS;
1842 	mdi_client_t	*ct = (mdi_client_t *)arg;
1843 	mdi_vhci_t	*vh = ct->ct_vhci;
1844 
1845 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1846 
1847 	if (vh->vh_ops->vo_failover != NULL) {
1848 		/*
1849 		 * Call vHCI drivers callback routine
1850 		 */
1851 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1852 		    ct->ct_failover_flags);
1853 	}
1854 
1855 	MDI_CLIENT_LOCK(ct);
1856 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1857 
1858 	/*
1859 	 * Save the failover return status
1860 	 */
1861 	ct->ct_failover_status = rv;
1862 
1863 	/*
1864 	 * As a result of failover, client status would have been changed.
1865 	 * Update the client state and wake up anyone waiting on this client
1866 	 * device.
1867 	 */
1868 	i_mdi_client_update_state(ct);
1869 
1870 	cv_broadcast(&ct->ct_failover_cv);
1871 	MDI_CLIENT_UNLOCK(ct);
1872 	return (rv);
1873 }
1874 
1875 /*
1876  * Load balancing is logical block.
1877  * IOs within the range described by region_size
1878  * would go on the same path. This would improve the
1879  * performance by cache-hit on some of the RAID devices.
1880  * Search only for online paths(At some point we
1881  * may want to balance across target ports).
1882  * If no paths are found then default to round-robin.
1883  */
1884 static int
1885 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1886 {
1887 	int		path_index = -1;
1888 	int		online_path_count = 0;
1889 	int		online_nonpref_path_count = 0;
1890 	int 		region_size = ct->ct_lb_args->region_size;
1891 	mdi_pathinfo_t	*pip;
1892 	mdi_pathinfo_t	*next;
1893 	int		preferred, path_cnt;
1894 
1895 	pip = ct->ct_path_head;
1896 	while (pip) {
1897 		MDI_PI_LOCK(pip);
1898 		if (MDI_PI(pip)->pi_state ==
1899 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1900 			online_path_count++;
1901 		} else if (MDI_PI(pip)->pi_state ==
1902 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1903 			online_nonpref_path_count++;
1904 		}
1905 		next = (mdi_pathinfo_t *)
1906 		    MDI_PI(pip)->pi_client_link;
1907 		MDI_PI_UNLOCK(pip);
1908 		pip = next;
1909 	}
1910 	/* if found any online/preferred then use this type */
1911 	if (online_path_count > 0) {
1912 		path_cnt = online_path_count;
1913 		preferred = 1;
1914 	} else if (online_nonpref_path_count > 0) {
1915 		path_cnt = online_nonpref_path_count;
1916 		preferred = 0;
1917 	} else {
1918 		path_cnt = 0;
1919 	}
1920 	if (path_cnt) {
1921 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1922 		pip = ct->ct_path_head;
1923 		while (pip && path_index != -1) {
1924 			MDI_PI_LOCK(pip);
1925 			if (path_index == 0 &&
1926 			    (MDI_PI(pip)->pi_state ==
1927 			    MDI_PATHINFO_STATE_ONLINE) &&
1928 				MDI_PI(pip)->pi_preferred == preferred) {
1929 				MDI_PI_HOLD(pip);
1930 				MDI_PI_UNLOCK(pip);
1931 				*ret_pip = pip;
1932 				return (MDI_SUCCESS);
1933 			}
1934 			path_index --;
1935 			next = (mdi_pathinfo_t *)
1936 			    MDI_PI(pip)->pi_client_link;
1937 			MDI_PI_UNLOCK(pip);
1938 			pip = next;
1939 		}
1940 		if (pip == NULL) {
1941 			MDI_DEBUG(4, (CE_NOTE, NULL,
1942 			    "!lba %llx, no pip !!\n",
1943 				bp->b_lblkno));
1944 		} else {
1945 			MDI_DEBUG(4, (CE_NOTE, NULL,
1946 			    "!lba %llx, no pip for path_index, "
1947 			    "pip %p\n", bp->b_lblkno, (void *)pip));
1948 		}
1949 	}
1950 	return (MDI_FAILURE);
1951 }
1952 
1953 /*
1954  * mdi_select_path():
1955  *		select a path to access a client device.
1956  *
1957  *		mdi_select_path() function is called by the vHCI drivers to
1958  *		select a path to route the I/O request to.  The caller passes
1959  *		the block I/O data transfer structure ("buf") as one of the
1960  *		parameters.  The mpxio framework uses the buf structure
1961  *		contents to maintain per path statistics (total I/O size /
1962  *		count pending).  If more than one online paths are available to
1963  *		select, the framework automatically selects a suitable path
1964  *		for routing I/O request. If a failover operation is active for
1965  *		this client device the call shall be failed with MDI_BUSY error
1966  *		code.
1967  *
1968  *		By default this function returns a suitable path in online
1969  *		state based on the current load balancing policy.  Currently
1970  *		we support LOAD_BALANCE_NONE (Previously selected online path
1971  *		will continue to be used till the path is usable) and
1972  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1973  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1974  *		based on the logical block).  The load balancing
1975  *		through vHCI drivers configuration file (driver.conf).
1976  *
1977  *		vHCI drivers may override this default behavior by specifying
1978  *		appropriate flags.  The meaning of the thrid argument depends
1979  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
1980  *		then the argument is the "path instance" of the path to select.
1981  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
1982  *		"start_pip". A non NULL "start_pip" is the starting point to
1983  *		walk and find the next appropriate path.  The following values
1984  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
1985  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
1986  *		STANDBY path).
1987  *
1988  *		The non-standard behavior is used by the scsi_vhci driver,
1989  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1990  *		attach of client devices (to avoid an unnecessary failover
1991  *		when the STANDBY path comes up first), during failover
1992  *		(to activate a STANDBY path as ONLINE).
1993  *
1994  *		The selected path is returned in a a mdi_hold_path() state
1995  *		(pi_ref_cnt). Caller should release the hold by calling
1996  *		mdi_rele_path().
1997  *
1998  * Return Values:
1999  *		MDI_SUCCESS	- Completed successfully
2000  *		MDI_BUSY 	- Client device is busy failing over
2001  *		MDI_NOPATH	- Client device is online, but no valid path are
2002  *				  available to access this client device
2003  *		MDI_FAILURE	- Invalid client device or state
2004  *		MDI_DEVI_ONLINING
2005  *				- Client device (struct dev_info state) is in
2006  *				  onlining state.
2007  */
2008 
2009 /*ARGSUSED*/
2010 int
2011 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2012     void *arg, mdi_pathinfo_t **ret_pip)
2013 {
2014 	mdi_client_t	*ct;
2015 	mdi_pathinfo_t	*pip;
2016 	mdi_pathinfo_t	*next;
2017 	mdi_pathinfo_t	*head;
2018 	mdi_pathinfo_t	*start;
2019 	client_lb_t	lbp;	/* load balancing policy */
2020 	int		sb = 1;	/* standard behavior */
2021 	int		preferred = 1;	/* preferred path */
2022 	int		cond, cont = 1;
2023 	int		retry = 0;
2024 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
2025 	int		path_instance;	/* request specific path instance */
2026 
2027 	/* determine type of arg based on flags */
2028 	if (flags & MDI_SELECT_PATH_INSTANCE) {
2029 		flags &= ~MDI_SELECT_PATH_INSTANCE;
2030 		path_instance = (int)(intptr_t)arg;
2031 		start_pip = NULL;
2032 	} else {
2033 		path_instance = 0;
2034 		start_pip = (mdi_pathinfo_t *)arg;
2035 	}
2036 
2037 	if (flags != 0) {
2038 		/*
2039 		 * disable default behavior
2040 		 */
2041 		sb = 0;
2042 	}
2043 
2044 	*ret_pip = NULL;
2045 	ct = i_devi_get_client(cdip);
2046 	if (ct == NULL) {
2047 		/* mdi extensions are NULL, Nothing more to do */
2048 		return (MDI_FAILURE);
2049 	}
2050 
2051 	MDI_CLIENT_LOCK(ct);
2052 
2053 	if (sb) {
2054 		if (MDI_CLIENT_IS_FAILED(ct)) {
2055 			/*
2056 			 * Client is not ready to accept any I/O requests.
2057 			 * Fail this request.
2058 			 */
2059 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2060 			    "client state offline ct = %p\n", (void *)ct));
2061 			MDI_CLIENT_UNLOCK(ct);
2062 			return (MDI_FAILURE);
2063 		}
2064 
2065 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2066 			/*
2067 			 * Check for Failover is in progress. If so tell the
2068 			 * caller that this device is busy.
2069 			 */
2070 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2071 			    "client failover in progress ct = %p\n",
2072 			    (void *)ct));
2073 			MDI_CLIENT_UNLOCK(ct);
2074 			return (MDI_BUSY);
2075 		}
2076 
2077 		/*
2078 		 * Check to see whether the client device is attached.
2079 		 * If not so, let the vHCI driver manually select a path
2080 		 * (standby) and let the probe/attach process to continue.
2081 		 */
2082 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2083 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining "
2084 			    "ct = %p\n", (void *)ct));
2085 			MDI_CLIENT_UNLOCK(ct);
2086 			return (MDI_DEVI_ONLINING);
2087 		}
2088 	}
2089 
2090 	/*
2091 	 * Cache in the client list head.  If head of the list is NULL
2092 	 * return MDI_NOPATH
2093 	 */
2094 	head = ct->ct_path_head;
2095 	if (head == NULL) {
2096 		MDI_CLIENT_UNLOCK(ct);
2097 		return (MDI_NOPATH);
2098 	}
2099 
2100 	/* Caller is specifying a specific pathinfo path by path_instance */
2101 	if (path_instance) {
2102 		/* search for pathinfo with correct path_instance */
2103 		for (pip = head;
2104 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2105 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2106 			;
2107 
2108 		/* If path can't be selected then MDI_FAILURE is returned. */
2109 		if (pip == NULL) {
2110 			MDI_CLIENT_UNLOCK(ct);
2111 			return (MDI_FAILURE);
2112 		}
2113 
2114 		/* verify state of path */
2115 		MDI_PI_LOCK(pip);
2116 		if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) {
2117 			MDI_PI_UNLOCK(pip);
2118 			MDI_CLIENT_UNLOCK(ct);
2119 			return (MDI_FAILURE);
2120 		}
2121 
2122 		/*
2123 		 * Return the path in hold state. Caller should release the
2124 		 * lock by calling mdi_rele_path()
2125 		 */
2126 		MDI_PI_HOLD(pip);
2127 		MDI_PI_UNLOCK(pip);
2128 		ct->ct_path_last = pip;
2129 		*ret_pip = pip;
2130 		MDI_CLIENT_UNLOCK(ct);
2131 		return (MDI_SUCCESS);
2132 	}
2133 
2134 	/*
2135 	 * for non default behavior, bypass current
2136 	 * load balancing policy and always use LOAD_BALANCE_RR
2137 	 * except that the start point will be adjusted based
2138 	 * on the provided start_pip
2139 	 */
2140 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2141 
2142 	switch (lbp) {
2143 	case LOAD_BALANCE_NONE:
2144 		/*
2145 		 * Load balancing is None  or Alternate path mode
2146 		 * Start looking for a online mdi_pathinfo node starting from
2147 		 * last known selected path
2148 		 */
2149 		preferred = 1;
2150 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2151 		if (pip == NULL) {
2152 			pip = head;
2153 		}
2154 		start = pip;
2155 		do {
2156 			MDI_PI_LOCK(pip);
2157 			/*
2158 			 * No need to explicitly check if the path is disabled.
2159 			 * Since we are checking for state == ONLINE and the
2160 			 * same variable is used for DISABLE/ENABLE information.
2161 			 */
2162 			if ((MDI_PI(pip)->pi_state  ==
2163 				MDI_PATHINFO_STATE_ONLINE) &&
2164 				preferred == MDI_PI(pip)->pi_preferred) {
2165 				/*
2166 				 * Return the path in hold state. Caller should
2167 				 * release the lock by calling mdi_rele_path()
2168 				 */
2169 				MDI_PI_HOLD(pip);
2170 				MDI_PI_UNLOCK(pip);
2171 				ct->ct_path_last = pip;
2172 				*ret_pip = pip;
2173 				MDI_CLIENT_UNLOCK(ct);
2174 				return (MDI_SUCCESS);
2175 			}
2176 
2177 			/*
2178 			 * Path is busy.
2179 			 */
2180 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2181 			    MDI_PI_IS_TRANSIENT(pip))
2182 				retry = 1;
2183 			/*
2184 			 * Keep looking for a next available online path
2185 			 */
2186 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2187 			if (next == NULL) {
2188 				next = head;
2189 			}
2190 			MDI_PI_UNLOCK(pip);
2191 			pip = next;
2192 			if (start == pip && preferred) {
2193 				preferred = 0;
2194 			} else if (start == pip && !preferred) {
2195 				cont = 0;
2196 			}
2197 		} while (cont);
2198 		break;
2199 
2200 	case LOAD_BALANCE_LBA:
2201 		/*
2202 		 * Make sure we are looking
2203 		 * for an online path. Otherwise, if it is for a STANDBY
2204 		 * path request, it will go through and fetch an ONLINE
2205 		 * path which is not desirable.
2206 		 */
2207 		if ((ct->ct_lb_args != NULL) &&
2208 			    (ct->ct_lb_args->region_size) && bp &&
2209 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2210 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2211 				    == MDI_SUCCESS) {
2212 				MDI_CLIENT_UNLOCK(ct);
2213 				return (MDI_SUCCESS);
2214 			}
2215 		}
2216 		/*  FALLTHROUGH */
2217 	case LOAD_BALANCE_RR:
2218 		/*
2219 		 * Load balancing is Round Robin. Start looking for a online
2220 		 * mdi_pathinfo node starting from last known selected path
2221 		 * as the start point.  If override flags are specified,
2222 		 * process accordingly.
2223 		 * If the search is already in effect(start_pip not null),
2224 		 * then lets just use the same path preference to continue the
2225 		 * traversal.
2226 		 */
2227 
2228 		if (start_pip != NULL) {
2229 			preferred = MDI_PI(start_pip)->pi_preferred;
2230 		} else {
2231 			preferred = 1;
2232 		}
2233 
2234 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2235 		if (start == NULL) {
2236 			pip = head;
2237 		} else {
2238 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2239 			if (pip == NULL) {
2240 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2241 					/*
2242 					 * Return since we hit the end of list
2243 					 */
2244 					MDI_CLIENT_UNLOCK(ct);
2245 					return (MDI_NOPATH);
2246 				}
2247 
2248 				if (!sb) {
2249 					if (preferred == 0) {
2250 						/*
2251 						 * Looks like we have completed
2252 						 * the traversal as preferred
2253 						 * value is 0. Time to bail out.
2254 						 */
2255 						*ret_pip = NULL;
2256 						MDI_CLIENT_UNLOCK(ct);
2257 						return (MDI_NOPATH);
2258 					} else {
2259 						/*
2260 						 * Looks like we reached the
2261 						 * end of the list. Lets enable
2262 						 * traversal of non preferred
2263 						 * paths.
2264 						 */
2265 						preferred = 0;
2266 					}
2267 				}
2268 				pip = head;
2269 			}
2270 		}
2271 		start = pip;
2272 		do {
2273 			MDI_PI_LOCK(pip);
2274 			if (sb) {
2275 				cond = ((MDI_PI(pip)->pi_state ==
2276 				    MDI_PATHINFO_STATE_ONLINE &&
2277 					MDI_PI(pip)->pi_preferred ==
2278 						preferred) ? 1 : 0);
2279 			} else {
2280 				if (flags == MDI_SELECT_ONLINE_PATH) {
2281 					cond = ((MDI_PI(pip)->pi_state ==
2282 					    MDI_PATHINFO_STATE_ONLINE &&
2283 						MDI_PI(pip)->pi_preferred ==
2284 						preferred) ? 1 : 0);
2285 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2286 					cond = ((MDI_PI(pip)->pi_state ==
2287 					    MDI_PATHINFO_STATE_STANDBY &&
2288 						MDI_PI(pip)->pi_preferred ==
2289 						preferred) ? 1 : 0);
2290 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2291 				    MDI_SELECT_STANDBY_PATH)) {
2292 					cond = (((MDI_PI(pip)->pi_state ==
2293 					    MDI_PATHINFO_STATE_ONLINE ||
2294 					    (MDI_PI(pip)->pi_state ==
2295 					    MDI_PATHINFO_STATE_STANDBY)) &&
2296 						MDI_PI(pip)->pi_preferred ==
2297 						preferred) ? 1 : 0);
2298 				} else if (flags ==
2299 					(MDI_SELECT_STANDBY_PATH |
2300 					MDI_SELECT_ONLINE_PATH |
2301 					MDI_SELECT_USER_DISABLE_PATH)) {
2302 					cond = (((MDI_PI(pip)->pi_state ==
2303 					    MDI_PATHINFO_STATE_ONLINE ||
2304 					    (MDI_PI(pip)->pi_state ==
2305 					    MDI_PATHINFO_STATE_STANDBY) ||
2306 						(MDI_PI(pip)->pi_state ==
2307 					    (MDI_PATHINFO_STATE_ONLINE|
2308 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2309 						(MDI_PI(pip)->pi_state ==
2310 					    (MDI_PATHINFO_STATE_STANDBY |
2311 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2312 						MDI_PI(pip)->pi_preferred ==
2313 						preferred) ? 1 : 0);
2314 				} else if (flags ==
2315 				    (MDI_SELECT_STANDBY_PATH |
2316 				    MDI_SELECT_ONLINE_PATH |
2317 				    MDI_SELECT_NO_PREFERRED)) {
2318 					cond = (((MDI_PI(pip)->pi_state ==
2319 					    MDI_PATHINFO_STATE_ONLINE) ||
2320 					    (MDI_PI(pip)->pi_state ==
2321 					    MDI_PATHINFO_STATE_STANDBY))
2322 					    ? 1 : 0);
2323 				} else {
2324 					cond = 0;
2325 				}
2326 			}
2327 			/*
2328 			 * No need to explicitly check if the path is disabled.
2329 			 * Since we are checking for state == ONLINE and the
2330 			 * same variable is used for DISABLE/ENABLE information.
2331 			 */
2332 			if (cond) {
2333 				/*
2334 				 * Return the path in hold state. Caller should
2335 				 * release the lock by calling mdi_rele_path()
2336 				 */
2337 				MDI_PI_HOLD(pip);
2338 				MDI_PI_UNLOCK(pip);
2339 				if (sb)
2340 					ct->ct_path_last = pip;
2341 				*ret_pip = pip;
2342 				MDI_CLIENT_UNLOCK(ct);
2343 				return (MDI_SUCCESS);
2344 			}
2345 			/*
2346 			 * Path is busy.
2347 			 */
2348 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2349 			    MDI_PI_IS_TRANSIENT(pip))
2350 				retry = 1;
2351 
2352 			/*
2353 			 * Keep looking for a next available online path
2354 			 */
2355 do_again:
2356 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2357 			if (next == NULL) {
2358 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2359 					/*
2360 					 * Bail out since we hit the end of list
2361 					 */
2362 					MDI_PI_UNLOCK(pip);
2363 					break;
2364 				}
2365 
2366 				if (!sb) {
2367 					if (preferred == 1) {
2368 						/*
2369 						 * Looks like we reached the
2370 						 * end of the list. Lets enable
2371 						 * traversal of non preferred
2372 						 * paths.
2373 						 */
2374 						preferred = 0;
2375 						next = head;
2376 					} else {
2377 						/*
2378 						 * We have done both the passes
2379 						 * Preferred as well as for
2380 						 * Non-preferred. Bail out now.
2381 						 */
2382 						cont = 0;
2383 					}
2384 				} else {
2385 					/*
2386 					 * Standard behavior case.
2387 					 */
2388 					next = head;
2389 				}
2390 			}
2391 			MDI_PI_UNLOCK(pip);
2392 			if (cont == 0) {
2393 				break;
2394 			}
2395 			pip = next;
2396 
2397 			if (!sb) {
2398 				/*
2399 				 * We need to handle the selection of
2400 				 * non-preferred path in the following
2401 				 * case:
2402 				 *
2403 				 * +------+   +------+   +------+   +-----+
2404 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2405 				 * +------+   +------+   +------+   +-----+
2406 				 *
2407 				 * If we start the search with B, we need to
2408 				 * skip beyond B to pick C which is non -
2409 				 * preferred in the second pass. The following
2410 				 * test, if true, will allow us to skip over
2411 				 * the 'start'(B in the example) to select
2412 				 * other non preferred elements.
2413 				 */
2414 				if ((start_pip != NULL) && (start_pip == pip) &&
2415 				    (MDI_PI(start_pip)->pi_preferred
2416 				    != preferred)) {
2417 					/*
2418 					 * try again after going past the start
2419 					 * pip
2420 					 */
2421 					MDI_PI_LOCK(pip);
2422 					goto do_again;
2423 				}
2424 			} else {
2425 				/*
2426 				 * Standard behavior case
2427 				 */
2428 				if (start == pip && preferred) {
2429 					/* look for nonpreferred paths */
2430 					preferred = 0;
2431 				} else if (start == pip && !preferred) {
2432 					/*
2433 					 * Exit condition
2434 					 */
2435 					cont = 0;
2436 				}
2437 			}
2438 		} while (cont);
2439 		break;
2440 	}
2441 
2442 	MDI_CLIENT_UNLOCK(ct);
2443 	if (retry == 1) {
2444 		return (MDI_BUSY);
2445 	} else {
2446 		return (MDI_NOPATH);
2447 	}
2448 }
2449 
2450 /*
2451  * For a client, return the next available path to any phci
2452  *
2453  * Note:
2454  *		Caller should hold the branch's devinfo node to get a consistent
2455  *		snap shot of the mdi_pathinfo nodes.
2456  *
2457  *		Please note that even the list is stable the mdi_pathinfo
2458  *		node state and properties are volatile.  The caller should lock
2459  *		and unlock the nodes by calling mdi_pi_lock() and
2460  *		mdi_pi_unlock() functions to get a stable properties.
2461  *
2462  *		If there is a need to use the nodes beyond the hold of the
2463  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2464  *		need to be held against unexpected removal by calling
2465  *		mdi_hold_path() and should be released by calling
2466  *		mdi_rele_path() on completion.
2467  */
2468 mdi_pathinfo_t *
2469 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2470 {
2471 	mdi_client_t *ct;
2472 
2473 	if (!MDI_CLIENT(ct_dip))
2474 		return (NULL);
2475 
2476 	/*
2477 	 * Walk through client link
2478 	 */
2479 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2480 	ASSERT(ct != NULL);
2481 
2482 	if (pip == NULL)
2483 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2484 
2485 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2486 }
2487 
2488 /*
2489  * For a phci, return the next available path to any client
2490  * Note: ditto mdi_get_next_phci_path()
2491  */
2492 mdi_pathinfo_t *
2493 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2494 {
2495 	mdi_phci_t *ph;
2496 
2497 	if (!MDI_PHCI(ph_dip))
2498 		return (NULL);
2499 
2500 	/*
2501 	 * Walk through pHCI link
2502 	 */
2503 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2504 	ASSERT(ph != NULL);
2505 
2506 	if (pip == NULL)
2507 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2508 
2509 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2510 }
2511 
2512 /*
2513  * mdi_hold_path():
2514  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2515  * Return Values:
2516  *		None
2517  */
2518 void
2519 mdi_hold_path(mdi_pathinfo_t *pip)
2520 {
2521 	if (pip) {
2522 		MDI_PI_LOCK(pip);
2523 		MDI_PI_HOLD(pip);
2524 		MDI_PI_UNLOCK(pip);
2525 	}
2526 }
2527 
2528 
2529 /*
2530  * mdi_rele_path():
2531  *		Release the mdi_pathinfo node which was selected
2532  *		through mdi_select_path() mechanism or manually held by
2533  *		calling mdi_hold_path().
2534  * Return Values:
2535  *		None
2536  */
2537 void
2538 mdi_rele_path(mdi_pathinfo_t *pip)
2539 {
2540 	if (pip) {
2541 		MDI_PI_LOCK(pip);
2542 		MDI_PI_RELE(pip);
2543 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2544 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2545 		}
2546 		MDI_PI_UNLOCK(pip);
2547 	}
2548 }
2549 
2550 /*
2551  * mdi_pi_lock():
2552  * 		Lock the mdi_pathinfo node.
2553  * Note:
2554  *		The caller should release the lock by calling mdi_pi_unlock()
2555  */
2556 void
2557 mdi_pi_lock(mdi_pathinfo_t *pip)
2558 {
2559 	ASSERT(pip != NULL);
2560 	if (pip) {
2561 		MDI_PI_LOCK(pip);
2562 	}
2563 }
2564 
2565 
2566 /*
2567  * mdi_pi_unlock():
2568  * 		Unlock the mdi_pathinfo node.
2569  * Note:
2570  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2571  */
2572 void
2573 mdi_pi_unlock(mdi_pathinfo_t *pip)
2574 {
2575 	ASSERT(pip != NULL);
2576 	if (pip) {
2577 		MDI_PI_UNLOCK(pip);
2578 	}
2579 }
2580 
2581 /*
2582  * mdi_pi_find():
2583  *		Search the list of mdi_pathinfo nodes attached to the
2584  *		pHCI/Client device node whose path address matches "paddr".
2585  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2586  *		found.
2587  * Return Values:
2588  *		mdi_pathinfo node handle
2589  *		NULL
2590  * Notes:
2591  *		Caller need not hold any locks to call this function.
2592  */
2593 mdi_pathinfo_t *
2594 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2595 {
2596 	mdi_phci_t		*ph;
2597 	mdi_vhci_t		*vh;
2598 	mdi_client_t		*ct;
2599 	mdi_pathinfo_t		*pip = NULL;
2600 
2601 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s",
2602 	    caddr ? caddr : "NULL", paddr ? paddr : "NULL"));
2603 	if ((pdip == NULL) || (paddr == NULL)) {
2604 		return (NULL);
2605 	}
2606 	ph = i_devi_get_phci(pdip);
2607 	if (ph == NULL) {
2608 		/*
2609 		 * Invalid pHCI device, Nothing more to do.
2610 		 */
2611 		MDI_DEBUG(2, (CE_WARN, pdip,
2612 		    "!mdi_pi_find: invalid phci"));
2613 		return (NULL);
2614 	}
2615 
2616 	vh = ph->ph_vhci;
2617 	if (vh == NULL) {
2618 		/*
2619 		 * Invalid vHCI device, Nothing more to do.
2620 		 */
2621 		MDI_DEBUG(2, (CE_WARN, pdip,
2622 		    "!mdi_pi_find: invalid vhci"));
2623 		return (NULL);
2624 	}
2625 
2626 	/*
2627 	 * Look for pathinfo node identified by paddr.
2628 	 */
2629 	if (caddr == NULL) {
2630 		/*
2631 		 * Find a mdi_pathinfo node under pHCI list for a matching
2632 		 * unit address.
2633 		 */
2634 		MDI_PHCI_LOCK(ph);
2635 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2636 			MDI_DEBUG(2, (CE_WARN, pdip,
2637 			    "!mdi_pi_find: offline phci %p", (void *)ph));
2638 			MDI_PHCI_UNLOCK(ph);
2639 			return (NULL);
2640 		}
2641 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2642 
2643 		while (pip != NULL) {
2644 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2645 				break;
2646 			}
2647 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2648 		}
2649 		MDI_PHCI_UNLOCK(ph);
2650 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p",
2651 		    (void *)pip));
2652 		return (pip);
2653 	}
2654 
2655 	/*
2656 	 * XXX - Is the rest of the code in this function really necessary?
2657 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2658 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2659 	 * whether the search is based on the pathinfo nodes attached to
2660 	 * the pHCI or the client node, the result will be the same.
2661 	 */
2662 
2663 	/*
2664 	 * Find the client device corresponding to 'caddr'
2665 	 */
2666 	MDI_VHCI_CLIENT_LOCK(vh);
2667 
2668 	/*
2669 	 * XXX - Passing NULL to the following function works as long as the
2670 	 * the client addresses (caddr) are unique per vhci basis.
2671 	 */
2672 	ct = i_mdi_client_find(vh, NULL, caddr);
2673 	if (ct == NULL) {
2674 		/*
2675 		 * Client not found, Obviously mdi_pathinfo node has not been
2676 		 * created yet.
2677 		 */
2678 		MDI_VHCI_CLIENT_UNLOCK(vh);
2679 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not "
2680 		    "found for caddr %s", caddr ? caddr : "NULL"));
2681 		return (NULL);
2682 	}
2683 
2684 	/*
2685 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2686 	 * pHCI and paddr
2687 	 */
2688 	MDI_CLIENT_LOCK(ct);
2689 
2690 	/*
2691 	 * Release the global mutex as it is no more needed. Note: We always
2692 	 * respect the locking order while acquiring.
2693 	 */
2694 	MDI_VHCI_CLIENT_UNLOCK(vh);
2695 
2696 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2697 	while (pip != NULL) {
2698 		/*
2699 		 * Compare the unit address
2700 		 */
2701 		if ((MDI_PI(pip)->pi_phci == ph) &&
2702 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2703 			break;
2704 		}
2705 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2706 	}
2707 	MDI_CLIENT_UNLOCK(ct);
2708 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip));
2709 	return (pip);
2710 }
2711 
2712 /*
2713  * mdi_pi_alloc():
2714  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2715  *		The mdi_pathinfo node returned by this function identifies a
2716  *		unique device path is capable of having properties attached
2717  *		and passed to mdi_pi_online() to fully attach and online the
2718  *		path and client device node.
2719  *		The mdi_pathinfo node returned by this function must be
2720  *		destroyed using mdi_pi_free() if the path is no longer
2721  *		operational or if the caller fails to attach a client device
2722  *		node when calling mdi_pi_online(). The framework will not free
2723  *		the resources allocated.
2724  *		This function can be called from both interrupt and kernel
2725  *		contexts.  DDI_NOSLEEP flag should be used while calling
2726  *		from interrupt contexts.
2727  * Return Values:
2728  *		MDI_SUCCESS
2729  *		MDI_FAILURE
2730  *		MDI_NOMEM
2731  */
2732 /*ARGSUSED*/
2733 int
2734 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2735     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2736 {
2737 	mdi_vhci_t	*vh;
2738 	mdi_phci_t	*ph;
2739 	mdi_client_t	*ct;
2740 	mdi_pathinfo_t	*pip = NULL;
2741 	dev_info_t	*cdip;
2742 	int		rv = MDI_NOMEM;
2743 	int		path_allocated = 0;
2744 
2745 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s",
2746 	    cname ? cname : "NULL", caddr ? caddr : "NULL",
2747 	    paddr ? paddr : "NULL"));
2748 
2749 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2750 	    ret_pip == NULL) {
2751 		/* Nothing more to do */
2752 		return (MDI_FAILURE);
2753 	}
2754 
2755 	*ret_pip = NULL;
2756 
2757 	/* No allocations on detaching pHCI */
2758 	if (DEVI_IS_DETACHING(pdip)) {
2759 		/* Invalid pHCI device, return failure */
2760 		MDI_DEBUG(1, (CE_WARN, pdip,
2761 		    "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip));
2762 		return (MDI_FAILURE);
2763 	}
2764 
2765 	ph = i_devi_get_phci(pdip);
2766 	ASSERT(ph != NULL);
2767 	if (ph == NULL) {
2768 		/* Invalid pHCI device, return failure */
2769 		MDI_DEBUG(1, (CE_WARN, pdip,
2770 		    "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip));
2771 		return (MDI_FAILURE);
2772 	}
2773 
2774 	MDI_PHCI_LOCK(ph);
2775 	vh = ph->ph_vhci;
2776 	if (vh == NULL) {
2777 		/* Invalid vHCI device, return failure */
2778 		MDI_DEBUG(1, (CE_WARN, pdip,
2779 		    "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip));
2780 		MDI_PHCI_UNLOCK(ph);
2781 		return (MDI_FAILURE);
2782 	}
2783 
2784 	if (MDI_PHCI_IS_READY(ph) == 0) {
2785 		/*
2786 		 * Do not allow new node creation when pHCI is in
2787 		 * offline/suspended states
2788 		 */
2789 		MDI_DEBUG(1, (CE_WARN, pdip,
2790 		    "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph));
2791 		MDI_PHCI_UNLOCK(ph);
2792 		return (MDI_BUSY);
2793 	}
2794 	MDI_PHCI_UNSTABLE(ph);
2795 	MDI_PHCI_UNLOCK(ph);
2796 
2797 	/* look for a matching client, create one if not found */
2798 	MDI_VHCI_CLIENT_LOCK(vh);
2799 	ct = i_mdi_client_find(vh, cname, caddr);
2800 	if (ct == NULL) {
2801 		ct = i_mdi_client_alloc(vh, cname, caddr);
2802 		ASSERT(ct != NULL);
2803 	}
2804 
2805 	if (ct->ct_dip == NULL) {
2806 		/*
2807 		 * Allocate a devinfo node
2808 		 */
2809 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2810 		    compatible, ncompatible);
2811 		if (ct->ct_dip == NULL) {
2812 			(void) i_mdi_client_free(vh, ct);
2813 			goto fail;
2814 		}
2815 	}
2816 	cdip = ct->ct_dip;
2817 
2818 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2819 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2820 
2821 	MDI_CLIENT_LOCK(ct);
2822 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2823 	while (pip != NULL) {
2824 		/*
2825 		 * Compare the unit address
2826 		 */
2827 		if ((MDI_PI(pip)->pi_phci == ph) &&
2828 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2829 			break;
2830 		}
2831 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2832 	}
2833 	MDI_CLIENT_UNLOCK(ct);
2834 
2835 	if (pip == NULL) {
2836 		/*
2837 		 * This is a new path for this client device.  Allocate and
2838 		 * initialize a new pathinfo node
2839 		 */
2840 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2841 		ASSERT(pip != NULL);
2842 		path_allocated = 1;
2843 	}
2844 	rv = MDI_SUCCESS;
2845 
2846 fail:
2847 	/*
2848 	 * Release the global mutex.
2849 	 */
2850 	MDI_VHCI_CLIENT_UNLOCK(vh);
2851 
2852 	/*
2853 	 * Mark the pHCI as stable
2854 	 */
2855 	MDI_PHCI_LOCK(ph);
2856 	MDI_PHCI_STABLE(ph);
2857 	MDI_PHCI_UNLOCK(ph);
2858 	*ret_pip = pip;
2859 
2860 	MDI_DEBUG(2, (CE_NOTE, pdip,
2861 	    "!mdi_pi_alloc_compatible: alloc %p", (void *)pip));
2862 
2863 	if (path_allocated)
2864 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2865 
2866 	return (rv);
2867 }
2868 
2869 /*ARGSUSED*/
2870 int
2871 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2872     int flags, mdi_pathinfo_t **ret_pip)
2873 {
2874 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2875 	    flags, ret_pip));
2876 }
2877 
2878 /*
2879  * i_mdi_pi_alloc():
2880  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2881  * Return Values:
2882  *		mdi_pathinfo
2883  */
2884 /*ARGSUSED*/
2885 static mdi_pathinfo_t *
2886 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2887 {
2888 	mdi_pathinfo_t	*pip;
2889 	int		ct_circular;
2890 	int		ph_circular;
2891 	static char	path[MAXPATHLEN];
2892 	char		*path_persistent;
2893 	int		path_instance;
2894 	mod_hash_val_t	hv;
2895 
2896 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2897 
2898 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2899 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2900 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2901 	    MDI_PATHINFO_STATE_TRANSIENT;
2902 
2903 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2904 		MDI_PI_SET_USER_DISABLE(pip);
2905 
2906 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2907 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2908 
2909 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2910 		MDI_PI_SET_DRV_DISABLE(pip);
2911 
2912 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2913 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2914 	MDI_PI(pip)->pi_client = ct;
2915 	MDI_PI(pip)->pi_phci = ph;
2916 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2917 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2918 
2919         /*
2920 	 * We form the "path" to the pathinfo node, and see if we have
2921 	 * already allocated a 'path_instance' for that "path".  If so,
2922 	 * we use the already allocated 'path_instance'.  If not, we
2923 	 * allocate a new 'path_instance' and associate it with a copy of
2924 	 * the "path" string (which is never freed). The association
2925 	 * between a 'path_instance' this "path" string persists until
2926 	 * reboot.
2927 	 */
2928         mutex_enter(&mdi_pathmap_mutex);
2929 	(void) ddi_pathname(ph->ph_dip, path);
2930 	(void) sprintf(path + strlen(path), "/%s@%s",
2931 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2932         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2933                 path_instance = (uint_t)(intptr_t)hv;
2934         } else {
2935 		/* allocate a new 'path_instance' and persistent "path" */
2936 		path_instance = mdi_pathmap_instance++;
2937 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2938                 (void) mod_hash_insert(mdi_pathmap_bypath,
2939                     (mod_hash_key_t)path_persistent,
2940                     (mod_hash_val_t)(intptr_t)path_instance);
2941 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2942 		    (mod_hash_key_t)(intptr_t)path_instance,
2943 		    (mod_hash_val_t)path_persistent);
2944         }
2945         mutex_exit(&mdi_pathmap_mutex);
2946 	MDI_PI(pip)->pi_path_instance = path_instance;
2947 
2948 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2949 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2950 	MDI_PI(pip)->pi_pprivate = NULL;
2951 	MDI_PI(pip)->pi_cprivate = NULL;
2952 	MDI_PI(pip)->pi_vprivate = NULL;
2953 	MDI_PI(pip)->pi_client_link = NULL;
2954 	MDI_PI(pip)->pi_phci_link = NULL;
2955 	MDI_PI(pip)->pi_ref_cnt = 0;
2956 	MDI_PI(pip)->pi_kstats = NULL;
2957 	MDI_PI(pip)->pi_preferred = 1;
2958 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2959 
2960 	/*
2961 	 * Lock both dev_info nodes against changes in parallel.
2962 	 *
2963 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
2964 	 * This atypical operation is done to synchronize pathinfo nodes
2965 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
2966 	 * the pathinfo nodes are children of the Client.
2967 	 */
2968 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2969 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2970 
2971 	i_mdi_phci_add_path(ph, pip);
2972 	i_mdi_client_add_path(ct, pip);
2973 
2974 	ndi_devi_exit(ph->ph_dip, ph_circular);
2975 	ndi_devi_exit(ct->ct_dip, ct_circular);
2976 
2977 	return (pip);
2978 }
2979 
2980 /*
2981  * mdi_pi_pathname_by_instance():
2982  *	Lookup of "path" by 'path_instance'. Return "path".
2983  *	NOTE: returned "path" remains valid forever (until reboot).
2984  */
2985 char *
2986 mdi_pi_pathname_by_instance(int path_instance)
2987 {
2988 	char		*path;
2989 	mod_hash_val_t	hv;
2990 
2991 	/* mdi_pathmap lookup of "path" by 'path_instance' */
2992 	mutex_enter(&mdi_pathmap_mutex);
2993 	if (mod_hash_find(mdi_pathmap_byinstance,
2994 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
2995 		path = (char *)hv;
2996 	else
2997 		path = NULL;
2998 	mutex_exit(&mdi_pathmap_mutex);
2999 	return (path);
3000 }
3001 
3002 /*
3003  * i_mdi_phci_add_path():
3004  * 		Add a mdi_pathinfo node to pHCI list.
3005  * Notes:
3006  *		Caller should per-pHCI mutex
3007  */
3008 static void
3009 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3010 {
3011 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3012 
3013 	MDI_PHCI_LOCK(ph);
3014 	if (ph->ph_path_head == NULL) {
3015 		ph->ph_path_head = pip;
3016 	} else {
3017 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3018 	}
3019 	ph->ph_path_tail = pip;
3020 	ph->ph_path_count++;
3021 	MDI_PHCI_UNLOCK(ph);
3022 }
3023 
3024 /*
3025  * i_mdi_client_add_path():
3026  *		Add mdi_pathinfo node to client list
3027  */
3028 static void
3029 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3030 {
3031 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3032 
3033 	MDI_CLIENT_LOCK(ct);
3034 	if (ct->ct_path_head == NULL) {
3035 		ct->ct_path_head = pip;
3036 	} else {
3037 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3038 	}
3039 	ct->ct_path_tail = pip;
3040 	ct->ct_path_count++;
3041 	MDI_CLIENT_UNLOCK(ct);
3042 }
3043 
3044 /*
3045  * mdi_pi_free():
3046  *		Free the mdi_pathinfo node and also client device node if this
3047  *		is the last path to the device
3048  * Return Values:
3049  *		MDI_SUCCESS
3050  *		MDI_FAILURE
3051  *		MDI_BUSY
3052  */
3053 /*ARGSUSED*/
3054 int
3055 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3056 {
3057 	int		rv = MDI_FAILURE;
3058 	mdi_vhci_t	*vh;
3059 	mdi_phci_t	*ph;
3060 	mdi_client_t	*ct;
3061 	int		(*f)();
3062 	int		client_held = 0;
3063 
3064 	MDI_PI_LOCK(pip);
3065 	ph = MDI_PI(pip)->pi_phci;
3066 	ASSERT(ph != NULL);
3067 	if (ph == NULL) {
3068 		/*
3069 		 * Invalid pHCI device, return failure
3070 		 */
3071 		MDI_DEBUG(1, (CE_WARN, NULL,
3072 		    "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip));
3073 		MDI_PI_UNLOCK(pip);
3074 		return (MDI_FAILURE);
3075 	}
3076 
3077 	vh = ph->ph_vhci;
3078 	ASSERT(vh != NULL);
3079 	if (vh == NULL) {
3080 		/* Invalid pHCI device, return failure */
3081 		MDI_DEBUG(1, (CE_WARN, NULL,
3082 		    "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip));
3083 		MDI_PI_UNLOCK(pip);
3084 		return (MDI_FAILURE);
3085 	}
3086 
3087 	ct = MDI_PI(pip)->pi_client;
3088 	ASSERT(ct != NULL);
3089 	if (ct == NULL) {
3090 		/*
3091 		 * Invalid Client device, return failure
3092 		 */
3093 		MDI_DEBUG(1, (CE_WARN, NULL,
3094 		    "!mdi_pi_free: invalid client pip=%p", (void *)pip));
3095 		MDI_PI_UNLOCK(pip);
3096 		return (MDI_FAILURE);
3097 	}
3098 
3099 	/*
3100 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3101 	 * if the node state is either offline or init and the reference count
3102 	 * is zero.
3103 	 */
3104 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3105 	    MDI_PI_IS_INITING(pip))) {
3106 		/*
3107 		 * Node is busy
3108 		 */
3109 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3110 		    "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip));
3111 		MDI_PI_UNLOCK(pip);
3112 		return (MDI_BUSY);
3113 	}
3114 
3115 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3116 		/*
3117 		 * Give a chance for pending I/Os to complete.
3118 		 */
3119 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: "
3120 		    "%d cmds still pending on path: %p\n",
3121 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3122 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3123 		    &MDI_PI(pip)->pi_mutex,
3124 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3125 			/*
3126 			 * The timeout time reached without ref_cnt being zero
3127 			 * being signaled.
3128 			 */
3129 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3130 			    "!mdi_pi_free: "
3131 			    "Timeout reached on path %p without the cond\n",
3132 			    (void *)pip));
3133 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3134 			    "!mdi_pi_free: "
3135 			    "%d cmds still pending on path: %p\n",
3136 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3137 			MDI_PI_UNLOCK(pip);
3138 			return (MDI_BUSY);
3139 		}
3140 	}
3141 	if (MDI_PI(pip)->pi_pm_held) {
3142 		client_held = 1;
3143 	}
3144 	MDI_PI_UNLOCK(pip);
3145 
3146 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3147 
3148 	MDI_CLIENT_LOCK(ct);
3149 
3150 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3151 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3152 
3153 	/*
3154 	 * Wait till failover is complete before removing this node.
3155 	 */
3156 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3157 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3158 
3159 	MDI_CLIENT_UNLOCK(ct);
3160 	MDI_VHCI_CLIENT_LOCK(vh);
3161 	MDI_CLIENT_LOCK(ct);
3162 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3163 
3164 	if (!MDI_PI_IS_INITING(pip)) {
3165 		f = vh->vh_ops->vo_pi_uninit;
3166 		if (f != NULL) {
3167 			rv = (*f)(vh->vh_dip, pip, 0);
3168 		}
3169 	}
3170 	/*
3171 	 * If vo_pi_uninit() completed successfully.
3172 	 */
3173 	if (rv == MDI_SUCCESS) {
3174 		if (client_held) {
3175 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
3176 			    "i_mdi_pm_rele_client\n"));
3177 			i_mdi_pm_rele_client(ct, 1);
3178 		}
3179 		i_mdi_pi_free(ph, pip, ct);
3180 		if (ct->ct_path_count == 0) {
3181 			/*
3182 			 * Client lost its last path.
3183 			 * Clean up the client device
3184 			 */
3185 			MDI_CLIENT_UNLOCK(ct);
3186 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3187 			MDI_VHCI_CLIENT_UNLOCK(vh);
3188 			return (rv);
3189 		}
3190 	}
3191 	MDI_CLIENT_UNLOCK(ct);
3192 	MDI_VHCI_CLIENT_UNLOCK(vh);
3193 
3194 	if (rv == MDI_FAILURE)
3195 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3196 
3197 	return (rv);
3198 }
3199 
3200 /*
3201  * i_mdi_pi_free():
3202  *		Free the mdi_pathinfo node
3203  */
3204 static void
3205 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3206 {
3207 	int	ct_circular;
3208 	int	ph_circular;
3209 
3210 	ASSERT(MDI_CLIENT_LOCKED(ct));
3211 
3212 	/*
3213 	 * remove any per-path kstats
3214 	 */
3215 	i_mdi_pi_kstat_destroy(pip);
3216 
3217 	/* See comments in i_mdi_pi_alloc() */
3218 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3219 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3220 
3221 	i_mdi_client_remove_path(ct, pip);
3222 	i_mdi_phci_remove_path(ph, pip);
3223 
3224 	ndi_devi_exit(ph->ph_dip, ph_circular);
3225 	ndi_devi_exit(ct->ct_dip, ct_circular);
3226 
3227 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3228 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3229 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3230 	if (MDI_PI(pip)->pi_addr) {
3231 		kmem_free(MDI_PI(pip)->pi_addr,
3232 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3233 		MDI_PI(pip)->pi_addr = NULL;
3234 	}
3235 
3236 	if (MDI_PI(pip)->pi_prop) {
3237 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3238 		MDI_PI(pip)->pi_prop = NULL;
3239 	}
3240 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3241 }
3242 
3243 
3244 /*
3245  * i_mdi_phci_remove_path():
3246  * 		Remove a mdi_pathinfo node from pHCI list.
3247  * Notes:
3248  *		Caller should hold per-pHCI mutex
3249  */
3250 static void
3251 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3252 {
3253 	mdi_pathinfo_t	*prev = NULL;
3254 	mdi_pathinfo_t	*path = NULL;
3255 
3256 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3257 
3258 	MDI_PHCI_LOCK(ph);
3259 	path = ph->ph_path_head;
3260 	while (path != NULL) {
3261 		if (path == pip) {
3262 			break;
3263 		}
3264 		prev = path;
3265 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3266 	}
3267 
3268 	if (path) {
3269 		ph->ph_path_count--;
3270 		if (prev) {
3271 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3272 		} else {
3273 			ph->ph_path_head =
3274 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3275 		}
3276 		if (ph->ph_path_tail == path) {
3277 			ph->ph_path_tail = prev;
3278 		}
3279 	}
3280 
3281 	/*
3282 	 * Clear the pHCI link
3283 	 */
3284 	MDI_PI(pip)->pi_phci_link = NULL;
3285 	MDI_PI(pip)->pi_phci = NULL;
3286 	MDI_PHCI_UNLOCK(ph);
3287 }
3288 
3289 /*
3290  * i_mdi_client_remove_path():
3291  * 		Remove a mdi_pathinfo node from client path list.
3292  */
3293 static void
3294 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3295 {
3296 	mdi_pathinfo_t	*prev = NULL;
3297 	mdi_pathinfo_t	*path;
3298 
3299 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3300 
3301 	ASSERT(MDI_CLIENT_LOCKED(ct));
3302 	path = ct->ct_path_head;
3303 	while (path != NULL) {
3304 		if (path == pip) {
3305 			break;
3306 		}
3307 		prev = path;
3308 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3309 	}
3310 
3311 	if (path) {
3312 		ct->ct_path_count--;
3313 		if (prev) {
3314 			MDI_PI(prev)->pi_client_link =
3315 			    MDI_PI(path)->pi_client_link;
3316 		} else {
3317 			ct->ct_path_head =
3318 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3319 		}
3320 		if (ct->ct_path_tail == path) {
3321 			ct->ct_path_tail = prev;
3322 		}
3323 		if (ct->ct_path_last == path) {
3324 			ct->ct_path_last = ct->ct_path_head;
3325 		}
3326 	}
3327 	MDI_PI(pip)->pi_client_link = NULL;
3328 	MDI_PI(pip)->pi_client = NULL;
3329 }
3330 
3331 /*
3332  * i_mdi_pi_state_change():
3333  *		online a mdi_pathinfo node
3334  *
3335  * Return Values:
3336  *		MDI_SUCCESS
3337  *		MDI_FAILURE
3338  */
3339 /*ARGSUSED*/
3340 static int
3341 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3342 {
3343 	int		rv = MDI_SUCCESS;
3344 	mdi_vhci_t	*vh;
3345 	mdi_phci_t	*ph;
3346 	mdi_client_t	*ct;
3347 	int		(*f)();
3348 	dev_info_t	*cdip;
3349 
3350 	MDI_PI_LOCK(pip);
3351 
3352 	ph = MDI_PI(pip)->pi_phci;
3353 	ASSERT(ph);
3354 	if (ph == NULL) {
3355 		/*
3356 		 * Invalid pHCI device, fail the request
3357 		 */
3358 		MDI_PI_UNLOCK(pip);
3359 		MDI_DEBUG(1, (CE_WARN, NULL,
3360 		    "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip));
3361 		return (MDI_FAILURE);
3362 	}
3363 
3364 	vh = ph->ph_vhci;
3365 	ASSERT(vh);
3366 	if (vh == NULL) {
3367 		/*
3368 		 * Invalid vHCI device, fail the request
3369 		 */
3370 		MDI_PI_UNLOCK(pip);
3371 		MDI_DEBUG(1, (CE_WARN, NULL,
3372 		    "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip));
3373 		return (MDI_FAILURE);
3374 	}
3375 
3376 	ct = MDI_PI(pip)->pi_client;
3377 	ASSERT(ct != NULL);
3378 	if (ct == NULL) {
3379 		/*
3380 		 * Invalid client device, fail the request
3381 		 */
3382 		MDI_PI_UNLOCK(pip);
3383 		MDI_DEBUG(1, (CE_WARN, NULL,
3384 		    "!mdi_pi_state_change: invalid client pip=%p",
3385 		    (void *)pip));
3386 		return (MDI_FAILURE);
3387 	}
3388 
3389 	/*
3390 	 * If this path has not been initialized yet, Callback vHCI driver's
3391 	 * pathinfo node initialize entry point
3392 	 */
3393 
3394 	if (MDI_PI_IS_INITING(pip)) {
3395 		MDI_PI_UNLOCK(pip);
3396 		f = vh->vh_ops->vo_pi_init;
3397 		if (f != NULL) {
3398 			rv = (*f)(vh->vh_dip, pip, 0);
3399 			if (rv != MDI_SUCCESS) {
3400 				MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3401 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3402 				    (void *)vh, (void *)pip));
3403 				return (MDI_FAILURE);
3404 			}
3405 		}
3406 		MDI_PI_LOCK(pip);
3407 		MDI_PI_CLEAR_TRANSIENT(pip);
3408 	}
3409 
3410 	/*
3411 	 * Do not allow state transition when pHCI is in offline/suspended
3412 	 * states
3413 	 */
3414 	i_mdi_phci_lock(ph, pip);
3415 	if (MDI_PHCI_IS_READY(ph) == 0) {
3416 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3417 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p",
3418 		    (void *)ph));
3419 		MDI_PI_UNLOCK(pip);
3420 		i_mdi_phci_unlock(ph);
3421 		return (MDI_BUSY);
3422 	}
3423 	MDI_PHCI_UNSTABLE(ph);
3424 	i_mdi_phci_unlock(ph);
3425 
3426 	/*
3427 	 * Check if mdi_pathinfo state is in transient state.
3428 	 * If yes, offlining is in progress and wait till transient state is
3429 	 * cleared.
3430 	 */
3431 	if (MDI_PI_IS_TRANSIENT(pip)) {
3432 		while (MDI_PI_IS_TRANSIENT(pip)) {
3433 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3434 			    &MDI_PI(pip)->pi_mutex);
3435 		}
3436 	}
3437 
3438 	/*
3439 	 * Grab the client lock in reverse order sequence and release the
3440 	 * mdi_pathinfo mutex.
3441 	 */
3442 	i_mdi_client_lock(ct, pip);
3443 	MDI_PI_UNLOCK(pip);
3444 
3445 	/*
3446 	 * Wait till failover state is cleared
3447 	 */
3448 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3449 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3450 
3451 	/*
3452 	 * Mark the mdi_pathinfo node state as transient
3453 	 */
3454 	MDI_PI_LOCK(pip);
3455 	switch (state) {
3456 	case MDI_PATHINFO_STATE_ONLINE:
3457 		MDI_PI_SET_ONLINING(pip);
3458 		break;
3459 
3460 	case MDI_PATHINFO_STATE_STANDBY:
3461 		MDI_PI_SET_STANDBYING(pip);
3462 		break;
3463 
3464 	case MDI_PATHINFO_STATE_FAULT:
3465 		/*
3466 		 * Mark the pathinfo state as FAULTED
3467 		 */
3468 		MDI_PI_SET_FAULTING(pip);
3469 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3470 		break;
3471 
3472 	case MDI_PATHINFO_STATE_OFFLINE:
3473 		/*
3474 		 * ndi_devi_offline() cannot hold pip or ct locks.
3475 		 */
3476 		MDI_PI_UNLOCK(pip);
3477 		/*
3478 		 * Don't offline the client dev_info node unless we have
3479 		 * no available paths left at all.
3480 		 */
3481 		cdip = ct->ct_dip;
3482 		if ((flag & NDI_DEVI_REMOVE) &&
3483 		    (ct->ct_path_count == 1)) {
3484 			i_mdi_client_unlock(ct);
3485 			rv = ndi_devi_offline(cdip, 0);
3486 			if (rv != NDI_SUCCESS) {
3487 				/*
3488 				 * Convert to MDI error code
3489 				 */
3490 				switch (rv) {
3491 				case NDI_BUSY:
3492 					rv = MDI_BUSY;
3493 					break;
3494 				default:
3495 					rv = MDI_FAILURE;
3496 					break;
3497 				}
3498 				goto state_change_exit;
3499 			} else {
3500 				i_mdi_client_lock(ct, NULL);
3501 			}
3502 		}
3503 		/*
3504 		 * Mark the mdi_pathinfo node state as transient
3505 		 */
3506 		MDI_PI_LOCK(pip);
3507 		MDI_PI_SET_OFFLINING(pip);
3508 		break;
3509 	}
3510 	MDI_PI_UNLOCK(pip);
3511 	MDI_CLIENT_UNSTABLE(ct);
3512 	i_mdi_client_unlock(ct);
3513 
3514 	f = vh->vh_ops->vo_pi_state_change;
3515 	if (f != NULL)
3516 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3517 
3518 	MDI_CLIENT_LOCK(ct);
3519 	MDI_PI_LOCK(pip);
3520 	if (rv == MDI_NOT_SUPPORTED) {
3521 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3522 	}
3523 	if (rv != MDI_SUCCESS) {
3524 		MDI_DEBUG(2, (CE_WARN, ct->ct_dip,
3525 		    "!vo_pi_state_change: failed rv = %x", rv));
3526 	}
3527 	if (MDI_PI_IS_TRANSIENT(pip)) {
3528 		if (rv == MDI_SUCCESS) {
3529 			MDI_PI_CLEAR_TRANSIENT(pip);
3530 		} else {
3531 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3532 		}
3533 	}
3534 
3535 	/*
3536 	 * Wake anyone waiting for this mdi_pathinfo node
3537 	 */
3538 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3539 	MDI_PI_UNLOCK(pip);
3540 
3541 	/*
3542 	 * Mark the client device as stable
3543 	 */
3544 	MDI_CLIENT_STABLE(ct);
3545 	if (rv == MDI_SUCCESS) {
3546 		if (ct->ct_unstable == 0) {
3547 			cdip = ct->ct_dip;
3548 
3549 			/*
3550 			 * Onlining the mdi_pathinfo node will impact the
3551 			 * client state Update the client and dev_info node
3552 			 * state accordingly
3553 			 */
3554 			rv = NDI_SUCCESS;
3555 			i_mdi_client_update_state(ct);
3556 			switch (MDI_CLIENT_STATE(ct)) {
3557 			case MDI_CLIENT_STATE_OPTIMAL:
3558 			case MDI_CLIENT_STATE_DEGRADED:
3559 				if (cdip && !i_ddi_devi_attached(cdip) &&
3560 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3561 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3562 
3563 					/*
3564 					 * Must do ndi_devi_online() through
3565 					 * hotplug thread for deferred
3566 					 * attach mechanism to work
3567 					 */
3568 					MDI_CLIENT_UNLOCK(ct);
3569 					rv = ndi_devi_online(cdip, 0);
3570 					MDI_CLIENT_LOCK(ct);
3571 					if ((rv != NDI_SUCCESS) &&
3572 					    (MDI_CLIENT_STATE(ct) ==
3573 					    MDI_CLIENT_STATE_DEGRADED)) {
3574 						/*
3575 						 * ndi_devi_online failed.
3576 						 * Reset client flags to
3577 						 * offline.
3578 						 */
3579 						MDI_DEBUG(1, (CE_WARN, cdip,
3580 						    "!ndi_devi_online: failed "
3581 						    " Error: %x", rv));
3582 						MDI_CLIENT_SET_OFFLINE(ct);
3583 					}
3584 					if (rv != NDI_SUCCESS) {
3585 						/* Reset the path state */
3586 						MDI_PI_LOCK(pip);
3587 						MDI_PI(pip)->pi_state =
3588 						    MDI_PI_OLD_STATE(pip);
3589 						MDI_PI_UNLOCK(pip);
3590 					}
3591 				}
3592 				break;
3593 
3594 			case MDI_CLIENT_STATE_FAILED:
3595 				/*
3596 				 * This is the last path case for
3597 				 * non-user initiated events.
3598 				 */
3599 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3600 				    cdip && (i_ddi_node_state(cdip) >=
3601 				    DS_INITIALIZED)) {
3602 					MDI_CLIENT_UNLOCK(ct);
3603 					rv = ndi_devi_offline(cdip, 0);
3604 					MDI_CLIENT_LOCK(ct);
3605 
3606 					if (rv != NDI_SUCCESS) {
3607 						/*
3608 						 * ndi_devi_offline failed.
3609 						 * Reset client flags to
3610 						 * online as the path could not
3611 						 * be offlined.
3612 						 */
3613 						MDI_DEBUG(1, (CE_WARN, cdip,
3614 						    "!ndi_devi_offline: failed "
3615 						    " Error: %x", rv));
3616 						MDI_CLIENT_SET_ONLINE(ct);
3617 					}
3618 				}
3619 				break;
3620 			}
3621 			/*
3622 			 * Convert to MDI error code
3623 			 */
3624 			switch (rv) {
3625 			case NDI_SUCCESS:
3626 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3627 				i_mdi_report_path_state(ct, pip);
3628 				rv = MDI_SUCCESS;
3629 				break;
3630 			case NDI_BUSY:
3631 				rv = MDI_BUSY;
3632 				break;
3633 			default:
3634 				rv = MDI_FAILURE;
3635 				break;
3636 			}
3637 		}
3638 	}
3639 	MDI_CLIENT_UNLOCK(ct);
3640 
3641 state_change_exit:
3642 	/*
3643 	 * Mark the pHCI as stable again.
3644 	 */
3645 	MDI_PHCI_LOCK(ph);
3646 	MDI_PHCI_STABLE(ph);
3647 	MDI_PHCI_UNLOCK(ph);
3648 	return (rv);
3649 }
3650 
3651 /*
3652  * mdi_pi_online():
3653  *		Place the path_info node in the online state.  The path is
3654  *		now available to be selected by mdi_select_path() for
3655  *		transporting I/O requests to client devices.
3656  * Return Values:
3657  *		MDI_SUCCESS
3658  *		MDI_FAILURE
3659  */
3660 int
3661 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3662 {
3663 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3664 	int		client_held = 0;
3665 	int		rv;
3666 	int		se_flag;
3667 	int		kmem_flag;
3668 
3669 	ASSERT(ct != NULL);
3670 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3671 	if (rv != MDI_SUCCESS)
3672 		return (rv);
3673 
3674 	MDI_PI_LOCK(pip);
3675 	if (MDI_PI(pip)->pi_pm_held == 0) {
3676 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3677 		    "i_mdi_pm_hold_pip %p\n", (void *)pip));
3678 		i_mdi_pm_hold_pip(pip);
3679 		client_held = 1;
3680 	}
3681 	MDI_PI_UNLOCK(pip);
3682 
3683 	if (client_held) {
3684 		MDI_CLIENT_LOCK(ct);
3685 		if (ct->ct_power_cnt == 0) {
3686 			rv = i_mdi_power_all_phci(ct);
3687 		}
3688 
3689 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3690 		    "i_mdi_pm_hold_client %p\n", (void *)ct));
3691 		i_mdi_pm_hold_client(ct, 1);
3692 		MDI_CLIENT_UNLOCK(ct);
3693 	}
3694 
3695 	/* determine interrupt context */
3696 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3697 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3698 
3699 	/* A new path is online.  Invalidate DINFOCACHE snap shot. */
3700 	i_ddi_di_cache_invalidate(kmem_flag);
3701 
3702 	return (rv);
3703 }
3704 
3705 /*
3706  * mdi_pi_standby():
3707  *		Place the mdi_pathinfo node in standby state
3708  *
3709  * Return Values:
3710  *		MDI_SUCCESS
3711  *		MDI_FAILURE
3712  */
3713 int
3714 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3715 {
3716 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3717 }
3718 
3719 /*
3720  * mdi_pi_fault():
3721  *		Place the mdi_pathinfo node in fault'ed state
3722  * Return Values:
3723  *		MDI_SUCCESS
3724  *		MDI_FAILURE
3725  */
3726 int
3727 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3728 {
3729 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3730 }
3731 
3732 /*
3733  * mdi_pi_offline():
3734  *		Offline a mdi_pathinfo node.
3735  * Return Values:
3736  *		MDI_SUCCESS
3737  *		MDI_FAILURE
3738  */
3739 int
3740 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3741 {
3742 	int	ret, client_held = 0;
3743 	mdi_client_t	*ct;
3744 	int		se_flag;
3745 	int		kmem_flag;
3746 
3747 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3748 
3749 	if (ret == MDI_SUCCESS) {
3750 		MDI_PI_LOCK(pip);
3751 		if (MDI_PI(pip)->pi_pm_held) {
3752 			client_held = 1;
3753 		}
3754 		MDI_PI_UNLOCK(pip);
3755 
3756 		if (client_held) {
3757 			ct = MDI_PI(pip)->pi_client;
3758 			MDI_CLIENT_LOCK(ct);
3759 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3760 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3761 			i_mdi_pm_rele_client(ct, 1);
3762 			MDI_CLIENT_UNLOCK(ct);
3763 		}
3764 
3765 		/* determine interrupt context */
3766 		se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3767 		kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3768 
3769 		/* pathinfo is offlined. update DINFOCACHE. */
3770 		i_ddi_di_cache_invalidate(kmem_flag);
3771 	}
3772 
3773 	return (ret);
3774 }
3775 
3776 /*
3777  * i_mdi_pi_offline():
3778  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3779  */
3780 static int
3781 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3782 {
3783 	dev_info_t	*vdip = NULL;
3784 	mdi_vhci_t	*vh = NULL;
3785 	mdi_client_t	*ct = NULL;
3786 	int		(*f)();
3787 	int		rv;
3788 
3789 	MDI_PI_LOCK(pip);
3790 	ct = MDI_PI(pip)->pi_client;
3791 	ASSERT(ct != NULL);
3792 
3793 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3794 		/*
3795 		 * Give a chance for pending I/Os to complete.
3796 		 */
3797 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3798 		    "%d cmds still pending on path: %p\n",
3799 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3800 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3801 		    &MDI_PI(pip)->pi_mutex,
3802 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3803 			/*
3804 			 * The timeout time reached without ref_cnt being zero
3805 			 * being signaled.
3806 			 */
3807 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3808 			    "Timeout reached on path %p without the cond\n",
3809 			    (void *)pip));
3810 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3811 			    "%d cmds still pending on path: %p\n",
3812 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3813 		}
3814 	}
3815 	vh = ct->ct_vhci;
3816 	vdip = vh->vh_dip;
3817 
3818 	/*
3819 	 * Notify vHCI that has registered this event
3820 	 */
3821 	ASSERT(vh->vh_ops);
3822 	f = vh->vh_ops->vo_pi_state_change;
3823 
3824 	if (f != NULL) {
3825 		MDI_PI_UNLOCK(pip);
3826 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3827 		    flags)) != MDI_SUCCESS) {
3828 			MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3829 			    "!vo_path_offline failed "
3830 			    "vdip %p, pip %p", (void *)vdip, (void *)pip));
3831 		}
3832 		MDI_PI_LOCK(pip);
3833 	}
3834 
3835 	/*
3836 	 * Set the mdi_pathinfo node state and clear the transient condition
3837 	 */
3838 	MDI_PI_SET_OFFLINE(pip);
3839 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3840 	MDI_PI_UNLOCK(pip);
3841 
3842 	MDI_CLIENT_LOCK(ct);
3843 	if (rv == MDI_SUCCESS) {
3844 		if (ct->ct_unstable == 0) {
3845 			dev_info_t	*cdip = ct->ct_dip;
3846 
3847 			/*
3848 			 * Onlining the mdi_pathinfo node will impact the
3849 			 * client state Update the client and dev_info node
3850 			 * state accordingly
3851 			 */
3852 			i_mdi_client_update_state(ct);
3853 			rv = NDI_SUCCESS;
3854 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3855 				if (cdip &&
3856 				    (i_ddi_node_state(cdip) >=
3857 				    DS_INITIALIZED)) {
3858 					MDI_CLIENT_UNLOCK(ct);
3859 					rv = ndi_devi_offline(cdip, 0);
3860 					MDI_CLIENT_LOCK(ct);
3861 					if (rv != NDI_SUCCESS) {
3862 						/*
3863 						 * ndi_devi_offline failed.
3864 						 * Reset client flags to
3865 						 * online.
3866 						 */
3867 						MDI_DEBUG(4, (CE_WARN, cdip,
3868 						    "!ndi_devi_offline: failed "
3869 						    " Error: %x", rv));
3870 						MDI_CLIENT_SET_ONLINE(ct);
3871 					}
3872 				}
3873 			}
3874 			/*
3875 			 * Convert to MDI error code
3876 			 */
3877 			switch (rv) {
3878 			case NDI_SUCCESS:
3879 				rv = MDI_SUCCESS;
3880 				break;
3881 			case NDI_BUSY:
3882 				rv = MDI_BUSY;
3883 				break;
3884 			default:
3885 				rv = MDI_FAILURE;
3886 				break;
3887 			}
3888 		}
3889 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3890 		i_mdi_report_path_state(ct, pip);
3891 	}
3892 
3893 	MDI_CLIENT_UNLOCK(ct);
3894 
3895 	/*
3896 	 * Change in the mdi_pathinfo node state will impact the client state
3897 	 */
3898 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3899 	    (void *)ct, (void *)pip));
3900 	return (rv);
3901 }
3902 
3903 /*
3904  * mdi_pi_get_node_name():
3905  *              Get the name associated with a mdi_pathinfo node.
3906  *              Since pathinfo nodes are not directly named, we
3907  *              return the node_name of the client.
3908  *
3909  * Return Values:
3910  *              char *
3911  */
3912 char *
3913 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
3914 {
3915 	mdi_client_t    *ct;
3916 
3917 	if (pip == NULL)
3918 		return (NULL);
3919 	ct = MDI_PI(pip)->pi_client;
3920 	if ((ct == NULL) || (ct->ct_dip == NULL))
3921 		return (NULL);
3922 	return (ddi_node_name(ct->ct_dip));
3923 }
3924 
3925 /*
3926  * mdi_pi_get_addr():
3927  *		Get the unit address associated with a mdi_pathinfo node
3928  *
3929  * Return Values:
3930  *		char *
3931  */
3932 char *
3933 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3934 {
3935 	if (pip == NULL)
3936 		return (NULL);
3937 
3938 	return (MDI_PI(pip)->pi_addr);
3939 }
3940 
3941 /*
3942  * mdi_pi_get_path_instance():
3943  *		Get the 'path_instance' of a mdi_pathinfo node
3944  *
3945  * Return Values:
3946  *		path_instance
3947  */
3948 int
3949 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
3950 {
3951 	if (pip == NULL)
3952 		return (0);
3953 
3954 	return (MDI_PI(pip)->pi_path_instance);
3955 }
3956 
3957 /*
3958  * mdi_pi_pathname():
3959  *		Return pointer to path to pathinfo node.
3960  */
3961 char *
3962 mdi_pi_pathname(mdi_pathinfo_t *pip)
3963 {
3964 	if (pip == NULL)
3965 		return (NULL);
3966 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
3967 }
3968 
3969 char *
3970 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
3971 {
3972 	char *obp_path = NULL;
3973 	if ((pip == NULL) || (path == NULL))
3974 		return (NULL);
3975 
3976 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
3977 		(void) strcpy(path, obp_path);
3978 		(void) mdi_prop_free(obp_path);
3979 	} else {
3980 		path = NULL;
3981 	}
3982 	return (path);
3983 }
3984 
3985 int
3986 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
3987 {
3988 	dev_info_t *pdip;
3989 	char *obp_path = NULL;
3990 	int rc = MDI_FAILURE;
3991 
3992 	if (pip == NULL)
3993 		return (MDI_FAILURE);
3994 
3995 	pdip = mdi_pi_get_phci(pip);
3996 	if (pdip == NULL)
3997 		return (MDI_FAILURE);
3998 
3999 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4000 
4001 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4002 		(void) ddi_pathname(pdip, obp_path);
4003 	}
4004 
4005 	if (component) {
4006 		(void) strncat(obp_path, "/", MAXPATHLEN);
4007 		(void) strncat(obp_path, component, MAXPATHLEN);
4008 	}
4009 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4010 
4011 	if (obp_path)
4012 		kmem_free(obp_path, MAXPATHLEN);
4013 	return (rc);
4014 }
4015 
4016 /*
4017  * mdi_pi_get_client():
4018  *		Get the client devinfo associated with a mdi_pathinfo node
4019  *
4020  * Return Values:
4021  *		Handle to client device dev_info node
4022  */
4023 dev_info_t *
4024 mdi_pi_get_client(mdi_pathinfo_t *pip)
4025 {
4026 	dev_info_t	*dip = NULL;
4027 	if (pip) {
4028 		dip = MDI_PI(pip)->pi_client->ct_dip;
4029 	}
4030 	return (dip);
4031 }
4032 
4033 /*
4034  * mdi_pi_get_phci():
4035  *		Get the pHCI devinfo associated with the mdi_pathinfo node
4036  * Return Values:
4037  *		Handle to dev_info node
4038  */
4039 dev_info_t *
4040 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4041 {
4042 	dev_info_t	*dip = NULL;
4043 	if (pip) {
4044 		dip = MDI_PI(pip)->pi_phci->ph_dip;
4045 	}
4046 	return (dip);
4047 }
4048 
4049 /*
4050  * mdi_pi_get_client_private():
4051  *		Get the client private information associated with the
4052  *		mdi_pathinfo node
4053  */
4054 void *
4055 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4056 {
4057 	void *cprivate = NULL;
4058 	if (pip) {
4059 		cprivate = MDI_PI(pip)->pi_cprivate;
4060 	}
4061 	return (cprivate);
4062 }
4063 
4064 /*
4065  * mdi_pi_set_client_private():
4066  *		Set the client private information in the mdi_pathinfo node
4067  */
4068 void
4069 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4070 {
4071 	if (pip) {
4072 		MDI_PI(pip)->pi_cprivate = priv;
4073 	}
4074 }
4075 
4076 /*
4077  * mdi_pi_get_phci_private():
4078  *		Get the pHCI private information associated with the
4079  *		mdi_pathinfo node
4080  */
4081 caddr_t
4082 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4083 {
4084 	caddr_t	pprivate = NULL;
4085 	if (pip) {
4086 		pprivate = MDI_PI(pip)->pi_pprivate;
4087 	}
4088 	return (pprivate);
4089 }
4090 
4091 /*
4092  * mdi_pi_set_phci_private():
4093  *		Set the pHCI private information in the mdi_pathinfo node
4094  */
4095 void
4096 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4097 {
4098 	if (pip) {
4099 		MDI_PI(pip)->pi_pprivate = priv;
4100 	}
4101 }
4102 
4103 /*
4104  * mdi_pi_get_state():
4105  *		Get the mdi_pathinfo node state. Transient states are internal
4106  *		and not provided to the users
4107  */
4108 mdi_pathinfo_state_t
4109 mdi_pi_get_state(mdi_pathinfo_t *pip)
4110 {
4111 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4112 
4113 	if (pip) {
4114 		if (MDI_PI_IS_TRANSIENT(pip)) {
4115 			/*
4116 			 * mdi_pathinfo is in state transition.  Return the
4117 			 * last good state.
4118 			 */
4119 			state = MDI_PI_OLD_STATE(pip);
4120 		} else {
4121 			state = MDI_PI_STATE(pip);
4122 		}
4123 	}
4124 	return (state);
4125 }
4126 
4127 /*
4128  * Note that the following function needs to be the new interface for
4129  * mdi_pi_get_state when mpxio gets integrated to ON.
4130  */
4131 int
4132 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4133 		uint32_t *ext_state)
4134 {
4135 	*state = MDI_PATHINFO_STATE_INIT;
4136 
4137 	if (pip) {
4138 		if (MDI_PI_IS_TRANSIENT(pip)) {
4139 			/*
4140 			 * mdi_pathinfo is in state transition.  Return the
4141 			 * last good state.
4142 			 */
4143 			*state = MDI_PI_OLD_STATE(pip);
4144 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4145 		} else {
4146 			*state = MDI_PI_STATE(pip);
4147 			*ext_state = MDI_PI_EXT_STATE(pip);
4148 		}
4149 	}
4150 	return (MDI_SUCCESS);
4151 }
4152 
4153 /*
4154  * mdi_pi_get_preferred:
4155  *	Get the preferred path flag
4156  */
4157 int
4158 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4159 {
4160 	if (pip) {
4161 		return (MDI_PI(pip)->pi_preferred);
4162 	}
4163 	return (0);
4164 }
4165 
4166 /*
4167  * mdi_pi_set_preferred:
4168  *	Set the preferred path flag
4169  */
4170 void
4171 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4172 {
4173 	if (pip) {
4174 		MDI_PI(pip)->pi_preferred = preferred;
4175 	}
4176 }
4177 
4178 /*
4179  * mdi_pi_set_state():
4180  *		Set the mdi_pathinfo node state
4181  */
4182 void
4183 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4184 {
4185 	uint32_t	ext_state;
4186 
4187 	if (pip) {
4188 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4189 		MDI_PI(pip)->pi_state = state;
4190 		MDI_PI(pip)->pi_state |= ext_state;
4191 	}
4192 }
4193 
4194 /*
4195  * Property functions:
4196  */
4197 int
4198 i_map_nvlist_error_to_mdi(int val)
4199 {
4200 	int rv;
4201 
4202 	switch (val) {
4203 	case 0:
4204 		rv = DDI_PROP_SUCCESS;
4205 		break;
4206 	case EINVAL:
4207 	case ENOTSUP:
4208 		rv = DDI_PROP_INVAL_ARG;
4209 		break;
4210 	case ENOMEM:
4211 		rv = DDI_PROP_NO_MEMORY;
4212 		break;
4213 	default:
4214 		rv = DDI_PROP_NOT_FOUND;
4215 		break;
4216 	}
4217 	return (rv);
4218 }
4219 
4220 /*
4221  * mdi_pi_get_next_prop():
4222  * 		Property walk function.  The caller should hold mdi_pi_lock()
4223  *		and release by calling mdi_pi_unlock() at the end of walk to
4224  *		get a consistent value.
4225  */
4226 nvpair_t *
4227 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4228 {
4229 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4230 		return (NULL);
4231 	}
4232 	ASSERT(MDI_PI_LOCKED(pip));
4233 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4234 }
4235 
4236 /*
4237  * mdi_prop_remove():
4238  * 		Remove the named property from the named list.
4239  */
4240 int
4241 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4242 {
4243 	if (pip == NULL) {
4244 		return (DDI_PROP_NOT_FOUND);
4245 	}
4246 	ASSERT(!MDI_PI_LOCKED(pip));
4247 	MDI_PI_LOCK(pip);
4248 	if (MDI_PI(pip)->pi_prop == NULL) {
4249 		MDI_PI_UNLOCK(pip);
4250 		return (DDI_PROP_NOT_FOUND);
4251 	}
4252 	if (name) {
4253 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4254 	} else {
4255 		char		nvp_name[MAXNAMELEN];
4256 		nvpair_t	*nvp;
4257 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4258 		while (nvp) {
4259 			nvpair_t	*next;
4260 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4261 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
4262 			    nvpair_name(nvp));
4263 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4264 			    nvp_name);
4265 			nvp = next;
4266 		}
4267 	}
4268 	MDI_PI_UNLOCK(pip);
4269 	return (DDI_PROP_SUCCESS);
4270 }
4271 
4272 /*
4273  * mdi_prop_size():
4274  * 		Get buffer size needed to pack the property data.
4275  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4276  *		buffer size.
4277  */
4278 int
4279 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4280 {
4281 	int	rv;
4282 	size_t	bufsize;
4283 
4284 	*buflenp = 0;
4285 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4286 		return (DDI_PROP_NOT_FOUND);
4287 	}
4288 	ASSERT(MDI_PI_LOCKED(pip));
4289 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4290 	    &bufsize, NV_ENCODE_NATIVE);
4291 	*buflenp = bufsize;
4292 	return (i_map_nvlist_error_to_mdi(rv));
4293 }
4294 
4295 /*
4296  * mdi_prop_pack():
4297  * 		pack the property list.  The caller should hold the
4298  *		mdi_pathinfo_t node to get a consistent data
4299  */
4300 int
4301 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4302 {
4303 	int	rv;
4304 	size_t	bufsize;
4305 
4306 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4307 		return (DDI_PROP_NOT_FOUND);
4308 	}
4309 
4310 	ASSERT(MDI_PI_LOCKED(pip));
4311 
4312 	bufsize = buflen;
4313 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4314 	    NV_ENCODE_NATIVE, KM_SLEEP);
4315 
4316 	return (i_map_nvlist_error_to_mdi(rv));
4317 }
4318 
4319 /*
4320  * mdi_prop_update_byte():
4321  *		Create/Update a byte property
4322  */
4323 int
4324 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4325 {
4326 	int rv;
4327 
4328 	if (pip == NULL) {
4329 		return (DDI_PROP_INVAL_ARG);
4330 	}
4331 	ASSERT(!MDI_PI_LOCKED(pip));
4332 	MDI_PI_LOCK(pip);
4333 	if (MDI_PI(pip)->pi_prop == NULL) {
4334 		MDI_PI_UNLOCK(pip);
4335 		return (DDI_PROP_NOT_FOUND);
4336 	}
4337 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4338 	MDI_PI_UNLOCK(pip);
4339 	return (i_map_nvlist_error_to_mdi(rv));
4340 }
4341 
4342 /*
4343  * mdi_prop_update_byte_array():
4344  *		Create/Update a byte array property
4345  */
4346 int
4347 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4348     uint_t nelements)
4349 {
4350 	int rv;
4351 
4352 	if (pip == NULL) {
4353 		return (DDI_PROP_INVAL_ARG);
4354 	}
4355 	ASSERT(!MDI_PI_LOCKED(pip));
4356 	MDI_PI_LOCK(pip);
4357 	if (MDI_PI(pip)->pi_prop == NULL) {
4358 		MDI_PI_UNLOCK(pip);
4359 		return (DDI_PROP_NOT_FOUND);
4360 	}
4361 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4362 	MDI_PI_UNLOCK(pip);
4363 	return (i_map_nvlist_error_to_mdi(rv));
4364 }
4365 
4366 /*
4367  * mdi_prop_update_int():
4368  *		Create/Update a 32 bit integer property
4369  */
4370 int
4371 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4372 {
4373 	int rv;
4374 
4375 	if (pip == NULL) {
4376 		return (DDI_PROP_INVAL_ARG);
4377 	}
4378 	ASSERT(!MDI_PI_LOCKED(pip));
4379 	MDI_PI_LOCK(pip);
4380 	if (MDI_PI(pip)->pi_prop == NULL) {
4381 		MDI_PI_UNLOCK(pip);
4382 		return (DDI_PROP_NOT_FOUND);
4383 	}
4384 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4385 	MDI_PI_UNLOCK(pip);
4386 	return (i_map_nvlist_error_to_mdi(rv));
4387 }
4388 
4389 /*
4390  * mdi_prop_update_int64():
4391  *		Create/Update a 64 bit integer property
4392  */
4393 int
4394 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4395 {
4396 	int rv;
4397 
4398 	if (pip == NULL) {
4399 		return (DDI_PROP_INVAL_ARG);
4400 	}
4401 	ASSERT(!MDI_PI_LOCKED(pip));
4402 	MDI_PI_LOCK(pip);
4403 	if (MDI_PI(pip)->pi_prop == NULL) {
4404 		MDI_PI_UNLOCK(pip);
4405 		return (DDI_PROP_NOT_FOUND);
4406 	}
4407 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4408 	MDI_PI_UNLOCK(pip);
4409 	return (i_map_nvlist_error_to_mdi(rv));
4410 }
4411 
4412 /*
4413  * mdi_prop_update_int_array():
4414  *		Create/Update a int array property
4415  */
4416 int
4417 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4418 	    uint_t nelements)
4419 {
4420 	int rv;
4421 
4422 	if (pip == NULL) {
4423 		return (DDI_PROP_INVAL_ARG);
4424 	}
4425 	ASSERT(!MDI_PI_LOCKED(pip));
4426 	MDI_PI_LOCK(pip);
4427 	if (MDI_PI(pip)->pi_prop == NULL) {
4428 		MDI_PI_UNLOCK(pip);
4429 		return (DDI_PROP_NOT_FOUND);
4430 	}
4431 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4432 	    nelements);
4433 	MDI_PI_UNLOCK(pip);
4434 	return (i_map_nvlist_error_to_mdi(rv));
4435 }
4436 
4437 /*
4438  * mdi_prop_update_string():
4439  *		Create/Update a string property
4440  */
4441 int
4442 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4443 {
4444 	int rv;
4445 
4446 	if (pip == NULL) {
4447 		return (DDI_PROP_INVAL_ARG);
4448 	}
4449 	ASSERT(!MDI_PI_LOCKED(pip));
4450 	MDI_PI_LOCK(pip);
4451 	if (MDI_PI(pip)->pi_prop == NULL) {
4452 		MDI_PI_UNLOCK(pip);
4453 		return (DDI_PROP_NOT_FOUND);
4454 	}
4455 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4456 	MDI_PI_UNLOCK(pip);
4457 	return (i_map_nvlist_error_to_mdi(rv));
4458 }
4459 
4460 /*
4461  * mdi_prop_update_string_array():
4462  *		Create/Update a string array property
4463  */
4464 int
4465 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4466     uint_t nelements)
4467 {
4468 	int rv;
4469 
4470 	if (pip == NULL) {
4471 		return (DDI_PROP_INVAL_ARG);
4472 	}
4473 	ASSERT(!MDI_PI_LOCKED(pip));
4474 	MDI_PI_LOCK(pip);
4475 	if (MDI_PI(pip)->pi_prop == NULL) {
4476 		MDI_PI_UNLOCK(pip);
4477 		return (DDI_PROP_NOT_FOUND);
4478 	}
4479 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4480 	    nelements);
4481 	MDI_PI_UNLOCK(pip);
4482 	return (i_map_nvlist_error_to_mdi(rv));
4483 }
4484 
4485 /*
4486  * mdi_prop_lookup_byte():
4487  * 		Look for byte property identified by name.  The data returned
4488  *		is the actual property and valid as long as mdi_pathinfo_t node
4489  *		is alive.
4490  */
4491 int
4492 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4493 {
4494 	int rv;
4495 
4496 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4497 		return (DDI_PROP_NOT_FOUND);
4498 	}
4499 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4500 	return (i_map_nvlist_error_to_mdi(rv));
4501 }
4502 
4503 
4504 /*
4505  * mdi_prop_lookup_byte_array():
4506  * 		Look for byte array property identified by name.  The data
4507  *		returned is the actual property and valid as long as
4508  *		mdi_pathinfo_t node is alive.
4509  */
4510 int
4511 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4512     uint_t *nelements)
4513 {
4514 	int rv;
4515 
4516 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4517 		return (DDI_PROP_NOT_FOUND);
4518 	}
4519 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4520 	    nelements);
4521 	return (i_map_nvlist_error_to_mdi(rv));
4522 }
4523 
4524 /*
4525  * mdi_prop_lookup_int():
4526  * 		Look for int property identified by name.  The data returned
4527  *		is the actual property and valid as long as mdi_pathinfo_t
4528  *		node is alive.
4529  */
4530 int
4531 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4532 {
4533 	int rv;
4534 
4535 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4536 		return (DDI_PROP_NOT_FOUND);
4537 	}
4538 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4539 	return (i_map_nvlist_error_to_mdi(rv));
4540 }
4541 
4542 /*
4543  * mdi_prop_lookup_int64():
4544  * 		Look for int64 property identified by name.  The data returned
4545  *		is the actual property and valid as long as mdi_pathinfo_t node
4546  *		is alive.
4547  */
4548 int
4549 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4550 {
4551 	int rv;
4552 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4553 		return (DDI_PROP_NOT_FOUND);
4554 	}
4555 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4556 	return (i_map_nvlist_error_to_mdi(rv));
4557 }
4558 
4559 /*
4560  * mdi_prop_lookup_int_array():
4561  * 		Look for int array property identified by name.  The data
4562  *		returned is the actual property and valid as long as
4563  *		mdi_pathinfo_t node is alive.
4564  */
4565 int
4566 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4567     uint_t *nelements)
4568 {
4569 	int rv;
4570 
4571 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4572 		return (DDI_PROP_NOT_FOUND);
4573 	}
4574 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4575 	    (int32_t **)data, nelements);
4576 	return (i_map_nvlist_error_to_mdi(rv));
4577 }
4578 
4579 /*
4580  * mdi_prop_lookup_string():
4581  * 		Look for string property identified by name.  The data
4582  *		returned is the actual property and valid as long as
4583  *		mdi_pathinfo_t node is alive.
4584  */
4585 int
4586 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4587 {
4588 	int rv;
4589 
4590 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4591 		return (DDI_PROP_NOT_FOUND);
4592 	}
4593 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4594 	return (i_map_nvlist_error_to_mdi(rv));
4595 }
4596 
4597 /*
4598  * mdi_prop_lookup_string_array():
4599  * 		Look for string array property identified by name.  The data
4600  *		returned is the actual property and valid as long as
4601  *		mdi_pathinfo_t node is alive.
4602  */
4603 int
4604 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4605     uint_t *nelements)
4606 {
4607 	int rv;
4608 
4609 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4610 		return (DDI_PROP_NOT_FOUND);
4611 	}
4612 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4613 	    nelements);
4614 	return (i_map_nvlist_error_to_mdi(rv));
4615 }
4616 
4617 /*
4618  * mdi_prop_free():
4619  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4620  *		functions return the pointer to actual property data and not a
4621  *		copy of it.  So the data returned is valid as long as
4622  *		mdi_pathinfo_t node is valid.
4623  */
4624 /*ARGSUSED*/
4625 int
4626 mdi_prop_free(void *data)
4627 {
4628 	return (DDI_PROP_SUCCESS);
4629 }
4630 
4631 /*ARGSUSED*/
4632 static void
4633 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4634 {
4635 	char		*phci_path, *ct_path;
4636 	char		*ct_status;
4637 	char		*status;
4638 	dev_info_t	*dip = ct->ct_dip;
4639 	char		lb_buf[64];
4640 
4641 	ASSERT(MDI_CLIENT_LOCKED(ct));
4642 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4643 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4644 		return;
4645 	}
4646 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4647 		ct_status = "optimal";
4648 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4649 		ct_status = "degraded";
4650 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4651 		ct_status = "failed";
4652 	} else {
4653 		ct_status = "unknown";
4654 	}
4655 
4656 	if (MDI_PI_IS_OFFLINE(pip)) {
4657 		status = "offline";
4658 	} else if (MDI_PI_IS_ONLINE(pip)) {
4659 		status = "online";
4660 	} else if (MDI_PI_IS_STANDBY(pip)) {
4661 		status = "standby";
4662 	} else if (MDI_PI_IS_FAULT(pip)) {
4663 		status = "faulted";
4664 	} else {
4665 		status = "unknown";
4666 	}
4667 
4668 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4669 		(void) snprintf(lb_buf, sizeof (lb_buf),
4670 		    "%s, region-size: %d", mdi_load_balance_lba,
4671 			ct->ct_lb_args->region_size);
4672 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4673 		(void) snprintf(lb_buf, sizeof (lb_buf),
4674 		    "%s", mdi_load_balance_none);
4675 	} else {
4676 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4677 		    mdi_load_balance_rr);
4678 	}
4679 
4680 	if (dip) {
4681 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4682 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4683 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4684 		    "path %s (%s%d) to target address: %s is %s"
4685 		    " Load balancing: %s\n",
4686 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4687 		    ddi_get_instance(dip), ct_status,
4688 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4689 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4690 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4691 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4692 		kmem_free(phci_path, MAXPATHLEN);
4693 		kmem_free(ct_path, MAXPATHLEN);
4694 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4695 	}
4696 }
4697 
4698 #ifdef	DEBUG
4699 /*
4700  * i_mdi_log():
4701  *		Utility function for error message management
4702  *
4703  */
4704 /*PRINTFLIKE3*/
4705 static void
4706 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4707 {
4708 	char		name[MAXNAMELEN];
4709 	char		buf[MAXNAMELEN];
4710 	char		*bp;
4711 	va_list		ap;
4712 	int		log_only = 0;
4713 	int		boot_only = 0;
4714 	int		console_only = 0;
4715 
4716 	if (dip) {
4717 		(void) snprintf(name, MAXNAMELEN, "%s%d: ",
4718 		    ddi_driver_name(dip), ddi_get_instance(dip));
4719 	} else {
4720 		name[0] = 0;
4721 	}
4722 
4723 	va_start(ap, fmt);
4724 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4725 	va_end(ap);
4726 
4727 	switch (buf[0]) {
4728 	case '!':
4729 		bp = &buf[1];
4730 		log_only = 1;
4731 		break;
4732 	case '?':
4733 		bp = &buf[1];
4734 		boot_only = 1;
4735 		break;
4736 	case '^':
4737 		bp = &buf[1];
4738 		console_only = 1;
4739 		break;
4740 	default:
4741 		bp = buf;
4742 		break;
4743 	}
4744 	if (mdi_debug_logonly) {
4745 		log_only = 1;
4746 		boot_only = 0;
4747 		console_only = 0;
4748 	}
4749 
4750 	switch (level) {
4751 	case CE_NOTE:
4752 		level = CE_CONT;
4753 		/* FALLTHROUGH */
4754 	case CE_CONT:
4755 	case CE_WARN:
4756 	case CE_PANIC:
4757 		if (boot_only) {
4758 			cmn_err(level, "?mdi: %s%s", name, bp);
4759 		} else if (console_only) {
4760 			cmn_err(level, "^mdi: %s%s", name, bp);
4761 		} else if (log_only) {
4762 			cmn_err(level, "!mdi: %s%s", name, bp);
4763 		} else {
4764 			cmn_err(level, "mdi: %s%s", name, bp);
4765 		}
4766 		break;
4767 	default:
4768 		cmn_err(level, "mdi: %s%s", name, bp);
4769 		break;
4770 	}
4771 }
4772 #endif	/* DEBUG */
4773 
4774 void
4775 i_mdi_client_online(dev_info_t *ct_dip)
4776 {
4777 	mdi_client_t	*ct;
4778 
4779 	/*
4780 	 * Client online notification. Mark client state as online
4781 	 * restore our binding with dev_info node
4782 	 */
4783 	ct = i_devi_get_client(ct_dip);
4784 	ASSERT(ct != NULL);
4785 	MDI_CLIENT_LOCK(ct);
4786 	MDI_CLIENT_SET_ONLINE(ct);
4787 	/* catch for any memory leaks */
4788 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4789 	ct->ct_dip = ct_dip;
4790 
4791 	if (ct->ct_power_cnt == 0)
4792 		(void) i_mdi_power_all_phci(ct);
4793 
4794 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4795 	    "i_mdi_pm_hold_client %p\n", (void *)ct));
4796 	i_mdi_pm_hold_client(ct, 1);
4797 
4798 	MDI_CLIENT_UNLOCK(ct);
4799 }
4800 
4801 void
4802 i_mdi_phci_online(dev_info_t *ph_dip)
4803 {
4804 	mdi_phci_t	*ph;
4805 
4806 	/* pHCI online notification. Mark state accordingly */
4807 	ph = i_devi_get_phci(ph_dip);
4808 	ASSERT(ph != NULL);
4809 	MDI_PHCI_LOCK(ph);
4810 	MDI_PHCI_SET_ONLINE(ph);
4811 	MDI_PHCI_UNLOCK(ph);
4812 }
4813 
4814 /*
4815  * mdi_devi_online():
4816  * 		Online notification from NDI framework on pHCI/client
4817  *		device online.
4818  * Return Values:
4819  *		NDI_SUCCESS
4820  *		MDI_FAILURE
4821  */
4822 /*ARGSUSED*/
4823 int
4824 mdi_devi_online(dev_info_t *dip, uint_t flags)
4825 {
4826 	if (MDI_PHCI(dip)) {
4827 		i_mdi_phci_online(dip);
4828 	}
4829 
4830 	if (MDI_CLIENT(dip)) {
4831 		i_mdi_client_online(dip);
4832 	}
4833 	return (NDI_SUCCESS);
4834 }
4835 
4836 /*
4837  * mdi_devi_offline():
4838  * 		Offline notification from NDI framework on pHCI/Client device
4839  *		offline.
4840  *
4841  * Return Values:
4842  *		NDI_SUCCESS
4843  *		NDI_FAILURE
4844  */
4845 /*ARGSUSED*/
4846 int
4847 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4848 {
4849 	int		rv = NDI_SUCCESS;
4850 
4851 	if (MDI_CLIENT(dip)) {
4852 		rv = i_mdi_client_offline(dip, flags);
4853 		if (rv != NDI_SUCCESS)
4854 			return (rv);
4855 	}
4856 
4857 	if (MDI_PHCI(dip)) {
4858 		rv = i_mdi_phci_offline(dip, flags);
4859 
4860 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4861 			/* set client back online */
4862 			i_mdi_client_online(dip);
4863 		}
4864 	}
4865 
4866 	return (rv);
4867 }
4868 
4869 /*ARGSUSED*/
4870 static int
4871 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4872 {
4873 	int		rv = NDI_SUCCESS;
4874 	mdi_phci_t	*ph;
4875 	mdi_client_t	*ct;
4876 	mdi_pathinfo_t	*pip;
4877 	mdi_pathinfo_t	*next;
4878 	mdi_pathinfo_t	*failed_pip = NULL;
4879 	dev_info_t	*cdip;
4880 
4881 	/*
4882 	 * pHCI component offline notification
4883 	 * Make sure that this pHCI instance is free to be offlined.
4884 	 * If it is OK to proceed, Offline and remove all the child
4885 	 * mdi_pathinfo nodes.  This process automatically offlines
4886 	 * corresponding client devices, for which this pHCI provides
4887 	 * critical services.
4888 	 */
4889 	ph = i_devi_get_phci(dip);
4890 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n",
4891 	    (void *)dip, (void *)ph));
4892 	if (ph == NULL) {
4893 		return (rv);
4894 	}
4895 
4896 	MDI_PHCI_LOCK(ph);
4897 
4898 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4899 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined",
4900 		    (void *)ph));
4901 		MDI_PHCI_UNLOCK(ph);
4902 		return (NDI_SUCCESS);
4903 	}
4904 
4905 	/*
4906 	 * Check to see if the pHCI can be offlined
4907 	 */
4908 	if (ph->ph_unstable) {
4909 		MDI_DEBUG(1, (CE_WARN, dip,
4910 		    "!One or more target devices are in transient "
4911 		    "state. This device can not be removed at "
4912 		    "this moment. Please try again later."));
4913 		MDI_PHCI_UNLOCK(ph);
4914 		return (NDI_BUSY);
4915 	}
4916 
4917 	pip = ph->ph_path_head;
4918 	while (pip != NULL) {
4919 		MDI_PI_LOCK(pip);
4920 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4921 
4922 		/*
4923 		 * The mdi_pathinfo state is OK. Check the client state.
4924 		 * If failover in progress fail the pHCI from offlining
4925 		 */
4926 		ct = MDI_PI(pip)->pi_client;
4927 		i_mdi_client_lock(ct, pip);
4928 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4929 		    (ct->ct_unstable)) {
4930 			/*
4931 			 * Failover is in progress, Fail the DR
4932 			 */
4933 			MDI_DEBUG(1, (CE_WARN, dip,
4934 			    "!pHCI device (%s%d) is Busy. %s",
4935 			    ddi_driver_name(dip), ddi_get_instance(dip),
4936 			    "This device can not be removed at "
4937 			    "this moment. Please try again later."));
4938 			MDI_PI_UNLOCK(pip);
4939 			i_mdi_client_unlock(ct);
4940 			MDI_PHCI_UNLOCK(ph);
4941 			return (NDI_BUSY);
4942 		}
4943 		MDI_PI_UNLOCK(pip);
4944 
4945 		/*
4946 		 * Check to see of we are removing the last path of this
4947 		 * client device...
4948 		 */
4949 		cdip = ct->ct_dip;
4950 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4951 		    (i_mdi_client_compute_state(ct, ph) ==
4952 		    MDI_CLIENT_STATE_FAILED)) {
4953 			i_mdi_client_unlock(ct);
4954 			MDI_PHCI_UNLOCK(ph);
4955 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4956 				/*
4957 				 * ndi_devi_offline() failed.
4958 				 * This pHCI provides the critical path
4959 				 * to one or more client devices.
4960 				 * Return busy.
4961 				 */
4962 				MDI_PHCI_LOCK(ph);
4963 				MDI_DEBUG(1, (CE_WARN, dip,
4964 				    "!pHCI device (%s%d) is Busy. %s",
4965 				    ddi_driver_name(dip), ddi_get_instance(dip),
4966 				    "This device can not be removed at "
4967 				    "this moment. Please try again later."));
4968 				failed_pip = pip;
4969 				break;
4970 			} else {
4971 				MDI_PHCI_LOCK(ph);
4972 				pip = next;
4973 			}
4974 		} else {
4975 			i_mdi_client_unlock(ct);
4976 			pip = next;
4977 		}
4978 	}
4979 
4980 	if (failed_pip) {
4981 		pip = ph->ph_path_head;
4982 		while (pip != failed_pip) {
4983 			MDI_PI_LOCK(pip);
4984 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4985 			ct = MDI_PI(pip)->pi_client;
4986 			i_mdi_client_lock(ct, pip);
4987 			cdip = ct->ct_dip;
4988 			switch (MDI_CLIENT_STATE(ct)) {
4989 			case MDI_CLIENT_STATE_OPTIMAL:
4990 			case MDI_CLIENT_STATE_DEGRADED:
4991 				if (cdip) {
4992 					MDI_PI_UNLOCK(pip);
4993 					i_mdi_client_unlock(ct);
4994 					MDI_PHCI_UNLOCK(ph);
4995 					(void) ndi_devi_online(cdip, 0);
4996 					MDI_PHCI_LOCK(ph);
4997 					pip = next;
4998 					continue;
4999 				}
5000 				break;
5001 
5002 			case MDI_CLIENT_STATE_FAILED:
5003 				if (cdip) {
5004 					MDI_PI_UNLOCK(pip);
5005 					i_mdi_client_unlock(ct);
5006 					MDI_PHCI_UNLOCK(ph);
5007 					(void) ndi_devi_offline(cdip, 0);
5008 					MDI_PHCI_LOCK(ph);
5009 					pip = next;
5010 					continue;
5011 				}
5012 				break;
5013 			}
5014 			MDI_PI_UNLOCK(pip);
5015 			i_mdi_client_unlock(ct);
5016 			pip = next;
5017 		}
5018 		MDI_PHCI_UNLOCK(ph);
5019 		return (NDI_BUSY);
5020 	}
5021 
5022 	/*
5023 	 * Mark the pHCI as offline
5024 	 */
5025 	MDI_PHCI_SET_OFFLINE(ph);
5026 
5027 	/*
5028 	 * Mark the child mdi_pathinfo nodes as transient
5029 	 */
5030 	pip = ph->ph_path_head;
5031 	while (pip != NULL) {
5032 		MDI_PI_LOCK(pip);
5033 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5034 		MDI_PI_SET_OFFLINING(pip);
5035 		MDI_PI_UNLOCK(pip);
5036 		pip = next;
5037 	}
5038 	MDI_PHCI_UNLOCK(ph);
5039 	/*
5040 	 * Give a chance for any pending commands to execute
5041 	 */
5042 	delay(1);
5043 	MDI_PHCI_LOCK(ph);
5044 	pip = ph->ph_path_head;
5045 	while (pip != NULL) {
5046 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5047 		(void) i_mdi_pi_offline(pip, flags);
5048 		MDI_PI_LOCK(pip);
5049 		ct = MDI_PI(pip)->pi_client;
5050 		if (!MDI_PI_IS_OFFLINE(pip)) {
5051 			MDI_DEBUG(1, (CE_WARN, dip,
5052 			    "!pHCI device (%s%d) is Busy. %s",
5053 			    ddi_driver_name(dip), ddi_get_instance(dip),
5054 			    "This device can not be removed at "
5055 			    "this moment. Please try again later."));
5056 			MDI_PI_UNLOCK(pip);
5057 			MDI_PHCI_SET_ONLINE(ph);
5058 			MDI_PHCI_UNLOCK(ph);
5059 			return (NDI_BUSY);
5060 		}
5061 		MDI_PI_UNLOCK(pip);
5062 		pip = next;
5063 	}
5064 	MDI_PHCI_UNLOCK(ph);
5065 
5066 	return (rv);
5067 }
5068 
5069 void
5070 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5071 {
5072 	mdi_phci_t	*ph;
5073 	mdi_client_t	*ct;
5074 	mdi_pathinfo_t	*pip;
5075 	mdi_pathinfo_t	*next;
5076 	dev_info_t	*cdip;
5077 
5078 	if (!MDI_PHCI(dip))
5079 		return;
5080 
5081 	ph = i_devi_get_phci(dip);
5082 	if (ph == NULL) {
5083 		return;
5084 	}
5085 
5086 	MDI_PHCI_LOCK(ph);
5087 
5088 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5089 		/* has no last path */
5090 		MDI_PHCI_UNLOCK(ph);
5091 		return;
5092 	}
5093 
5094 	pip = ph->ph_path_head;
5095 	while (pip != NULL) {
5096 		MDI_PI_LOCK(pip);
5097 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5098 
5099 		ct = MDI_PI(pip)->pi_client;
5100 		i_mdi_client_lock(ct, pip);
5101 		MDI_PI_UNLOCK(pip);
5102 
5103 		cdip = ct->ct_dip;
5104 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5105 		    (i_mdi_client_compute_state(ct, ph) ==
5106 		    MDI_CLIENT_STATE_FAILED)) {
5107 			/* Last path. Mark client dip as retiring */
5108 			i_mdi_client_unlock(ct);
5109 			MDI_PHCI_UNLOCK(ph);
5110 			(void) e_ddi_mark_retiring(cdip, cons_array);
5111 			MDI_PHCI_LOCK(ph);
5112 			pip = next;
5113 		} else {
5114 			i_mdi_client_unlock(ct);
5115 			pip = next;
5116 		}
5117 	}
5118 
5119 	MDI_PHCI_UNLOCK(ph);
5120 
5121 	return;
5122 }
5123 
5124 void
5125 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5126 {
5127 	mdi_phci_t	*ph;
5128 	mdi_client_t	*ct;
5129 	mdi_pathinfo_t	*pip;
5130 	mdi_pathinfo_t	*next;
5131 	dev_info_t	*cdip;
5132 
5133 	if (!MDI_PHCI(dip))
5134 		return;
5135 
5136 	ph = i_devi_get_phci(dip);
5137 	if (ph == NULL)
5138 		return;
5139 
5140 	MDI_PHCI_LOCK(ph);
5141 
5142 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5143 		MDI_PHCI_UNLOCK(ph);
5144 		/* not last path */
5145 		return;
5146 	}
5147 
5148 	if (ph->ph_unstable) {
5149 		MDI_PHCI_UNLOCK(ph);
5150 		/* can't check for constraints */
5151 		*constraint = 0;
5152 		return;
5153 	}
5154 
5155 	pip = ph->ph_path_head;
5156 	while (pip != NULL) {
5157 		MDI_PI_LOCK(pip);
5158 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5159 
5160 		/*
5161 		 * The mdi_pathinfo state is OK. Check the client state.
5162 		 * If failover in progress fail the pHCI from offlining
5163 		 */
5164 		ct = MDI_PI(pip)->pi_client;
5165 		i_mdi_client_lock(ct, pip);
5166 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5167 		    (ct->ct_unstable)) {
5168 			/*
5169 			 * Failover is in progress, can't check for constraints
5170 			 */
5171 			MDI_PI_UNLOCK(pip);
5172 			i_mdi_client_unlock(ct);
5173 			MDI_PHCI_UNLOCK(ph);
5174 			*constraint = 0;
5175 			return;
5176 		}
5177 		MDI_PI_UNLOCK(pip);
5178 
5179 		/*
5180 		 * Check to see of we are retiring the last path of this
5181 		 * client device...
5182 		 */
5183 		cdip = ct->ct_dip;
5184 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5185 		    (i_mdi_client_compute_state(ct, ph) ==
5186 		    MDI_CLIENT_STATE_FAILED)) {
5187 			i_mdi_client_unlock(ct);
5188 			MDI_PHCI_UNLOCK(ph);
5189 			(void) e_ddi_retire_notify(cdip, constraint);
5190 			MDI_PHCI_LOCK(ph);
5191 			pip = next;
5192 		} else {
5193 			i_mdi_client_unlock(ct);
5194 			pip = next;
5195 		}
5196 	}
5197 
5198 	MDI_PHCI_UNLOCK(ph);
5199 
5200 	return;
5201 }
5202 
5203 /*
5204  * offline the path(s) hanging off the PHCI. If the
5205  * last path to any client, check that constraints
5206  * have been applied.
5207  */
5208 void
5209 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
5210 {
5211 	mdi_phci_t	*ph;
5212 	mdi_client_t	*ct;
5213 	mdi_pathinfo_t	*pip;
5214 	mdi_pathinfo_t	*next;
5215 	dev_info_t	*cdip;
5216 	int		unstable = 0;
5217 	int		constraint;
5218 
5219 	if (!MDI_PHCI(dip))
5220 		return;
5221 
5222 	ph = i_devi_get_phci(dip);
5223 	if (ph == NULL) {
5224 		/* no last path and no pips */
5225 		return;
5226 	}
5227 
5228 	MDI_PHCI_LOCK(ph);
5229 
5230 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5231 		MDI_PHCI_UNLOCK(ph);
5232 		/* no last path and no pips */
5233 		return;
5234 	}
5235 
5236 	/*
5237 	 * Check to see if the pHCI can be offlined
5238 	 */
5239 	if (ph->ph_unstable) {
5240 		unstable = 1;
5241 	}
5242 
5243 	pip = ph->ph_path_head;
5244 	while (pip != NULL) {
5245 		MDI_PI_LOCK(pip);
5246 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5247 
5248 		/*
5249 		 * if failover in progress fail the pHCI from offlining
5250 		 */
5251 		ct = MDI_PI(pip)->pi_client;
5252 		i_mdi_client_lock(ct, pip);
5253 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5254 		    (ct->ct_unstable)) {
5255 			unstable = 1;
5256 		}
5257 		MDI_PI_UNLOCK(pip);
5258 
5259 		/*
5260 		 * Check to see of we are removing the last path of this
5261 		 * client device...
5262 		 */
5263 		cdip = ct->ct_dip;
5264 		if (!phci_only && cdip &&
5265 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5266 		    (i_mdi_client_compute_state(ct, ph) ==
5267 		    MDI_CLIENT_STATE_FAILED)) {
5268 			i_mdi_client_unlock(ct);
5269 			MDI_PHCI_UNLOCK(ph);
5270 			/*
5271 			 * We don't retire clients we just retire the
5272 			 * path to a client. If it is the last path
5273 			 * to a client, constraints are checked and
5274 			 * if we pass the last path is offlined. MPXIO will
5275 			 * then fail all I/Os to the client. Since we don't
5276 			 * want to retire the client on a path error
5277 			 * set constraint = 0 so that the client dip
5278 			 * is not retired.
5279 			 */
5280 			constraint = 0;
5281 			(void) e_ddi_retire_finalize(cdip, &constraint);
5282 			MDI_PHCI_LOCK(ph);
5283 			pip = next;
5284 		} else {
5285 			i_mdi_client_unlock(ct);
5286 			pip = next;
5287 		}
5288 	}
5289 
5290 	/*
5291 	 * Cannot offline pip(s)
5292 	 */
5293 	if (unstable) {
5294 		cmn_err(CE_WARN, "PHCI in transient state, cannot "
5295 		    "retire, dip = %p", (void *)dip);
5296 		MDI_PHCI_UNLOCK(ph);
5297 		return;
5298 	}
5299 
5300 	/*
5301 	 * Mark the pHCI as offline
5302 	 */
5303 	MDI_PHCI_SET_OFFLINE(ph);
5304 
5305 	/*
5306 	 * Mark the child mdi_pathinfo nodes as transient
5307 	 */
5308 	pip = ph->ph_path_head;
5309 	while (pip != NULL) {
5310 		MDI_PI_LOCK(pip);
5311 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5312 		MDI_PI_SET_OFFLINING(pip);
5313 		MDI_PI_UNLOCK(pip);
5314 		pip = next;
5315 	}
5316 	MDI_PHCI_UNLOCK(ph);
5317 	/*
5318 	 * Give a chance for any pending commands to execute
5319 	 */
5320 	delay(1);
5321 	MDI_PHCI_LOCK(ph);
5322 	pip = ph->ph_path_head;
5323 	while (pip != NULL) {
5324 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5325 		(void) i_mdi_pi_offline(pip, 0);
5326 		MDI_PI_LOCK(pip);
5327 		ct = MDI_PI(pip)->pi_client;
5328 		if (!MDI_PI_IS_OFFLINE(pip)) {
5329 			cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
5330 			    "PHCI dip = %p", (void *)dip);
5331 			MDI_PI_UNLOCK(pip);
5332 			MDI_PHCI_SET_ONLINE(ph);
5333 			MDI_PHCI_UNLOCK(ph);
5334 			return;
5335 		}
5336 		MDI_PI_UNLOCK(pip);
5337 		pip = next;
5338 	}
5339 	MDI_PHCI_UNLOCK(ph);
5340 
5341 	return;
5342 }
5343 
5344 void
5345 mdi_phci_unretire(dev_info_t *dip)
5346 {
5347 	ASSERT(MDI_PHCI(dip));
5348 
5349 	/*
5350 	 * Online the phci
5351 	 */
5352 	i_mdi_phci_online(dip);
5353 }
5354 
5355 /*ARGSUSED*/
5356 static int
5357 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5358 {
5359 	int		rv = NDI_SUCCESS;
5360 	mdi_client_t	*ct;
5361 
5362 	/*
5363 	 * Client component to go offline.  Make sure that we are
5364 	 * not in failing over state and update client state
5365 	 * accordingly
5366 	 */
5367 	ct = i_devi_get_client(dip);
5368 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n",
5369 	    (void *)dip, (void *)ct));
5370 	if (ct != NULL) {
5371 		MDI_CLIENT_LOCK(ct);
5372 		if (ct->ct_unstable) {
5373 			/*
5374 			 * One or more paths are in transient state,
5375 			 * Dont allow offline of a client device
5376 			 */
5377 			MDI_DEBUG(1, (CE_WARN, dip,
5378 			    "!One or more paths to this device is "
5379 			    "in transient state. This device can not "
5380 			    "be removed at this moment. "
5381 			    "Please try again later."));
5382 			MDI_CLIENT_UNLOCK(ct);
5383 			return (NDI_BUSY);
5384 		}
5385 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5386 			/*
5387 			 * Failover is in progress, Dont allow DR of
5388 			 * a client device
5389 			 */
5390 			MDI_DEBUG(1, (CE_WARN, dip,
5391 			    "!Client device (%s%d) is Busy. %s",
5392 			    ddi_driver_name(dip), ddi_get_instance(dip),
5393 			    "This device can not be removed at "
5394 			    "this moment. Please try again later."));
5395 			MDI_CLIENT_UNLOCK(ct);
5396 			return (NDI_BUSY);
5397 		}
5398 		MDI_CLIENT_SET_OFFLINE(ct);
5399 
5400 		/*
5401 		 * Unbind our relationship with the dev_info node
5402 		 */
5403 		if (flags & NDI_DEVI_REMOVE) {
5404 			ct->ct_dip = NULL;
5405 		}
5406 		MDI_CLIENT_UNLOCK(ct);
5407 	}
5408 	return (rv);
5409 }
5410 
5411 /*
5412  * mdi_pre_attach():
5413  *		Pre attach() notification handler
5414  */
5415 /*ARGSUSED*/
5416 int
5417 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5418 {
5419 	/* don't support old DDI_PM_RESUME */
5420 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5421 	    (cmd == DDI_PM_RESUME))
5422 		return (DDI_FAILURE);
5423 
5424 	return (DDI_SUCCESS);
5425 }
5426 
5427 /*
5428  * mdi_post_attach():
5429  *		Post attach() notification handler
5430  */
5431 /*ARGSUSED*/
5432 void
5433 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5434 {
5435 	mdi_phci_t	*ph;
5436 	mdi_client_t	*ct;
5437 	mdi_vhci_t	*vh;
5438 
5439 	if (MDI_PHCI(dip)) {
5440 		ph = i_devi_get_phci(dip);
5441 		ASSERT(ph != NULL);
5442 
5443 		MDI_PHCI_LOCK(ph);
5444 		switch (cmd) {
5445 		case DDI_ATTACH:
5446 			MDI_DEBUG(2, (CE_NOTE, dip,
5447 			    "!pHCI post_attach: called %p\n", (void *)ph));
5448 			if (error == DDI_SUCCESS) {
5449 				MDI_PHCI_SET_ATTACH(ph);
5450 			} else {
5451 				MDI_DEBUG(1, (CE_NOTE, dip,
5452 				    "!pHCI post_attach: failed error=%d\n",
5453 				    error));
5454 				MDI_PHCI_SET_DETACH(ph);
5455 			}
5456 			break;
5457 
5458 		case DDI_RESUME:
5459 			MDI_DEBUG(2, (CE_NOTE, dip,
5460 			    "!pHCI post_resume: called %p\n", (void *)ph));
5461 			if (error == DDI_SUCCESS) {
5462 				MDI_PHCI_SET_RESUME(ph);
5463 			} else {
5464 				MDI_DEBUG(1, (CE_NOTE, dip,
5465 				    "!pHCI post_resume: failed error=%d\n",
5466 				    error));
5467 				MDI_PHCI_SET_SUSPEND(ph);
5468 			}
5469 			break;
5470 		}
5471 		MDI_PHCI_UNLOCK(ph);
5472 	}
5473 
5474 	if (MDI_CLIENT(dip)) {
5475 		ct = i_devi_get_client(dip);
5476 		ASSERT(ct != NULL);
5477 
5478 		MDI_CLIENT_LOCK(ct);
5479 		switch (cmd) {
5480 		case DDI_ATTACH:
5481 			MDI_DEBUG(2, (CE_NOTE, dip,
5482 			    "!Client post_attach: called %p\n", (void *)ct));
5483 			if (error != DDI_SUCCESS) {
5484 				MDI_DEBUG(1, (CE_NOTE, dip,
5485 				    "!Client post_attach: failed error=%d\n",
5486 				    error));
5487 				MDI_CLIENT_SET_DETACH(ct);
5488 				MDI_DEBUG(4, (CE_WARN, dip,
5489 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
5490 				i_mdi_pm_reset_client(ct);
5491 				break;
5492 			}
5493 
5494 			/*
5495 			 * Client device has successfully attached, inform
5496 			 * the vhci.
5497 			 */
5498 			vh = ct->ct_vhci;
5499 			if (vh->vh_ops->vo_client_attached)
5500 				(*vh->vh_ops->vo_client_attached)(dip);
5501 
5502 			MDI_CLIENT_SET_ATTACH(ct);
5503 			break;
5504 
5505 		case DDI_RESUME:
5506 			MDI_DEBUG(2, (CE_NOTE, dip,
5507 			    "!Client post_attach: called %p\n", (void *)ct));
5508 			if (error == DDI_SUCCESS) {
5509 				MDI_CLIENT_SET_RESUME(ct);
5510 			} else {
5511 				MDI_DEBUG(1, (CE_NOTE, dip,
5512 				    "!Client post_resume: failed error=%d\n",
5513 				    error));
5514 				MDI_CLIENT_SET_SUSPEND(ct);
5515 			}
5516 			break;
5517 		}
5518 		MDI_CLIENT_UNLOCK(ct);
5519 	}
5520 }
5521 
5522 /*
5523  * mdi_pre_detach():
5524  *		Pre detach notification handler
5525  */
5526 /*ARGSUSED*/
5527 int
5528 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5529 {
5530 	int rv = DDI_SUCCESS;
5531 
5532 	if (MDI_CLIENT(dip)) {
5533 		(void) i_mdi_client_pre_detach(dip, cmd);
5534 	}
5535 
5536 	if (MDI_PHCI(dip)) {
5537 		rv = i_mdi_phci_pre_detach(dip, cmd);
5538 	}
5539 
5540 	return (rv);
5541 }
5542 
5543 /*ARGSUSED*/
5544 static int
5545 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5546 {
5547 	int		rv = DDI_SUCCESS;
5548 	mdi_phci_t	*ph;
5549 	mdi_client_t	*ct;
5550 	mdi_pathinfo_t	*pip;
5551 	mdi_pathinfo_t	*failed_pip = NULL;
5552 	mdi_pathinfo_t	*next;
5553 
5554 	ph = i_devi_get_phci(dip);
5555 	if (ph == NULL) {
5556 		return (rv);
5557 	}
5558 
5559 	MDI_PHCI_LOCK(ph);
5560 	switch (cmd) {
5561 	case DDI_DETACH:
5562 		MDI_DEBUG(2, (CE_NOTE, dip,
5563 		    "!pHCI pre_detach: called %p\n", (void *)ph));
5564 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5565 			/*
5566 			 * mdi_pathinfo nodes are still attached to
5567 			 * this pHCI. Fail the detach for this pHCI.
5568 			 */
5569 			MDI_DEBUG(2, (CE_WARN, dip,
5570 			    "!pHCI pre_detach: "
5571 			    "mdi_pathinfo nodes are still attached "
5572 			    "%p\n", (void *)ph));
5573 			rv = DDI_FAILURE;
5574 			break;
5575 		}
5576 		MDI_PHCI_SET_DETACH(ph);
5577 		break;
5578 
5579 	case DDI_SUSPEND:
5580 		/*
5581 		 * pHCI is getting suspended.  Since mpxio client
5582 		 * devices may not be suspended at this point, to avoid
5583 		 * a potential stack overflow, it is important to suspend
5584 		 * client devices before pHCI can be suspended.
5585 		 */
5586 
5587 		MDI_DEBUG(2, (CE_NOTE, dip,
5588 		    "!pHCI pre_suspend: called %p\n", (void *)ph));
5589 		/*
5590 		 * Suspend all the client devices accessible through this pHCI
5591 		 */
5592 		pip = ph->ph_path_head;
5593 		while (pip != NULL && rv == DDI_SUCCESS) {
5594 			dev_info_t *cdip;
5595 			MDI_PI_LOCK(pip);
5596 			next =
5597 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5598 			ct = MDI_PI(pip)->pi_client;
5599 			i_mdi_client_lock(ct, pip);
5600 			cdip = ct->ct_dip;
5601 			MDI_PI_UNLOCK(pip);
5602 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5603 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5604 				i_mdi_client_unlock(ct);
5605 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5606 				    DDI_SUCCESS) {
5607 					/*
5608 					 * Suspend of one of the client
5609 					 * device has failed.
5610 					 */
5611 					MDI_DEBUG(1, (CE_WARN, dip,
5612 					    "!Suspend of device (%s%d) failed.",
5613 					    ddi_driver_name(cdip),
5614 					    ddi_get_instance(cdip)));
5615 					failed_pip = pip;
5616 					break;
5617 				}
5618 			} else {
5619 				i_mdi_client_unlock(ct);
5620 			}
5621 			pip = next;
5622 		}
5623 
5624 		if (rv == DDI_SUCCESS) {
5625 			/*
5626 			 * Suspend of client devices is complete. Proceed
5627 			 * with pHCI suspend.
5628 			 */
5629 			MDI_PHCI_SET_SUSPEND(ph);
5630 		} else {
5631 			/*
5632 			 * Revert back all the suspended client device states
5633 			 * to converse.
5634 			 */
5635 			pip = ph->ph_path_head;
5636 			while (pip != failed_pip) {
5637 				dev_info_t *cdip;
5638 				MDI_PI_LOCK(pip);
5639 				next =
5640 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5641 				ct = MDI_PI(pip)->pi_client;
5642 				i_mdi_client_lock(ct, pip);
5643 				cdip = ct->ct_dip;
5644 				MDI_PI_UNLOCK(pip);
5645 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5646 					i_mdi_client_unlock(ct);
5647 					(void) devi_attach(cdip, DDI_RESUME);
5648 				} else {
5649 					i_mdi_client_unlock(ct);
5650 				}
5651 				pip = next;
5652 			}
5653 		}
5654 		break;
5655 
5656 	default:
5657 		rv = DDI_FAILURE;
5658 		break;
5659 	}
5660 	MDI_PHCI_UNLOCK(ph);
5661 	return (rv);
5662 }
5663 
5664 /*ARGSUSED*/
5665 static int
5666 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5667 {
5668 	int		rv = DDI_SUCCESS;
5669 	mdi_client_t	*ct;
5670 
5671 	ct = i_devi_get_client(dip);
5672 	if (ct == NULL) {
5673 		return (rv);
5674 	}
5675 
5676 	MDI_CLIENT_LOCK(ct);
5677 	switch (cmd) {
5678 	case DDI_DETACH:
5679 		MDI_DEBUG(2, (CE_NOTE, dip,
5680 		    "!Client pre_detach: called %p\n", (void *)ct));
5681 		MDI_CLIENT_SET_DETACH(ct);
5682 		break;
5683 
5684 	case DDI_SUSPEND:
5685 		MDI_DEBUG(2, (CE_NOTE, dip,
5686 		    "!Client pre_suspend: called %p\n", (void *)ct));
5687 		MDI_CLIENT_SET_SUSPEND(ct);
5688 		break;
5689 
5690 	default:
5691 		rv = DDI_FAILURE;
5692 		break;
5693 	}
5694 	MDI_CLIENT_UNLOCK(ct);
5695 	return (rv);
5696 }
5697 
5698 /*
5699  * mdi_post_detach():
5700  *		Post detach notification handler
5701  */
5702 /*ARGSUSED*/
5703 void
5704 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5705 {
5706 	/*
5707 	 * Detach/Suspend of mpxio component failed. Update our state
5708 	 * too
5709 	 */
5710 	if (MDI_PHCI(dip))
5711 		i_mdi_phci_post_detach(dip, cmd, error);
5712 
5713 	if (MDI_CLIENT(dip))
5714 		i_mdi_client_post_detach(dip, cmd, error);
5715 }
5716 
5717 /*ARGSUSED*/
5718 static void
5719 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5720 {
5721 	mdi_phci_t	*ph;
5722 
5723 	/*
5724 	 * Detach/Suspend of phci component failed. Update our state
5725 	 * too
5726 	 */
5727 	ph = i_devi_get_phci(dip);
5728 	if (ph == NULL) {
5729 		return;
5730 	}
5731 
5732 	MDI_PHCI_LOCK(ph);
5733 	/*
5734 	 * Detach of pHCI failed. Restore back converse
5735 	 * state
5736 	 */
5737 	switch (cmd) {
5738 	case DDI_DETACH:
5739 		MDI_DEBUG(2, (CE_NOTE, dip,
5740 		    "!pHCI post_detach: called %p\n", (void *)ph));
5741 		if (error != DDI_SUCCESS)
5742 			MDI_PHCI_SET_ATTACH(ph);
5743 		break;
5744 
5745 	case DDI_SUSPEND:
5746 		MDI_DEBUG(2, (CE_NOTE, dip,
5747 		    "!pHCI post_suspend: called %p\n", (void *)ph));
5748 		if (error != DDI_SUCCESS)
5749 			MDI_PHCI_SET_RESUME(ph);
5750 		break;
5751 	}
5752 	MDI_PHCI_UNLOCK(ph);
5753 }
5754 
5755 /*ARGSUSED*/
5756 static void
5757 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5758 {
5759 	mdi_client_t	*ct;
5760 
5761 	ct = i_devi_get_client(dip);
5762 	if (ct == NULL) {
5763 		return;
5764 	}
5765 	MDI_CLIENT_LOCK(ct);
5766 	/*
5767 	 * Detach of Client failed. Restore back converse
5768 	 * state
5769 	 */
5770 	switch (cmd) {
5771 	case DDI_DETACH:
5772 		MDI_DEBUG(2, (CE_NOTE, dip,
5773 		    "!Client post_detach: called %p\n", (void *)ct));
5774 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5775 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5776 			    "i_mdi_pm_rele_client\n"));
5777 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5778 		} else {
5779 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5780 			    "i_mdi_pm_reset_client\n"));
5781 			i_mdi_pm_reset_client(ct);
5782 		}
5783 		if (error != DDI_SUCCESS)
5784 			MDI_CLIENT_SET_ATTACH(ct);
5785 		break;
5786 
5787 	case DDI_SUSPEND:
5788 		MDI_DEBUG(2, (CE_NOTE, dip,
5789 		    "!Client post_suspend: called %p\n", (void *)ct));
5790 		if (error != DDI_SUCCESS)
5791 			MDI_CLIENT_SET_RESUME(ct);
5792 		break;
5793 	}
5794 	MDI_CLIENT_UNLOCK(ct);
5795 }
5796 
5797 int
5798 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
5799 {
5800 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
5801 }
5802 
5803 /*
5804  * create and install per-path (client - pHCI) statistics
5805  * I/O stats supported: nread, nwritten, reads, and writes
5806  * Error stats - hard errors, soft errors, & transport errors
5807  */
5808 int
5809 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
5810 {
5811 	kstat_t			*kiosp, *kerrsp;
5812 	struct pi_errs		*nsp;
5813 	struct mdi_pi_kstats	*mdi_statp;
5814 
5815 	if (MDI_PI(pip)->pi_kstats != NULL)
5816 		return (MDI_SUCCESS);
5817 
5818 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5819 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
5820 		return (MDI_FAILURE);
5821 	}
5822 
5823 	(void) strcat(ksname, ",err");
5824 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5825 	    KSTAT_TYPE_NAMED,
5826 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5827 	if (kerrsp == NULL) {
5828 		kstat_delete(kiosp);
5829 		return (MDI_FAILURE);
5830 	}
5831 
5832 	nsp = (struct pi_errs *)kerrsp->ks_data;
5833 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5834 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5835 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5836 	    KSTAT_DATA_UINT32);
5837 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5838 	    KSTAT_DATA_UINT32);
5839 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5840 	    KSTAT_DATA_UINT32);
5841 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5842 	    KSTAT_DATA_UINT32);
5843 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5844 	    KSTAT_DATA_UINT32);
5845 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5846 	    KSTAT_DATA_UINT32);
5847 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5848 	    KSTAT_DATA_UINT32);
5849 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5850 
5851 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5852 	mdi_statp->pi_kstat_ref = 1;
5853 	mdi_statp->pi_kstat_iostats = kiosp;
5854 	mdi_statp->pi_kstat_errstats = kerrsp;
5855 	kstat_install(kiosp);
5856 	kstat_install(kerrsp);
5857 	MDI_PI(pip)->pi_kstats = mdi_statp;
5858 	return (MDI_SUCCESS);
5859 }
5860 
5861 /*
5862  * destroy per-path properties
5863  */
5864 static void
5865 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5866 {
5867 
5868 	struct mdi_pi_kstats *mdi_statp;
5869 
5870 	if (MDI_PI(pip)->pi_kstats == NULL)
5871 		return;
5872 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5873 		return;
5874 
5875 	MDI_PI(pip)->pi_kstats = NULL;
5876 
5877 	/*
5878 	 * the kstat may be shared between multiple pathinfo nodes
5879 	 * decrement this pathinfo's usage, removing the kstats
5880 	 * themselves when the last pathinfo reference is removed.
5881 	 */
5882 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5883 	if (--mdi_statp->pi_kstat_ref != 0)
5884 		return;
5885 
5886 	kstat_delete(mdi_statp->pi_kstat_iostats);
5887 	kstat_delete(mdi_statp->pi_kstat_errstats);
5888 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5889 }
5890 
5891 /*
5892  * update I/O paths KSTATS
5893  */
5894 void
5895 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5896 {
5897 	kstat_t *iostatp;
5898 	size_t xfer_cnt;
5899 
5900 	ASSERT(pip != NULL);
5901 
5902 	/*
5903 	 * I/O can be driven across a path prior to having path
5904 	 * statistics available, i.e. probe(9e).
5905 	 */
5906 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5907 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5908 		xfer_cnt = bp->b_bcount - bp->b_resid;
5909 		if (bp->b_flags & B_READ) {
5910 			KSTAT_IO_PTR(iostatp)->reads++;
5911 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5912 		} else {
5913 			KSTAT_IO_PTR(iostatp)->writes++;
5914 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5915 		}
5916 	}
5917 }
5918 
5919 /*
5920  * Enable the path(specific client/target/initiator)
5921  * Enabling a path means that MPxIO may select the enabled path for routing
5922  * future I/O requests, subject to other path state constraints.
5923  */
5924 int
5925 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5926 {
5927 	mdi_phci_t	*ph;
5928 
5929 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5930 	if (ph == NULL) {
5931 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5932 			" failed. pip: %p ph = NULL\n", (void *)pip));
5933 		return (MDI_FAILURE);
5934 	}
5935 
5936 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5937 		MDI_ENABLE_OP);
5938 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5939 		" Returning success pip = %p. ph = %p\n",
5940 		(void *)pip, (void *)ph));
5941 	return (MDI_SUCCESS);
5942 
5943 }
5944 
5945 /*
5946  * Disable the path (specific client/target/initiator)
5947  * Disabling a path means that MPxIO will not select the disabled path for
5948  * routing any new I/O requests.
5949  */
5950 int
5951 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5952 {
5953 	mdi_phci_t	*ph;
5954 
5955 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5956 	if (ph == NULL) {
5957 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5958 			" failed. pip: %p ph = NULL\n", (void *)pip));
5959 		return (MDI_FAILURE);
5960 	}
5961 
5962 	(void) i_mdi_enable_disable_path(pip,
5963 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5964 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5965 		"Returning success pip = %p. ph = %p",
5966 		(void *)pip, (void *)ph));
5967 	return (MDI_SUCCESS);
5968 }
5969 
5970 /*
5971  * disable the path to a particular pHCI (pHCI specified in the phci_path
5972  * argument) for a particular client (specified in the client_path argument).
5973  * Disabling a path means that MPxIO will not select the disabled path for
5974  * routing any new I/O requests.
5975  * NOTE: this will be removed once the NWS files are changed to use the new
5976  * mdi_{enable,disable}_path interfaces
5977  */
5978 int
5979 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5980 {
5981 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5982 }
5983 
5984 /*
5985  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5986  * argument) for a particular client (specified in the client_path argument).
5987  * Enabling a path means that MPxIO may select the enabled path for routing
5988  * future I/O requests, subject to other path state constraints.
5989  * NOTE: this will be removed once the NWS files are changed to use the new
5990  * mdi_{enable,disable}_path interfaces
5991  */
5992 
5993 int
5994 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5995 {
5996 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5997 }
5998 
5999 /*
6000  * Common routine for doing enable/disable.
6001  */
6002 static mdi_pathinfo_t *
6003 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6004 		int op)
6005 {
6006 	int		sync_flag = 0;
6007 	int		rv;
6008 	mdi_pathinfo_t 	*next;
6009 	int		(*f)() = NULL;
6010 
6011 	/*
6012 	 * Check to make sure the path is not already in the
6013 	 * requested state. If it is just return the next path
6014 	 * as we have nothing to do here.
6015 	 */
6016 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6017 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6018 		MDI_PI_LOCK(pip);
6019 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6020 		MDI_PI_UNLOCK(pip);
6021 		return (next);
6022 	}
6023 
6024 	f = vh->vh_ops->vo_pi_state_change;
6025 
6026 	sync_flag = (flags << 8) & 0xf00;
6027 
6028 	/*
6029 	 * Do a callback into the mdi consumer to let it
6030 	 * know that path is about to get enabled/disabled.
6031 	 */
6032 	if (f != NULL) {
6033 		rv = (*f)(vh->vh_dip, pip, 0,
6034 			MDI_PI_EXT_STATE(pip),
6035 			MDI_EXT_STATE_CHANGE | sync_flag |
6036 			op | MDI_BEFORE_STATE_CHANGE);
6037 		if (rv != MDI_SUCCESS) {
6038 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
6039 			"!vo_pi_state_change: failed rv = %x", rv));
6040 		}
6041 	}
6042 	MDI_PI_LOCK(pip);
6043 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6044 
6045 	switch (flags) {
6046 		case USER_DISABLE:
6047 			if (op == MDI_DISABLE_OP) {
6048 				MDI_PI_SET_USER_DISABLE(pip);
6049 			} else {
6050 				MDI_PI_SET_USER_ENABLE(pip);
6051 			}
6052 			break;
6053 		case DRIVER_DISABLE:
6054 			if (op == MDI_DISABLE_OP) {
6055 				MDI_PI_SET_DRV_DISABLE(pip);
6056 			} else {
6057 				MDI_PI_SET_DRV_ENABLE(pip);
6058 			}
6059 			break;
6060 		case DRIVER_DISABLE_TRANSIENT:
6061 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6062 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6063 			} else {
6064 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6065 			}
6066 			break;
6067 	}
6068 	MDI_PI_UNLOCK(pip);
6069 	/*
6070 	 * Do a callback into the mdi consumer to let it
6071 	 * know that path is now enabled/disabled.
6072 	 */
6073 	if (f != NULL) {
6074 		rv = (*f)(vh->vh_dip, pip, 0,
6075 			MDI_PI_EXT_STATE(pip),
6076 			MDI_EXT_STATE_CHANGE | sync_flag |
6077 			op | MDI_AFTER_STATE_CHANGE);
6078 		if (rv != MDI_SUCCESS) {
6079 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
6080 			"!vo_pi_state_change: failed rv = %x", rv));
6081 		}
6082 	}
6083 	return (next);
6084 }
6085 
6086 /*
6087  * Common routine for doing enable/disable.
6088  * NOTE: this will be removed once the NWS files are changed to use the new
6089  * mdi_{enable,disable}_path has been putback
6090  */
6091 int
6092 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6093 {
6094 
6095 	mdi_phci_t	*ph;
6096 	mdi_vhci_t	*vh = NULL;
6097 	mdi_client_t	*ct;
6098 	mdi_pathinfo_t	*next, *pip;
6099 	int		found_it;
6100 
6101 	ph = i_devi_get_phci(pdip);
6102 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6103 		"Op = %d pdip = %p cdip = %p\n", op, (void *)pdip,
6104 		(void *)cdip));
6105 	if (ph == NULL) {
6106 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
6107 			"Op %d failed. ph = NULL\n", op));
6108 		return (MDI_FAILURE);
6109 	}
6110 
6111 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6112 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6113 			"Op Invalid operation = %d\n", op));
6114 		return (MDI_FAILURE);
6115 	}
6116 
6117 	vh = ph->ph_vhci;
6118 
6119 	if (cdip == NULL) {
6120 		/*
6121 		 * Need to mark the Phci as enabled/disabled.
6122 		 */
6123 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6124 		"Op %d for the phci\n", op));
6125 		MDI_PHCI_LOCK(ph);
6126 		switch (flags) {
6127 			case USER_DISABLE:
6128 				if (op == MDI_DISABLE_OP) {
6129 					MDI_PHCI_SET_USER_DISABLE(ph);
6130 				} else {
6131 					MDI_PHCI_SET_USER_ENABLE(ph);
6132 				}
6133 				break;
6134 			case DRIVER_DISABLE:
6135 				if (op == MDI_DISABLE_OP) {
6136 					MDI_PHCI_SET_DRV_DISABLE(ph);
6137 				} else {
6138 					MDI_PHCI_SET_DRV_ENABLE(ph);
6139 				}
6140 				break;
6141 			case DRIVER_DISABLE_TRANSIENT:
6142 				if (op == MDI_DISABLE_OP) {
6143 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6144 				} else {
6145 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6146 				}
6147 				break;
6148 			default:
6149 				MDI_PHCI_UNLOCK(ph);
6150 				MDI_DEBUG(1, (CE_NOTE, NULL,
6151 				"!i_mdi_pi_enable_disable:"
6152 				" Invalid flag argument= %d\n", flags));
6153 		}
6154 
6155 		/*
6156 		 * Phci has been disabled. Now try to enable/disable
6157 		 * path info's to each client.
6158 		 */
6159 		pip = ph->ph_path_head;
6160 		while (pip != NULL) {
6161 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6162 		}
6163 		MDI_PHCI_UNLOCK(ph);
6164 	} else {
6165 
6166 		/*
6167 		 * Disable a specific client.
6168 		 */
6169 		ct = i_devi_get_client(cdip);
6170 		if (ct == NULL) {
6171 			MDI_DEBUG(1, (CE_NOTE, NULL,
6172 			"!i_mdi_pi_enable_disable:"
6173 			" failed. ct = NULL operation = %d\n", op));
6174 			return (MDI_FAILURE);
6175 		}
6176 
6177 		MDI_CLIENT_LOCK(ct);
6178 		pip = ct->ct_path_head;
6179 		found_it = 0;
6180 		while (pip != NULL) {
6181 			MDI_PI_LOCK(pip);
6182 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6183 			if (MDI_PI(pip)->pi_phci == ph) {
6184 				MDI_PI_UNLOCK(pip);
6185 				found_it = 1;
6186 				break;
6187 			}
6188 			MDI_PI_UNLOCK(pip);
6189 			pip = next;
6190 		}
6191 
6192 
6193 		MDI_CLIENT_UNLOCK(ct);
6194 		if (found_it == 0) {
6195 			MDI_DEBUG(1, (CE_NOTE, NULL,
6196 			"!i_mdi_pi_enable_disable:"
6197 			" failed. Could not find corresponding pip\n"));
6198 			return (MDI_FAILURE);
6199 		}
6200 
6201 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6202 	}
6203 
6204 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6205 		"Op %d Returning success pdip = %p cdip = %p\n",
6206 		op, (void *)pdip, (void *)cdip));
6207 	return (MDI_SUCCESS);
6208 }
6209 
6210 /*
6211  * Ensure phci powered up
6212  */
6213 static void
6214 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6215 {
6216 	dev_info_t	*ph_dip;
6217 
6218 	ASSERT(pip != NULL);
6219 	ASSERT(MDI_PI_LOCKED(pip));
6220 
6221 	if (MDI_PI(pip)->pi_pm_held) {
6222 		return;
6223 	}
6224 
6225 	ph_dip = mdi_pi_get_phci(pip);
6226 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n",
6227 	    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6228 	if (ph_dip == NULL) {
6229 		return;
6230 	}
6231 
6232 	MDI_PI_UNLOCK(pip);
6233 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6234 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6235 
6236 	pm_hold_power(ph_dip);
6237 
6238 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6239 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6240 	MDI_PI_LOCK(pip);
6241 
6242 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6243 	if (DEVI(ph_dip)->devi_pm_info)
6244 		MDI_PI(pip)->pi_pm_held = 1;
6245 }
6246 
6247 /*
6248  * Allow phci powered down
6249  */
6250 static void
6251 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6252 {
6253 	dev_info_t	*ph_dip = NULL;
6254 
6255 	ASSERT(pip != NULL);
6256 	ASSERT(MDI_PI_LOCKED(pip));
6257 
6258 	if (MDI_PI(pip)->pi_pm_held == 0) {
6259 		return;
6260 	}
6261 
6262 	ph_dip = mdi_pi_get_phci(pip);
6263 	ASSERT(ph_dip != NULL);
6264 
6265 	MDI_PI_UNLOCK(pip);
6266 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n",
6267 	    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6268 
6269 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6270 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6271 	pm_rele_power(ph_dip);
6272 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6273 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6274 
6275 	MDI_PI_LOCK(pip);
6276 	MDI_PI(pip)->pi_pm_held = 0;
6277 }
6278 
6279 static void
6280 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6281 {
6282 	ASSERT(MDI_CLIENT_LOCKED(ct));
6283 
6284 	ct->ct_power_cnt += incr;
6285 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p "
6286 	    "ct_power_cnt = %d incr = %d\n", (void *)ct,
6287 	    ct->ct_power_cnt, incr));
6288 	ASSERT(ct->ct_power_cnt >= 0);
6289 }
6290 
6291 static void
6292 i_mdi_rele_all_phci(mdi_client_t *ct)
6293 {
6294 	mdi_pathinfo_t  *pip;
6295 
6296 	ASSERT(MDI_CLIENT_LOCKED(ct));
6297 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6298 	while (pip != NULL) {
6299 		mdi_hold_path(pip);
6300 		MDI_PI_LOCK(pip);
6301 		i_mdi_pm_rele_pip(pip);
6302 		MDI_PI_UNLOCK(pip);
6303 		mdi_rele_path(pip);
6304 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6305 	}
6306 }
6307 
6308 static void
6309 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6310 {
6311 	ASSERT(MDI_CLIENT_LOCKED(ct));
6312 
6313 	if (i_ddi_devi_attached(ct->ct_dip)) {
6314 		ct->ct_power_cnt -= decr;
6315 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p "
6316 		    "ct_power_cnt = %d decr = %d\n",
6317 		    (void *)ct, ct->ct_power_cnt, decr));
6318 	}
6319 
6320 	ASSERT(ct->ct_power_cnt >= 0);
6321 	if (ct->ct_power_cnt == 0) {
6322 		i_mdi_rele_all_phci(ct);
6323 		return;
6324 	}
6325 }
6326 
6327 static void
6328 i_mdi_pm_reset_client(mdi_client_t *ct)
6329 {
6330 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p "
6331 	    "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt));
6332 	ASSERT(MDI_CLIENT_LOCKED(ct));
6333 	ct->ct_power_cnt = 0;
6334 	i_mdi_rele_all_phci(ct);
6335 	ct->ct_powercnt_config = 0;
6336 	ct->ct_powercnt_unconfig = 0;
6337 	ct->ct_powercnt_reset = 1;
6338 }
6339 
6340 static int
6341 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6342 {
6343 	int		ret;
6344 	dev_info_t	*ph_dip;
6345 
6346 	MDI_PI_LOCK(pip);
6347 	i_mdi_pm_hold_pip(pip);
6348 
6349 	ph_dip = mdi_pi_get_phci(pip);
6350 	MDI_PI_UNLOCK(pip);
6351 
6352 	/* bring all components of phci to full power */
6353 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6354 	    "pm_powerup for %s%d %p\n", ddi_driver_name(ph_dip),
6355 	    ddi_get_instance(ph_dip), (void *)pip));
6356 
6357 	ret = pm_powerup(ph_dip);
6358 
6359 	if (ret == DDI_FAILURE) {
6360 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6361 		    "pm_powerup FAILED for %s%d %p\n",
6362 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6363 		    (void *)pip));
6364 
6365 		MDI_PI_LOCK(pip);
6366 		i_mdi_pm_rele_pip(pip);
6367 		MDI_PI_UNLOCK(pip);
6368 		return (MDI_FAILURE);
6369 	}
6370 
6371 	return (MDI_SUCCESS);
6372 }
6373 
6374 static int
6375 i_mdi_power_all_phci(mdi_client_t *ct)
6376 {
6377 	mdi_pathinfo_t  *pip;
6378 	int		succeeded = 0;
6379 
6380 	ASSERT(MDI_CLIENT_LOCKED(ct));
6381 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6382 	while (pip != NULL) {
6383 		/*
6384 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6385 		 * or MDI_PATHINFO_STATE_OFFLINE.
6386 		 */
6387 		if (MDI_PI_IS_INIT(pip) ||
6388 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6389 			mdi_hold_path(pip);
6390 			MDI_CLIENT_UNLOCK(ct);
6391 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6392 				succeeded = 1;
6393 
6394 			ASSERT(ct == MDI_PI(pip)->pi_client);
6395 			MDI_CLIENT_LOCK(ct);
6396 			mdi_rele_path(pip);
6397 		}
6398 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6399 	}
6400 
6401 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6402 }
6403 
6404 /*
6405  * mdi_bus_power():
6406  *		1. Place the phci(s) into powered up state so that
6407  *		   client can do power management
6408  *		2. Ensure phci powered up as client power managing
6409  * Return Values:
6410  *		MDI_SUCCESS
6411  *		MDI_FAILURE
6412  */
6413 int
6414 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6415     void *arg, void *result)
6416 {
6417 	int			ret = MDI_SUCCESS;
6418 	pm_bp_child_pwrchg_t	*bpc;
6419 	mdi_client_t		*ct;
6420 	dev_info_t		*cdip;
6421 	pm_bp_has_changed_t	*bphc;
6422 
6423 	/*
6424 	 * BUS_POWER_NOINVOL not supported
6425 	 */
6426 	if (op == BUS_POWER_NOINVOL)
6427 		return (MDI_FAILURE);
6428 
6429 	/*
6430 	 * ignore other OPs.
6431 	 * return quickly to save cou cycles on the ct processing
6432 	 */
6433 	switch (op) {
6434 	case BUS_POWER_PRE_NOTIFICATION:
6435 	case BUS_POWER_POST_NOTIFICATION:
6436 		bpc = (pm_bp_child_pwrchg_t *)arg;
6437 		cdip = bpc->bpc_dip;
6438 		break;
6439 	case BUS_POWER_HAS_CHANGED:
6440 		bphc = (pm_bp_has_changed_t *)arg;
6441 		cdip = bphc->bphc_dip;
6442 		break;
6443 	default:
6444 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6445 	}
6446 
6447 	ASSERT(MDI_CLIENT(cdip));
6448 
6449 	ct = i_devi_get_client(cdip);
6450 	if (ct == NULL)
6451 		return (MDI_FAILURE);
6452 
6453 	/*
6454 	 * wait till the mdi_pathinfo node state change are processed
6455 	 */
6456 	MDI_CLIENT_LOCK(ct);
6457 	switch (op) {
6458 	case BUS_POWER_PRE_NOTIFICATION:
6459 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6460 		    "BUS_POWER_PRE_NOTIFICATION:"
6461 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6462 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6463 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6464 
6465 		/* serialize power level change per client */
6466 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6467 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6468 
6469 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6470 
6471 		if (ct->ct_power_cnt == 0) {
6472 			ret = i_mdi_power_all_phci(ct);
6473 		}
6474 
6475 		/*
6476 		 * if new_level > 0:
6477 		 *	- hold phci(s)
6478 		 *	- power up phci(s) if not already
6479 		 * ignore power down
6480 		 */
6481 		if (bpc->bpc_nlevel > 0) {
6482 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6483 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6484 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
6485 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6486 			}
6487 		}
6488 		break;
6489 	case BUS_POWER_POST_NOTIFICATION:
6490 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6491 		    "BUS_POWER_POST_NOTIFICATION:"
6492 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
6493 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6494 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6495 		    *(int *)result));
6496 
6497 		if (*(int *)result == DDI_SUCCESS) {
6498 			if (bpc->bpc_nlevel > 0) {
6499 				MDI_CLIENT_SET_POWER_UP(ct);
6500 			} else {
6501 				MDI_CLIENT_SET_POWER_DOWN(ct);
6502 			}
6503 		}
6504 
6505 		/* release the hold we did in pre-notification */
6506 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6507 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6508 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6509 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6510 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6511 		}
6512 
6513 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6514 			/* another thread might started attaching */
6515 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6516 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6517 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
6518 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6519 			/* detaching has been taken care in pm_post_unconfig */
6520 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6521 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6522 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
6523 				i_mdi_pm_reset_client(ct);
6524 			}
6525 		}
6526 
6527 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6528 		cv_broadcast(&ct->ct_powerchange_cv);
6529 
6530 		break;
6531 
6532 	/* need to do more */
6533 	case BUS_POWER_HAS_CHANGED:
6534 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
6535 		    "BUS_POWER_HAS_CHANGED:"
6536 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6537 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6538 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6539 
6540 		if (bphc->bphc_nlevel > 0 &&
6541 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6542 			if (ct->ct_power_cnt == 0) {
6543 				ret = i_mdi_power_all_phci(ct);
6544 			}
6545 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6546 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
6547 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6548 		}
6549 
6550 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6551 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6552 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6553 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6554 		}
6555 		break;
6556 	}
6557 
6558 	MDI_CLIENT_UNLOCK(ct);
6559 	return (ret);
6560 }
6561 
6562 static int
6563 i_mdi_pm_pre_config_one(dev_info_t *child)
6564 {
6565 	int		ret = MDI_SUCCESS;
6566 	mdi_client_t	*ct;
6567 
6568 	ct = i_devi_get_client(child);
6569 	if (ct == NULL)
6570 		return (MDI_FAILURE);
6571 
6572 	MDI_CLIENT_LOCK(ct);
6573 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6574 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6575 
6576 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6577 		MDI_CLIENT_UNLOCK(ct);
6578 		MDI_DEBUG(4, (CE_NOTE, child,
6579 		    "i_mdi_pm_pre_config_one already configured\n"));
6580 		return (MDI_SUCCESS);
6581 	}
6582 
6583 	if (ct->ct_powercnt_config) {
6584 		MDI_CLIENT_UNLOCK(ct);
6585 		MDI_DEBUG(4, (CE_NOTE, child,
6586 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
6587 		return (MDI_SUCCESS);
6588 	}
6589 
6590 	if (ct->ct_power_cnt == 0) {
6591 		ret = i_mdi_power_all_phci(ct);
6592 	}
6593 	MDI_DEBUG(4, (CE_NOTE, child,
6594 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
6595 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6596 	ct->ct_powercnt_config = 1;
6597 	ct->ct_powercnt_reset = 0;
6598 	MDI_CLIENT_UNLOCK(ct);
6599 	return (ret);
6600 }
6601 
6602 static int
6603 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6604 {
6605 	int			ret = MDI_SUCCESS;
6606 	dev_info_t		*cdip;
6607 	int			circ;
6608 
6609 	ASSERT(MDI_VHCI(vdip));
6610 
6611 	/* ndi_devi_config_one */
6612 	if (child) {
6613 		ASSERT(DEVI_BUSY_OWNED(vdip));
6614 		return (i_mdi_pm_pre_config_one(child));
6615 	}
6616 
6617 	/* devi_config_common */
6618 	ndi_devi_enter(vdip, &circ);
6619 	cdip = ddi_get_child(vdip);
6620 	while (cdip) {
6621 		dev_info_t *next = ddi_get_next_sibling(cdip);
6622 
6623 		ret = i_mdi_pm_pre_config_one(cdip);
6624 		if (ret != MDI_SUCCESS)
6625 			break;
6626 		cdip = next;
6627 	}
6628 	ndi_devi_exit(vdip, circ);
6629 	return (ret);
6630 }
6631 
6632 static int
6633 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6634 {
6635 	int		ret = MDI_SUCCESS;
6636 	mdi_client_t	*ct;
6637 
6638 	ct = i_devi_get_client(child);
6639 	if (ct == NULL)
6640 		return (MDI_FAILURE);
6641 
6642 	MDI_CLIENT_LOCK(ct);
6643 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6644 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6645 
6646 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6647 		MDI_DEBUG(4, (CE_NOTE, child,
6648 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6649 		MDI_CLIENT_UNLOCK(ct);
6650 		return (MDI_SUCCESS);
6651 	}
6652 
6653 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6654 	    (flags & NDI_AUTODETACH)) {
6655 		MDI_DEBUG(4, (CE_NOTE, child,
6656 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6657 		MDI_CLIENT_UNLOCK(ct);
6658 		return (MDI_FAILURE);
6659 	}
6660 
6661 	if (ct->ct_powercnt_unconfig) {
6662 		MDI_DEBUG(4, (CE_NOTE, child,
6663 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6664 		MDI_CLIENT_UNLOCK(ct);
6665 		*held = 1;
6666 		return (MDI_SUCCESS);
6667 	}
6668 
6669 	if (ct->ct_power_cnt == 0) {
6670 		ret = i_mdi_power_all_phci(ct);
6671 	}
6672 	MDI_DEBUG(4, (CE_NOTE, child,
6673 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6674 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6675 	ct->ct_powercnt_unconfig = 1;
6676 	ct->ct_powercnt_reset = 0;
6677 	MDI_CLIENT_UNLOCK(ct);
6678 	if (ret == MDI_SUCCESS)
6679 		*held = 1;
6680 	return (ret);
6681 }
6682 
6683 static int
6684 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6685     int flags)
6686 {
6687 	int			ret = MDI_SUCCESS;
6688 	dev_info_t		*cdip;
6689 	int			circ;
6690 
6691 	ASSERT(MDI_VHCI(vdip));
6692 	*held = 0;
6693 
6694 	/* ndi_devi_unconfig_one */
6695 	if (child) {
6696 		ASSERT(DEVI_BUSY_OWNED(vdip));
6697 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6698 	}
6699 
6700 	/* devi_unconfig_common */
6701 	ndi_devi_enter(vdip, &circ);
6702 	cdip = ddi_get_child(vdip);
6703 	while (cdip) {
6704 		dev_info_t *next = ddi_get_next_sibling(cdip);
6705 
6706 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6707 		cdip = next;
6708 	}
6709 	ndi_devi_exit(vdip, circ);
6710 
6711 	if (*held)
6712 		ret = MDI_SUCCESS;
6713 
6714 	return (ret);
6715 }
6716 
6717 static void
6718 i_mdi_pm_post_config_one(dev_info_t *child)
6719 {
6720 	mdi_client_t	*ct;
6721 
6722 	ct = i_devi_get_client(child);
6723 	if (ct == NULL)
6724 		return;
6725 
6726 	MDI_CLIENT_LOCK(ct);
6727 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6728 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6729 
6730 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6731 		MDI_DEBUG(4, (CE_NOTE, child,
6732 		    "i_mdi_pm_post_config_one NOT configured\n"));
6733 		MDI_CLIENT_UNLOCK(ct);
6734 		return;
6735 	}
6736 
6737 	/* client has not been updated */
6738 	if (MDI_CLIENT_IS_FAILED(ct)) {
6739 		MDI_DEBUG(4, (CE_NOTE, child,
6740 		    "i_mdi_pm_post_config_one NOT configured\n"));
6741 		MDI_CLIENT_UNLOCK(ct);
6742 		return;
6743 	}
6744 
6745 	/* another thread might have powered it down or detached it */
6746 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6747 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6748 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6749 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6750 		MDI_DEBUG(4, (CE_NOTE, child,
6751 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6752 		i_mdi_pm_reset_client(ct);
6753 	} else {
6754 		mdi_pathinfo_t  *pip, *next;
6755 		int	valid_path_count = 0;
6756 
6757 		MDI_DEBUG(4, (CE_NOTE, child,
6758 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6759 		pip = ct->ct_path_head;
6760 		while (pip != NULL) {
6761 			MDI_PI_LOCK(pip);
6762 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6763 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6764 				valid_path_count ++;
6765 			MDI_PI_UNLOCK(pip);
6766 			pip = next;
6767 		}
6768 		i_mdi_pm_rele_client(ct, valid_path_count);
6769 	}
6770 	ct->ct_powercnt_config = 0;
6771 	MDI_CLIENT_UNLOCK(ct);
6772 }
6773 
6774 static void
6775 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
6776 {
6777 	int		circ;
6778 	dev_info_t	*cdip;
6779 
6780 	ASSERT(MDI_VHCI(vdip));
6781 
6782 	/* ndi_devi_config_one */
6783 	if (child) {
6784 		ASSERT(DEVI_BUSY_OWNED(vdip));
6785 		i_mdi_pm_post_config_one(child);
6786 		return;
6787 	}
6788 
6789 	/* devi_config_common */
6790 	ndi_devi_enter(vdip, &circ);
6791 	cdip = ddi_get_child(vdip);
6792 	while (cdip) {
6793 		dev_info_t *next = ddi_get_next_sibling(cdip);
6794 
6795 		i_mdi_pm_post_config_one(cdip);
6796 		cdip = next;
6797 	}
6798 	ndi_devi_exit(vdip, circ);
6799 }
6800 
6801 static void
6802 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6803 {
6804 	mdi_client_t	*ct;
6805 
6806 	ct = i_devi_get_client(child);
6807 	if (ct == NULL)
6808 		return;
6809 
6810 	MDI_CLIENT_LOCK(ct);
6811 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6812 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6813 
6814 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6815 		MDI_DEBUG(4, (CE_NOTE, child,
6816 		    "i_mdi_pm_post_unconfig NOT held\n"));
6817 		MDI_CLIENT_UNLOCK(ct);
6818 		return;
6819 	}
6820 
6821 	/* failure detaching or another thread just attached it */
6822 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6823 	    i_ddi_devi_attached(ct->ct_dip)) ||
6824 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6825 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6826 		MDI_DEBUG(4, (CE_NOTE, child,
6827 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6828 		i_mdi_pm_reset_client(ct);
6829 	} else {
6830 		mdi_pathinfo_t  *pip, *next;
6831 		int	valid_path_count = 0;
6832 
6833 		MDI_DEBUG(4, (CE_NOTE, child,
6834 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6835 		pip = ct->ct_path_head;
6836 		while (pip != NULL) {
6837 			MDI_PI_LOCK(pip);
6838 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6839 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6840 				valid_path_count ++;
6841 			MDI_PI_UNLOCK(pip);
6842 			pip = next;
6843 		}
6844 		i_mdi_pm_rele_client(ct, valid_path_count);
6845 		ct->ct_powercnt_unconfig = 0;
6846 	}
6847 
6848 	MDI_CLIENT_UNLOCK(ct);
6849 }
6850 
6851 static void
6852 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
6853 {
6854 	int			circ;
6855 	dev_info_t		*cdip;
6856 
6857 	ASSERT(MDI_VHCI(vdip));
6858 
6859 	if (!held) {
6860 		MDI_DEBUG(4, (CE_NOTE, vdip,
6861 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6862 		return;
6863 	}
6864 
6865 	if (child) {
6866 		ASSERT(DEVI_BUSY_OWNED(vdip));
6867 		i_mdi_pm_post_unconfig_one(child);
6868 		return;
6869 	}
6870 
6871 	ndi_devi_enter(vdip, &circ);
6872 	cdip = ddi_get_child(vdip);
6873 	while (cdip) {
6874 		dev_info_t *next = ddi_get_next_sibling(cdip);
6875 
6876 		i_mdi_pm_post_unconfig_one(cdip);
6877 		cdip = next;
6878 	}
6879 	ndi_devi_exit(vdip, circ);
6880 }
6881 
6882 int
6883 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6884 {
6885 	int			circ, ret = MDI_SUCCESS;
6886 	dev_info_t		*client_dip = NULL;
6887 	mdi_client_t		*ct;
6888 
6889 	/*
6890 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6891 	 * Power up pHCI for the named client device.
6892 	 * Note: Before the client is enumerated under vhci by phci,
6893 	 * client_dip can be NULL. Then proceed to power up all the
6894 	 * pHCIs.
6895 	 */
6896 	if (devnm != NULL) {
6897 		ndi_devi_enter(vdip, &circ);
6898 		client_dip = ndi_devi_findchild(vdip, devnm);
6899 	}
6900 
6901 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n",
6902 	    op, devnm ? devnm : "NULL", (void *)client_dip));
6903 
6904 	switch (op) {
6905 	case MDI_PM_PRE_CONFIG:
6906 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6907 		break;
6908 
6909 	case MDI_PM_PRE_UNCONFIG:
6910 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6911 		    flags);
6912 		break;
6913 
6914 	case MDI_PM_POST_CONFIG:
6915 		i_mdi_pm_post_config(vdip, client_dip);
6916 		break;
6917 
6918 	case MDI_PM_POST_UNCONFIG:
6919 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6920 		break;
6921 
6922 	case MDI_PM_HOLD_POWER:
6923 	case MDI_PM_RELE_POWER:
6924 		ASSERT(args);
6925 
6926 		client_dip = (dev_info_t *)args;
6927 		ASSERT(MDI_CLIENT(client_dip));
6928 
6929 		ct = i_devi_get_client(client_dip);
6930 		MDI_CLIENT_LOCK(ct);
6931 
6932 		if (op == MDI_PM_HOLD_POWER) {
6933 			if (ct->ct_power_cnt == 0) {
6934 				(void) i_mdi_power_all_phci(ct);
6935 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6936 				    "mdi_power i_mdi_pm_hold_client\n"));
6937 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6938 			}
6939 		} else {
6940 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6941 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6942 				    "mdi_power i_mdi_pm_rele_client\n"));
6943 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6944 			} else {
6945 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6946 				    "mdi_power i_mdi_pm_reset_client\n"));
6947 				i_mdi_pm_reset_client(ct);
6948 			}
6949 		}
6950 
6951 		MDI_CLIENT_UNLOCK(ct);
6952 		break;
6953 
6954 	default:
6955 		break;
6956 	}
6957 
6958 	if (devnm)
6959 		ndi_devi_exit(vdip, circ);
6960 
6961 	return (ret);
6962 }
6963 
6964 int
6965 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6966 {
6967 	mdi_vhci_t *vhci;
6968 
6969 	if (!MDI_VHCI(dip))
6970 		return (MDI_FAILURE);
6971 
6972 	if (mdi_class) {
6973 		vhci = DEVI(dip)->devi_mdi_xhci;
6974 		ASSERT(vhci);
6975 		*mdi_class = vhci->vh_class;
6976 	}
6977 
6978 	return (MDI_SUCCESS);
6979 }
6980 
6981 int
6982 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6983 {
6984 	mdi_phci_t *phci;
6985 
6986 	if (!MDI_PHCI(dip))
6987 		return (MDI_FAILURE);
6988 
6989 	if (mdi_class) {
6990 		phci = DEVI(dip)->devi_mdi_xhci;
6991 		ASSERT(phci);
6992 		*mdi_class = phci->ph_vhci->vh_class;
6993 	}
6994 
6995 	return (MDI_SUCCESS);
6996 }
6997 
6998 int
6999 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7000 {
7001 	mdi_client_t *client;
7002 
7003 	if (!MDI_CLIENT(dip))
7004 		return (MDI_FAILURE);
7005 
7006 	if (mdi_class) {
7007 		client = DEVI(dip)->devi_mdi_client;
7008 		ASSERT(client);
7009 		*mdi_class = client->ct_vhci->vh_class;
7010 	}
7011 
7012 	return (MDI_SUCCESS);
7013 }
7014 
7015 void *
7016 mdi_client_get_vhci_private(dev_info_t *dip)
7017 {
7018 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7019 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7020 		mdi_client_t	*ct;
7021 		ct = i_devi_get_client(dip);
7022 		return (ct->ct_vprivate);
7023 	}
7024 	return (NULL);
7025 }
7026 
7027 void
7028 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7029 {
7030 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7031 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7032 		mdi_client_t	*ct;
7033 		ct = i_devi_get_client(dip);
7034 		ct->ct_vprivate = data;
7035 	}
7036 }
7037 /*
7038  * mdi_pi_get_vhci_private():
7039  *		Get the vhci private information associated with the
7040  *		mdi_pathinfo node
7041  */
7042 void *
7043 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7044 {
7045 	caddr_t	vprivate = NULL;
7046 	if (pip) {
7047 		vprivate = MDI_PI(pip)->pi_vprivate;
7048 	}
7049 	return (vprivate);
7050 }
7051 
7052 /*
7053  * mdi_pi_set_vhci_private():
7054  *		Set the vhci private information in the mdi_pathinfo node
7055  */
7056 void
7057 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7058 {
7059 	if (pip) {
7060 		MDI_PI(pip)->pi_vprivate = priv;
7061 	}
7062 }
7063 
7064 /*
7065  * mdi_phci_get_vhci_private():
7066  *		Get the vhci private information associated with the
7067  *		mdi_phci node
7068  */
7069 void *
7070 mdi_phci_get_vhci_private(dev_info_t *dip)
7071 {
7072 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7073 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7074 		mdi_phci_t	*ph;
7075 		ph = i_devi_get_phci(dip);
7076 		return (ph->ph_vprivate);
7077 	}
7078 	return (NULL);
7079 }
7080 
7081 /*
7082  * mdi_phci_set_vhci_private():
7083  *		Set the vhci private information in the mdi_phci node
7084  */
7085 void
7086 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7087 {
7088 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7089 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7090 		mdi_phci_t	*ph;
7091 		ph = i_devi_get_phci(dip);
7092 		ph->ph_vprivate = priv;
7093 	}
7094 }
7095 
7096 /*
7097  * List of vhci class names:
7098  * A vhci class name must be in this list only if the corresponding vhci
7099  * driver intends to use the mdi provided bus config implementation
7100  * (i.e., mdi_vhci_bus_config()).
7101  */
7102 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7103 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7104 
7105 /*
7106  * During boot time, the on-disk vhci cache for every vhci class is read
7107  * in the form of an nvlist and stored here.
7108  */
7109 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7110 
7111 /* nvpair names in vhci cache nvlist */
7112 #define	MDI_VHCI_CACHE_VERSION	1
7113 #define	MDI_NVPNAME_VERSION	"version"
7114 #define	MDI_NVPNAME_PHCIS	"phcis"
7115 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7116 
7117 /*
7118  * Given vhci class name, return its on-disk vhci cache filename.
7119  * Memory for the returned filename which includes the full path is allocated
7120  * by this function.
7121  */
7122 static char *
7123 vhclass2vhcache_filename(char *vhclass)
7124 {
7125 	char *filename;
7126 	int len;
7127 	static char *fmt = "/etc/devices/mdi_%s_cache";
7128 
7129 	/*
7130 	 * fmt contains the on-disk vhci cache file name format;
7131 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7132 	 */
7133 
7134 	/* the -1 below is to account for "%s" in the format string */
7135 	len = strlen(fmt) + strlen(vhclass) - 1;
7136 	filename = kmem_alloc(len, KM_SLEEP);
7137 	(void) snprintf(filename, len, fmt, vhclass);
7138 	ASSERT(len == (strlen(filename) + 1));
7139 	return (filename);
7140 }
7141 
7142 /*
7143  * initialize the vhci cache related data structures and read the on-disk
7144  * vhci cached data into memory.
7145  */
7146 static void
7147 setup_vhci_cache(mdi_vhci_t *vh)
7148 {
7149 	mdi_vhci_config_t *vhc;
7150 	mdi_vhci_cache_t *vhcache;
7151 	int i;
7152 	nvlist_t *nvl = NULL;
7153 
7154 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7155 	vh->vh_config = vhc;
7156 	vhcache = &vhc->vhc_vhcache;
7157 
7158 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7159 
7160 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7161 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7162 
7163 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7164 
7165 	/*
7166 	 * Create string hash; same as mod_hash_create_strhash() except that
7167 	 * we use NULL key destructor.
7168 	 */
7169 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7170 	    mdi_bus_config_cache_hash_size,
7171 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7172 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7173 
7174 	/*
7175 	 * The on-disk vhci cache is read during booting prior to the
7176 	 * lights-out period by mdi_read_devices_files().
7177 	 */
7178 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7179 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7180 			nvl = vhcache_nvl[i];
7181 			vhcache_nvl[i] = NULL;
7182 			break;
7183 		}
7184 	}
7185 
7186 	/*
7187 	 * this is to cover the case of some one manually causing unloading
7188 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7189 	 */
7190 	if (nvl == NULL && modrootloaded)
7191 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7192 
7193 	if (nvl != NULL) {
7194 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7195 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7196 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7197 		else  {
7198 			cmn_err(CE_WARN,
7199 			    "%s: data file corrupted, will recreate\n",
7200 			    vhc->vhc_vhcache_filename);
7201 		}
7202 		rw_exit(&vhcache->vhcache_lock);
7203 		nvlist_free(nvl);
7204 	}
7205 
7206 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7207 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7208 
7209 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7210 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7211 }
7212 
7213 /*
7214  * free all vhci cache related resources
7215  */
7216 static int
7217 destroy_vhci_cache(mdi_vhci_t *vh)
7218 {
7219 	mdi_vhci_config_t *vhc = vh->vh_config;
7220 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7221 	mdi_vhcache_phci_t *cphci, *cphci_next;
7222 	mdi_vhcache_client_t *cct, *cct_next;
7223 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7224 
7225 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7226 		return (MDI_FAILURE);
7227 
7228 	kmem_free(vhc->vhc_vhcache_filename,
7229 	    strlen(vhc->vhc_vhcache_filename) + 1);
7230 
7231 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7232 
7233 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7234 	    cphci = cphci_next) {
7235 		cphci_next = cphci->cphci_next;
7236 		free_vhcache_phci(cphci);
7237 	}
7238 
7239 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7240 		cct_next = cct->cct_next;
7241 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7242 			cpi_next = cpi->cpi_next;
7243 			free_vhcache_pathinfo(cpi);
7244 		}
7245 		free_vhcache_client(cct);
7246 	}
7247 
7248 	rw_destroy(&vhcache->vhcache_lock);
7249 
7250 	mutex_destroy(&vhc->vhc_lock);
7251 	cv_destroy(&vhc->vhc_cv);
7252 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7253 	return (MDI_SUCCESS);
7254 }
7255 
7256 /*
7257  * Stop all vhci cache related async threads and free their resources.
7258  */
7259 static int
7260 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7261 {
7262 	mdi_async_client_config_t *acc, *acc_next;
7263 
7264 	mutex_enter(&vhc->vhc_lock);
7265 	vhc->vhc_flags |= MDI_VHC_EXIT;
7266 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7267 	cv_broadcast(&vhc->vhc_cv);
7268 
7269 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7270 	    vhc->vhc_acc_thrcount != 0) {
7271 		mutex_exit(&vhc->vhc_lock);
7272 		delay(1);
7273 		mutex_enter(&vhc->vhc_lock);
7274 	}
7275 
7276 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7277 
7278 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7279 		acc_next = acc->acc_next;
7280 		free_async_client_config(acc);
7281 	}
7282 	vhc->vhc_acc_list_head = NULL;
7283 	vhc->vhc_acc_list_tail = NULL;
7284 	vhc->vhc_acc_count = 0;
7285 
7286 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7287 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7288 		mutex_exit(&vhc->vhc_lock);
7289 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7290 			vhcache_dirty(vhc);
7291 			return (MDI_FAILURE);
7292 		}
7293 	} else
7294 		mutex_exit(&vhc->vhc_lock);
7295 
7296 	if (callb_delete(vhc->vhc_cbid) != 0)
7297 		return (MDI_FAILURE);
7298 
7299 	return (MDI_SUCCESS);
7300 }
7301 
7302 /*
7303  * Stop vhci cache flush thread
7304  */
7305 /* ARGSUSED */
7306 static boolean_t
7307 stop_vhcache_flush_thread(void *arg, int code)
7308 {
7309 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7310 
7311 	mutex_enter(&vhc->vhc_lock);
7312 	vhc->vhc_flags |= MDI_VHC_EXIT;
7313 	cv_broadcast(&vhc->vhc_cv);
7314 
7315 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7316 		mutex_exit(&vhc->vhc_lock);
7317 		delay(1);
7318 		mutex_enter(&vhc->vhc_lock);
7319 	}
7320 
7321 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7322 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7323 		mutex_exit(&vhc->vhc_lock);
7324 		(void) flush_vhcache(vhc, 1);
7325 	} else
7326 		mutex_exit(&vhc->vhc_lock);
7327 
7328 	return (B_TRUE);
7329 }
7330 
7331 /*
7332  * Enqueue the vhcache phci (cphci) at the tail of the list
7333  */
7334 static void
7335 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7336 {
7337 	cphci->cphci_next = NULL;
7338 	if (vhcache->vhcache_phci_head == NULL)
7339 		vhcache->vhcache_phci_head = cphci;
7340 	else
7341 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7342 	vhcache->vhcache_phci_tail = cphci;
7343 }
7344 
7345 /*
7346  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7347  */
7348 static void
7349 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7350     mdi_vhcache_pathinfo_t *cpi)
7351 {
7352 	cpi->cpi_next = NULL;
7353 	if (cct->cct_cpi_head == NULL)
7354 		cct->cct_cpi_head = cpi;
7355 	else
7356 		cct->cct_cpi_tail->cpi_next = cpi;
7357 	cct->cct_cpi_tail = cpi;
7358 }
7359 
7360 /*
7361  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7362  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7363  * flag set come at the beginning of the list. All cpis which have this
7364  * flag set come at the end of the list.
7365  */
7366 static void
7367 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7368     mdi_vhcache_pathinfo_t *newcpi)
7369 {
7370 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7371 
7372 	if (cct->cct_cpi_head == NULL ||
7373 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7374 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7375 	else {
7376 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7377 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7378 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7379 			;
7380 
7381 		if (prev_cpi == NULL)
7382 			cct->cct_cpi_head = newcpi;
7383 		else
7384 			prev_cpi->cpi_next = newcpi;
7385 
7386 		newcpi->cpi_next = cpi;
7387 
7388 		if (cpi == NULL)
7389 			cct->cct_cpi_tail = newcpi;
7390 	}
7391 }
7392 
7393 /*
7394  * Enqueue the vhcache client (cct) at the tail of the list
7395  */
7396 static void
7397 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7398     mdi_vhcache_client_t *cct)
7399 {
7400 	cct->cct_next = NULL;
7401 	if (vhcache->vhcache_client_head == NULL)
7402 		vhcache->vhcache_client_head = cct;
7403 	else
7404 		vhcache->vhcache_client_tail->cct_next = cct;
7405 	vhcache->vhcache_client_tail = cct;
7406 }
7407 
7408 static void
7409 free_string_array(char **str, int nelem)
7410 {
7411 	int i;
7412 
7413 	if (str) {
7414 		for (i = 0; i < nelem; i++) {
7415 			if (str[i])
7416 				kmem_free(str[i], strlen(str[i]) + 1);
7417 		}
7418 		kmem_free(str, sizeof (char *) * nelem);
7419 	}
7420 }
7421 
7422 static void
7423 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7424 {
7425 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7426 	kmem_free(cphci, sizeof (*cphci));
7427 }
7428 
7429 static void
7430 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7431 {
7432 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7433 	kmem_free(cpi, sizeof (*cpi));
7434 }
7435 
7436 static void
7437 free_vhcache_client(mdi_vhcache_client_t *cct)
7438 {
7439 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7440 	kmem_free(cct, sizeof (*cct));
7441 }
7442 
7443 static char *
7444 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7445 {
7446 	char *name_addr;
7447 	int len;
7448 
7449 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7450 	name_addr = kmem_alloc(len, KM_SLEEP);
7451 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7452 
7453 	if (ret_len)
7454 		*ret_len = len;
7455 	return (name_addr);
7456 }
7457 
7458 /*
7459  * Copy the contents of paddrnvl to vhci cache.
7460  * paddrnvl nvlist contains path information for a vhci client.
7461  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7462  */
7463 static void
7464 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7465     mdi_vhcache_client_t *cct)
7466 {
7467 	nvpair_t *nvp = NULL;
7468 	mdi_vhcache_pathinfo_t *cpi;
7469 	uint_t nelem;
7470 	uint32_t *val;
7471 
7472 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7473 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7474 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7475 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7476 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7477 		ASSERT(nelem == 2);
7478 		cpi->cpi_cphci = cphci_list[val[0]];
7479 		cpi->cpi_flags = val[1];
7480 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7481 	}
7482 }
7483 
7484 /*
7485  * Copy the contents of caddrmapnvl to vhci cache.
7486  * caddrmapnvl nvlist contains vhci client address to phci client address
7487  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7488  * this nvlist.
7489  */
7490 static void
7491 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7492     mdi_vhcache_phci_t *cphci_list[])
7493 {
7494 	nvpair_t *nvp = NULL;
7495 	nvlist_t *paddrnvl;
7496 	mdi_vhcache_client_t *cct;
7497 
7498 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7499 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7500 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7501 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7502 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7503 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7504 		/* the client must contain at least one path */
7505 		ASSERT(cct->cct_cpi_head != NULL);
7506 
7507 		enqueue_vhcache_client(vhcache, cct);
7508 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7509 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7510 	}
7511 }
7512 
7513 /*
7514  * Copy the contents of the main nvlist to vhci cache.
7515  *
7516  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7517  * The nvlist contains the mappings between the vhci client addresses and
7518  * their corresponding phci client addresses.
7519  *
7520  * The structure of the nvlist is as follows:
7521  *
7522  * Main nvlist:
7523  *	NAME		TYPE		DATA
7524  *	version		int32		version number
7525  *	phcis		string array	array of phci paths
7526  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7527  *
7528  * structure of c2paddrs_nvl:
7529  *	NAME		TYPE		DATA
7530  *	caddr1		nvlist_t	paddrs_nvl1
7531  *	caddr2		nvlist_t	paddrs_nvl2
7532  *	...
7533  * where caddr1, caddr2, ... are vhci client name and addresses in the
7534  * form of "<clientname>@<clientaddress>".
7535  * (for example: "ssd@2000002037cd9f72");
7536  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7537  *
7538  * structure of paddrs_nvl:
7539  *	NAME		TYPE		DATA
7540  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7541  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7542  *	...
7543  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7544  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7545  * phci-ids are integers that identify PHCIs to which the
7546  * the bus specific address belongs to. These integers are used as an index
7547  * into to the phcis string array in the main nvlist to get the PHCI path.
7548  */
7549 static int
7550 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7551 {
7552 	char **phcis, **phci_namep;
7553 	uint_t nphcis;
7554 	mdi_vhcache_phci_t *cphci, **cphci_list;
7555 	nvlist_t *caddrmapnvl;
7556 	int32_t ver;
7557 	int i;
7558 	size_t cphci_list_size;
7559 
7560 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7561 
7562 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7563 	    ver != MDI_VHCI_CACHE_VERSION)
7564 		return (MDI_FAILURE);
7565 
7566 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7567 	    &nphcis) != 0)
7568 		return (MDI_SUCCESS);
7569 
7570 	ASSERT(nphcis > 0);
7571 
7572 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7573 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7574 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7575 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7576 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7577 		enqueue_vhcache_phci(vhcache, cphci);
7578 		cphci_list[i] = cphci;
7579 	}
7580 
7581 	ASSERT(vhcache->vhcache_phci_head != NULL);
7582 
7583 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7584 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7585 
7586 	kmem_free(cphci_list, cphci_list_size);
7587 	return (MDI_SUCCESS);
7588 }
7589 
7590 /*
7591  * Build paddrnvl for the specified client using the information in the
7592  * vhci cache and add it to the caddrmapnnvl.
7593  * Returns 0 on success, errno on failure.
7594  */
7595 static int
7596 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7597     nvlist_t *caddrmapnvl)
7598 {
7599 	mdi_vhcache_pathinfo_t *cpi;
7600 	nvlist_t *nvl;
7601 	int err;
7602 	uint32_t val[2];
7603 
7604 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7605 
7606 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7607 		return (err);
7608 
7609 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7610 		val[0] = cpi->cpi_cphci->cphci_id;
7611 		val[1] = cpi->cpi_flags;
7612 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7613 		    != 0)
7614 			goto out;
7615 	}
7616 
7617 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7618 out:
7619 	nvlist_free(nvl);
7620 	return (err);
7621 }
7622 
7623 /*
7624  * Build caddrmapnvl using the information in the vhci cache
7625  * and add it to the mainnvl.
7626  * Returns 0 on success, errno on failure.
7627  */
7628 static int
7629 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7630 {
7631 	mdi_vhcache_client_t *cct;
7632 	nvlist_t *nvl;
7633 	int err;
7634 
7635 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7636 
7637 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7638 		return (err);
7639 
7640 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7641 	    cct = cct->cct_next) {
7642 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7643 			goto out;
7644 	}
7645 
7646 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7647 out:
7648 	nvlist_free(nvl);
7649 	return (err);
7650 }
7651 
7652 /*
7653  * Build nvlist using the information in the vhci cache.
7654  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7655  * Returns nvl on success, NULL on failure.
7656  */
7657 static nvlist_t *
7658 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7659 {
7660 	mdi_vhcache_phci_t *cphci;
7661 	uint_t phci_count;
7662 	char **phcis;
7663 	nvlist_t *nvl;
7664 	int err, i;
7665 
7666 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7667 		nvl = NULL;
7668 		goto out;
7669 	}
7670 
7671 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7672 	    MDI_VHCI_CACHE_VERSION)) != 0)
7673 		goto out;
7674 
7675 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7676 	if (vhcache->vhcache_phci_head == NULL) {
7677 		rw_exit(&vhcache->vhcache_lock);
7678 		return (nvl);
7679 	}
7680 
7681 	phci_count = 0;
7682 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7683 	    cphci = cphci->cphci_next)
7684 		cphci->cphci_id = phci_count++;
7685 
7686 	/* build phci pathname list */
7687 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7688 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7689 	    cphci = cphci->cphci_next, i++)
7690 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7691 
7692 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7693 	    phci_count);
7694 	free_string_array(phcis, phci_count);
7695 
7696 	if (err == 0 &&
7697 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7698 		rw_exit(&vhcache->vhcache_lock);
7699 		return (nvl);
7700 	}
7701 
7702 	rw_exit(&vhcache->vhcache_lock);
7703 out:
7704 	if (nvl)
7705 		nvlist_free(nvl);
7706 	return (NULL);
7707 }
7708 
7709 /*
7710  * Lookup vhcache phci structure for the specified phci path.
7711  */
7712 static mdi_vhcache_phci_t *
7713 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7714 {
7715 	mdi_vhcache_phci_t *cphci;
7716 
7717 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7718 
7719 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7720 	    cphci = cphci->cphci_next) {
7721 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7722 			return (cphci);
7723 	}
7724 
7725 	return (NULL);
7726 }
7727 
7728 /*
7729  * Lookup vhcache phci structure for the specified phci.
7730  */
7731 static mdi_vhcache_phci_t *
7732 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7733 {
7734 	mdi_vhcache_phci_t *cphci;
7735 
7736 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7737 
7738 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7739 	    cphci = cphci->cphci_next) {
7740 		if (cphci->cphci_phci == ph)
7741 			return (cphci);
7742 	}
7743 
7744 	return (NULL);
7745 }
7746 
7747 /*
7748  * Add the specified phci to the vhci cache if not already present.
7749  */
7750 static void
7751 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7752 {
7753 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7754 	mdi_vhcache_phci_t *cphci;
7755 	char *pathname;
7756 	int cache_updated;
7757 
7758 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7759 
7760 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7761 	(void) ddi_pathname(ph->ph_dip, pathname);
7762 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7763 	    != NULL) {
7764 		cphci->cphci_phci = ph;
7765 		cache_updated = 0;
7766 	} else {
7767 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7768 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7769 		cphci->cphci_phci = ph;
7770 		enqueue_vhcache_phci(vhcache, cphci);
7771 		cache_updated = 1;
7772 	}
7773 
7774 	rw_exit(&vhcache->vhcache_lock);
7775 
7776 	/*
7777 	 * Since a new phci has been added, reset
7778 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7779 	 * during next vhcache_discover_paths().
7780 	 */
7781 	mutex_enter(&vhc->vhc_lock);
7782 	vhc->vhc_path_discovery_cutoff_time = 0;
7783 	mutex_exit(&vhc->vhc_lock);
7784 
7785 	kmem_free(pathname, MAXPATHLEN);
7786 	if (cache_updated)
7787 		vhcache_dirty(vhc);
7788 }
7789 
7790 /*
7791  * Remove the reference to the specified phci from the vhci cache.
7792  */
7793 static void
7794 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7795 {
7796 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7797 	mdi_vhcache_phci_t *cphci;
7798 
7799 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7800 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7801 		/* do not remove the actual mdi_vhcache_phci structure */
7802 		cphci->cphci_phci = NULL;
7803 	}
7804 	rw_exit(&vhcache->vhcache_lock);
7805 }
7806 
7807 static void
7808 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7809     mdi_vhcache_lookup_token_t *src)
7810 {
7811 	if (src == NULL) {
7812 		dst->lt_cct = NULL;
7813 		dst->lt_cct_lookup_time = 0;
7814 	} else {
7815 		dst->lt_cct = src->lt_cct;
7816 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7817 	}
7818 }
7819 
7820 /*
7821  * Look up vhcache client for the specified client.
7822  */
7823 static mdi_vhcache_client_t *
7824 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7825     mdi_vhcache_lookup_token_t *token)
7826 {
7827 	mod_hash_val_t hv;
7828 	char *name_addr;
7829 	int len;
7830 
7831 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7832 
7833 	/*
7834 	 * If no vhcache clean occurred since the last lookup, we can
7835 	 * simply return the cct from the last lookup operation.
7836 	 * It works because ccts are never freed except during the vhcache
7837 	 * cleanup operation.
7838 	 */
7839 	if (token != NULL &&
7840 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7841 		return (token->lt_cct);
7842 
7843 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7844 	if (mod_hash_find(vhcache->vhcache_client_hash,
7845 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7846 		if (token) {
7847 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7848 			token->lt_cct_lookup_time = lbolt64;
7849 		}
7850 	} else {
7851 		if (token) {
7852 			token->lt_cct = NULL;
7853 			token->lt_cct_lookup_time = 0;
7854 		}
7855 		hv = NULL;
7856 	}
7857 	kmem_free(name_addr, len);
7858 	return ((mdi_vhcache_client_t *)hv);
7859 }
7860 
7861 /*
7862  * Add the specified path to the vhci cache if not already present.
7863  * Also add the vhcache client for the client corresponding to this path
7864  * if it doesn't already exist.
7865  */
7866 static void
7867 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7868 {
7869 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7870 	mdi_vhcache_client_t *cct;
7871 	mdi_vhcache_pathinfo_t *cpi;
7872 	mdi_phci_t *ph = pip->pi_phci;
7873 	mdi_client_t *ct = pip->pi_client;
7874 	int cache_updated = 0;
7875 
7876 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7877 
7878 	/* if vhcache client for this pip doesn't already exist, add it */
7879 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7880 	    NULL)) == NULL) {
7881 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7882 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7883 		    ct->ct_guid, NULL);
7884 		enqueue_vhcache_client(vhcache, cct);
7885 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7886 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7887 		cache_updated = 1;
7888 	}
7889 
7890 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7891 		if (cpi->cpi_cphci->cphci_phci == ph &&
7892 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7893 			cpi->cpi_pip = pip;
7894 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7895 				cpi->cpi_flags &=
7896 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7897 				sort_vhcache_paths(cct);
7898 				cache_updated = 1;
7899 			}
7900 			break;
7901 		}
7902 	}
7903 
7904 	if (cpi == NULL) {
7905 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7906 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7907 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7908 		ASSERT(cpi->cpi_cphci != NULL);
7909 		cpi->cpi_pip = pip;
7910 		enqueue_vhcache_pathinfo(cct, cpi);
7911 		cache_updated = 1;
7912 	}
7913 
7914 	rw_exit(&vhcache->vhcache_lock);
7915 
7916 	if (cache_updated)
7917 		vhcache_dirty(vhc);
7918 }
7919 
7920 /*
7921  * Remove the reference to the specified path from the vhci cache.
7922  */
7923 static void
7924 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7925 {
7926 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7927 	mdi_client_t *ct = pip->pi_client;
7928 	mdi_vhcache_client_t *cct;
7929 	mdi_vhcache_pathinfo_t *cpi;
7930 
7931 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7932 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7933 	    NULL)) != NULL) {
7934 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7935 		    cpi = cpi->cpi_next) {
7936 			if (cpi->cpi_pip == pip) {
7937 				cpi->cpi_pip = NULL;
7938 				break;
7939 			}
7940 		}
7941 	}
7942 	rw_exit(&vhcache->vhcache_lock);
7943 }
7944 
7945 /*
7946  * Flush the vhci cache to disk.
7947  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7948  */
7949 static int
7950 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7951 {
7952 	nvlist_t *nvl;
7953 	int err;
7954 	int rv;
7955 
7956 	/*
7957 	 * It is possible that the system may shutdown before
7958 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7959 	 * flushing the cache in this case do not check for
7960 	 * i_ddi_io_initialized when force flag is set.
7961 	 */
7962 	if (force_flag == 0 && !i_ddi_io_initialized())
7963 		return (MDI_FAILURE);
7964 
7965 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7966 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7967 		nvlist_free(nvl);
7968 	} else
7969 		err = EFAULT;
7970 
7971 	rv = MDI_SUCCESS;
7972 	mutex_enter(&vhc->vhc_lock);
7973 	if (err != 0) {
7974 		if (err == EROFS) {
7975 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7976 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7977 			    MDI_VHC_VHCACHE_DIRTY);
7978 		} else {
7979 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7980 				cmn_err(CE_CONT, "%s: update failed\n",
7981 				    vhc->vhc_vhcache_filename);
7982 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7983 			}
7984 			rv = MDI_FAILURE;
7985 		}
7986 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7987 		cmn_err(CE_CONT,
7988 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7989 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7990 	}
7991 	mutex_exit(&vhc->vhc_lock);
7992 
7993 	return (rv);
7994 }
7995 
7996 /*
7997  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7998  * Exits itself if left idle for the idle timeout period.
7999  */
8000 static void
8001 vhcache_flush_thread(void *arg)
8002 {
8003 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8004 	clock_t idle_time, quit_at_ticks;
8005 	callb_cpr_t cprinfo;
8006 
8007 	/* number of seconds to sleep idle before exiting */
8008 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8009 
8010 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8011 	    "mdi_vhcache_flush");
8012 	mutex_enter(&vhc->vhc_lock);
8013 	for (; ; ) {
8014 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8015 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8016 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8017 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
8018 				(void) cv_timedwait(&vhc->vhc_cv,
8019 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8020 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8021 			} else {
8022 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8023 				mutex_exit(&vhc->vhc_lock);
8024 
8025 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8026 					vhcache_dirty(vhc);
8027 
8028 				mutex_enter(&vhc->vhc_lock);
8029 			}
8030 		}
8031 
8032 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8033 
8034 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8035 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8036 		    ddi_get_lbolt() < quit_at_ticks) {
8037 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8038 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8039 			    quit_at_ticks);
8040 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8041 		}
8042 
8043 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8044 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8045 			goto out;
8046 	}
8047 
8048 out:
8049 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8050 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8051 	CALLB_CPR_EXIT(&cprinfo);
8052 }
8053 
8054 /*
8055  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8056  */
8057 static void
8058 vhcache_dirty(mdi_vhci_config_t *vhc)
8059 {
8060 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8061 	int create_thread;
8062 
8063 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8064 	/* do not flush cache until the cache is fully built */
8065 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8066 		rw_exit(&vhcache->vhcache_lock);
8067 		return;
8068 	}
8069 	rw_exit(&vhcache->vhcache_lock);
8070 
8071 	mutex_enter(&vhc->vhc_lock);
8072 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8073 		mutex_exit(&vhc->vhc_lock);
8074 		return;
8075 	}
8076 
8077 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8078 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8079 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8080 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8081 		cv_broadcast(&vhc->vhc_cv);
8082 		create_thread = 0;
8083 	} else {
8084 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8085 		create_thread = 1;
8086 	}
8087 	mutex_exit(&vhc->vhc_lock);
8088 
8089 	if (create_thread)
8090 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8091 		    0, &p0, TS_RUN, minclsyspri);
8092 }
8093 
8094 /*
8095  * phci bus config structure - one for for each phci bus config operation that
8096  * we initiate on behalf of a vhci.
8097  */
8098 typedef struct mdi_phci_bus_config_s {
8099 	char *phbc_phci_path;
8100 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8101 	struct mdi_phci_bus_config_s *phbc_next;
8102 } mdi_phci_bus_config_t;
8103 
8104 /* vhci bus config structure - one for each vhci bus config operation */
8105 typedef struct mdi_vhci_bus_config_s {
8106 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8107 	major_t vhbc_op_major;		/* bus config op major */
8108 	uint_t vhbc_op_flags;		/* bus config op flags */
8109 	kmutex_t vhbc_lock;
8110 	kcondvar_t vhbc_cv;
8111 	int vhbc_thr_count;
8112 } mdi_vhci_bus_config_t;
8113 
8114 /*
8115  * bus config the specified phci
8116  */
8117 static void
8118 bus_config_phci(void *arg)
8119 {
8120 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8121 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8122 	dev_info_t *ph_dip;
8123 
8124 	/*
8125 	 * first configure all path components upto phci and then configure
8126 	 * the phci children.
8127 	 */
8128 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8129 	    != NULL) {
8130 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8131 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8132 			(void) ndi_devi_config_driver(ph_dip,
8133 			    vhbc->vhbc_op_flags,
8134 			    vhbc->vhbc_op_major);
8135 		} else
8136 			(void) ndi_devi_config(ph_dip,
8137 			    vhbc->vhbc_op_flags);
8138 
8139 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8140 		ndi_rele_devi(ph_dip);
8141 	}
8142 
8143 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8144 	kmem_free(phbc, sizeof (*phbc));
8145 
8146 	mutex_enter(&vhbc->vhbc_lock);
8147 	vhbc->vhbc_thr_count--;
8148 	if (vhbc->vhbc_thr_count == 0)
8149 		cv_broadcast(&vhbc->vhbc_cv);
8150 	mutex_exit(&vhbc->vhbc_lock);
8151 }
8152 
8153 /*
8154  * Bus config all phcis associated with the vhci in parallel.
8155  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8156  */
8157 static void
8158 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8159     ddi_bus_config_op_t op, major_t maj)
8160 {
8161 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8162 	mdi_vhci_bus_config_t *vhbc;
8163 	mdi_vhcache_phci_t *cphci;
8164 
8165 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8166 	if (vhcache->vhcache_phci_head == NULL) {
8167 		rw_exit(&vhcache->vhcache_lock);
8168 		return;
8169 	}
8170 
8171 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8172 
8173 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8174 	    cphci = cphci->cphci_next) {
8175 		/* skip phcis that haven't attached before root is available */
8176 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8177 			continue;
8178 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8179 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8180 		    KM_SLEEP);
8181 		phbc->phbc_vhbusconfig = vhbc;
8182 		phbc->phbc_next = phbc_head;
8183 		phbc_head = phbc;
8184 		vhbc->vhbc_thr_count++;
8185 	}
8186 	rw_exit(&vhcache->vhcache_lock);
8187 
8188 	vhbc->vhbc_op = op;
8189 	vhbc->vhbc_op_major = maj;
8190 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8191 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8192 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8193 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8194 
8195 	/* now create threads to initiate bus config on all phcis in parallel */
8196 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8197 		phbc_next = phbc->phbc_next;
8198 		if (mdi_mtc_off)
8199 			bus_config_phci((void *)phbc);
8200 		else
8201 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8202 			    0, &p0, TS_RUN, minclsyspri);
8203 	}
8204 
8205 	mutex_enter(&vhbc->vhbc_lock);
8206 	/* wait until all threads exit */
8207 	while (vhbc->vhbc_thr_count > 0)
8208 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8209 	mutex_exit(&vhbc->vhbc_lock);
8210 
8211 	mutex_destroy(&vhbc->vhbc_lock);
8212 	cv_destroy(&vhbc->vhbc_cv);
8213 	kmem_free(vhbc, sizeof (*vhbc));
8214 }
8215 
8216 /*
8217  * Single threaded version of bus_config_all_phcis()
8218  */
8219 static void
8220 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8221     ddi_bus_config_op_t op, major_t maj)
8222 {
8223 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8224 
8225 	single_threaded_vhconfig_enter(vhc);
8226 	bus_config_all_phcis(vhcache, flags, op, maj);
8227 	single_threaded_vhconfig_exit(vhc);
8228 }
8229 
8230 /*
8231  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8232  * The path includes the child component in addition to the phci path.
8233  */
8234 static int
8235 bus_config_one_phci_child(char *path)
8236 {
8237 	dev_info_t *ph_dip, *child;
8238 	char *devnm;
8239 	int rv = MDI_FAILURE;
8240 
8241 	/* extract the child component of the phci */
8242 	devnm = strrchr(path, '/');
8243 	*devnm++ = '\0';
8244 
8245 	/*
8246 	 * first configure all path components upto phci and then
8247 	 * configure the phci child.
8248 	 */
8249 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8250 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8251 		    NDI_SUCCESS) {
8252 			/*
8253 			 * release the hold that ndi_devi_config_one() placed
8254 			 */
8255 			ndi_rele_devi(child);
8256 			rv = MDI_SUCCESS;
8257 		}
8258 
8259 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8260 		ndi_rele_devi(ph_dip);
8261 	}
8262 
8263 	devnm--;
8264 	*devnm = '/';
8265 	return (rv);
8266 }
8267 
8268 /*
8269  * Build a list of phci client paths for the specified vhci client.
8270  * The list includes only those phci client paths which aren't configured yet.
8271  */
8272 static mdi_phys_path_t *
8273 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8274 {
8275 	mdi_vhcache_pathinfo_t *cpi;
8276 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8277 	int config_path, len;
8278 
8279 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8280 		/*
8281 		 * include only those paths that aren't configured.
8282 		 */
8283 		config_path = 0;
8284 		if (cpi->cpi_pip == NULL)
8285 			config_path = 1;
8286 		else {
8287 			MDI_PI_LOCK(cpi->cpi_pip);
8288 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8289 				config_path = 1;
8290 			MDI_PI_UNLOCK(cpi->cpi_pip);
8291 		}
8292 
8293 		if (config_path) {
8294 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8295 			len = strlen(cpi->cpi_cphci->cphci_path) +
8296 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8297 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8298 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8299 			    cpi->cpi_cphci->cphci_path, ct_name,
8300 			    cpi->cpi_addr);
8301 			pp->phys_path_next = NULL;
8302 
8303 			if (pp_head == NULL)
8304 				pp_head = pp;
8305 			else
8306 				pp_tail->phys_path_next = pp;
8307 			pp_tail = pp;
8308 		}
8309 	}
8310 
8311 	return (pp_head);
8312 }
8313 
8314 /*
8315  * Free the memory allocated for phci client path list.
8316  */
8317 static void
8318 free_phclient_path_list(mdi_phys_path_t *pp_head)
8319 {
8320 	mdi_phys_path_t *pp, *pp_next;
8321 
8322 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8323 		pp_next = pp->phys_path_next;
8324 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8325 		kmem_free(pp, sizeof (*pp));
8326 	}
8327 }
8328 
8329 /*
8330  * Allocated async client structure and initialize with the specified values.
8331  */
8332 static mdi_async_client_config_t *
8333 alloc_async_client_config(char *ct_name, char *ct_addr,
8334     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8335 {
8336 	mdi_async_client_config_t *acc;
8337 
8338 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8339 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8340 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8341 	acc->acc_phclient_path_list_head = pp_head;
8342 	init_vhcache_lookup_token(&acc->acc_token, tok);
8343 	acc->acc_next = NULL;
8344 	return (acc);
8345 }
8346 
8347 /*
8348  * Free the memory allocated for the async client structure and their members.
8349  */
8350 static void
8351 free_async_client_config(mdi_async_client_config_t *acc)
8352 {
8353 	if (acc->acc_phclient_path_list_head)
8354 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8355 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8356 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8357 	kmem_free(acc, sizeof (*acc));
8358 }
8359 
8360 /*
8361  * Sort vhcache pathinfos (cpis) of the specified client.
8362  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8363  * flag set come at the beginning of the list. All cpis which have this
8364  * flag set come at the end of the list.
8365  */
8366 static void
8367 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8368 {
8369 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8370 
8371 	cpi_head = cct->cct_cpi_head;
8372 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8373 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8374 		cpi_next = cpi->cpi_next;
8375 		enqueue_vhcache_pathinfo(cct, cpi);
8376 	}
8377 }
8378 
8379 /*
8380  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8381  * every vhcache pathinfo of the specified client. If not adjust the flag
8382  * setting appropriately.
8383  *
8384  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8385  * on-disk vhci cache. So every time this flag is updated the cache must be
8386  * flushed.
8387  */
8388 static void
8389 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8390     mdi_vhcache_lookup_token_t *tok)
8391 {
8392 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8393 	mdi_vhcache_client_t *cct;
8394 	mdi_vhcache_pathinfo_t *cpi;
8395 
8396 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8397 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8398 	    == NULL) {
8399 		rw_exit(&vhcache->vhcache_lock);
8400 		return;
8401 	}
8402 
8403 	/*
8404 	 * to avoid unnecessary on-disk cache updates, first check if an
8405 	 * update is really needed. If no update is needed simply return.
8406 	 */
8407 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8408 		if ((cpi->cpi_pip != NULL &&
8409 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8410 		    (cpi->cpi_pip == NULL &&
8411 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8412 			break;
8413 		}
8414 	}
8415 	if (cpi == NULL) {
8416 		rw_exit(&vhcache->vhcache_lock);
8417 		return;
8418 	}
8419 
8420 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8421 		rw_exit(&vhcache->vhcache_lock);
8422 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8423 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8424 		    tok)) == NULL) {
8425 			rw_exit(&vhcache->vhcache_lock);
8426 			return;
8427 		}
8428 	}
8429 
8430 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8431 		if (cpi->cpi_pip != NULL)
8432 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8433 		else
8434 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8435 	}
8436 	sort_vhcache_paths(cct);
8437 
8438 	rw_exit(&vhcache->vhcache_lock);
8439 	vhcache_dirty(vhc);
8440 }
8441 
8442 /*
8443  * Configure all specified paths of the client.
8444  */
8445 static void
8446 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8447     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8448 {
8449 	mdi_phys_path_t *pp;
8450 
8451 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8452 		(void) bus_config_one_phci_child(pp->phys_path);
8453 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8454 }
8455 
8456 /*
8457  * Dequeue elements from vhci async client config list and bus configure
8458  * their corresponding phci clients.
8459  */
8460 static void
8461 config_client_paths_thread(void *arg)
8462 {
8463 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8464 	mdi_async_client_config_t *acc;
8465 	clock_t quit_at_ticks;
8466 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8467 	callb_cpr_t cprinfo;
8468 
8469 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8470 	    "mdi_config_client_paths");
8471 
8472 	for (; ; ) {
8473 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8474 
8475 		mutex_enter(&vhc->vhc_lock);
8476 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8477 		    vhc->vhc_acc_list_head == NULL &&
8478 		    ddi_get_lbolt() < quit_at_ticks) {
8479 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8480 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8481 			    quit_at_ticks);
8482 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8483 		}
8484 
8485 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8486 		    vhc->vhc_acc_list_head == NULL)
8487 			goto out;
8488 
8489 		acc = vhc->vhc_acc_list_head;
8490 		vhc->vhc_acc_list_head = acc->acc_next;
8491 		if (vhc->vhc_acc_list_head == NULL)
8492 			vhc->vhc_acc_list_tail = NULL;
8493 		vhc->vhc_acc_count--;
8494 		mutex_exit(&vhc->vhc_lock);
8495 
8496 		config_client_paths_sync(vhc, acc->acc_ct_name,
8497 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8498 		    &acc->acc_token);
8499 
8500 		free_async_client_config(acc);
8501 	}
8502 
8503 out:
8504 	vhc->vhc_acc_thrcount--;
8505 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8506 	CALLB_CPR_EXIT(&cprinfo);
8507 }
8508 
8509 /*
8510  * Arrange for all the phci client paths (pp_head) for the specified client
8511  * to be bus configured asynchronously by a thread.
8512  */
8513 static void
8514 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8515     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8516 {
8517 	mdi_async_client_config_t *acc, *newacc;
8518 	int create_thread;
8519 
8520 	if (pp_head == NULL)
8521 		return;
8522 
8523 	if (mdi_mtc_off) {
8524 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8525 		free_phclient_path_list(pp_head);
8526 		return;
8527 	}
8528 
8529 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8530 	ASSERT(newacc);
8531 
8532 	mutex_enter(&vhc->vhc_lock);
8533 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8534 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8535 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8536 			free_async_client_config(newacc);
8537 			mutex_exit(&vhc->vhc_lock);
8538 			return;
8539 		}
8540 	}
8541 
8542 	if (vhc->vhc_acc_list_head == NULL)
8543 		vhc->vhc_acc_list_head = newacc;
8544 	else
8545 		vhc->vhc_acc_list_tail->acc_next = newacc;
8546 	vhc->vhc_acc_list_tail = newacc;
8547 	vhc->vhc_acc_count++;
8548 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8549 		cv_broadcast(&vhc->vhc_cv);
8550 		create_thread = 0;
8551 	} else {
8552 		vhc->vhc_acc_thrcount++;
8553 		create_thread = 1;
8554 	}
8555 	mutex_exit(&vhc->vhc_lock);
8556 
8557 	if (create_thread)
8558 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8559 		    0, &p0, TS_RUN, minclsyspri);
8560 }
8561 
8562 /*
8563  * Return number of online paths for the specified client.
8564  */
8565 static int
8566 nonline_paths(mdi_vhcache_client_t *cct)
8567 {
8568 	mdi_vhcache_pathinfo_t *cpi;
8569 	int online_count = 0;
8570 
8571 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8572 		if (cpi->cpi_pip != NULL) {
8573 			MDI_PI_LOCK(cpi->cpi_pip);
8574 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8575 				online_count++;
8576 			MDI_PI_UNLOCK(cpi->cpi_pip);
8577 		}
8578 	}
8579 
8580 	return (online_count);
8581 }
8582 
8583 /*
8584  * Bus configure all paths for the specified vhci client.
8585  * If at least one path for the client is already online, the remaining paths
8586  * will be configured asynchronously. Otherwise, it synchronously configures
8587  * the paths until at least one path is online and then rest of the paths
8588  * will be configured asynchronously.
8589  */
8590 static void
8591 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8592 {
8593 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8594 	mdi_phys_path_t *pp_head, *pp;
8595 	mdi_vhcache_client_t *cct;
8596 	mdi_vhcache_lookup_token_t tok;
8597 
8598 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8599 
8600 	init_vhcache_lookup_token(&tok, NULL);
8601 
8602 	if (ct_name == NULL || ct_addr == NULL ||
8603 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8604 	    == NULL ||
8605 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8606 		rw_exit(&vhcache->vhcache_lock);
8607 		return;
8608 	}
8609 
8610 	/* if at least one path is online, configure the rest asynchronously */
8611 	if (nonline_paths(cct) > 0) {
8612 		rw_exit(&vhcache->vhcache_lock);
8613 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8614 		return;
8615 	}
8616 
8617 	rw_exit(&vhcache->vhcache_lock);
8618 
8619 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8620 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8621 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8622 
8623 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8624 			    ct_addr, &tok)) == NULL) {
8625 				rw_exit(&vhcache->vhcache_lock);
8626 				goto out;
8627 			}
8628 
8629 			if (nonline_paths(cct) > 0 &&
8630 			    pp->phys_path_next != NULL) {
8631 				rw_exit(&vhcache->vhcache_lock);
8632 				config_client_paths_async(vhc, ct_name, ct_addr,
8633 				    pp->phys_path_next, &tok);
8634 				pp->phys_path_next = NULL;
8635 				goto out;
8636 			}
8637 
8638 			rw_exit(&vhcache->vhcache_lock);
8639 		}
8640 	}
8641 
8642 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8643 out:
8644 	free_phclient_path_list(pp_head);
8645 }
8646 
8647 static void
8648 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8649 {
8650 	mutex_enter(&vhc->vhc_lock);
8651 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8652 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8653 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8654 	mutex_exit(&vhc->vhc_lock);
8655 }
8656 
8657 static void
8658 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8659 {
8660 	mutex_enter(&vhc->vhc_lock);
8661 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8662 	cv_broadcast(&vhc->vhc_cv);
8663 	mutex_exit(&vhc->vhc_lock);
8664 }
8665 
8666 typedef struct mdi_phci_driver_info {
8667 	char	*phdriver_name;	/* name of the phci driver */
8668 
8669 	/* set to non zero if the phci driver supports root device */
8670 	int	phdriver_root_support;
8671 } mdi_phci_driver_info_t;
8672 
8673 /*
8674  * vhci class and root support capability of a phci driver can be
8675  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8676  * phci driver.conf file. The built-in tables below contain this information
8677  * for those phci drivers whose driver.conf files don't yet contain this info.
8678  *
8679  * All phci drivers expect iscsi have root device support.
8680  */
8681 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8682 	{ "fp", 1 },
8683 	{ "iscsi", 0 },
8684 	{ "ibsrp", 1 }
8685 	};
8686 
8687 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8688 
8689 static void *
8690 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8691 {
8692 	void *new_ptr;
8693 
8694 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8695 	if (old_ptr) {
8696 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
8697 		kmem_free(old_ptr, old_size);
8698 	}
8699 	return (new_ptr);
8700 }
8701 
8702 static void
8703 add_to_phci_list(char ***driver_list, int **root_support_list,
8704     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8705 {
8706 	ASSERT(*cur_elements <= *max_elements);
8707 	if (*cur_elements == *max_elements) {
8708 		*max_elements += 10;
8709 		*driver_list = mdi_realloc(*driver_list,
8710 		    sizeof (char *) * (*cur_elements),
8711 		    sizeof (char *) * (*max_elements));
8712 		*root_support_list = mdi_realloc(*root_support_list,
8713 		    sizeof (int) * (*cur_elements),
8714 		    sizeof (int) * (*max_elements));
8715 	}
8716 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8717 	(*root_support_list)[*cur_elements] = root_support;
8718 	(*cur_elements)++;
8719 }
8720 
8721 static void
8722 get_phci_driver_list(char *vhci_class, char ***driver_list,
8723     int **root_support_list, int *cur_elements, int *max_elements)
8724 {
8725 	mdi_phci_driver_info_t	*st_driver_list, *p;
8726 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8727 	major_t		m;
8728 	struct devnames	*dnp;
8729 	ddi_prop_t	*propp;
8730 
8731 	*driver_list = NULL;
8732 	*root_support_list = NULL;
8733 	*cur_elements = 0;
8734 	*max_elements = 0;
8735 
8736 	/* add the phci drivers derived from the phci driver.conf files */
8737 	for (m = 0; m < devcnt; m++) {
8738 		dnp = &devnamesp[m];
8739 
8740 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8741 			LOCK_DEV_OPS(&dnp->dn_lock);
8742 			if (dnp->dn_global_prop_ptr != NULL &&
8743 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8744 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8745 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8746 			    strcmp(propp->prop_val, vhci_class) == 0) {
8747 
8748 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8749 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8750 				    &dnp->dn_global_prop_ptr->prop_list)
8751 				    == NULL) ? 1 : 0;
8752 
8753 				add_to_phci_list(driver_list, root_support_list,
8754 				    cur_elements, max_elements, dnp->dn_name,
8755 				    root_support);
8756 
8757 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8758 			} else
8759 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8760 		}
8761 	}
8762 
8763 	driver_conf_count = *cur_elements;
8764 
8765 	/* add the phci drivers specified in the built-in tables */
8766 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8767 		st_driver_list = scsi_phci_driver_list;
8768 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8769 		    sizeof (mdi_phci_driver_info_t);
8770 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8771 		st_driver_list = ib_phci_driver_list;
8772 		st_ndrivers = sizeof (ib_phci_driver_list) /
8773 		    sizeof (mdi_phci_driver_info_t);
8774 	} else {
8775 		st_driver_list = NULL;
8776 		st_ndrivers = 0;
8777 	}
8778 
8779 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8780 		/* add this phci driver if not already added before */
8781 		for (j = 0; j < driver_conf_count; j++) {
8782 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8783 				break;
8784 		}
8785 		if (j == driver_conf_count) {
8786 			add_to_phci_list(driver_list, root_support_list,
8787 			    cur_elements, max_elements, p->phdriver_name,
8788 			    p->phdriver_root_support);
8789 		}
8790 	}
8791 }
8792 
8793 /*
8794  * Attach the phci driver instances associated with the specified vhci class.
8795  * If root is mounted attach all phci driver instances.
8796  * If root is not mounted, attach the instances of only those phci
8797  * drivers that have the root support.
8798  */
8799 static void
8800 attach_phci_drivers(char *vhci_class)
8801 {
8802 	char	**driver_list, **p;
8803 	int	*root_support_list;
8804 	int	cur_elements, max_elements, i;
8805 	major_t	m;
8806 
8807 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8808 	    &cur_elements, &max_elements);
8809 
8810 	for (i = 0; i < cur_elements; i++) {
8811 		if (modrootloaded || root_support_list[i]) {
8812 			m = ddi_name_to_major(driver_list[i]);
8813 			if (m != DDI_MAJOR_T_NONE &&
8814 			    ddi_hold_installed_driver(m))
8815 				ddi_rele_driver(m);
8816 		}
8817 	}
8818 
8819 	if (driver_list) {
8820 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8821 			kmem_free(*p, strlen(*p) + 1);
8822 		kmem_free(driver_list, sizeof (char *) * max_elements);
8823 		kmem_free(root_support_list, sizeof (int) * max_elements);
8824 	}
8825 }
8826 
8827 /*
8828  * Build vhci cache:
8829  *
8830  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8831  * the phci driver instances. During this process the cache gets built.
8832  *
8833  * Cache is built fully if the root is mounted.
8834  * If the root is not mounted, phci drivers that do not have root support
8835  * are not attached. As a result the cache is built partially. The entries
8836  * in the cache reflect only those phci drivers that have root support.
8837  */
8838 static int
8839 build_vhci_cache(mdi_vhci_t *vh)
8840 {
8841 	mdi_vhci_config_t *vhc = vh->vh_config;
8842 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8843 
8844 	single_threaded_vhconfig_enter(vhc);
8845 
8846 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8847 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8848 		rw_exit(&vhcache->vhcache_lock);
8849 		single_threaded_vhconfig_exit(vhc);
8850 		return (0);
8851 	}
8852 	rw_exit(&vhcache->vhcache_lock);
8853 
8854 	attach_phci_drivers(vh->vh_class);
8855 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8856 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8857 
8858 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8859 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8860 	rw_exit(&vhcache->vhcache_lock);
8861 
8862 	single_threaded_vhconfig_exit(vhc);
8863 	vhcache_dirty(vhc);
8864 	return (1);
8865 }
8866 
8867 /*
8868  * Determine if discovery of paths is needed.
8869  */
8870 static int
8871 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8872 {
8873 	int rv = 1;
8874 
8875 	mutex_enter(&vhc->vhc_lock);
8876 	if (i_ddi_io_initialized() == 0) {
8877 		if (vhc->vhc_path_discovery_boot > 0) {
8878 			vhc->vhc_path_discovery_boot--;
8879 			goto out;
8880 		}
8881 	} else {
8882 		if (vhc->vhc_path_discovery_postboot > 0) {
8883 			vhc->vhc_path_discovery_postboot--;
8884 			goto out;
8885 		}
8886 	}
8887 
8888 	/*
8889 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8890 	 * This is to avoid a series of full path discoveries when opening
8891 	 * stale /dev/[r]dsk links.
8892 	 */
8893 	if (mdi_path_discovery_interval != -1 &&
8894 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8895 		goto out;
8896 
8897 	rv = 0;
8898 out:
8899 	mutex_exit(&vhc->vhc_lock);
8900 	return (rv);
8901 }
8902 
8903 /*
8904  * Discover all paths:
8905  *
8906  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8907  * driver instances. During this process all paths will be discovered.
8908  */
8909 static int
8910 vhcache_discover_paths(mdi_vhci_t *vh)
8911 {
8912 	mdi_vhci_config_t *vhc = vh->vh_config;
8913 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8914 	int rv = 0;
8915 
8916 	single_threaded_vhconfig_enter(vhc);
8917 
8918 	if (vhcache_do_discovery(vhc)) {
8919 		attach_phci_drivers(vh->vh_class);
8920 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8921 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8922 
8923 		mutex_enter(&vhc->vhc_lock);
8924 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8925 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8926 		mutex_exit(&vhc->vhc_lock);
8927 		rv = 1;
8928 	}
8929 
8930 	single_threaded_vhconfig_exit(vhc);
8931 	return (rv);
8932 }
8933 
8934 /*
8935  * Generic vhci bus config implementation:
8936  *
8937  * Parameters
8938  *	vdip	vhci dip
8939  *	flags	bus config flags
8940  *	op	bus config operation
8941  *	The remaining parameters are bus config operation specific
8942  *
8943  * for BUS_CONFIG_ONE
8944  *	arg	pointer to name@addr
8945  *	child	upon successful return from this function, *child will be
8946  *		set to the configured and held devinfo child node of vdip.
8947  *	ct_addr	pointer to client address (i.e. GUID)
8948  *
8949  * for BUS_CONFIG_DRIVER
8950  *	arg	major number of the driver
8951  *	child and ct_addr parameters are ignored
8952  *
8953  * for BUS_CONFIG_ALL
8954  *	arg, child, and ct_addr parameters are ignored
8955  *
8956  * Note that for the rest of the bus config operations, this function simply
8957  * calls the framework provided default bus config routine.
8958  */
8959 int
8960 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8961     void *arg, dev_info_t **child, char *ct_addr)
8962 {
8963 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8964 	mdi_vhci_config_t *vhc = vh->vh_config;
8965 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8966 	int rv = 0;
8967 	int params_valid = 0;
8968 	char *cp;
8969 
8970 	/*
8971 	 * To bus config vhcis we relay operation, possibly using another
8972 	 * thread, to phcis. The phci driver then interacts with MDI to cause
8973 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
8974 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
8975 	 * thread may be adding the child, to avoid deadlock we can't wait
8976 	 * for the relayed operations to complete if we have already entered
8977 	 * the vhci node.
8978 	 */
8979 	if (DEVI_BUSY_OWNED(vdip)) {
8980 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: "
8981 		    "vhci dip is busy owned %p\n", (void *)vdip));
8982 		goto default_bus_config;
8983 	}
8984 
8985 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8986 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8987 		rw_exit(&vhcache->vhcache_lock);
8988 		rv = build_vhci_cache(vh);
8989 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8990 	}
8991 
8992 	switch (op) {
8993 	case BUS_CONFIG_ONE:
8994 		if (arg != NULL && ct_addr != NULL) {
8995 			/* extract node name */
8996 			cp = (char *)arg;
8997 			while (*cp != '\0' && *cp != '@')
8998 				cp++;
8999 			if (*cp == '@') {
9000 				params_valid = 1;
9001 				*cp = '\0';
9002 				config_client_paths(vhc, (char *)arg, ct_addr);
9003 				/* config_client_paths() releases cache_lock */
9004 				*cp = '@';
9005 				break;
9006 			}
9007 		}
9008 
9009 		rw_exit(&vhcache->vhcache_lock);
9010 		break;
9011 
9012 	case BUS_CONFIG_DRIVER:
9013 		rw_exit(&vhcache->vhcache_lock);
9014 		if (rv == 0)
9015 			st_bus_config_all_phcis(vhc, flags, op,
9016 			    (major_t)(uintptr_t)arg);
9017 		break;
9018 
9019 	case BUS_CONFIG_ALL:
9020 		rw_exit(&vhcache->vhcache_lock);
9021 		if (rv == 0)
9022 			st_bus_config_all_phcis(vhc, flags, op, -1);
9023 		break;
9024 
9025 	default:
9026 		rw_exit(&vhcache->vhcache_lock);
9027 		break;
9028 	}
9029 
9030 
9031 default_bus_config:
9032 	/*
9033 	 * All requested child nodes are enumerated under the vhci.
9034 	 * Now configure them.
9035 	 */
9036 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9037 	    NDI_SUCCESS) {
9038 		return (MDI_SUCCESS);
9039 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9040 		/* discover all paths and try configuring again */
9041 		if (vhcache_discover_paths(vh) &&
9042 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9043 		    NDI_SUCCESS)
9044 			return (MDI_SUCCESS);
9045 	}
9046 
9047 	return (MDI_FAILURE);
9048 }
9049 
9050 /*
9051  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9052  */
9053 static nvlist_t *
9054 read_on_disk_vhci_cache(char *vhci_class)
9055 {
9056 	nvlist_t *nvl;
9057 	int err;
9058 	char *filename;
9059 
9060 	filename = vhclass2vhcache_filename(vhci_class);
9061 
9062 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
9063 		kmem_free(filename, strlen(filename) + 1);
9064 		return (nvl);
9065 	} else if (err == EIO)
9066 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
9067 	else if (err == EINVAL)
9068 		cmn_err(CE_WARN,
9069 		    "%s: data file corrupted, will recreate\n", filename);
9070 
9071 	kmem_free(filename, strlen(filename) + 1);
9072 	return (NULL);
9073 }
9074 
9075 /*
9076  * Read on-disk vhci cache into nvlists for all vhci classes.
9077  * Called during booting by i_ddi_read_devices_files().
9078  */
9079 void
9080 mdi_read_devices_files(void)
9081 {
9082 	int i;
9083 
9084 	for (i = 0; i < N_VHCI_CLASSES; i++)
9085 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9086 }
9087 
9088 /*
9089  * Remove all stale entries from vhci cache.
9090  */
9091 static void
9092 clean_vhcache(mdi_vhci_config_t *vhc)
9093 {
9094 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9095 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
9096 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
9097 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
9098 
9099 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9100 
9101 	cct_head = vhcache->vhcache_client_head;
9102 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9103 	for (cct = cct_head; cct != NULL; cct = cct_next) {
9104 		cct_next = cct->cct_next;
9105 
9106 		cpi_head = cct->cct_cpi_head;
9107 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
9108 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
9109 			cpi_next = cpi->cpi_next;
9110 			if (cpi->cpi_pip != NULL) {
9111 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
9112 				enqueue_tail_vhcache_pathinfo(cct, cpi);
9113 			} else
9114 				free_vhcache_pathinfo(cpi);
9115 		}
9116 
9117 		if (cct->cct_cpi_head != NULL)
9118 			enqueue_vhcache_client(vhcache, cct);
9119 		else {
9120 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9121 			    (mod_hash_key_t)cct->cct_name_addr);
9122 			free_vhcache_client(cct);
9123 		}
9124 	}
9125 
9126 	cphci_head = vhcache->vhcache_phci_head;
9127 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9128 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
9129 		cphci_next = cphci->cphci_next;
9130 		if (cphci->cphci_phci != NULL)
9131 			enqueue_vhcache_phci(vhcache, cphci);
9132 		else
9133 			free_vhcache_phci(cphci);
9134 	}
9135 
9136 	vhcache->vhcache_clean_time = lbolt64;
9137 	rw_exit(&vhcache->vhcache_lock);
9138 	vhcache_dirty(vhc);
9139 }
9140 
9141 /*
9142  * Remove all stale entries from vhci cache.
9143  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9144  */
9145 void
9146 mdi_clean_vhcache(void)
9147 {
9148 	mdi_vhci_t *vh;
9149 
9150 	mutex_enter(&mdi_mutex);
9151 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9152 		vh->vh_refcnt++;
9153 		mutex_exit(&mdi_mutex);
9154 		clean_vhcache(vh->vh_config);
9155 		mutex_enter(&mdi_mutex);
9156 		vh->vh_refcnt--;
9157 	}
9158 	mutex_exit(&mdi_mutex);
9159 }
9160 
9161 /*
9162  * mdi_vhci_walk_clients():
9163  *		Walker routine to traverse client dev_info nodes
9164  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9165  * below the client, including nexus devices, which we dont want.
9166  * So we just traverse the immediate siblings, starting from 1st client.
9167  */
9168 void
9169 mdi_vhci_walk_clients(dev_info_t *vdip,
9170     int (*f)(dev_info_t *, void *), void *arg)
9171 {
9172 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9173 	dev_info_t	*cdip;
9174 	mdi_client_t	*ct;
9175 
9176 	MDI_VHCI_CLIENT_LOCK(vh);
9177 	cdip = ddi_get_child(vdip);
9178 	while (cdip) {
9179 		ct = i_devi_get_client(cdip);
9180 		MDI_CLIENT_LOCK(ct);
9181 
9182 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9183 			cdip = ddi_get_next_sibling(cdip);
9184 		else
9185 			cdip = NULL;
9186 
9187 		MDI_CLIENT_UNLOCK(ct);
9188 	}
9189 	MDI_VHCI_CLIENT_UNLOCK(vh);
9190 }
9191 
9192 /*
9193  * mdi_vhci_walk_phcis():
9194  *		Walker routine to traverse phci dev_info nodes
9195  */
9196 void
9197 mdi_vhci_walk_phcis(dev_info_t *vdip,
9198     int (*f)(dev_info_t *, void *), void *arg)
9199 {
9200 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9201 	mdi_phci_t	*ph, *next;
9202 
9203 	MDI_VHCI_PHCI_LOCK(vh);
9204 	ph = vh->vh_phci_head;
9205 	while (ph) {
9206 		MDI_PHCI_LOCK(ph);
9207 
9208 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9209 			next = ph->ph_next;
9210 		else
9211 			next = NULL;
9212 
9213 		MDI_PHCI_UNLOCK(ph);
9214 		ph = next;
9215 	}
9216 	MDI_VHCI_PHCI_UNLOCK(vh);
9217 }
9218 
9219 
9220 /*
9221  * mdi_walk_vhcis():
9222  *		Walker routine to traverse vhci dev_info nodes
9223  */
9224 void
9225 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9226 {
9227 	mdi_vhci_t	*vh = NULL;
9228 
9229 	mutex_enter(&mdi_mutex);
9230 	/*
9231 	 * Scan for already registered vhci
9232 	 */
9233 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9234 		vh->vh_refcnt++;
9235 		mutex_exit(&mdi_mutex);
9236 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9237 			mutex_enter(&mdi_mutex);
9238 			vh->vh_refcnt--;
9239 			break;
9240 		} else {
9241 			mutex_enter(&mdi_mutex);
9242 			vh->vh_refcnt--;
9243 		}
9244 	}
9245 
9246 	mutex_exit(&mdi_mutex);
9247 }
9248 
9249 /*
9250  * i_mdi_log_sysevent():
9251  *		Logs events for pickup by syseventd
9252  */
9253 static void
9254 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9255 {
9256 	char		*path_name;
9257 	nvlist_t	*attr_list;
9258 
9259 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9260 	    KM_SLEEP) != DDI_SUCCESS) {
9261 		goto alloc_failed;
9262 	}
9263 
9264 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9265 	(void) ddi_pathname(dip, path_name);
9266 
9267 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9268 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9269 		goto error;
9270 	}
9271 
9272 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9273 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9274 		goto error;
9275 	}
9276 
9277 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9278 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9279 		goto error;
9280 	}
9281 
9282 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9283 	    path_name) != DDI_SUCCESS) {
9284 		goto error;
9285 	}
9286 
9287 	if (nvlist_add_string(attr_list, DDI_CLASS,
9288 	    ph_vh_class) != DDI_SUCCESS) {
9289 		goto error;
9290 	}
9291 
9292 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9293 	    attr_list, NULL, DDI_SLEEP);
9294 
9295 error:
9296 	kmem_free(path_name, MAXPATHLEN);
9297 	nvlist_free(attr_list);
9298 	return;
9299 
9300 alloc_failed:
9301 	MDI_DEBUG(1, (CE_WARN, dip,
9302 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
9303 }
9304 
9305 char **
9306 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9307 {
9308 	char	**driver_list, **ret_driver_list = NULL;
9309 	int	*root_support_list;
9310 	int	cur_elements, max_elements;
9311 
9312 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9313 	    &cur_elements, &max_elements);
9314 
9315 
9316 	if (driver_list) {
9317 		kmem_free(root_support_list, sizeof (int) * max_elements);
9318 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9319 		    * max_elements, sizeof (char *) * cur_elements);
9320 	}
9321 	*ndrivers = cur_elements;
9322 
9323 	return (ret_driver_list);
9324 
9325 }
9326 
9327 void
9328 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9329 {
9330 	char	**p;
9331 	int	i;
9332 
9333 	if (driver_list) {
9334 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9335 			kmem_free(*p, strlen(*p) + 1);
9336 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9337 	}
9338 }
9339 
9340 /*
9341  * mdi_is_dev_supported():
9342  *		function called by pHCI bus config operation to determine if a
9343  *		device should be represented as a child of the vHCI or the
9344  *		pHCI.  This decision is made by the vHCI, using cinfo idenity
9345  *		information passed by the pHCI - specifics of the cinfo
9346  *		representation are by agreement between the pHCI and vHCI.
9347  * Return Values:
9348  *		MDI_SUCCESS
9349  *		MDI_FAILURE
9350  */
9351 int
9352 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9353 {
9354 	mdi_vhci_t	*vh;
9355 
9356 	ASSERT(class && pdip);
9357 
9358 	/*
9359 	 * For dev_supported, mdi_phci_register() must have established pdip as
9360 	 * a pHCI.
9361 	 *
9362 	 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9363 	 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9364 	 */
9365 	if (!MDI_PHCI(pdip))
9366 		return (MDI_FAILURE);
9367 
9368 	/* Return MDI_FAILURE if vHCI does not support asking the question. */
9369 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9370 	if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9371 		return (MDI_FAILURE);
9372 	}
9373 
9374 	/* Return vHCI answer */
9375 	return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9376 }
9377 
9378 int
9379 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9380 {
9381 	uint_t devstate = 0;
9382 	dev_info_t *cdip;
9383 
9384 	if ((pip == NULL) || (dcp == NULL))
9385 		return (MDI_FAILURE);
9386 
9387 	cdip = mdi_pi_get_client(pip);
9388 
9389 	switch (mdi_pi_get_state(pip)) {
9390 	case MDI_PATHINFO_STATE_INIT:
9391 		devstate = DEVICE_DOWN;
9392 		break;
9393 	case MDI_PATHINFO_STATE_ONLINE:
9394 		devstate = DEVICE_ONLINE;
9395 		if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9396 			devstate |= DEVICE_BUSY;
9397 		break;
9398 	case MDI_PATHINFO_STATE_STANDBY:
9399 		devstate = DEVICE_ONLINE;
9400 		break;
9401 	case MDI_PATHINFO_STATE_FAULT:
9402 		devstate = DEVICE_DOWN;
9403 		break;
9404 	case MDI_PATHINFO_STATE_OFFLINE:
9405 		devstate = DEVICE_OFFLINE;
9406 		break;
9407 	default:
9408 		ASSERT(MDI_PI(pip)->pi_state);
9409 	}
9410 
9411 	if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9412 		return (MDI_FAILURE);
9413 
9414 	return (MDI_SUCCESS);
9415 }
9416