xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision 49f0e51890161901ae4f49c7a47602d97b52b934)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
29  * detailed discussion of the overall mpxio architecture.
30  *
31  * Default locking order:
32  *
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
35  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
37  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
38  */
39 
40 #include <sys/note.h>
41 #include <sys/types.h>
42 #include <sys/varargs.h>
43 #include <sys/param.h>
44 #include <sys/errno.h>
45 #include <sys/uio.h>
46 #include <sys/buf.h>
47 #include <sys/modctl.h>
48 #include <sys/open.h>
49 #include <sys/kmem.h>
50 #include <sys/poll.h>
51 #include <sys/conf.h>
52 #include <sys/bootconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/stat.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddipropdefs.h>
58 #include <sys/sunndi.h>
59 #include <sys/ndi_impldefs.h>
60 #include <sys/promif.h>
61 #include <sys/sunmdi.h>
62 #include <sys/mdi_impldefs.h>
63 #include <sys/taskq.h>
64 #include <sys/epm.h>
65 #include <sys/sunpm.h>
66 #include <sys/modhash.h>
67 #include <sys/disp.h>
68 #include <sys/autoconf.h>
69 
70 #ifdef	DEBUG
71 #include <sys/debug.h>
72 int	mdi_debug = 1;
73 #define	MDI_DEBUG(level, stmnt) \
74 	    if (mdi_debug >= (level)) i_mdi_log stmnt
75 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
76 #else	/* !DEBUG */
77 #define	MDI_DEBUG(level, stmnt)
78 #endif	/* DEBUG */
79 
80 extern pri_t	minclsyspri;
81 extern int	modrootloaded;
82 
83 /*
84  * Global mutex:
85  * Protects vHCI list and structure members, pHCI and Client lists.
86  */
87 kmutex_t	mdi_mutex;
88 
89 /*
90  * Registered vHCI class driver lists
91  */
92 int		mdi_vhci_count;
93 mdi_vhci_t	*mdi_vhci_head;
94 mdi_vhci_t	*mdi_vhci_tail;
95 
96 /*
97  * Client Hash Table size
98  */
99 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
100 
101 /*
102  * taskq interface definitions
103  */
104 #define	MDI_TASKQ_N_THREADS	8
105 #define	MDI_TASKQ_PRI		minclsyspri
106 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
107 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
108 
109 taskq_t				*mdi_taskq;
110 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
111 
112 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
113 
114 /*
115  * The data should be "quiet" for this interval (in seconds) before the
116  * vhci cached data is flushed to the disk.
117  */
118 static int mdi_vhcache_flush_delay = 10;
119 
120 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
121 static int mdi_vhcache_flush_daemon_idle_time = 60;
122 
123 /*
124  * MDI falls back to discovery of all paths when a bus_config_one fails.
125  * The following parameters can be used to tune this operation.
126  *
127  * mdi_path_discovery_boot
128  *	Number of times path discovery will be attempted during early boot.
129  *	Probably there is no reason to ever set this value to greater than one.
130  *
131  * mdi_path_discovery_postboot
132  *	Number of times path discovery will be attempted after early boot.
133  *	Set it to a minimum of two to allow for discovery of iscsi paths which
134  *	may happen very late during booting.
135  *
136  * mdi_path_discovery_interval
137  *	Minimum number of seconds MDI will wait between successive discovery
138  *	of all paths. Set it to -1 to disable discovery of all paths.
139  */
140 static int mdi_path_discovery_boot = 1;
141 static int mdi_path_discovery_postboot = 2;
142 static int mdi_path_discovery_interval = 10;
143 
144 /*
145  * number of seconds the asynchronous configuration thread will sleep idle
146  * before exiting.
147  */
148 static int mdi_async_config_idle_time = 600;
149 
150 static int mdi_bus_config_cache_hash_size = 256;
151 
152 /* turns off multithreaded configuration for certain operations */
153 static int mdi_mtc_off = 0;
154 
155 /*
156  * MDI component property name/value string definitions
157  */
158 const char 		*mdi_component_prop = "mpxio-component";
159 const char		*mdi_component_prop_vhci = "vhci";
160 const char		*mdi_component_prop_phci = "phci";
161 const char		*mdi_component_prop_client = "client";
162 
163 /*
164  * MDI client global unique identifier property name
165  */
166 const char		*mdi_client_guid_prop = "client-guid";
167 
168 /*
169  * MDI client load balancing property name/value string definitions
170  */
171 const char		*mdi_load_balance = "load-balance";
172 const char		*mdi_load_balance_none = "none";
173 const char		*mdi_load_balance_rr = "round-robin";
174 const char		*mdi_load_balance_lba = "logical-block";
175 
176 /*
177  * Obsolete vHCI class definition; to be removed after Leadville update
178  */
179 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
180 
181 static char vhci_greeting[] =
182 	"\tThere already exists one vHCI driver for class %s\n"
183 	"\tOnly one vHCI driver for each class is allowed\n";
184 
185 /*
186  * Static function prototypes
187  */
188 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
189 static int		i_mdi_client_offline(dev_info_t *, uint_t);
190 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
191 static void		i_mdi_phci_post_detach(dev_info_t *,
192 			    ddi_detach_cmd_t, int);
193 static int		i_mdi_client_pre_detach(dev_info_t *,
194 			    ddi_detach_cmd_t);
195 static void		i_mdi_client_post_detach(dev_info_t *,
196 			    ddi_detach_cmd_t, int);
197 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
198 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
199 static int 		i_mdi_lba_lb(mdi_client_t *ct,
200 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
201 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
202 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
203 static void		i_mdi_pm_reset_client(mdi_client_t *);
204 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
205 static int		i_mdi_power_all_phci(mdi_client_t *);
206 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
207 
208 
209 /*
210  * Internal mdi_pathinfo node functions
211  */
212 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
213 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
214 
215 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
216 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
217 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
218 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
219 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
220 			    mdi_client_t *);
221 static void		i_mdi_phci_unlock(mdi_phci_t *);
222 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
223 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
224 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
225 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
226 			    mdi_client_t *);
227 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
228 static void		i_mdi_client_remove_path(mdi_client_t *,
229 			    mdi_pathinfo_t *);
230 
231 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
232 			    mdi_pathinfo_state_t, int);
233 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
234 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
235 			    char **, int);
236 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
237 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
238 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
239 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
240 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
241 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
242 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
243 static void		i_mdi_client_update_state(mdi_client_t *);
244 static int		i_mdi_client_compute_state(mdi_client_t *,
245 			    mdi_phci_t *);
246 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
247 static void		i_mdi_client_unlock(mdi_client_t *);
248 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
249 static mdi_client_t	*i_devi_get_client(dev_info_t *);
250 /*
251  * NOTE: this will be removed once the NWS files are changed to use the new
252  * mdi_{enable,disable}_path interfaces
253  */
254 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
255 				int, int);
256 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
257 				mdi_vhci_t *vh, int flags, int op);
258 /*
259  * Failover related function prototypes
260  */
261 static int		i_mdi_failover(void *);
262 
263 /*
264  * misc internal functions
265  */
266 static int		i_mdi_get_hash_key(char *);
267 static int		i_map_nvlist_error_to_mdi(int);
268 static void		i_mdi_report_path_state(mdi_client_t *,
269 			    mdi_pathinfo_t *);
270 
271 static void		setup_vhci_cache(mdi_vhci_t *);
272 static int		destroy_vhci_cache(mdi_vhci_t *);
273 static void		setup_phci_driver_list(mdi_vhci_t *);
274 static void		free_phci_driver_list(mdi_vhci_config_t *);
275 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
276 static boolean_t	stop_vhcache_flush_thread(void *, int);
277 static void		free_string_array(char **, int);
278 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
279 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
280 static void		free_vhcache_client(mdi_vhcache_client_t *);
281 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
282 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
283 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
284 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
285 static void		vhcache_pi_add(mdi_vhci_config_t *,
286 			    struct mdi_pathinfo *);
287 static void		vhcache_pi_remove(mdi_vhci_config_t *,
288 			    struct mdi_pathinfo *);
289 static void		free_phclient_path_list(mdi_phys_path_t *);
290 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
291 static int		flush_vhcache(mdi_vhci_config_t *, int);
292 static void		vhcache_dirty(mdi_vhci_config_t *);
293 static void		free_async_client_config(mdi_async_client_config_t *);
294 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
295 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
296 static nvlist_t		*read_on_disk_vhci_cache(char *);
297 extern int		fread_nvlist(char *, nvlist_t **);
298 extern int		fwrite_nvlist(char *, nvlist_t *);
299 
300 /* called once when first vhci registers with mdi */
301 static void
302 i_mdi_init()
303 {
304 	static int initialized = 0;
305 
306 	if (initialized)
307 		return;
308 	initialized = 1;
309 
310 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
311 	/*
312 	 * Create our taskq resources
313 	 */
314 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
315 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
316 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
317 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
318 }
319 
320 /*
321  * mdi_get_component_type():
322  *		Return mpxio component type
323  * Return Values:
324  *		MDI_COMPONENT_NONE
325  *		MDI_COMPONENT_VHCI
326  *		MDI_COMPONENT_PHCI
327  *		MDI_COMPONENT_CLIENT
328  * XXX This doesn't work under multi-level MPxIO and should be
329  *	removed when clients migrate mdi_is_*() interfaces.
330  */
331 int
332 mdi_get_component_type(dev_info_t *dip)
333 {
334 	return (DEVI(dip)->devi_mdi_component);
335 }
336 
337 /*
338  * mdi_vhci_register():
339  *		Register a vHCI module with the mpxio framework
340  *		mdi_vhci_register() is called by vHCI drivers to register the
341  *		'class_driver' vHCI driver and its MDI entrypoints with the
342  *		mpxio framework.  The vHCI driver must call this interface as
343  *		part of its attach(9e) handler.
344  *		Competing threads may try to attach mdi_vhci_register() as
345  *		the vHCI drivers are loaded and attached as a result of pHCI
346  *		driver instance registration (mdi_phci_register()) with the
347  *		framework.
348  * Return Values:
349  *		MDI_SUCCESS
350  *		MDI_FAILURE
351  */
352 
353 /*ARGSUSED*/
354 int
355 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
356     int flags)
357 {
358 	mdi_vhci_t		*vh = NULL;
359 
360 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
361 
362 	i_mdi_init();
363 
364 	mutex_enter(&mdi_mutex);
365 	/*
366 	 * Scan for already registered vhci
367 	 */
368 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
369 		if (strcmp(vh->vh_class, class) == 0) {
370 			/*
371 			 * vHCI has already been created.  Check for valid
372 			 * vHCI ops registration.  We only support one vHCI
373 			 * module per class
374 			 */
375 			if (vh->vh_ops != NULL) {
376 				mutex_exit(&mdi_mutex);
377 				cmn_err(CE_NOTE, vhci_greeting, class);
378 				return (MDI_FAILURE);
379 			}
380 			break;
381 		}
382 	}
383 
384 	/*
385 	 * if not yet created, create the vHCI component
386 	 */
387 	if (vh == NULL) {
388 		struct client_hash	*hash = NULL;
389 		char			*load_balance;
390 
391 		/*
392 		 * Allocate and initialize the mdi extensions
393 		 */
394 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
395 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
396 		    KM_SLEEP);
397 		vh->vh_client_table = hash;
398 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
399 		(void) strcpy(vh->vh_class, class);
400 		vh->vh_lb = LOAD_BALANCE_RR;
401 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
402 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
403 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
404 				vh->vh_lb = LOAD_BALANCE_NONE;
405 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
406 				    == 0) {
407 				vh->vh_lb = LOAD_BALANCE_LBA;
408 			}
409 			ddi_prop_free(load_balance);
410 		}
411 
412 		/*
413 		 * Store the vHCI ops vectors
414 		 */
415 		vh->vh_dip = vdip;
416 		vh->vh_ops = vops;
417 
418 		setup_vhci_cache(vh);
419 
420 		if (mdi_vhci_head == NULL) {
421 			mdi_vhci_head = vh;
422 		}
423 		if (mdi_vhci_tail) {
424 			mdi_vhci_tail->vh_next = vh;
425 		}
426 		mdi_vhci_tail = vh;
427 		mdi_vhci_count++;
428 	}
429 
430 	/*
431 	 * Claim the devfs node as a vhci component
432 	 */
433 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
434 
435 	/*
436 	 * Initialize our back reference from dev_info node
437 	 */
438 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
439 	mutex_exit(&mdi_mutex);
440 	return (MDI_SUCCESS);
441 }
442 
443 /*
444  * mdi_vhci_unregister():
445  *		Unregister a vHCI module from mpxio framework
446  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
447  * 		of a vhci to unregister it from the framework.
448  * Return Values:
449  *		MDI_SUCCESS
450  *		MDI_FAILURE
451  */
452 
453 /*ARGSUSED*/
454 int
455 mdi_vhci_unregister(dev_info_t *vdip, int flags)
456 {
457 	mdi_vhci_t	*found, *vh, *prev = NULL;
458 
459 	/*
460 	 * Check for invalid VHCI
461 	 */
462 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
463 		return (MDI_FAILURE);
464 
465 	mutex_enter(&mdi_mutex);
466 
467 	/*
468 	 * Scan the list of registered vHCIs for a match
469 	 */
470 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
471 		if (found == vh)
472 			break;
473 		prev = found;
474 	}
475 
476 	if (found == NULL) {
477 		mutex_exit(&mdi_mutex);
478 		return (MDI_FAILURE);
479 	}
480 
481 	/*
482 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
483 	 * should have been unregistered, before a vHCI can be
484 	 * unregistered.
485 	 */
486 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
487 		mutex_exit(&mdi_mutex);
488 		return (MDI_FAILURE);
489 	}
490 
491 	/*
492 	 * Remove the vHCI from the global list
493 	 */
494 	if (vh == mdi_vhci_head) {
495 		mdi_vhci_head = vh->vh_next;
496 	} else {
497 		prev->vh_next = vh->vh_next;
498 	}
499 	if (vh == mdi_vhci_tail) {
500 		mdi_vhci_tail = prev;
501 	}
502 
503 	mdi_vhci_count--;
504 	mutex_exit(&mdi_mutex);
505 
506 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
507 		/* add vhci to the global list */
508 		mutex_enter(&mdi_mutex);
509 		if (mdi_vhci_head == NULL)
510 			mdi_vhci_head = vh;
511 		else
512 			mdi_vhci_tail->vh_next = vh;
513 		mdi_vhci_tail = vh;
514 		mdi_vhci_count++;
515 		mutex_exit(&mdi_mutex);
516 		return (MDI_FAILURE);
517 	}
518 
519 	vh->vh_ops = NULL;
520 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
521 	DEVI(vdip)->devi_mdi_xhci = NULL;
522 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
523 	kmem_free(vh->vh_client_table,
524 	    mdi_client_table_size * sizeof (struct client_hash));
525 
526 	kmem_free(vh, sizeof (mdi_vhci_t));
527 	return (MDI_SUCCESS);
528 }
529 
530 /*
531  * i_mdi_vhci_class2vhci():
532  *		Look for a matching vHCI module given a vHCI class name
533  * Return Values:
534  *		Handle to a vHCI component
535  *		NULL
536  */
537 static mdi_vhci_t *
538 i_mdi_vhci_class2vhci(char *class)
539 {
540 	mdi_vhci_t	*vh = NULL;
541 
542 	ASSERT(!MUTEX_HELD(&mdi_mutex));
543 
544 	mutex_enter(&mdi_mutex);
545 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
546 		if (strcmp(vh->vh_class, class) == 0) {
547 			break;
548 		}
549 	}
550 	mutex_exit(&mdi_mutex);
551 	return (vh);
552 }
553 
554 /*
555  * i_devi_get_vhci():
556  *		Utility function to get the handle to a vHCI component
557  * Return Values:
558  *		Handle to a vHCI component
559  *		NULL
560  */
561 mdi_vhci_t *
562 i_devi_get_vhci(dev_info_t *vdip)
563 {
564 	mdi_vhci_t	*vh = NULL;
565 	if (MDI_VHCI(vdip)) {
566 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
567 	}
568 	return (vh);
569 }
570 
571 /*
572  * mdi_phci_register():
573  *		Register a pHCI module with mpxio framework
574  *		mdi_phci_register() is called by pHCI drivers to register with
575  *		the mpxio framework and a specific 'class_driver' vHCI.  The
576  *		pHCI driver must call this interface as part of its attach(9e)
577  *		handler.
578  * Return Values:
579  *		MDI_SUCCESS
580  *		MDI_FAILURE
581  */
582 
583 /*ARGSUSED*/
584 int
585 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
586 {
587 	mdi_phci_t		*ph;
588 	mdi_vhci_t		*vh;
589 	char			*data;
590 	char			*pathname;
591 
592 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
593 	(void) ddi_pathname(pdip, pathname);
594 
595 	/*
596 	 * Check for mpxio-disable property. Enable mpxio if the property is
597 	 * missing or not set to "yes".
598 	 * If the property is set to "yes" then emit a brief message.
599 	 */
600 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
601 	    &data) == DDI_SUCCESS)) {
602 		if (strcmp(data, "yes") == 0) {
603 			MDI_DEBUG(1, (CE_CONT, pdip,
604 			    "?%s (%s%d) multipath capabilities "
605 			    "disabled via %s.conf.\n", pathname,
606 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
607 			    ddi_driver_name(pdip)));
608 			ddi_prop_free(data);
609 			kmem_free(pathname, MAXPATHLEN);
610 			return (MDI_FAILURE);
611 		}
612 		ddi_prop_free(data);
613 	}
614 
615 	kmem_free(pathname, MAXPATHLEN);
616 
617 	/*
618 	 * Search for a matching vHCI
619 	 */
620 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
621 	if (vh == NULL) {
622 		return (MDI_FAILURE);
623 	}
624 
625 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
626 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
627 	ph->ph_dip = pdip;
628 	ph->ph_vhci = vh;
629 	ph->ph_next = NULL;
630 	ph->ph_unstable = 0;
631 	ph->ph_vprivate = 0;
632 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
633 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
634 
635 	MDI_PHCI_SET_POWER_UP(ph);
636 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
637 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
638 
639 	vhcache_phci_add(vh->vh_config, ph);
640 
641 	mutex_enter(&mdi_mutex);
642 	if (vh->vh_phci_head == NULL) {
643 		vh->vh_phci_head = ph;
644 	}
645 	if (vh->vh_phci_tail) {
646 		vh->vh_phci_tail->ph_next = ph;
647 	}
648 	vh->vh_phci_tail = ph;
649 	vh->vh_phci_count++;
650 	mutex_exit(&mdi_mutex);
651 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
652 	return (MDI_SUCCESS);
653 }
654 
655 /*
656  * mdi_phci_unregister():
657  *		Unregister a pHCI module from mpxio framework
658  *		mdi_phci_unregister() is called by the pHCI drivers from their
659  *		detach(9E) handler to unregister their instances from the
660  *		framework.
661  * Return Values:
662  *		MDI_SUCCESS
663  *		MDI_FAILURE
664  */
665 
666 /*ARGSUSED*/
667 int
668 mdi_phci_unregister(dev_info_t *pdip, int flags)
669 {
670 	mdi_vhci_t		*vh;
671 	mdi_phci_t		*ph;
672 	mdi_phci_t		*tmp;
673 	mdi_phci_t		*prev = NULL;
674 
675 	ph = i_devi_get_phci(pdip);
676 	if (ph == NULL) {
677 		MDI_DEBUG(1, (CE_WARN, pdip,
678 		    "!pHCI unregister: Not a valid pHCI"));
679 		return (MDI_FAILURE);
680 	}
681 
682 	vh = ph->ph_vhci;
683 	ASSERT(vh != NULL);
684 	if (vh == NULL) {
685 		MDI_DEBUG(1, (CE_WARN, pdip,
686 		    "!pHCI unregister: Not a valid vHCI"));
687 		return (MDI_FAILURE);
688 	}
689 
690 	mutex_enter(&mdi_mutex);
691 	tmp = vh->vh_phci_head;
692 	while (tmp) {
693 		if (tmp == ph) {
694 			break;
695 		}
696 		prev = tmp;
697 		tmp = tmp->ph_next;
698 	}
699 
700 	if (ph == vh->vh_phci_head) {
701 		vh->vh_phci_head = ph->ph_next;
702 	} else {
703 		prev->ph_next = ph->ph_next;
704 	}
705 
706 	if (ph == vh->vh_phci_tail) {
707 		vh->vh_phci_tail = prev;
708 	}
709 
710 	vh->vh_phci_count--;
711 
712 	mutex_exit(&mdi_mutex);
713 
714 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
715 	    ESC_DDI_INITIATOR_UNREGISTER);
716 	vhcache_phci_remove(vh->vh_config, ph);
717 	cv_destroy(&ph->ph_unstable_cv);
718 	cv_destroy(&ph->ph_powerchange_cv);
719 	mutex_destroy(&ph->ph_mutex);
720 	kmem_free(ph, sizeof (mdi_phci_t));
721 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
722 	DEVI(pdip)->devi_mdi_xhci = NULL;
723 	return (MDI_SUCCESS);
724 }
725 
726 /*
727  * i_devi_get_phci():
728  * 		Utility function to return the phci extensions.
729  */
730 static mdi_phci_t *
731 i_devi_get_phci(dev_info_t *pdip)
732 {
733 	mdi_phci_t	*ph = NULL;
734 	if (MDI_PHCI(pdip)) {
735 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
736 	}
737 	return (ph);
738 }
739 
740 /*
741  * mdi_phci_path2devinfo():
742  * 		Utility function to search for a valid phci device given
743  *		the devfs pathname.
744  */
745 
746 dev_info_t *
747 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
748 {
749 	char		*temp_pathname;
750 	mdi_vhci_t	*vh;
751 	mdi_phci_t	*ph;
752 	dev_info_t 	*pdip = NULL;
753 
754 	vh = i_devi_get_vhci(vdip);
755 	ASSERT(vh != NULL);
756 
757 	if (vh == NULL) {
758 		/*
759 		 * Invalid vHCI component, return failure
760 		 */
761 		return (NULL);
762 	}
763 
764 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
765 	mutex_enter(&mdi_mutex);
766 	ph = vh->vh_phci_head;
767 	while (ph != NULL) {
768 		pdip = ph->ph_dip;
769 		ASSERT(pdip != NULL);
770 		*temp_pathname = '\0';
771 		(void) ddi_pathname(pdip, temp_pathname);
772 		if (strcmp(temp_pathname, pathname) == 0) {
773 			break;
774 		}
775 		ph = ph->ph_next;
776 	}
777 	if (ph == NULL) {
778 		pdip = NULL;
779 	}
780 	mutex_exit(&mdi_mutex);
781 	kmem_free(temp_pathname, MAXPATHLEN);
782 	return (pdip);
783 }
784 
785 /*
786  * mdi_phci_get_path_count():
787  * 		get number of path information nodes associated with a given
788  *		pHCI device.
789  */
790 int
791 mdi_phci_get_path_count(dev_info_t *pdip)
792 {
793 	mdi_phci_t	*ph;
794 	int		count = 0;
795 
796 	ph = i_devi_get_phci(pdip);
797 	if (ph != NULL) {
798 		count = ph->ph_path_count;
799 	}
800 	return (count);
801 }
802 
803 /*
804  * i_mdi_phci_lock():
805  *		Lock a pHCI device
806  * Return Values:
807  *		None
808  * Note:
809  *		The default locking order is:
810  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
811  *		But there are number of situations where locks need to be
812  *		grabbed in reverse order.  This routine implements try and lock
813  *		mechanism depending on the requested parameter option.
814  */
815 static void
816 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
817 {
818 	if (pip) {
819 		/* Reverse locking is requested. */
820 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
821 			/*
822 			 * tryenter failed. Try to grab again
823 			 * after a small delay
824 			 */
825 			MDI_PI_HOLD(pip);
826 			MDI_PI_UNLOCK(pip);
827 			delay(1);
828 			MDI_PI_LOCK(pip);
829 			MDI_PI_RELE(pip);
830 		}
831 	} else {
832 		MDI_PHCI_LOCK(ph);
833 	}
834 }
835 
836 /*
837  * i_mdi_phci_get_client_lock():
838  *		Lock a pHCI device
839  * Return Values:
840  *		None
841  * Note:
842  *		The default locking order is:
843  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
844  *		But there are number of situations where locks need to be
845  *		grabbed in reverse order.  This routine implements try and lock
846  *		mechanism depending on the requested parameter option.
847  */
848 static void
849 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
850 {
851 	if (ct) {
852 		/* Reverse locking is requested. */
853 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
854 			/*
855 			 * tryenter failed. Try to grab again
856 			 * after a small delay
857 			 */
858 			MDI_CLIENT_UNLOCK(ct);
859 			delay(1);
860 			MDI_CLIENT_LOCK(ct);
861 		}
862 	} else {
863 		MDI_PHCI_LOCK(ph);
864 	}
865 }
866 
867 /*
868  * i_mdi_phci_unlock():
869  *		Unlock the pHCI component
870  */
871 static void
872 i_mdi_phci_unlock(mdi_phci_t *ph)
873 {
874 	MDI_PHCI_UNLOCK(ph);
875 }
876 
877 /*
878  * i_mdi_devinfo_create():
879  *		create client device's devinfo node
880  * Return Values:
881  *		dev_info
882  *		NULL
883  * Notes:
884  */
885 static dev_info_t *
886 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
887 	char **compatible, int ncompatible)
888 {
889 	dev_info_t *cdip = NULL;
890 
891 	ASSERT(MUTEX_HELD(&mdi_mutex));
892 
893 	/* Verify for duplicate entry */
894 	cdip = i_mdi_devinfo_find(vh, name, guid);
895 	ASSERT(cdip == NULL);
896 	if (cdip) {
897 		cmn_err(CE_WARN,
898 		    "i_mdi_devinfo_create: client dip %p already exists",
899 			(void *)cdip);
900 	}
901 
902 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
903 	if (cdip == NULL)
904 		goto fail;
905 
906 	/*
907 	 * Create component type and Global unique identifier
908 	 * properties
909 	 */
910 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
911 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
912 		goto fail;
913 	}
914 
915 	/* Decorate the node with compatible property */
916 	if (compatible &&
917 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
918 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
919 		goto fail;
920 	}
921 
922 	return (cdip);
923 
924 fail:
925 	if (cdip) {
926 		(void) ndi_prop_remove_all(cdip);
927 		(void) ndi_devi_free(cdip);
928 	}
929 	return (NULL);
930 }
931 
932 /*
933  * i_mdi_devinfo_find():
934  *		Find a matching devinfo node for given client node name
935  *		and its guid.
936  * Return Values:
937  *		Handle to a dev_info node or NULL
938  */
939 
940 static dev_info_t *
941 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
942 {
943 	char			*data;
944 	dev_info_t 		*cdip = NULL;
945 	dev_info_t 		*ndip = NULL;
946 	int			circular;
947 
948 	ndi_devi_enter(vh->vh_dip, &circular);
949 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
950 	while ((cdip = ndip) != NULL) {
951 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
952 
953 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
954 			continue;
955 		}
956 
957 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
958 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
959 		    &data) != DDI_PROP_SUCCESS) {
960 			continue;
961 		}
962 
963 		if (strcmp(data, guid) != 0) {
964 			ddi_prop_free(data);
965 			continue;
966 		}
967 		ddi_prop_free(data);
968 		break;
969 	}
970 	ndi_devi_exit(vh->vh_dip, circular);
971 	return (cdip);
972 }
973 
974 /*
975  * i_mdi_devinfo_remove():
976  *		Remove a client device node
977  */
978 static int
979 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
980 {
981 	int	rv = MDI_SUCCESS;
982 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
983 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
984 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
985 		if (rv != NDI_SUCCESS) {
986 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
987 			    " failed. cdip = %p\n", cdip));
988 		}
989 		/*
990 		 * Convert to MDI error code
991 		 */
992 		switch (rv) {
993 		case NDI_SUCCESS:
994 			rv = MDI_SUCCESS;
995 			break;
996 		case NDI_BUSY:
997 			rv = MDI_BUSY;
998 			break;
999 		default:
1000 			rv = MDI_FAILURE;
1001 			break;
1002 		}
1003 	}
1004 	return (rv);
1005 }
1006 
1007 /*
1008  * i_devi_get_client()
1009  *		Utility function to get mpxio component extensions
1010  */
1011 static mdi_client_t *
1012 i_devi_get_client(dev_info_t *cdip)
1013 {
1014 	mdi_client_t	*ct = NULL;
1015 	if (MDI_CLIENT(cdip)) {
1016 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1017 	}
1018 	return (ct);
1019 }
1020 
1021 /*
1022  * i_mdi_is_child_present():
1023  *		Search for the presence of client device dev_info node
1024  */
1025 
1026 static int
1027 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1028 {
1029 	int		rv = MDI_FAILURE;
1030 	struct dev_info	*dip;
1031 	int		circular;
1032 
1033 	ndi_devi_enter(vdip, &circular);
1034 	dip = DEVI(vdip)->devi_child;
1035 	while (dip) {
1036 		if (dip == DEVI(cdip)) {
1037 			rv = MDI_SUCCESS;
1038 			break;
1039 		}
1040 		dip = dip->devi_sibling;
1041 	}
1042 	ndi_devi_exit(vdip, circular);
1043 	return (rv);
1044 }
1045 
1046 
1047 /*
1048  * i_mdi_client_lock():
1049  *		Grab client component lock
1050  * Return Values:
1051  *		None
1052  * Note:
1053  *		The default locking order is:
1054  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1055  *		But there are number of situations where locks need to be
1056  *		grabbed in reverse order.  This routine implements try and lock
1057  *		mechanism depending on the requested parameter option.
1058  */
1059 
1060 static void
1061 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1062 {
1063 	if (pip) {
1064 		/*
1065 		 * Reverse locking is requested.
1066 		 */
1067 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1068 			/*
1069 			 * tryenter failed. Try to grab again
1070 			 * after a small delay
1071 			 */
1072 			MDI_PI_HOLD(pip);
1073 			MDI_PI_UNLOCK(pip);
1074 			delay(1);
1075 			MDI_PI_LOCK(pip);
1076 			MDI_PI_RELE(pip);
1077 		}
1078 	} else {
1079 		MDI_CLIENT_LOCK(ct);
1080 	}
1081 }
1082 
1083 /*
1084  * i_mdi_client_unlock():
1085  *		Unlock a client component
1086  */
1087 
1088 static void
1089 i_mdi_client_unlock(mdi_client_t *ct)
1090 {
1091 	MDI_CLIENT_UNLOCK(ct);
1092 }
1093 
1094 /*
1095  * i_mdi_client_alloc():
1096  * 		Allocate and initialize a client structure.  Caller should
1097  *		hold the global mdi_mutex.
1098  * Return Values:
1099  *		Handle to a client component
1100  */
1101 /*ARGSUSED*/
1102 static mdi_client_t *
1103 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1104 {
1105 	mdi_client_t	*ct;
1106 
1107 	ASSERT(MUTEX_HELD(&mdi_mutex));
1108 
1109 	/*
1110 	 * Allocate and initialize a component structure.
1111 	 */
1112 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1113 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1114 	ct->ct_hnext = NULL;
1115 	ct->ct_hprev = NULL;
1116 	ct->ct_dip = NULL;
1117 	ct->ct_vhci = vh;
1118 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1119 	(void) strcpy(ct->ct_drvname, name);
1120 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1121 	(void) strcpy(ct->ct_guid, lguid);
1122 	ct->ct_cprivate = NULL;
1123 	ct->ct_vprivate = NULL;
1124 	ct->ct_flags = 0;
1125 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1126 	MDI_CLIENT_SET_OFFLINE(ct);
1127 	MDI_CLIENT_SET_DETACH(ct);
1128 	MDI_CLIENT_SET_POWER_UP(ct);
1129 	ct->ct_failover_flags = 0;
1130 	ct->ct_failover_status = 0;
1131 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1132 	ct->ct_unstable = 0;
1133 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1134 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1135 	ct->ct_lb = vh->vh_lb;
1136 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1137 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1138 	ct->ct_path_count = 0;
1139 	ct->ct_path_head = NULL;
1140 	ct->ct_path_tail = NULL;
1141 	ct->ct_path_last = NULL;
1142 
1143 	/*
1144 	 * Add this client component to our client hash queue
1145 	 */
1146 	i_mdi_client_enlist_table(vh, ct);
1147 	return (ct);
1148 }
1149 
1150 /*
1151  * i_mdi_client_enlist_table():
1152  *		Attach the client device to the client hash table. Caller
1153  *		should hold the mdi_mutex
1154  */
1155 
1156 static void
1157 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1158 {
1159 	int 			index;
1160 	struct client_hash	*head;
1161 
1162 	ASSERT(MUTEX_HELD(&mdi_mutex));
1163 	index = i_mdi_get_hash_key(ct->ct_guid);
1164 	head = &vh->vh_client_table[index];
1165 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1166 	head->ct_hash_head = ct;
1167 	head->ct_hash_count++;
1168 	vh->vh_client_count++;
1169 }
1170 
1171 /*
1172  * i_mdi_client_delist_table():
1173  *		Attach the client device to the client hash table.
1174  *		Caller should hold the mdi_mutex
1175  */
1176 
1177 static void
1178 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1179 {
1180 	int			index;
1181 	char			*guid;
1182 	struct client_hash 	*head;
1183 	mdi_client_t		*next;
1184 	mdi_client_t		*last;
1185 
1186 	ASSERT(MUTEX_HELD(&mdi_mutex));
1187 	guid = ct->ct_guid;
1188 	index = i_mdi_get_hash_key(guid);
1189 	head = &vh->vh_client_table[index];
1190 
1191 	last = NULL;
1192 	next = (mdi_client_t *)head->ct_hash_head;
1193 	while (next != NULL) {
1194 		if (next == ct) {
1195 			break;
1196 		}
1197 		last = next;
1198 		next = next->ct_hnext;
1199 	}
1200 
1201 	if (next) {
1202 		head->ct_hash_count--;
1203 		if (last == NULL) {
1204 			head->ct_hash_head = ct->ct_hnext;
1205 		} else {
1206 			last->ct_hnext = ct->ct_hnext;
1207 		}
1208 		ct->ct_hnext = NULL;
1209 		vh->vh_client_count--;
1210 	}
1211 }
1212 
1213 
1214 /*
1215  * i_mdi_client_free():
1216  *		Free a client component
1217  */
1218 static int
1219 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1220 {
1221 	int		rv = MDI_SUCCESS;
1222 	int		flags = ct->ct_flags;
1223 	dev_info_t	*cdip;
1224 	dev_info_t	*vdip;
1225 
1226 	ASSERT(MUTEX_HELD(&mdi_mutex));
1227 	vdip = vh->vh_dip;
1228 	cdip = ct->ct_dip;
1229 
1230 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1231 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1232 	DEVI(cdip)->devi_mdi_client = NULL;
1233 
1234 	/*
1235 	 * Clear out back ref. to dev_info_t node
1236 	 */
1237 	ct->ct_dip = NULL;
1238 
1239 	/*
1240 	 * Remove this client from our hash queue
1241 	 */
1242 	i_mdi_client_delist_table(vh, ct);
1243 
1244 	/*
1245 	 * Uninitialize and free the component
1246 	 */
1247 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1248 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1249 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1250 	cv_destroy(&ct->ct_failover_cv);
1251 	cv_destroy(&ct->ct_unstable_cv);
1252 	cv_destroy(&ct->ct_powerchange_cv);
1253 	mutex_destroy(&ct->ct_mutex);
1254 	kmem_free(ct, sizeof (*ct));
1255 
1256 	if (cdip != NULL) {
1257 		mutex_exit(&mdi_mutex);
1258 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1259 		mutex_enter(&mdi_mutex);
1260 	}
1261 	return (rv);
1262 }
1263 
1264 /*
1265  * i_mdi_client_find():
1266  * 		Find the client structure corresponding to a given guid
1267  *		Caller should hold the mdi_mutex
1268  */
1269 static mdi_client_t *
1270 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1271 {
1272 	int			index;
1273 	struct client_hash	*head;
1274 	mdi_client_t		*ct;
1275 
1276 	ASSERT(MUTEX_HELD(&mdi_mutex));
1277 	index = i_mdi_get_hash_key(guid);
1278 	head = &vh->vh_client_table[index];
1279 
1280 	ct = head->ct_hash_head;
1281 	while (ct != NULL) {
1282 		if (strcmp(ct->ct_guid, guid) == 0 &&
1283 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1284 			break;
1285 		}
1286 		ct = ct->ct_hnext;
1287 	}
1288 	return (ct);
1289 }
1290 
1291 
1292 
1293 /*
1294  * i_mdi_client_update_state():
1295  *		Compute and update client device state
1296  * Notes:
1297  *		A client device can be in any of three possible states:
1298  *
1299  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1300  *		one online/standby paths. Can tolerate failures.
1301  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1302  *		no alternate paths available as standby. A failure on the online
1303  *		would result in loss of access to device data.
1304  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1305  *		no paths available to access the device.
1306  */
1307 static void
1308 i_mdi_client_update_state(mdi_client_t *ct)
1309 {
1310 	int state;
1311 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1312 	state = i_mdi_client_compute_state(ct, NULL);
1313 	MDI_CLIENT_SET_STATE(ct, state);
1314 }
1315 
1316 /*
1317  * i_mdi_client_compute_state():
1318  *		Compute client device state
1319  *
1320  *		mdi_phci_t *	Pointer to pHCI structure which should
1321  *				while computing the new value.  Used by
1322  *				i_mdi_phci_offline() to find the new
1323  *				client state after DR of a pHCI.
1324  */
1325 static int
1326 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1327 {
1328 	int		state;
1329 	int		online_count = 0;
1330 	int		standby_count = 0;
1331 	mdi_pathinfo_t	*pip, *next;
1332 
1333 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1334 	pip = ct->ct_path_head;
1335 	while (pip != NULL) {
1336 		MDI_PI_LOCK(pip);
1337 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1338 		if (MDI_PI(pip)->pi_phci == ph) {
1339 			MDI_PI_UNLOCK(pip);
1340 			pip = next;
1341 			continue;
1342 		}
1343 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1344 				== MDI_PATHINFO_STATE_ONLINE)
1345 			online_count++;
1346 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1347 				== MDI_PATHINFO_STATE_STANDBY)
1348 			standby_count++;
1349 		MDI_PI_UNLOCK(pip);
1350 		pip = next;
1351 	}
1352 
1353 	if (online_count == 0) {
1354 		if (standby_count == 0) {
1355 			state = MDI_CLIENT_STATE_FAILED;
1356 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1357 			    " ct = %p\n", ct));
1358 		} else if (standby_count == 1) {
1359 			state = MDI_CLIENT_STATE_DEGRADED;
1360 		} else {
1361 			state = MDI_CLIENT_STATE_OPTIMAL;
1362 		}
1363 	} else if (online_count == 1) {
1364 		if (standby_count == 0) {
1365 			state = MDI_CLIENT_STATE_DEGRADED;
1366 		} else {
1367 			state = MDI_CLIENT_STATE_OPTIMAL;
1368 		}
1369 	} else {
1370 		state = MDI_CLIENT_STATE_OPTIMAL;
1371 	}
1372 	return (state);
1373 }
1374 
1375 /*
1376  * i_mdi_client2devinfo():
1377  *		Utility function
1378  */
1379 dev_info_t *
1380 i_mdi_client2devinfo(mdi_client_t *ct)
1381 {
1382 	return (ct->ct_dip);
1383 }
1384 
1385 /*
1386  * mdi_client_path2_devinfo():
1387  * 		Given the parent devinfo and child devfs pathname, search for
1388  *		a valid devfs node handle.
1389  */
1390 dev_info_t *
1391 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1392 {
1393 	dev_info_t 	*cdip = NULL;
1394 	dev_info_t 	*ndip = NULL;
1395 	char		*temp_pathname;
1396 	int		circular;
1397 
1398 	/*
1399 	 * Allocate temp buffer
1400 	 */
1401 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1402 
1403 	/*
1404 	 * Lock parent against changes
1405 	 */
1406 	ndi_devi_enter(vdip, &circular);
1407 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1408 	while ((cdip = ndip) != NULL) {
1409 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1410 
1411 		*temp_pathname = '\0';
1412 		(void) ddi_pathname(cdip, temp_pathname);
1413 		if (strcmp(temp_pathname, pathname) == 0) {
1414 			break;
1415 		}
1416 	}
1417 	/*
1418 	 * Release devinfo lock
1419 	 */
1420 	ndi_devi_exit(vdip, circular);
1421 
1422 	/*
1423 	 * Free the temp buffer
1424 	 */
1425 	kmem_free(temp_pathname, MAXPATHLEN);
1426 	return (cdip);
1427 }
1428 
1429 
1430 /*
1431  * mdi_client_get_path_count():
1432  * 		Utility function to get number of path information nodes
1433  *		associated with a given client device.
1434  */
1435 int
1436 mdi_client_get_path_count(dev_info_t *cdip)
1437 {
1438 	mdi_client_t	*ct;
1439 	int		count = 0;
1440 
1441 	ct = i_devi_get_client(cdip);
1442 	if (ct != NULL) {
1443 		count = ct->ct_path_count;
1444 	}
1445 	return (count);
1446 }
1447 
1448 
1449 /*
1450  * i_mdi_get_hash_key():
1451  * 		Create a hash using strings as keys
1452  *
1453  */
1454 static int
1455 i_mdi_get_hash_key(char *str)
1456 {
1457 	uint32_t	g, hash = 0;
1458 	char		*p;
1459 
1460 	for (p = str; *p != '\0'; p++) {
1461 		g = *p;
1462 		hash += g;
1463 	}
1464 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1465 }
1466 
1467 /*
1468  * mdi_get_lb_policy():
1469  * 		Get current load balancing policy for a given client device
1470  */
1471 client_lb_t
1472 mdi_get_lb_policy(dev_info_t *cdip)
1473 {
1474 	client_lb_t	lb = LOAD_BALANCE_NONE;
1475 	mdi_client_t	*ct;
1476 
1477 	ct = i_devi_get_client(cdip);
1478 	if (ct != NULL) {
1479 		lb = ct->ct_lb;
1480 	}
1481 	return (lb);
1482 }
1483 
1484 /*
1485  * mdi_set_lb_region_size():
1486  * 		Set current region size for the load-balance
1487  */
1488 int
1489 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1490 {
1491 	mdi_client_t	*ct;
1492 	int		rv = MDI_FAILURE;
1493 
1494 	ct = i_devi_get_client(cdip);
1495 	if (ct != NULL && ct->ct_lb_args != NULL) {
1496 		ct->ct_lb_args->region_size = region_size;
1497 		rv = MDI_SUCCESS;
1498 	}
1499 	return (rv);
1500 }
1501 
1502 /*
1503  * mdi_Set_lb_policy():
1504  * 		Set current load balancing policy for a given client device
1505  */
1506 int
1507 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1508 {
1509 	mdi_client_t	*ct;
1510 	int		rv = MDI_FAILURE;
1511 
1512 	ct = i_devi_get_client(cdip);
1513 	if (ct != NULL) {
1514 		ct->ct_lb = lb;
1515 		rv = MDI_SUCCESS;
1516 	}
1517 	return (rv);
1518 }
1519 
1520 /*
1521  * mdi_failover():
1522  *		failover function called by the vHCI drivers to initiate
1523  *		a failover operation.  This is typically due to non-availability
1524  *		of online paths to route I/O requests.  Failover can be
1525  *		triggered through user application also.
1526  *
1527  *		The vHCI driver calls mdi_failover() to initiate a failover
1528  *		operation. mdi_failover() calls back into the vHCI driver's
1529  *		vo_failover() entry point to perform the actual failover
1530  *		operation.  The reason for requiring the vHCI driver to
1531  *		initiate failover by calling mdi_failover(), instead of directly
1532  *		executing vo_failover() itself, is to ensure that the mdi
1533  *		framework can keep track of the client state properly.
1534  *		Additionally, mdi_failover() provides as a convenience the
1535  *		option of performing the failover operation synchronously or
1536  *		asynchronously
1537  *
1538  *		Upon successful completion of the failover operation, the
1539  *		paths that were previously ONLINE will be in the STANDBY state,
1540  *		and the newly activated paths will be in the ONLINE state.
1541  *
1542  *		The flags modifier determines whether the activation is done
1543  *		synchronously: MDI_FAILOVER_SYNC
1544  * Return Values:
1545  *		MDI_SUCCESS
1546  *		MDI_FAILURE
1547  *		MDI_BUSY
1548  */
1549 /*ARGSUSED*/
1550 int
1551 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1552 {
1553 	int			rv;
1554 	mdi_client_t		*ct;
1555 
1556 	ct = i_devi_get_client(cdip);
1557 	ASSERT(ct != NULL);
1558 	if (ct == NULL) {
1559 		/* cdip is not a valid client device. Nothing more to do. */
1560 		return (MDI_FAILURE);
1561 	}
1562 
1563 	MDI_CLIENT_LOCK(ct);
1564 
1565 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1566 		/* A path to the client is being freed */
1567 		MDI_CLIENT_UNLOCK(ct);
1568 		return (MDI_BUSY);
1569 	}
1570 
1571 
1572 	if (MDI_CLIENT_IS_FAILED(ct)) {
1573 		/*
1574 		 * Client is in failed state. Nothing more to do.
1575 		 */
1576 		MDI_CLIENT_UNLOCK(ct);
1577 		return (MDI_FAILURE);
1578 	}
1579 
1580 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1581 		/*
1582 		 * Failover is already in progress; return BUSY
1583 		 */
1584 		MDI_CLIENT_UNLOCK(ct);
1585 		return (MDI_BUSY);
1586 	}
1587 	/*
1588 	 * Make sure that mdi_pathinfo node state changes are processed.
1589 	 * We do not allow failovers to progress while client path state
1590 	 * changes are in progress
1591 	 */
1592 	if (ct->ct_unstable) {
1593 		if (flags == MDI_FAILOVER_ASYNC) {
1594 			MDI_CLIENT_UNLOCK(ct);
1595 			return (MDI_BUSY);
1596 		} else {
1597 			while (ct->ct_unstable)
1598 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1599 		}
1600 	}
1601 
1602 	/*
1603 	 * Client device is in stable state. Before proceeding, perform sanity
1604 	 * checks again.
1605 	 */
1606 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1607 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1608 		/*
1609 		 * Client is in failed state. Nothing more to do.
1610 		 */
1611 		MDI_CLIENT_UNLOCK(ct);
1612 		return (MDI_FAILURE);
1613 	}
1614 
1615 	/*
1616 	 * Set the client state as failover in progress.
1617 	 */
1618 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1619 	ct->ct_failover_flags = flags;
1620 	MDI_CLIENT_UNLOCK(ct);
1621 
1622 	if (flags == MDI_FAILOVER_ASYNC) {
1623 		/*
1624 		 * Submit the initiate failover request via CPR safe
1625 		 * taskq threads.
1626 		 */
1627 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1628 		    ct, KM_SLEEP);
1629 		return (MDI_ACCEPT);
1630 	} else {
1631 		/*
1632 		 * Synchronous failover mode.  Typically invoked from the user
1633 		 * land.
1634 		 */
1635 		rv = i_mdi_failover(ct);
1636 	}
1637 	return (rv);
1638 }
1639 
1640 /*
1641  * i_mdi_failover():
1642  *		internal failover function. Invokes vHCI drivers failover
1643  *		callback function and process the failover status
1644  * Return Values:
1645  *		None
1646  *
1647  * Note: A client device in failover state can not be detached or freed.
1648  */
1649 static int
1650 i_mdi_failover(void *arg)
1651 {
1652 	int		rv = MDI_SUCCESS;
1653 	mdi_client_t	*ct = (mdi_client_t *)arg;
1654 	mdi_vhci_t	*vh = ct->ct_vhci;
1655 
1656 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1657 
1658 	if (vh->vh_ops->vo_failover != NULL) {
1659 		/*
1660 		 * Call vHCI drivers callback routine
1661 		 */
1662 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1663 		    ct->ct_failover_flags);
1664 	}
1665 
1666 	MDI_CLIENT_LOCK(ct);
1667 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1668 
1669 	/*
1670 	 * Save the failover return status
1671 	 */
1672 	ct->ct_failover_status = rv;
1673 
1674 	/*
1675 	 * As a result of failover, client status would have been changed.
1676 	 * Update the client state and wake up anyone waiting on this client
1677 	 * device.
1678 	 */
1679 	i_mdi_client_update_state(ct);
1680 
1681 	cv_broadcast(&ct->ct_failover_cv);
1682 	MDI_CLIENT_UNLOCK(ct);
1683 	return (rv);
1684 }
1685 
1686 /*
1687  * Load balancing is logical block.
1688  * IOs within the range described by region_size
1689  * would go on the same path. This would improve the
1690  * performance by cache-hit on some of the RAID devices.
1691  * Search only for online paths(At some point we
1692  * may want to balance across target ports).
1693  * If no paths are found then default to round-robin.
1694  */
1695 static int
1696 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1697 {
1698 	int		path_index = -1;
1699 	int		online_path_count = 0;
1700 	int		online_nonpref_path_count = 0;
1701 	int 		region_size = ct->ct_lb_args->region_size;
1702 	mdi_pathinfo_t	*pip;
1703 	mdi_pathinfo_t	*next;
1704 	int		preferred, path_cnt;
1705 
1706 	pip = ct->ct_path_head;
1707 	while (pip) {
1708 		MDI_PI_LOCK(pip);
1709 		if (MDI_PI(pip)->pi_state ==
1710 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1711 			online_path_count++;
1712 		} else if (MDI_PI(pip)->pi_state ==
1713 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1714 			online_nonpref_path_count++;
1715 		}
1716 		next = (mdi_pathinfo_t *)
1717 		    MDI_PI(pip)->pi_client_link;
1718 		MDI_PI_UNLOCK(pip);
1719 		pip = next;
1720 	}
1721 	/* if found any online/preferred then use this type */
1722 	if (online_path_count > 0) {
1723 		path_cnt = online_path_count;
1724 		preferred = 1;
1725 	} else if (online_nonpref_path_count > 0) {
1726 		path_cnt = online_nonpref_path_count;
1727 		preferred = 0;
1728 	} else {
1729 		path_cnt = 0;
1730 	}
1731 	if (path_cnt) {
1732 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1733 		pip = ct->ct_path_head;
1734 		while (pip && path_index != -1) {
1735 			MDI_PI_LOCK(pip);
1736 			if (path_index == 0 &&
1737 			    (MDI_PI(pip)->pi_state ==
1738 			    MDI_PATHINFO_STATE_ONLINE) &&
1739 				MDI_PI(pip)->pi_preferred == preferred) {
1740 				MDI_PI_HOLD(pip);
1741 				MDI_PI_UNLOCK(pip);
1742 				*ret_pip = pip;
1743 				return (MDI_SUCCESS);
1744 			}
1745 			path_index --;
1746 			next = (mdi_pathinfo_t *)
1747 			    MDI_PI(pip)->pi_client_link;
1748 			MDI_PI_UNLOCK(pip);
1749 			pip = next;
1750 		}
1751 		if (pip == NULL) {
1752 			MDI_DEBUG(4, (CE_NOTE, NULL,
1753 			    "!lba %p, no pip !!\n",
1754 				bp->b_blkno));
1755 		} else {
1756 			MDI_DEBUG(4, (CE_NOTE, NULL,
1757 			    "!lba %p, no pip for path_index, "
1758 			    "pip %p\n", pip));
1759 		}
1760 	}
1761 	return (MDI_FAILURE);
1762 }
1763 
1764 /*
1765  * mdi_select_path():
1766  *		select a path to access a client device.
1767  *
1768  *		mdi_select_path() function is called by the vHCI drivers to
1769  *		select a path to route the I/O request to.  The caller passes
1770  *		the block I/O data transfer structure ("buf") as one of the
1771  *		parameters.  The mpxio framework uses the buf structure
1772  *		contents to maintain per path statistics (total I/O size /
1773  *		count pending).  If more than one online paths are available to
1774  *		select, the framework automatically selects a suitable path
1775  *		for routing I/O request. If a failover operation is active for
1776  *		this client device the call shall be failed with MDI_BUSY error
1777  *		code.
1778  *
1779  *		By default this function returns a suitable path in online
1780  *		state based on the current load balancing policy.  Currently
1781  *		we support LOAD_BALANCE_NONE (Previously selected online path
1782  *		will continue to be used till the path is usable) and
1783  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1784  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1785  *		based on the logical block).  The load balancing
1786  *		through vHCI drivers configuration file (driver.conf).
1787  *
1788  *		vHCI drivers may override this default behavior by specifying
1789  *		appropriate flags.  If start_pip is specified (non NULL) is
1790  *		used as start point to walk and find the next appropriate path.
1791  *		The following values are currently defined:
1792  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1793  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1794  *
1795  *		The non-standard behavior is used by the scsi_vhci driver,
1796  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1797  *		attach of client devices (to avoid an unnecessary failover
1798  *		when the STANDBY path comes up first), during failover
1799  *		(to activate a STANDBY path as ONLINE).
1800  *
1801  *		The selected path in returned in a held state (ref_cnt).
1802  *		Caller should release the hold by calling mdi_rele_path().
1803  *
1804  * Return Values:
1805  *		MDI_SUCCESS	- Completed successfully
1806  *		MDI_BUSY 	- Client device is busy failing over
1807  *		MDI_NOPATH	- Client device is online, but no valid path are
1808  *				  available to access this client device
1809  *		MDI_FAILURE	- Invalid client device or state
1810  *		MDI_DEVI_ONLINING
1811  *				- Client device (struct dev_info state) is in
1812  *				  onlining state.
1813  */
1814 
1815 /*ARGSUSED*/
1816 int
1817 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1818     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1819 {
1820 	mdi_client_t	*ct;
1821 	mdi_pathinfo_t	*pip;
1822 	mdi_pathinfo_t	*next;
1823 	mdi_pathinfo_t	*head;
1824 	mdi_pathinfo_t	*start;
1825 	client_lb_t	lbp;	/* load balancing policy */
1826 	int		sb = 1;	/* standard behavior */
1827 	int		preferred = 1;	/* preferred path */
1828 	int		cond, cont = 1;
1829 	int		retry = 0;
1830 
1831 	if (flags != 0) {
1832 		/*
1833 		 * disable default behavior
1834 		 */
1835 		sb = 0;
1836 	}
1837 
1838 	*ret_pip = NULL;
1839 	ct = i_devi_get_client(cdip);
1840 	if (ct == NULL) {
1841 		/* mdi extensions are NULL, Nothing more to do */
1842 		return (MDI_FAILURE);
1843 	}
1844 
1845 	MDI_CLIENT_LOCK(ct);
1846 
1847 	if (sb) {
1848 		if (MDI_CLIENT_IS_FAILED(ct)) {
1849 			/*
1850 			 * Client is not ready to accept any I/O requests.
1851 			 * Fail this request.
1852 			 */
1853 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1854 			    "client state offline ct = %p\n", ct));
1855 			MDI_CLIENT_UNLOCK(ct);
1856 			return (MDI_FAILURE);
1857 		}
1858 
1859 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1860 			/*
1861 			 * Check for Failover is in progress. If so tell the
1862 			 * caller that this device is busy.
1863 			 */
1864 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1865 			    "client failover in progress ct = %p\n", ct));
1866 			MDI_CLIENT_UNLOCK(ct);
1867 			return (MDI_BUSY);
1868 		}
1869 
1870 		/*
1871 		 * Check to see whether the client device is attached.
1872 		 * If not so, let the vHCI driver manually select a path
1873 		 * (standby) and let the probe/attach process to continue.
1874 		 */
1875 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
1876 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1877 			MDI_CLIENT_UNLOCK(ct);
1878 			return (MDI_DEVI_ONLINING);
1879 		}
1880 	}
1881 
1882 	/*
1883 	 * Cache in the client list head.  If head of the list is NULL
1884 	 * return MDI_NOPATH
1885 	 */
1886 	head = ct->ct_path_head;
1887 	if (head == NULL) {
1888 		MDI_CLIENT_UNLOCK(ct);
1889 		return (MDI_NOPATH);
1890 	}
1891 
1892 	/*
1893 	 * for non default behavior, bypass current
1894 	 * load balancing policy and always use LOAD_BALANCE_RR
1895 	 * except that the start point will be adjusted based
1896 	 * on the provided start_pip
1897 	 */
1898 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1899 
1900 	switch (lbp) {
1901 	case LOAD_BALANCE_NONE:
1902 		/*
1903 		 * Load balancing is None  or Alternate path mode
1904 		 * Start looking for a online mdi_pathinfo node starting from
1905 		 * last known selected path
1906 		 */
1907 		preferred = 1;
1908 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1909 		if (pip == NULL) {
1910 			pip = head;
1911 		}
1912 		start = pip;
1913 		do {
1914 			MDI_PI_LOCK(pip);
1915 			/*
1916 			 * No need to explicitly check if the path is disabled.
1917 			 * Since we are checking for state == ONLINE and the
1918 			 * same veriable is used for DISABLE/ENABLE information.
1919 			 */
1920 			if ((MDI_PI(pip)->pi_state  ==
1921 				MDI_PATHINFO_STATE_ONLINE) &&
1922 				preferred == MDI_PI(pip)->pi_preferred) {
1923 				/*
1924 				 * Return the path in hold state. Caller should
1925 				 * release the lock by calling mdi_rele_path()
1926 				 */
1927 				MDI_PI_HOLD(pip);
1928 				MDI_PI_UNLOCK(pip);
1929 				ct->ct_path_last = pip;
1930 				*ret_pip = pip;
1931 				MDI_CLIENT_UNLOCK(ct);
1932 				return (MDI_SUCCESS);
1933 			}
1934 
1935 			/*
1936 			 * Path is busy.
1937 			 */
1938 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1939 			    MDI_PI_IS_TRANSIENT(pip))
1940 				retry = 1;
1941 			/*
1942 			 * Keep looking for a next available online path
1943 			 */
1944 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1945 			if (next == NULL) {
1946 				next = head;
1947 			}
1948 			MDI_PI_UNLOCK(pip);
1949 			pip = next;
1950 			if (start == pip && preferred) {
1951 				preferred = 0;
1952 			} else if (start == pip && !preferred) {
1953 				cont = 0;
1954 			}
1955 		} while (cont);
1956 		break;
1957 
1958 	case LOAD_BALANCE_LBA:
1959 		/*
1960 		 * Make sure we are looking
1961 		 * for an online path. Otherwise, if it is for a STANDBY
1962 		 * path request, it will go through and fetch an ONLINE
1963 		 * path which is not desirable.
1964 		 */
1965 		if ((ct->ct_lb_args != NULL) &&
1966 			    (ct->ct_lb_args->region_size) && bp &&
1967 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1968 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1969 				    == MDI_SUCCESS) {
1970 				MDI_CLIENT_UNLOCK(ct);
1971 				return (MDI_SUCCESS);
1972 			}
1973 		}
1974 		/*  FALLTHROUGH */
1975 	case LOAD_BALANCE_RR:
1976 		/*
1977 		 * Load balancing is Round Robin. Start looking for a online
1978 		 * mdi_pathinfo node starting from last known selected path
1979 		 * as the start point.  If override flags are specified,
1980 		 * process accordingly.
1981 		 * If the search is already in effect(start_pip not null),
1982 		 * then lets just use the same path preference to continue the
1983 		 * traversal.
1984 		 */
1985 
1986 		if (start_pip != NULL) {
1987 			preferred = MDI_PI(start_pip)->pi_preferred;
1988 		} else {
1989 			preferred = 1;
1990 		}
1991 
1992 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1993 		if (start == NULL) {
1994 			pip = head;
1995 		} else {
1996 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1997 			if (pip == NULL) {
1998 				if (!sb) {
1999 					if (preferred == 0) {
2000 						/*
2001 						 * Looks like we have completed
2002 						 * the traversal as preferred
2003 						 * value is 0. Time to bail out.
2004 						 */
2005 						*ret_pip = NULL;
2006 						MDI_CLIENT_UNLOCK(ct);
2007 						return (MDI_NOPATH);
2008 					} else {
2009 						/*
2010 						 * Looks like we reached the
2011 						 * end of the list. Lets enable
2012 						 * traversal of non preferred
2013 						 * paths.
2014 						 */
2015 						preferred = 0;
2016 					}
2017 				}
2018 				pip = head;
2019 			}
2020 		}
2021 		start = pip;
2022 		do {
2023 			MDI_PI_LOCK(pip);
2024 			if (sb) {
2025 				cond = ((MDI_PI(pip)->pi_state ==
2026 				    MDI_PATHINFO_STATE_ONLINE &&
2027 					MDI_PI(pip)->pi_preferred ==
2028 						preferred) ? 1 : 0);
2029 			} else {
2030 				if (flags == MDI_SELECT_ONLINE_PATH) {
2031 					cond = ((MDI_PI(pip)->pi_state ==
2032 					    MDI_PATHINFO_STATE_ONLINE &&
2033 						MDI_PI(pip)->pi_preferred ==
2034 						preferred) ? 1 : 0);
2035 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2036 					cond = ((MDI_PI(pip)->pi_state ==
2037 					    MDI_PATHINFO_STATE_STANDBY &&
2038 						MDI_PI(pip)->pi_preferred ==
2039 						preferred) ? 1 : 0);
2040 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2041 				    MDI_SELECT_STANDBY_PATH)) {
2042 					cond = (((MDI_PI(pip)->pi_state ==
2043 					    MDI_PATHINFO_STATE_ONLINE ||
2044 					    (MDI_PI(pip)->pi_state ==
2045 					    MDI_PATHINFO_STATE_STANDBY)) &&
2046 						MDI_PI(pip)->pi_preferred ==
2047 						preferred) ? 1 : 0);
2048 				} else if (flags ==
2049 					(MDI_SELECT_STANDBY_PATH |
2050 					MDI_SELECT_ONLINE_PATH |
2051 					MDI_SELECT_USER_DISABLE_PATH)) {
2052 					cond = (((MDI_PI(pip)->pi_state ==
2053 					    MDI_PATHINFO_STATE_ONLINE ||
2054 					    (MDI_PI(pip)->pi_state ==
2055 					    MDI_PATHINFO_STATE_STANDBY) ||
2056 						(MDI_PI(pip)->pi_state ==
2057 					    (MDI_PATHINFO_STATE_ONLINE|
2058 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2059 						(MDI_PI(pip)->pi_state ==
2060 					    (MDI_PATHINFO_STATE_STANDBY |
2061 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2062 						MDI_PI(pip)->pi_preferred ==
2063 						preferred) ? 1 : 0);
2064 				} else {
2065 					cond = 0;
2066 				}
2067 			}
2068 			/*
2069 			 * No need to explicitly check if the path is disabled.
2070 			 * Since we are checking for state == ONLINE and the
2071 			 * same veriable is used for DISABLE/ENABLE information.
2072 			 */
2073 			if (cond) {
2074 				/*
2075 				 * Return the path in hold state. Caller should
2076 				 * release the lock by calling mdi_rele_path()
2077 				 */
2078 				MDI_PI_HOLD(pip);
2079 				MDI_PI_UNLOCK(pip);
2080 				if (sb)
2081 					ct->ct_path_last = pip;
2082 				*ret_pip = pip;
2083 				MDI_CLIENT_UNLOCK(ct);
2084 				return (MDI_SUCCESS);
2085 			}
2086 			/*
2087 			 * Path is busy.
2088 			 */
2089 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2090 			    MDI_PI_IS_TRANSIENT(pip))
2091 				retry = 1;
2092 
2093 			/*
2094 			 * Keep looking for a next available online path
2095 			 */
2096 do_again:
2097 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2098 			if (next == NULL) {
2099 				if (!sb) {
2100 					if (preferred == 1) {
2101 						/*
2102 						 * Looks like we reached the
2103 						 * end of the list. Lets enable
2104 						 * traversal of non preferred
2105 						 * paths.
2106 						 */
2107 						preferred = 0;
2108 						next = head;
2109 					} else {
2110 						/*
2111 						 * We have done both the passes
2112 						 * Preferred as well as for
2113 						 * Non-preferred. Bail out now.
2114 						 */
2115 						cont = 0;
2116 					}
2117 				} else {
2118 					/*
2119 					 * Standard behavior case.
2120 					 */
2121 					next = head;
2122 				}
2123 			}
2124 			MDI_PI_UNLOCK(pip);
2125 			if (cont == 0) {
2126 				break;
2127 			}
2128 			pip = next;
2129 
2130 			if (!sb) {
2131 				/*
2132 				 * We need to handle the selection of
2133 				 * non-preferred path in the following
2134 				 * case:
2135 				 *
2136 				 * +------+   +------+   +------+   +-----+
2137 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2138 				 * +------+   +------+   +------+   +-----+
2139 				 *
2140 				 * If we start the search with B, we need to
2141 				 * skip beyond B to pick C which is non -
2142 				 * preferred in the second pass. The following
2143 				 * test, if true, will allow us to skip over
2144 				 * the 'start'(B in the example) to select
2145 				 * other non preferred elements.
2146 				 */
2147 				if ((start_pip != NULL) && (start_pip == pip) &&
2148 				    (MDI_PI(start_pip)->pi_preferred
2149 				    != preferred)) {
2150 					/*
2151 					 * try again after going past the start
2152 					 * pip
2153 					 */
2154 					MDI_PI_LOCK(pip);
2155 					goto do_again;
2156 				}
2157 			} else {
2158 				/*
2159 				 * Standard behavior case
2160 				 */
2161 				if (start == pip && preferred) {
2162 					/* look for nonpreferred paths */
2163 					preferred = 0;
2164 				} else if (start == pip && !preferred) {
2165 					/*
2166 					 * Exit condition
2167 					 */
2168 					cont = 0;
2169 				}
2170 			}
2171 		} while (cont);
2172 		break;
2173 	}
2174 
2175 	MDI_CLIENT_UNLOCK(ct);
2176 	if (retry == 1) {
2177 		return (MDI_BUSY);
2178 	} else {
2179 		return (MDI_NOPATH);
2180 	}
2181 }
2182 
2183 /*
2184  * For a client, return the next available path to any phci
2185  *
2186  * Note:
2187  *		Caller should hold the branch's devinfo node to get a consistent
2188  *		snap shot of the mdi_pathinfo nodes.
2189  *
2190  *		Please note that even the list is stable the mdi_pathinfo
2191  *		node state and properties are volatile.  The caller should lock
2192  *		and unlock the nodes by calling mdi_pi_lock() and
2193  *		mdi_pi_unlock() functions to get a stable properties.
2194  *
2195  *		If there is a need to use the nodes beyond the hold of the
2196  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2197  *		need to be held against unexpected removal by calling
2198  *		mdi_hold_path() and should be released by calling
2199  *		mdi_rele_path() on completion.
2200  */
2201 mdi_pathinfo_t *
2202 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2203 {
2204 	mdi_client_t *ct;
2205 
2206 	if (!MDI_CLIENT(ct_dip))
2207 		return (NULL);
2208 
2209 	/*
2210 	 * Walk through client link
2211 	 */
2212 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2213 	ASSERT(ct != NULL);
2214 
2215 	if (pip == NULL)
2216 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2217 
2218 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2219 }
2220 
2221 /*
2222  * For a phci, return the next available path to any client
2223  * Note: ditto mdi_get_next_phci_path()
2224  */
2225 mdi_pathinfo_t *
2226 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2227 {
2228 	mdi_phci_t *ph;
2229 
2230 	if (!MDI_PHCI(ph_dip))
2231 		return (NULL);
2232 
2233 	/*
2234 	 * Walk through pHCI link
2235 	 */
2236 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2237 	ASSERT(ph != NULL);
2238 
2239 	if (pip == NULL)
2240 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2241 
2242 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2243 }
2244 
2245 /*
2246  * mdi_get_nextpath():
2247  *		mdi_pathinfo node walker function.  Get the next node from the
2248  *		client or pHCI device list.
2249  *
2250  * XXX This is wrapper function for compatibility purposes only.
2251  *
2252  *	It doesn't work under Multi-level MPxIO, where a dip
2253  *	is both client and phci (which link should next_path follow?).
2254  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2255  *	this interface should be removed.
2256  */
2257 void
2258 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2259     mdi_pathinfo_t **ret_pip)
2260 {
2261 	if (MDI_CLIENT(dip)) {
2262 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2263 	} else if (MDI_PHCI(dip)) {
2264 		*ret_pip = mdi_get_next_client_path(dip, pip);
2265 	} else {
2266 		*ret_pip = NULL;
2267 	}
2268 }
2269 
2270 /*
2271  * mdi_hold_path():
2272  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2273  * Return Values:
2274  *		None
2275  */
2276 void
2277 mdi_hold_path(mdi_pathinfo_t *pip)
2278 {
2279 	if (pip) {
2280 		MDI_PI_LOCK(pip);
2281 		MDI_PI_HOLD(pip);
2282 		MDI_PI_UNLOCK(pip);
2283 	}
2284 }
2285 
2286 
2287 /*
2288  * mdi_rele_path():
2289  *		Release the mdi_pathinfo node which was selected
2290  *		through mdi_select_path() mechanism or manually held by
2291  *		calling mdi_hold_path().
2292  * Return Values:
2293  *		None
2294  */
2295 void
2296 mdi_rele_path(mdi_pathinfo_t *pip)
2297 {
2298 	if (pip) {
2299 		MDI_PI_LOCK(pip);
2300 		MDI_PI_RELE(pip);
2301 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2302 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2303 		}
2304 		MDI_PI_UNLOCK(pip);
2305 	}
2306 }
2307 
2308 
2309 /*
2310  * mdi_pi_lock():
2311  * 		Lock the mdi_pathinfo node.
2312  * Note:
2313  *		The caller should release the lock by calling mdi_pi_unlock()
2314  */
2315 void
2316 mdi_pi_lock(mdi_pathinfo_t *pip)
2317 {
2318 	ASSERT(pip != NULL);
2319 	if (pip) {
2320 		MDI_PI_LOCK(pip);
2321 	}
2322 }
2323 
2324 
2325 /*
2326  * mdi_pi_unlock():
2327  * 		Unlock the mdi_pathinfo node.
2328  * Note:
2329  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2330  */
2331 void
2332 mdi_pi_unlock(mdi_pathinfo_t *pip)
2333 {
2334 	ASSERT(pip != NULL);
2335 	if (pip) {
2336 		MDI_PI_UNLOCK(pip);
2337 	}
2338 }
2339 
2340 /*
2341  * mdi_pi_find():
2342  *		Search the list of mdi_pathinfo nodes attached to the
2343  *		pHCI/Client device node whose path address matches "paddr".
2344  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2345  *		found.
2346  * Return Values:
2347  *		mdi_pathinfo node handle
2348  *		NULL
2349  * Notes:
2350  *		Caller need not hold any locks to call this function.
2351  */
2352 mdi_pathinfo_t *
2353 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2354 {
2355 	mdi_phci_t		*ph;
2356 	mdi_vhci_t		*vh;
2357 	mdi_client_t		*ct;
2358 	mdi_pathinfo_t		*pip = NULL;
2359 
2360 	if ((pdip == NULL) || (paddr == NULL)) {
2361 		return (NULL);
2362 	}
2363 	ph = i_devi_get_phci(pdip);
2364 	if (ph == NULL) {
2365 		/*
2366 		 * Invalid pHCI device, Nothing more to do.
2367 		 */
2368 		MDI_DEBUG(2, (CE_WARN, NULL,
2369 		    "!mdi_pi_find: invalid phci"));
2370 		return (NULL);
2371 	}
2372 
2373 	vh = ph->ph_vhci;
2374 	if (vh == NULL) {
2375 		/*
2376 		 * Invalid vHCI device, Nothing more to do.
2377 		 */
2378 		MDI_DEBUG(2, (CE_WARN, NULL,
2379 		    "!mdi_pi_find: invalid phci"));
2380 		return (NULL);
2381 	}
2382 
2383 	/*
2384 	 * Look for client device identified by caddr (guid)
2385 	 */
2386 	if (caddr == NULL) {
2387 		/*
2388 		 * Find a mdi_pathinfo node under pHCI list for a matching
2389 		 * unit address.
2390 		 */
2391 		mutex_enter(&ph->ph_mutex);
2392 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2393 
2394 		while (pip != NULL) {
2395 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2396 				break;
2397 			}
2398 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2399 		}
2400 		mutex_exit(&ph->ph_mutex);
2401 		return (pip);
2402 	}
2403 
2404 	/*
2405 	 * XXX - Is the rest of the code in this function really necessary?
2406 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2407 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2408 	 * whether the search is based on the pathinfo nodes attached to
2409 	 * the pHCI or the client node, the result will be the same.
2410 	 */
2411 
2412 	/*
2413 	 * Find the client device corresponding to 'caddr'
2414 	 */
2415 	mutex_enter(&mdi_mutex);
2416 
2417 	/*
2418 	 * XXX - Passing NULL to the following function works as long as the
2419 	 * the client addresses (caddr) are unique per vhci basis.
2420 	 */
2421 	ct = i_mdi_client_find(vh, NULL, caddr);
2422 	if (ct == NULL) {
2423 		/*
2424 		 * Client not found, Obviously mdi_pathinfo node has not been
2425 		 * created yet.
2426 		 */
2427 		mutex_exit(&mdi_mutex);
2428 		return (pip);
2429 	}
2430 
2431 	/*
2432 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2433 	 * pHCI and paddr
2434 	 */
2435 	MDI_CLIENT_LOCK(ct);
2436 
2437 	/*
2438 	 * Release the global mutex as it is no more needed. Note: We always
2439 	 * respect the locking order while acquiring.
2440 	 */
2441 	mutex_exit(&mdi_mutex);
2442 
2443 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2444 	while (pip != NULL) {
2445 		/*
2446 		 * Compare the unit address
2447 		 */
2448 		if ((MDI_PI(pip)->pi_phci == ph) &&
2449 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2450 			break;
2451 		}
2452 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2453 	}
2454 	MDI_CLIENT_UNLOCK(ct);
2455 	return (pip);
2456 }
2457 
2458 /*
2459  * mdi_pi_alloc():
2460  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2461  *		The mdi_pathinfo node returned by this function identifies a
2462  *		unique device path is capable of having properties attached
2463  *		and passed to mdi_pi_online() to fully attach and online the
2464  *		path and client device node.
2465  *		The mdi_pathinfo node returned by this function must be
2466  *		destroyed using mdi_pi_free() if the path is no longer
2467  *		operational or if the caller fails to attach a client device
2468  *		node when calling mdi_pi_online(). The framework will not free
2469  *		the resources allocated.
2470  *		This function can be called from both interrupt and kernel
2471  *		contexts.  DDI_NOSLEEP flag should be used while calling
2472  *		from interrupt contexts.
2473  * Return Values:
2474  *		MDI_SUCCESS
2475  *		MDI_FAILURE
2476  *		MDI_NOMEM
2477  */
2478 /*ARGSUSED*/
2479 int
2480 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2481     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2482 {
2483 	mdi_vhci_t	*vh;
2484 	mdi_phci_t	*ph;
2485 	mdi_client_t	*ct;
2486 	mdi_pathinfo_t	*pip = NULL;
2487 	dev_info_t	*cdip;
2488 	int		rv = MDI_NOMEM;
2489 	int		path_allocated = 0;
2490 
2491 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2492 	    ret_pip == NULL) {
2493 		/* Nothing more to do */
2494 		return (MDI_FAILURE);
2495 	}
2496 
2497 	*ret_pip = NULL;
2498 	ph = i_devi_get_phci(pdip);
2499 	ASSERT(ph != NULL);
2500 	if (ph == NULL) {
2501 		/* Invalid pHCI device, return failure */
2502 		MDI_DEBUG(1, (CE_WARN, NULL,
2503 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2504 		return (MDI_FAILURE);
2505 	}
2506 
2507 	MDI_PHCI_LOCK(ph);
2508 	vh = ph->ph_vhci;
2509 	if (vh == NULL) {
2510 		/* Invalid vHCI device, return failure */
2511 		MDI_DEBUG(1, (CE_WARN, NULL,
2512 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2513 		MDI_PHCI_UNLOCK(ph);
2514 		return (MDI_FAILURE);
2515 	}
2516 
2517 	if (MDI_PHCI_IS_READY(ph) == 0) {
2518 		/*
2519 		 * Do not allow new node creation when pHCI is in
2520 		 * offline/suspended states
2521 		 */
2522 		MDI_DEBUG(1, (CE_WARN, NULL,
2523 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2524 		MDI_PHCI_UNLOCK(ph);
2525 		return (MDI_BUSY);
2526 	}
2527 	MDI_PHCI_UNSTABLE(ph);
2528 	MDI_PHCI_UNLOCK(ph);
2529 
2530 	/* look for a matching client, create one if not found */
2531 	mutex_enter(&mdi_mutex);
2532 	ct = i_mdi_client_find(vh, cname, caddr);
2533 	if (ct == NULL) {
2534 		ct = i_mdi_client_alloc(vh, cname, caddr);
2535 		ASSERT(ct != NULL);
2536 	}
2537 
2538 	if (ct->ct_dip == NULL) {
2539 		/*
2540 		 * Allocate a devinfo node
2541 		 */
2542 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2543 		    compatible, ncompatible);
2544 		if (ct->ct_dip == NULL) {
2545 			(void) i_mdi_client_free(vh, ct);
2546 			goto fail;
2547 		}
2548 	}
2549 	cdip = ct->ct_dip;
2550 
2551 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2552 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2553 
2554 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2555 	while (pip != NULL) {
2556 		/*
2557 		 * Compare the unit address
2558 		 */
2559 		if ((MDI_PI(pip)->pi_phci == ph) &&
2560 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2561 			break;
2562 		}
2563 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2564 	}
2565 
2566 	if (pip == NULL) {
2567 		/*
2568 		 * This is a new path for this client device.  Allocate and
2569 		 * initialize a new pathinfo node
2570 		 */
2571 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2572 		ASSERT(pip != NULL);
2573 		path_allocated = 1;
2574 	}
2575 	rv = MDI_SUCCESS;
2576 
2577 fail:
2578 	/*
2579 	 * Release the global mutex.
2580 	 */
2581 	mutex_exit(&mdi_mutex);
2582 
2583 	/*
2584 	 * Mark the pHCI as stable
2585 	 */
2586 	MDI_PHCI_LOCK(ph);
2587 	MDI_PHCI_STABLE(ph);
2588 	MDI_PHCI_UNLOCK(ph);
2589 	*ret_pip = pip;
2590 
2591 	if (path_allocated)
2592 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2593 
2594 	return (rv);
2595 }
2596 
2597 /*ARGSUSED*/
2598 int
2599 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2600     int flags, mdi_pathinfo_t **ret_pip)
2601 {
2602 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2603 	    flags, ret_pip));
2604 }
2605 
2606 /*
2607  * i_mdi_pi_alloc():
2608  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2609  * Return Values:
2610  *		mdi_pathinfo
2611  */
2612 
2613 /*ARGSUSED*/
2614 static mdi_pathinfo_t *
2615 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2616 {
2617 	mdi_pathinfo_t	*pip;
2618 	int		ct_circular;
2619 	int		ph_circular;
2620 	int		se_flag;
2621 	int		kmem_flag;
2622 
2623 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2624 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2625 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2626 	    MDI_PATHINFO_STATE_TRANSIENT;
2627 
2628 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2629 		MDI_PI_SET_USER_DISABLE(pip);
2630 
2631 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2632 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2633 
2634 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2635 		MDI_PI_SET_DRV_DISABLE(pip);
2636 
2637 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2638 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2639 	MDI_PI(pip)->pi_client = ct;
2640 	MDI_PI(pip)->pi_phci = ph;
2641 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2642 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2643 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2644 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2645 	MDI_PI(pip)->pi_pprivate = NULL;
2646 	MDI_PI(pip)->pi_cprivate = NULL;
2647 	MDI_PI(pip)->pi_vprivate = NULL;
2648 	MDI_PI(pip)->pi_client_link = NULL;
2649 	MDI_PI(pip)->pi_phci_link = NULL;
2650 	MDI_PI(pip)->pi_ref_cnt = 0;
2651 	MDI_PI(pip)->pi_kstats = NULL;
2652 	MDI_PI(pip)->pi_preferred = 1;
2653 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2654 
2655 	/*
2656 	 * Lock both dev_info nodes against changes in parallel.
2657 	 */
2658 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2659 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2660 
2661 	i_mdi_phci_add_path(ph, pip);
2662 	i_mdi_client_add_path(ct, pip);
2663 
2664 	ndi_devi_exit(ph->ph_dip, ph_circular);
2665 	ndi_devi_exit(ct->ct_dip, ct_circular);
2666 
2667 	/* determine interrupt context */
2668 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2669 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2670 
2671 	i_ddi_di_cache_invalidate(kmem_flag);
2672 
2673 	return (pip);
2674 }
2675 
2676 /*
2677  * i_mdi_phci_add_path():
2678  * 		Add a mdi_pathinfo node to pHCI list.
2679  * Notes:
2680  *		Caller should per-pHCI mutex
2681  */
2682 
2683 static void
2684 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2685 {
2686 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2687 
2688 	if (ph->ph_path_head == NULL) {
2689 		ph->ph_path_head = pip;
2690 	} else {
2691 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2692 	}
2693 	ph->ph_path_tail = pip;
2694 	ph->ph_path_count++;
2695 }
2696 
2697 /*
2698  * i_mdi_client_add_path():
2699  *		Add mdi_pathinfo node to client list
2700  */
2701 
2702 static void
2703 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2704 {
2705 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2706 
2707 	if (ct->ct_path_head == NULL) {
2708 		ct->ct_path_head = pip;
2709 	} else {
2710 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2711 	}
2712 	ct->ct_path_tail = pip;
2713 	ct->ct_path_count++;
2714 }
2715 
2716 /*
2717  * mdi_pi_free():
2718  *		Free the mdi_pathinfo node and also client device node if this
2719  *		is the last path to the device
2720  * Return Values:
2721  *		MDI_SUCCESS
2722  *		MDI_FAILURE
2723  *		MDI_BUSY
2724  */
2725 
2726 /*ARGSUSED*/
2727 int
2728 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2729 {
2730 	int		rv = MDI_SUCCESS;
2731 	mdi_vhci_t	*vh;
2732 	mdi_phci_t	*ph;
2733 	mdi_client_t	*ct;
2734 	int		(*f)();
2735 	int		client_held = 0;
2736 
2737 	MDI_PI_LOCK(pip);
2738 	ph = MDI_PI(pip)->pi_phci;
2739 	ASSERT(ph != NULL);
2740 	if (ph == NULL) {
2741 		/*
2742 		 * Invalid pHCI device, return failure
2743 		 */
2744 		MDI_DEBUG(1, (CE_WARN, NULL,
2745 		    "!mdi_pi_free: invalid pHCI"));
2746 		MDI_PI_UNLOCK(pip);
2747 		return (MDI_FAILURE);
2748 	}
2749 
2750 	vh = ph->ph_vhci;
2751 	ASSERT(vh != NULL);
2752 	if (vh == NULL) {
2753 		/* Invalid pHCI device, return failure */
2754 		MDI_DEBUG(1, (CE_WARN, NULL,
2755 		    "!mdi_pi_free: invalid vHCI"));
2756 		MDI_PI_UNLOCK(pip);
2757 		return (MDI_FAILURE);
2758 	}
2759 
2760 	ct = MDI_PI(pip)->pi_client;
2761 	ASSERT(ct != NULL);
2762 	if (ct == NULL) {
2763 		/*
2764 		 * Invalid Client device, return failure
2765 		 */
2766 		MDI_DEBUG(1, (CE_WARN, NULL,
2767 		    "!mdi_pi_free: invalid client"));
2768 		MDI_PI_UNLOCK(pip);
2769 		return (MDI_FAILURE);
2770 	}
2771 
2772 	/*
2773 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2774 	 * if the node state is either offline or init and the reference count
2775 	 * is zero.
2776 	 */
2777 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2778 	    MDI_PI_IS_INITING(pip))) {
2779 		/*
2780 		 * Node is busy
2781 		 */
2782 		MDI_DEBUG(1, (CE_WARN, NULL,
2783 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2784 		MDI_PI_UNLOCK(pip);
2785 		return (MDI_BUSY);
2786 	}
2787 
2788 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2789 		/*
2790 		 * Give a chance for pending I/Os to complete.
2791 		 */
2792 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2793 		    "%d cmds still pending on path: %p\n",
2794 		    MDI_PI(pip)->pi_ref_cnt, pip));
2795 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2796 		    &MDI_PI(pip)->pi_mutex,
2797 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2798 			/*
2799 			 * The timeout time reached without ref_cnt being zero
2800 			 * being signaled.
2801 			 */
2802 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2803 			    "!mdi_pi_free: "
2804 			    "Timeout reached on path %p without the cond\n",
2805 			    pip));
2806 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2807 			    "!mdi_pi_free: "
2808 			    "%d cmds still pending on path: %p\n",
2809 			    MDI_PI(pip)->pi_ref_cnt, pip));
2810 			MDI_PI_UNLOCK(pip);
2811 			return (MDI_BUSY);
2812 		}
2813 	}
2814 	if (MDI_PI(pip)->pi_pm_held) {
2815 		client_held = 1;
2816 	}
2817 	MDI_PI_UNLOCK(pip);
2818 
2819 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2820 
2821 	MDI_CLIENT_LOCK(ct);
2822 
2823 	/* Prevent further failovers till mdi_mutex is held */
2824 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2825 
2826 	/*
2827 	 * Wait till failover is complete before removing this node.
2828 	 */
2829 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2830 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2831 
2832 	MDI_CLIENT_UNLOCK(ct);
2833 	mutex_enter(&mdi_mutex);
2834 	MDI_CLIENT_LOCK(ct);
2835 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2836 
2837 	if (!MDI_PI_IS_INITING(pip)) {
2838 		f = vh->vh_ops->vo_pi_uninit;
2839 		if (f != NULL) {
2840 			rv = (*f)(vh->vh_dip, pip, 0);
2841 		}
2842 	}
2843 	/*
2844 	 * If vo_pi_uninit() completed successfully.
2845 	 */
2846 	if (rv == MDI_SUCCESS) {
2847 		if (client_held) {
2848 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2849 			    "i_mdi_pm_rele_client\n"));
2850 			i_mdi_pm_rele_client(ct, 1);
2851 		}
2852 		i_mdi_pi_free(ph, pip, ct);
2853 		if (ct->ct_path_count == 0) {
2854 			/*
2855 			 * Client lost its last path.
2856 			 * Clean up the client device
2857 			 */
2858 			MDI_CLIENT_UNLOCK(ct);
2859 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2860 			mutex_exit(&mdi_mutex);
2861 			return (rv);
2862 		}
2863 	}
2864 	MDI_CLIENT_UNLOCK(ct);
2865 	mutex_exit(&mdi_mutex);
2866 
2867 	if (rv == MDI_FAILURE)
2868 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2869 
2870 	return (rv);
2871 }
2872 
2873 /*
2874  * i_mdi_pi_free():
2875  *		Free the mdi_pathinfo node
2876  */
2877 static void
2878 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2879 {
2880 	int	ct_circular;
2881 	int	ph_circular;
2882 	int	se_flag;
2883 	int	kmem_flag;
2884 
2885 	/*
2886 	 * remove any per-path kstats
2887 	 */
2888 	i_mdi_pi_kstat_destroy(pip);
2889 
2890 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2891 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2892 
2893 	i_mdi_client_remove_path(ct, pip);
2894 	i_mdi_phci_remove_path(ph, pip);
2895 
2896 	ndi_devi_exit(ph->ph_dip, ph_circular);
2897 	ndi_devi_exit(ct->ct_dip, ct_circular);
2898 
2899 	/* determine interrupt context */
2900 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2901 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2902 
2903 	i_ddi_di_cache_invalidate(kmem_flag);
2904 
2905 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2906 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2907 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2908 	if (MDI_PI(pip)->pi_addr) {
2909 		kmem_free(MDI_PI(pip)->pi_addr,
2910 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2911 		MDI_PI(pip)->pi_addr = NULL;
2912 	}
2913 
2914 	if (MDI_PI(pip)->pi_prop) {
2915 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2916 		MDI_PI(pip)->pi_prop = NULL;
2917 	}
2918 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2919 }
2920 
2921 
2922 /*
2923  * i_mdi_phci_remove_path():
2924  * 		Remove a mdi_pathinfo node from pHCI list.
2925  * Notes:
2926  *		Caller should hold per-pHCI mutex
2927  */
2928 
2929 static void
2930 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2931 {
2932 	mdi_pathinfo_t	*prev = NULL;
2933 	mdi_pathinfo_t	*path = NULL;
2934 
2935 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2936 
2937 	path = ph->ph_path_head;
2938 	while (path != NULL) {
2939 		if (path == pip) {
2940 			break;
2941 		}
2942 		prev = path;
2943 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2944 	}
2945 
2946 	if (path) {
2947 		ph->ph_path_count--;
2948 		if (prev) {
2949 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2950 		} else {
2951 			ph->ph_path_head =
2952 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2953 		}
2954 		if (ph->ph_path_tail == path) {
2955 			ph->ph_path_tail = prev;
2956 		}
2957 	}
2958 
2959 	/*
2960 	 * Clear the pHCI link
2961 	 */
2962 	MDI_PI(pip)->pi_phci_link = NULL;
2963 	MDI_PI(pip)->pi_phci = NULL;
2964 }
2965 
2966 /*
2967  * i_mdi_client_remove_path():
2968  * 		Remove a mdi_pathinfo node from client path list.
2969  */
2970 
2971 static void
2972 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2973 {
2974 	mdi_pathinfo_t	*prev = NULL;
2975 	mdi_pathinfo_t	*path;
2976 
2977 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2978 
2979 	path = ct->ct_path_head;
2980 	while (path != NULL) {
2981 		if (path == pip) {
2982 			break;
2983 		}
2984 		prev = path;
2985 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2986 	}
2987 
2988 	if (path) {
2989 		ct->ct_path_count--;
2990 		if (prev) {
2991 			MDI_PI(prev)->pi_client_link =
2992 			    MDI_PI(path)->pi_client_link;
2993 		} else {
2994 			ct->ct_path_head =
2995 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2996 		}
2997 		if (ct->ct_path_tail == path) {
2998 			ct->ct_path_tail = prev;
2999 		}
3000 		if (ct->ct_path_last == path) {
3001 			ct->ct_path_last = ct->ct_path_head;
3002 		}
3003 	}
3004 	MDI_PI(pip)->pi_client_link = NULL;
3005 	MDI_PI(pip)->pi_client = NULL;
3006 }
3007 
3008 /*
3009  * i_mdi_pi_state_change():
3010  *		online a mdi_pathinfo node
3011  *
3012  * Return Values:
3013  *		MDI_SUCCESS
3014  *		MDI_FAILURE
3015  */
3016 /*ARGSUSED*/
3017 static int
3018 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3019 {
3020 	int		rv = MDI_SUCCESS;
3021 	mdi_vhci_t	*vh;
3022 	mdi_phci_t	*ph;
3023 	mdi_client_t	*ct;
3024 	int		(*f)();
3025 	dev_info_t	*cdip;
3026 
3027 	MDI_PI_LOCK(pip);
3028 
3029 	ph = MDI_PI(pip)->pi_phci;
3030 	ASSERT(ph);
3031 	if (ph == NULL) {
3032 		/*
3033 		 * Invalid pHCI device, fail the request
3034 		 */
3035 		MDI_PI_UNLOCK(pip);
3036 		MDI_DEBUG(1, (CE_WARN, NULL,
3037 		    "!mdi_pi_state_change: invalid phci"));
3038 		return (MDI_FAILURE);
3039 	}
3040 
3041 	vh = ph->ph_vhci;
3042 	ASSERT(vh);
3043 	if (vh == NULL) {
3044 		/*
3045 		 * Invalid vHCI device, fail the request
3046 		 */
3047 		MDI_PI_UNLOCK(pip);
3048 		MDI_DEBUG(1, (CE_WARN, NULL,
3049 		    "!mdi_pi_state_change: invalid vhci"));
3050 		return (MDI_FAILURE);
3051 	}
3052 
3053 	ct = MDI_PI(pip)->pi_client;
3054 	ASSERT(ct != NULL);
3055 	if (ct == NULL) {
3056 		/*
3057 		 * Invalid client device, fail the request
3058 		 */
3059 		MDI_PI_UNLOCK(pip);
3060 		MDI_DEBUG(1, (CE_WARN, NULL,
3061 		    "!mdi_pi_state_change: invalid client"));
3062 		return (MDI_FAILURE);
3063 	}
3064 
3065 	/*
3066 	 * If this path has not been initialized yet, Callback vHCI driver's
3067 	 * pathinfo node initialize entry point
3068 	 */
3069 
3070 	if (MDI_PI_IS_INITING(pip)) {
3071 		MDI_PI_UNLOCK(pip);
3072 		f = vh->vh_ops->vo_pi_init;
3073 		if (f != NULL) {
3074 			rv = (*f)(vh->vh_dip, pip, 0);
3075 			if (rv != MDI_SUCCESS) {
3076 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3077 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3078 				    vh, pip));
3079 				return (MDI_FAILURE);
3080 			}
3081 		}
3082 		MDI_PI_LOCK(pip);
3083 		MDI_PI_CLEAR_TRANSIENT(pip);
3084 	}
3085 
3086 	/*
3087 	 * Do not allow state transition when pHCI is in offline/suspended
3088 	 * states
3089 	 */
3090 	i_mdi_phci_lock(ph, pip);
3091 	if (MDI_PHCI_IS_READY(ph) == 0) {
3092 		MDI_DEBUG(1, (CE_WARN, NULL,
3093 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3094 		MDI_PI_UNLOCK(pip);
3095 		i_mdi_phci_unlock(ph);
3096 		return (MDI_BUSY);
3097 	}
3098 	MDI_PHCI_UNSTABLE(ph);
3099 	i_mdi_phci_unlock(ph);
3100 
3101 	/*
3102 	 * Check if mdi_pathinfo state is in transient state.
3103 	 * If yes, offlining is in progress and wait till transient state is
3104 	 * cleared.
3105 	 */
3106 	if (MDI_PI_IS_TRANSIENT(pip)) {
3107 		while (MDI_PI_IS_TRANSIENT(pip)) {
3108 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3109 			    &MDI_PI(pip)->pi_mutex);
3110 		}
3111 	}
3112 
3113 	/*
3114 	 * Grab the client lock in reverse order sequence and release the
3115 	 * mdi_pathinfo mutex.
3116 	 */
3117 	i_mdi_client_lock(ct, pip);
3118 	MDI_PI_UNLOCK(pip);
3119 
3120 	/*
3121 	 * Wait till failover state is cleared
3122 	 */
3123 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3124 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3125 
3126 	/*
3127 	 * Mark the mdi_pathinfo node state as transient
3128 	 */
3129 	MDI_PI_LOCK(pip);
3130 	switch (state) {
3131 	case MDI_PATHINFO_STATE_ONLINE:
3132 		MDI_PI_SET_ONLINING(pip);
3133 		break;
3134 
3135 	case MDI_PATHINFO_STATE_STANDBY:
3136 		MDI_PI_SET_STANDBYING(pip);
3137 		break;
3138 
3139 	case MDI_PATHINFO_STATE_FAULT:
3140 		/*
3141 		 * Mark the pathinfo state as FAULTED
3142 		 */
3143 		MDI_PI_SET_FAULTING(pip);
3144 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3145 		break;
3146 
3147 	case MDI_PATHINFO_STATE_OFFLINE:
3148 		/*
3149 		 * ndi_devi_offline() cannot hold pip or ct locks.
3150 		 */
3151 		MDI_PI_UNLOCK(pip);
3152 		/*
3153 		 * Do not offline if path will become last path and path
3154 		 * is busy for user initiated events.
3155 		 */
3156 		cdip = ct->ct_dip;
3157 		if ((flag & NDI_DEVI_REMOVE) &&
3158 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3159 			i_mdi_client_unlock(ct);
3160 			rv = ndi_devi_offline(cdip, 0);
3161 			if (rv != NDI_SUCCESS) {
3162 				/*
3163 				 * Convert to MDI error code
3164 				 */
3165 				switch (rv) {
3166 				case NDI_BUSY:
3167 					rv = MDI_BUSY;
3168 					break;
3169 				default:
3170 					rv = MDI_FAILURE;
3171 					break;
3172 				}
3173 				goto state_change_exit;
3174 			} else {
3175 				i_mdi_client_lock(ct, NULL);
3176 			}
3177 		}
3178 		/*
3179 		 * Mark the mdi_pathinfo node state as transient
3180 		 */
3181 		MDI_PI_LOCK(pip);
3182 		MDI_PI_SET_OFFLINING(pip);
3183 		break;
3184 	}
3185 	MDI_PI_UNLOCK(pip);
3186 	MDI_CLIENT_UNSTABLE(ct);
3187 	i_mdi_client_unlock(ct);
3188 
3189 	f = vh->vh_ops->vo_pi_state_change;
3190 	if (f != NULL) {
3191 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3192 		if (rv == MDI_NOT_SUPPORTED) {
3193 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3194 		}
3195 		if (rv != MDI_SUCCESS) {
3196 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3197 			    "!vo_pi_state_change: failed rv = %x", rv));
3198 		}
3199 	}
3200 	MDI_CLIENT_LOCK(ct);
3201 	MDI_PI_LOCK(pip);
3202 	if (MDI_PI_IS_TRANSIENT(pip)) {
3203 		if (rv == MDI_SUCCESS) {
3204 			MDI_PI_CLEAR_TRANSIENT(pip);
3205 		} else {
3206 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3207 		}
3208 	}
3209 
3210 	/*
3211 	 * Wake anyone waiting for this mdi_pathinfo node
3212 	 */
3213 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3214 	MDI_PI_UNLOCK(pip);
3215 
3216 	/*
3217 	 * Mark the client device as stable
3218 	 */
3219 	MDI_CLIENT_STABLE(ct);
3220 	if (rv == MDI_SUCCESS) {
3221 		if (ct->ct_unstable == 0) {
3222 			cdip = ct->ct_dip;
3223 
3224 			/*
3225 			 * Onlining the mdi_pathinfo node will impact the
3226 			 * client state Update the client and dev_info node
3227 			 * state accordingly
3228 			 */
3229 			rv = NDI_SUCCESS;
3230 			i_mdi_client_update_state(ct);
3231 			switch (MDI_CLIENT_STATE(ct)) {
3232 			case MDI_CLIENT_STATE_OPTIMAL:
3233 			case MDI_CLIENT_STATE_DEGRADED:
3234 				if (cdip && !i_ddi_devi_attached(cdip) &&
3235 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3236 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3237 
3238 					i_mdi_client_unlock(ct);
3239 					/*
3240 					 * Must do ndi_devi_online() through
3241 					 * hotplug thread for deferred
3242 					 * attach mechanism to work
3243 					 */
3244 					rv = ndi_devi_online(cdip, 0);
3245 					i_mdi_client_lock(ct, NULL);
3246 					if ((rv != NDI_SUCCESS) &&
3247 					    (MDI_CLIENT_STATE(ct) ==
3248 					    MDI_CLIENT_STATE_DEGRADED)) {
3249 						/*
3250 						 * ndi_devi_online failed.
3251 						 * Reset client flags to
3252 						 * offline.
3253 						 */
3254 						MDI_DEBUG(1, (CE_WARN, cdip,
3255 						    "!ndi_devi_online: failed "
3256 						    " Error: %x", rv));
3257 						MDI_CLIENT_SET_OFFLINE(ct);
3258 					}
3259 					if (rv != NDI_SUCCESS) {
3260 						/* Reset the path state */
3261 						MDI_PI_LOCK(pip);
3262 						MDI_PI(pip)->pi_state =
3263 						    MDI_PI_OLD_STATE(pip);
3264 						MDI_PI_UNLOCK(pip);
3265 					}
3266 				}
3267 				break;
3268 
3269 			case MDI_CLIENT_STATE_FAILED:
3270 				/*
3271 				 * This is the last path case for
3272 				 * non-user initiated events.
3273 				 */
3274 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3275 				    cdip && (i_ddi_node_state(cdip) >=
3276 				    DS_INITIALIZED)) {
3277 					i_mdi_client_unlock(ct);
3278 					rv = ndi_devi_offline(cdip, 0);
3279 					i_mdi_client_lock(ct, NULL);
3280 
3281 					if (rv != NDI_SUCCESS) {
3282 						/*
3283 						 * ndi_devi_offline failed.
3284 						 * Reset client flags to
3285 						 * online as the path could not
3286 						 * be offlined.
3287 						 */
3288 						MDI_DEBUG(1, (CE_WARN, cdip,
3289 						    "!ndi_devi_offline: failed "
3290 						    " Error: %x", rv));
3291 						MDI_CLIENT_SET_ONLINE(ct);
3292 					}
3293 				}
3294 				break;
3295 			}
3296 			/*
3297 			 * Convert to MDI error code
3298 			 */
3299 			switch (rv) {
3300 			case NDI_SUCCESS:
3301 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3302 				i_mdi_report_path_state(ct, pip);
3303 				rv = MDI_SUCCESS;
3304 				break;
3305 			case NDI_BUSY:
3306 				rv = MDI_BUSY;
3307 				break;
3308 			default:
3309 				rv = MDI_FAILURE;
3310 				break;
3311 			}
3312 		}
3313 	}
3314 	MDI_CLIENT_UNLOCK(ct);
3315 
3316 state_change_exit:
3317 	/*
3318 	 * Mark the pHCI as stable again.
3319 	 */
3320 	MDI_PHCI_LOCK(ph);
3321 	MDI_PHCI_STABLE(ph);
3322 	MDI_PHCI_UNLOCK(ph);
3323 	return (rv);
3324 }
3325 
3326 /*
3327  * mdi_pi_online():
3328  *		Place the path_info node in the online state.  The path is
3329  *		now available to be selected by mdi_select_path() for
3330  *		transporting I/O requests to client devices.
3331  * Return Values:
3332  *		MDI_SUCCESS
3333  *		MDI_FAILURE
3334  */
3335 int
3336 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3337 {
3338 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3339 	dev_info_t *cdip;
3340 	int		client_held = 0;
3341 	int rv;
3342 
3343 	ASSERT(ct != NULL);
3344 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3345 	if (rv != MDI_SUCCESS)
3346 		return (rv);
3347 
3348 	MDI_PI_LOCK(pip);
3349 	if (MDI_PI(pip)->pi_pm_held == 0) {
3350 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3351 		    "i_mdi_pm_hold_pip\n"));
3352 		i_mdi_pm_hold_pip(pip);
3353 		client_held = 1;
3354 	}
3355 	MDI_PI_UNLOCK(pip);
3356 
3357 	if (client_held) {
3358 		MDI_CLIENT_LOCK(ct);
3359 		if (ct->ct_power_cnt == 0) {
3360 			rv = i_mdi_power_all_phci(ct);
3361 		}
3362 
3363 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3364 		    "i_mdi_pm_hold_client\n"));
3365 		i_mdi_pm_hold_client(ct, 1);
3366 		MDI_CLIENT_UNLOCK(ct);
3367 	}
3368 
3369 	/*
3370 	 * Create the per-path (pathinfo) IO and error kstats which
3371 	 * are reported via iostat(1m).
3372 	 *
3373 	 * Defer creating the per-path kstats if device is not yet
3374 	 * attached;  the names of the kstats are constructed in part
3375 	 * using the devices instance number which is assigned during
3376 	 * process of attaching the client device.
3377 	 *
3378 	 * The framework post_attach handler, mdi_post_attach(), is
3379 	 * is responsible for initializing the client's pathinfo list
3380 	 * once successfully attached.
3381 	 */
3382 	cdip = ct->ct_dip;
3383 	ASSERT(cdip);
3384 	if (cdip == NULL || !i_ddi_devi_attached(cdip))
3385 		return (rv);
3386 
3387 	MDI_CLIENT_LOCK(ct);
3388 	rv = i_mdi_pi_kstat_create(pip);
3389 	MDI_CLIENT_UNLOCK(ct);
3390 	return (rv);
3391 }
3392 
3393 /*
3394  * mdi_pi_standby():
3395  *		Place the mdi_pathinfo node in standby state
3396  *
3397  * Return Values:
3398  *		MDI_SUCCESS
3399  *		MDI_FAILURE
3400  */
3401 int
3402 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3403 {
3404 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3405 }
3406 
3407 /*
3408  * mdi_pi_fault():
3409  *		Place the mdi_pathinfo node in fault'ed state
3410  * Return Values:
3411  *		MDI_SUCCESS
3412  *		MDI_FAILURE
3413  */
3414 int
3415 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3416 {
3417 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3418 }
3419 
3420 /*
3421  * mdi_pi_offline():
3422  *		Offline a mdi_pathinfo node.
3423  * Return Values:
3424  *		MDI_SUCCESS
3425  *		MDI_FAILURE
3426  */
3427 int
3428 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3429 {
3430 	int	ret, client_held = 0;
3431 	mdi_client_t	*ct;
3432 
3433 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3434 
3435 	if (ret == MDI_SUCCESS) {
3436 		MDI_PI_LOCK(pip);
3437 		if (MDI_PI(pip)->pi_pm_held) {
3438 			client_held = 1;
3439 		}
3440 		MDI_PI_UNLOCK(pip);
3441 
3442 		if (client_held) {
3443 			ct = MDI_PI(pip)->pi_client;
3444 			MDI_CLIENT_LOCK(ct);
3445 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3446 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3447 			i_mdi_pm_rele_client(ct, 1);
3448 			MDI_CLIENT_UNLOCK(ct);
3449 		}
3450 	}
3451 
3452 	return (ret);
3453 }
3454 
3455 /*
3456  * i_mdi_pi_offline():
3457  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3458  */
3459 static int
3460 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3461 {
3462 	dev_info_t	*vdip = NULL;
3463 	mdi_vhci_t	*vh = NULL;
3464 	mdi_client_t	*ct = NULL;
3465 	int		(*f)();
3466 	int		rv;
3467 
3468 	MDI_PI_LOCK(pip);
3469 	ct = MDI_PI(pip)->pi_client;
3470 	ASSERT(ct != NULL);
3471 
3472 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3473 		/*
3474 		 * Give a chance for pending I/Os to complete.
3475 		 */
3476 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3477 		    "%d cmds still pending on path: %p\n",
3478 		    MDI_PI(pip)->pi_ref_cnt, pip));
3479 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3480 		    &MDI_PI(pip)->pi_mutex,
3481 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3482 			/*
3483 			 * The timeout time reached without ref_cnt being zero
3484 			 * being signaled.
3485 			 */
3486 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3487 			    "Timeout reached on path %p without the cond\n",
3488 			    pip));
3489 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3490 			    "%d cmds still pending on path: %p\n",
3491 			    MDI_PI(pip)->pi_ref_cnt, pip));
3492 		}
3493 	}
3494 	vh = ct->ct_vhci;
3495 	vdip = vh->vh_dip;
3496 
3497 	/*
3498 	 * Notify vHCI that has registered this event
3499 	 */
3500 	ASSERT(vh->vh_ops);
3501 	f = vh->vh_ops->vo_pi_state_change;
3502 
3503 	if (f != NULL) {
3504 		MDI_PI_UNLOCK(pip);
3505 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3506 		    flags)) != MDI_SUCCESS) {
3507 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3508 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3509 		}
3510 		MDI_PI_LOCK(pip);
3511 	}
3512 
3513 	/*
3514 	 * Set the mdi_pathinfo node state and clear the transient condition
3515 	 */
3516 	MDI_PI_SET_OFFLINE(pip);
3517 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3518 	MDI_PI_UNLOCK(pip);
3519 
3520 	MDI_CLIENT_LOCK(ct);
3521 	if (rv == MDI_SUCCESS) {
3522 		if (ct->ct_unstable == 0) {
3523 			dev_info_t	*cdip = ct->ct_dip;
3524 
3525 			/*
3526 			 * Onlining the mdi_pathinfo node will impact the
3527 			 * client state Update the client and dev_info node
3528 			 * state accordingly
3529 			 */
3530 			i_mdi_client_update_state(ct);
3531 			rv = NDI_SUCCESS;
3532 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3533 				if (cdip &&
3534 				    (i_ddi_node_state(cdip) >=
3535 				    DS_INITIALIZED)) {
3536 					MDI_CLIENT_UNLOCK(ct);
3537 					rv = ndi_devi_offline(cdip, 0);
3538 					MDI_CLIENT_LOCK(ct);
3539 					if (rv != NDI_SUCCESS) {
3540 						/*
3541 						 * ndi_devi_offline failed.
3542 						 * Reset client flags to
3543 						 * online.
3544 						 */
3545 						MDI_DEBUG(4, (CE_WARN, cdip,
3546 						    "!ndi_devi_offline: failed "
3547 						    " Error: %x", rv));
3548 						MDI_CLIENT_SET_ONLINE(ct);
3549 					}
3550 				}
3551 			}
3552 			/*
3553 			 * Convert to MDI error code
3554 			 */
3555 			switch (rv) {
3556 			case NDI_SUCCESS:
3557 				rv = MDI_SUCCESS;
3558 				break;
3559 			case NDI_BUSY:
3560 				rv = MDI_BUSY;
3561 				break;
3562 			default:
3563 				rv = MDI_FAILURE;
3564 				break;
3565 			}
3566 		}
3567 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3568 		i_mdi_report_path_state(ct, pip);
3569 	}
3570 
3571 	MDI_CLIENT_UNLOCK(ct);
3572 
3573 	/*
3574 	 * Change in the mdi_pathinfo node state will impact the client state
3575 	 */
3576 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3577 	    ct, pip));
3578 	return (rv);
3579 }
3580 
3581 
3582 /*
3583  * mdi_pi_get_addr():
3584  *		Get the unit address associated with a mdi_pathinfo node
3585  *
3586  * Return Values:
3587  *		char *
3588  */
3589 char *
3590 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3591 {
3592 	if (pip == NULL)
3593 		return (NULL);
3594 
3595 	return (MDI_PI(pip)->pi_addr);
3596 }
3597 
3598 /*
3599  * mdi_pi_get_client():
3600  *		Get the client devinfo associated with a mdi_pathinfo node
3601  *
3602  * Return Values:
3603  *		Handle to client device dev_info node
3604  */
3605 dev_info_t *
3606 mdi_pi_get_client(mdi_pathinfo_t *pip)
3607 {
3608 	dev_info_t	*dip = NULL;
3609 	if (pip) {
3610 		dip = MDI_PI(pip)->pi_client->ct_dip;
3611 	}
3612 	return (dip);
3613 }
3614 
3615 /*
3616  * mdi_pi_get_phci():
3617  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3618  * Return Values:
3619  *		Handle to dev_info node
3620  */
3621 dev_info_t *
3622 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3623 {
3624 	dev_info_t	*dip = NULL;
3625 	if (pip) {
3626 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3627 	}
3628 	return (dip);
3629 }
3630 
3631 /*
3632  * mdi_pi_get_client_private():
3633  *		Get the client private information associated with the
3634  *		mdi_pathinfo node
3635  */
3636 void *
3637 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3638 {
3639 	void *cprivate = NULL;
3640 	if (pip) {
3641 		cprivate = MDI_PI(pip)->pi_cprivate;
3642 	}
3643 	return (cprivate);
3644 }
3645 
3646 /*
3647  * mdi_pi_set_client_private():
3648  *		Set the client private information in the mdi_pathinfo node
3649  */
3650 void
3651 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3652 {
3653 	if (pip) {
3654 		MDI_PI(pip)->pi_cprivate = priv;
3655 	}
3656 }
3657 
3658 /*
3659  * mdi_pi_get_phci_private():
3660  *		Get the pHCI private information associated with the
3661  *		mdi_pathinfo node
3662  */
3663 caddr_t
3664 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3665 {
3666 	caddr_t	pprivate = NULL;
3667 	if (pip) {
3668 		pprivate = MDI_PI(pip)->pi_pprivate;
3669 	}
3670 	return (pprivate);
3671 }
3672 
3673 /*
3674  * mdi_pi_set_phci_private():
3675  *		Set the pHCI private information in the mdi_pathinfo node
3676  */
3677 void
3678 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3679 {
3680 	if (pip) {
3681 		MDI_PI(pip)->pi_pprivate = priv;
3682 	}
3683 }
3684 
3685 /*
3686  * mdi_pi_get_state():
3687  *		Get the mdi_pathinfo node state. Transient states are internal
3688  *		and not provided to the users
3689  */
3690 mdi_pathinfo_state_t
3691 mdi_pi_get_state(mdi_pathinfo_t *pip)
3692 {
3693 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3694 
3695 	if (pip) {
3696 		if (MDI_PI_IS_TRANSIENT(pip)) {
3697 			/*
3698 			 * mdi_pathinfo is in state transition.  Return the
3699 			 * last good state.
3700 			 */
3701 			state = MDI_PI_OLD_STATE(pip);
3702 		} else {
3703 			state = MDI_PI_STATE(pip);
3704 		}
3705 	}
3706 	return (state);
3707 }
3708 
3709 /*
3710  * Note that the following function needs to be the new interface for
3711  * mdi_pi_get_state when mpxio gets integrated to ON.
3712  */
3713 int
3714 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3715 		uint32_t *ext_state)
3716 {
3717 	*state = MDI_PATHINFO_STATE_INIT;
3718 
3719 	if (pip) {
3720 		if (MDI_PI_IS_TRANSIENT(pip)) {
3721 			/*
3722 			 * mdi_pathinfo is in state transition.  Return the
3723 			 * last good state.
3724 			 */
3725 			*state = MDI_PI_OLD_STATE(pip);
3726 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3727 		} else {
3728 			*state = MDI_PI_STATE(pip);
3729 			*ext_state = MDI_PI_EXT_STATE(pip);
3730 		}
3731 	}
3732 	return (MDI_SUCCESS);
3733 }
3734 
3735 /*
3736  * mdi_pi_get_preferred:
3737  *	Get the preferred path flag
3738  */
3739 int
3740 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3741 {
3742 	if (pip) {
3743 		return (MDI_PI(pip)->pi_preferred);
3744 	}
3745 	return (0);
3746 }
3747 
3748 /*
3749  * mdi_pi_set_preferred:
3750  *	Set the preferred path flag
3751  */
3752 void
3753 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3754 {
3755 	if (pip) {
3756 		MDI_PI(pip)->pi_preferred = preferred;
3757 	}
3758 }
3759 
3760 
3761 /*
3762  * mdi_pi_set_state():
3763  *		Set the mdi_pathinfo node state
3764  */
3765 void
3766 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3767 {
3768 	uint32_t	ext_state;
3769 
3770 	if (pip) {
3771 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3772 		MDI_PI(pip)->pi_state = state;
3773 		MDI_PI(pip)->pi_state |= ext_state;
3774 	}
3775 }
3776 
3777 /*
3778  * Property functions:
3779  */
3780 
3781 int
3782 i_map_nvlist_error_to_mdi(int val)
3783 {
3784 	int rv;
3785 
3786 	switch (val) {
3787 	case 0:
3788 		rv = DDI_PROP_SUCCESS;
3789 		break;
3790 	case EINVAL:
3791 	case ENOTSUP:
3792 		rv = DDI_PROP_INVAL_ARG;
3793 		break;
3794 	case ENOMEM:
3795 		rv = DDI_PROP_NO_MEMORY;
3796 		break;
3797 	default:
3798 		rv = DDI_PROP_NOT_FOUND;
3799 		break;
3800 	}
3801 	return (rv);
3802 }
3803 
3804 /*
3805  * mdi_pi_get_next_prop():
3806  * 		Property walk function.  The caller should hold mdi_pi_lock()
3807  *		and release by calling mdi_pi_unlock() at the end of walk to
3808  *		get a consistent value.
3809  */
3810 
3811 nvpair_t *
3812 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3813 {
3814 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3815 		return (NULL);
3816 	}
3817 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3818 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3819 }
3820 
3821 /*
3822  * mdi_prop_remove():
3823  * 		Remove the named property from the named list.
3824  */
3825 
3826 int
3827 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3828 {
3829 	if (pip == NULL) {
3830 		return (DDI_PROP_NOT_FOUND);
3831 	}
3832 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3833 	MDI_PI_LOCK(pip);
3834 	if (MDI_PI(pip)->pi_prop == NULL) {
3835 		MDI_PI_UNLOCK(pip);
3836 		return (DDI_PROP_NOT_FOUND);
3837 	}
3838 	if (name) {
3839 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3840 	} else {
3841 		char		nvp_name[MAXNAMELEN];
3842 		nvpair_t	*nvp;
3843 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3844 		while (nvp) {
3845 			nvpair_t	*next;
3846 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3847 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3848 			    nvpair_name(nvp));
3849 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3850 			    nvp_name);
3851 			nvp = next;
3852 		}
3853 	}
3854 	MDI_PI_UNLOCK(pip);
3855 	return (DDI_PROP_SUCCESS);
3856 }
3857 
3858 /*
3859  * mdi_prop_size():
3860  * 		Get buffer size needed to pack the property data.
3861  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3862  *		buffer size.
3863  */
3864 
3865 int
3866 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3867 {
3868 	int	rv;
3869 	size_t	bufsize;
3870 
3871 	*buflenp = 0;
3872 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3873 		return (DDI_PROP_NOT_FOUND);
3874 	}
3875 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3876 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3877 	    &bufsize, NV_ENCODE_NATIVE);
3878 	*buflenp = bufsize;
3879 	return (i_map_nvlist_error_to_mdi(rv));
3880 }
3881 
3882 /*
3883  * mdi_prop_pack():
3884  * 		pack the property list.  The caller should hold the
3885  *		mdi_pathinfo_t node to get a consistent data
3886  */
3887 
3888 int
3889 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3890 {
3891 	int	rv;
3892 	size_t	bufsize;
3893 
3894 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3895 		return (DDI_PROP_NOT_FOUND);
3896 	}
3897 
3898 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3899 
3900 	bufsize = buflen;
3901 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3902 	    NV_ENCODE_NATIVE, KM_SLEEP);
3903 
3904 	return (i_map_nvlist_error_to_mdi(rv));
3905 }
3906 
3907 /*
3908  * mdi_prop_update_byte():
3909  *		Create/Update a byte property
3910  */
3911 int
3912 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3913 {
3914 	int rv;
3915 
3916 	if (pip == NULL) {
3917 		return (DDI_PROP_INVAL_ARG);
3918 	}
3919 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3920 	MDI_PI_LOCK(pip);
3921 	if (MDI_PI(pip)->pi_prop == NULL) {
3922 		MDI_PI_UNLOCK(pip);
3923 		return (DDI_PROP_NOT_FOUND);
3924 	}
3925 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3926 	MDI_PI_UNLOCK(pip);
3927 	return (i_map_nvlist_error_to_mdi(rv));
3928 }
3929 
3930 /*
3931  * mdi_prop_update_byte_array():
3932  *		Create/Update a byte array property
3933  */
3934 int
3935 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3936     uint_t nelements)
3937 {
3938 	int rv;
3939 
3940 	if (pip == NULL) {
3941 		return (DDI_PROP_INVAL_ARG);
3942 	}
3943 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3944 	MDI_PI_LOCK(pip);
3945 	if (MDI_PI(pip)->pi_prop == NULL) {
3946 		MDI_PI_UNLOCK(pip);
3947 		return (DDI_PROP_NOT_FOUND);
3948 	}
3949 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3950 	MDI_PI_UNLOCK(pip);
3951 	return (i_map_nvlist_error_to_mdi(rv));
3952 }
3953 
3954 /*
3955  * mdi_prop_update_int():
3956  *		Create/Update a 32 bit integer property
3957  */
3958 int
3959 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3960 {
3961 	int rv;
3962 
3963 	if (pip == NULL) {
3964 		return (DDI_PROP_INVAL_ARG);
3965 	}
3966 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3967 	MDI_PI_LOCK(pip);
3968 	if (MDI_PI(pip)->pi_prop == NULL) {
3969 		MDI_PI_UNLOCK(pip);
3970 		return (DDI_PROP_NOT_FOUND);
3971 	}
3972 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3973 	MDI_PI_UNLOCK(pip);
3974 	return (i_map_nvlist_error_to_mdi(rv));
3975 }
3976 
3977 /*
3978  * mdi_prop_update_int64():
3979  *		Create/Update a 64 bit integer property
3980  */
3981 int
3982 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3983 {
3984 	int rv;
3985 
3986 	if (pip == NULL) {
3987 		return (DDI_PROP_INVAL_ARG);
3988 	}
3989 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3990 	MDI_PI_LOCK(pip);
3991 	if (MDI_PI(pip)->pi_prop == NULL) {
3992 		MDI_PI_UNLOCK(pip);
3993 		return (DDI_PROP_NOT_FOUND);
3994 	}
3995 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3996 	MDI_PI_UNLOCK(pip);
3997 	return (i_map_nvlist_error_to_mdi(rv));
3998 }
3999 
4000 /*
4001  * mdi_prop_update_int_array():
4002  *		Create/Update a int array property
4003  */
4004 int
4005 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4006 	    uint_t nelements)
4007 {
4008 	int rv;
4009 
4010 	if (pip == NULL) {
4011 		return (DDI_PROP_INVAL_ARG);
4012 	}
4013 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4014 	MDI_PI_LOCK(pip);
4015 	if (MDI_PI(pip)->pi_prop == NULL) {
4016 		MDI_PI_UNLOCK(pip);
4017 		return (DDI_PROP_NOT_FOUND);
4018 	}
4019 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4020 	    nelements);
4021 	MDI_PI_UNLOCK(pip);
4022 	return (i_map_nvlist_error_to_mdi(rv));
4023 }
4024 
4025 /*
4026  * mdi_prop_update_string():
4027  *		Create/Update a string property
4028  */
4029 int
4030 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4031 {
4032 	int rv;
4033 
4034 	if (pip == NULL) {
4035 		return (DDI_PROP_INVAL_ARG);
4036 	}
4037 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4038 	MDI_PI_LOCK(pip);
4039 	if (MDI_PI(pip)->pi_prop == NULL) {
4040 		MDI_PI_UNLOCK(pip);
4041 		return (DDI_PROP_NOT_FOUND);
4042 	}
4043 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4044 	MDI_PI_UNLOCK(pip);
4045 	return (i_map_nvlist_error_to_mdi(rv));
4046 }
4047 
4048 /*
4049  * mdi_prop_update_string_array():
4050  *		Create/Update a string array property
4051  */
4052 int
4053 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4054     uint_t nelements)
4055 {
4056 	int rv;
4057 
4058 	if (pip == NULL) {
4059 		return (DDI_PROP_INVAL_ARG);
4060 	}
4061 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4062 	MDI_PI_LOCK(pip);
4063 	if (MDI_PI(pip)->pi_prop == NULL) {
4064 		MDI_PI_UNLOCK(pip);
4065 		return (DDI_PROP_NOT_FOUND);
4066 	}
4067 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4068 	    nelements);
4069 	MDI_PI_UNLOCK(pip);
4070 	return (i_map_nvlist_error_to_mdi(rv));
4071 }
4072 
4073 /*
4074  * mdi_prop_lookup_byte():
4075  * 		Look for byte property identified by name.  The data returned
4076  *		is the actual property and valid as long as mdi_pathinfo_t node
4077  *		is alive.
4078  */
4079 int
4080 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4081 {
4082 	int rv;
4083 
4084 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4085 		return (DDI_PROP_NOT_FOUND);
4086 	}
4087 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4088 	return (i_map_nvlist_error_to_mdi(rv));
4089 }
4090 
4091 
4092 /*
4093  * mdi_prop_lookup_byte_array():
4094  * 		Look for byte array property identified by name.  The data
4095  *		returned is the actual property and valid as long as
4096  *		mdi_pathinfo_t node is alive.
4097  */
4098 int
4099 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4100     uint_t *nelements)
4101 {
4102 	int rv;
4103 
4104 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4105 		return (DDI_PROP_NOT_FOUND);
4106 	}
4107 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4108 	    nelements);
4109 	return (i_map_nvlist_error_to_mdi(rv));
4110 }
4111 
4112 /*
4113  * mdi_prop_lookup_int():
4114  * 		Look for int property identified by name.  The data returned
4115  *		is the actual property and valid as long as mdi_pathinfo_t
4116  *		node is alive.
4117  */
4118 int
4119 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4120 {
4121 	int rv;
4122 
4123 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4124 		return (DDI_PROP_NOT_FOUND);
4125 	}
4126 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4127 	return (i_map_nvlist_error_to_mdi(rv));
4128 }
4129 
4130 /*
4131  * mdi_prop_lookup_int64():
4132  * 		Look for int64 property identified by name.  The data returned
4133  *		is the actual property and valid as long as mdi_pathinfo_t node
4134  *		is alive.
4135  */
4136 int
4137 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4138 {
4139 	int rv;
4140 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4141 		return (DDI_PROP_NOT_FOUND);
4142 	}
4143 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4144 	return (i_map_nvlist_error_to_mdi(rv));
4145 }
4146 
4147 /*
4148  * mdi_prop_lookup_int_array():
4149  * 		Look for int array property identified by name.  The data
4150  *		returned is the actual property and valid as long as
4151  *		mdi_pathinfo_t node is alive.
4152  */
4153 int
4154 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4155     uint_t *nelements)
4156 {
4157 	int rv;
4158 
4159 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4160 		return (DDI_PROP_NOT_FOUND);
4161 	}
4162 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4163 	    (int32_t **)data, nelements);
4164 	return (i_map_nvlist_error_to_mdi(rv));
4165 }
4166 
4167 /*
4168  * mdi_prop_lookup_string():
4169  * 		Look for string property identified by name.  The data
4170  *		returned is the actual property and valid as long as
4171  *		mdi_pathinfo_t node is alive.
4172  */
4173 int
4174 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4175 {
4176 	int rv;
4177 
4178 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4179 		return (DDI_PROP_NOT_FOUND);
4180 	}
4181 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4182 	return (i_map_nvlist_error_to_mdi(rv));
4183 }
4184 
4185 /*
4186  * mdi_prop_lookup_string_array():
4187  * 		Look for string array property identified by name.  The data
4188  *		returned is the actual property and valid as long as
4189  *		mdi_pathinfo_t node is alive.
4190  */
4191 
4192 int
4193 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4194     uint_t *nelements)
4195 {
4196 	int rv;
4197 
4198 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4199 		return (DDI_PROP_NOT_FOUND);
4200 	}
4201 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4202 	    nelements);
4203 	return (i_map_nvlist_error_to_mdi(rv));
4204 }
4205 
4206 /*
4207  * mdi_prop_free():
4208  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4209  *		functions return the pointer to actual property data and not a
4210  *		copy of it.  So the data returned is valid as long as
4211  *		mdi_pathinfo_t node is valid.
4212  */
4213 
4214 /*ARGSUSED*/
4215 int
4216 mdi_prop_free(void *data)
4217 {
4218 	return (DDI_PROP_SUCCESS);
4219 }
4220 
4221 /*ARGSUSED*/
4222 static void
4223 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4224 {
4225 	char		*phci_path, *ct_path;
4226 	char		*ct_status;
4227 	char		*status;
4228 	dev_info_t	*dip = ct->ct_dip;
4229 	char		lb_buf[64];
4230 
4231 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4232 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4233 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4234 		return;
4235 	}
4236 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4237 		ct_status = "optimal";
4238 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4239 		ct_status = "degraded";
4240 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4241 		ct_status = "failed";
4242 	} else {
4243 		ct_status = "unknown";
4244 	}
4245 
4246 	if (MDI_PI_IS_OFFLINE(pip)) {
4247 		status = "offline";
4248 	} else if (MDI_PI_IS_ONLINE(pip)) {
4249 		status = "online";
4250 	} else if (MDI_PI_IS_STANDBY(pip)) {
4251 		status = "standby";
4252 	} else if (MDI_PI_IS_FAULT(pip)) {
4253 		status = "faulted";
4254 	} else {
4255 		status = "unknown";
4256 	}
4257 
4258 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4259 		(void) snprintf(lb_buf, sizeof (lb_buf),
4260 		    "%s, region-size: %d", mdi_load_balance_lba,
4261 			ct->ct_lb_args->region_size);
4262 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4263 		(void) snprintf(lb_buf, sizeof (lb_buf),
4264 		    "%s", mdi_load_balance_none);
4265 	} else {
4266 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4267 		    mdi_load_balance_rr);
4268 	}
4269 
4270 	if (dip) {
4271 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4272 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4273 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4274 		    "path %s (%s%d) to target address: %s is %s"
4275 		    " Load balancing: %s\n",
4276 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4277 		    ddi_get_instance(dip), ct_status,
4278 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4279 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4280 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4281 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4282 		kmem_free(phci_path, MAXPATHLEN);
4283 		kmem_free(ct_path, MAXPATHLEN);
4284 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4285 	}
4286 }
4287 
4288 #ifdef	DEBUG
4289 /*
4290  * i_mdi_log():
4291  *		Utility function for error message management
4292  *
4293  */
4294 
4295 /*VARARGS3*/
4296 static void
4297 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4298 {
4299 	char		buf[MAXNAMELEN];
4300 	char		name[MAXNAMELEN];
4301 	va_list		ap;
4302 	int		log_only = 0;
4303 	int		boot_only = 0;
4304 	int		console_only = 0;
4305 
4306 	if (dip) {
4307 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4308 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4309 			    ddi_node_name(dip), ddi_get_instance(dip));
4310 		} else {
4311 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4312 			    ddi_node_name(dip), ddi_get_instance(dip));
4313 		}
4314 	} else {
4315 		name[0] = '\0';
4316 	}
4317 
4318 	va_start(ap, fmt);
4319 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4320 	va_end(ap);
4321 
4322 	switch (buf[0]) {
4323 	case '!':
4324 		log_only = 1;
4325 		break;
4326 	case '?':
4327 		boot_only = 1;
4328 		break;
4329 	case '^':
4330 		console_only = 1;
4331 		break;
4332 	}
4333 
4334 	switch (level) {
4335 	case CE_NOTE:
4336 		level = CE_CONT;
4337 		/* FALLTHROUGH */
4338 	case CE_CONT:
4339 	case CE_WARN:
4340 	case CE_PANIC:
4341 		if (boot_only) {
4342 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4343 		} else if (console_only) {
4344 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4345 		} else if (log_only) {
4346 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4347 		} else {
4348 			cmn_err(level, "%s\t%s", name, buf);
4349 		}
4350 		break;
4351 	default:
4352 		cmn_err(level, "%s\t%s", name, buf);
4353 		break;
4354 	}
4355 }
4356 #endif	/* DEBUG */
4357 
4358 void
4359 i_mdi_client_online(dev_info_t *ct_dip)
4360 {
4361 	mdi_client_t	*ct;
4362 
4363 	/*
4364 	 * Client online notification. Mark client state as online
4365 	 * restore our binding with dev_info node
4366 	 */
4367 	ct = i_devi_get_client(ct_dip);
4368 	ASSERT(ct != NULL);
4369 	MDI_CLIENT_LOCK(ct);
4370 	MDI_CLIENT_SET_ONLINE(ct);
4371 	/* catch for any memory leaks */
4372 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4373 	ct->ct_dip = ct_dip;
4374 
4375 	if (ct->ct_power_cnt == 0)
4376 		(void) i_mdi_power_all_phci(ct);
4377 
4378 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4379 	    "i_mdi_pm_hold_client\n"));
4380 	i_mdi_pm_hold_client(ct, 1);
4381 
4382 	MDI_CLIENT_UNLOCK(ct);
4383 }
4384 
4385 void
4386 i_mdi_phci_online(dev_info_t *ph_dip)
4387 {
4388 	mdi_phci_t	*ph;
4389 
4390 	/* pHCI online notification. Mark state accordingly */
4391 	ph = i_devi_get_phci(ph_dip);
4392 	ASSERT(ph != NULL);
4393 	MDI_PHCI_LOCK(ph);
4394 	MDI_PHCI_SET_ONLINE(ph);
4395 	MDI_PHCI_UNLOCK(ph);
4396 }
4397 
4398 /*
4399  * mdi_devi_online():
4400  * 		Online notification from NDI framework on pHCI/client
4401  *		device online.
4402  * Return Values:
4403  *		NDI_SUCCESS
4404  *		MDI_FAILURE
4405  */
4406 
4407 /*ARGSUSED*/
4408 int
4409 mdi_devi_online(dev_info_t *dip, uint_t flags)
4410 {
4411 	if (MDI_PHCI(dip)) {
4412 		i_mdi_phci_online(dip);
4413 	}
4414 
4415 	if (MDI_CLIENT(dip)) {
4416 		i_mdi_client_online(dip);
4417 	}
4418 	return (NDI_SUCCESS);
4419 }
4420 
4421 /*
4422  * mdi_devi_offline():
4423  * 		Offline notification from NDI framework on pHCI/Client device
4424  *		offline.
4425  *
4426  * Return Values:
4427  *		NDI_SUCCESS
4428  *		NDI_FAILURE
4429  */
4430 
4431 /*ARGSUSED*/
4432 int
4433 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4434 {
4435 	int		rv = NDI_SUCCESS;
4436 
4437 	if (MDI_CLIENT(dip)) {
4438 		rv = i_mdi_client_offline(dip, flags);
4439 		if (rv != NDI_SUCCESS)
4440 			return (rv);
4441 	}
4442 
4443 	if (MDI_PHCI(dip)) {
4444 		rv = i_mdi_phci_offline(dip, flags);
4445 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4446 			/* set client back online */
4447 			i_mdi_client_online(dip);
4448 		}
4449 	}
4450 
4451 	return (rv);
4452 }
4453 
4454 /*ARGSUSED*/
4455 static int
4456 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4457 {
4458 	int		rv = NDI_SUCCESS;
4459 	mdi_phci_t	*ph;
4460 	mdi_client_t	*ct;
4461 	mdi_pathinfo_t	*pip;
4462 	mdi_pathinfo_t	*next;
4463 	mdi_pathinfo_t	*failed_pip = NULL;
4464 	dev_info_t	*cdip;
4465 
4466 	/*
4467 	 * pHCI component offline notification
4468 	 * Make sure that this pHCI instance is free to be offlined.
4469 	 * If it is OK to proceed, Offline and remove all the child
4470 	 * mdi_pathinfo nodes.  This process automatically offlines
4471 	 * corresponding client devices, for which this pHCI provides
4472 	 * critical services.
4473 	 */
4474 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4475 	    dip));
4476 
4477 	ph = i_devi_get_phci(dip);
4478 	if (ph == NULL) {
4479 		return (rv);
4480 	}
4481 
4482 	MDI_PHCI_LOCK(ph);
4483 
4484 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4485 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4486 		MDI_PHCI_UNLOCK(ph);
4487 		return (NDI_SUCCESS);
4488 	}
4489 
4490 	/*
4491 	 * Check to see if the pHCI can be offlined
4492 	 */
4493 	if (ph->ph_unstable) {
4494 		MDI_DEBUG(1, (CE_WARN, dip,
4495 		    "!One or more target devices are in transient "
4496 		    "state. This device can not be removed at "
4497 		    "this moment. Please try again later."));
4498 		MDI_PHCI_UNLOCK(ph);
4499 		return (NDI_BUSY);
4500 	}
4501 
4502 	pip = ph->ph_path_head;
4503 	while (pip != NULL) {
4504 		MDI_PI_LOCK(pip);
4505 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4506 		/*
4507 		 * The mdi_pathinfo state is OK. Check the client state.
4508 		 * If failover in progress fail the pHCI from offlining
4509 		 */
4510 		ct = MDI_PI(pip)->pi_client;
4511 		i_mdi_client_lock(ct, pip);
4512 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4513 		    (ct->ct_unstable)) {
4514 			/*
4515 			 * Failover is in progress, Fail the DR
4516 			 */
4517 			MDI_DEBUG(1, (CE_WARN, dip,
4518 			    "!pHCI device (%s%d) is Busy. %s",
4519 			    ddi_driver_name(dip), ddi_get_instance(dip),
4520 			    "This device can not be removed at "
4521 			    "this moment. Please try again later."));
4522 			MDI_PI_UNLOCK(pip);
4523 			MDI_CLIENT_UNLOCK(ct);
4524 			MDI_PHCI_UNLOCK(ph);
4525 			return (NDI_BUSY);
4526 		}
4527 		MDI_PI_UNLOCK(pip);
4528 
4529 		/*
4530 		 * Check to see of we are removing the last path of this
4531 		 * client device...
4532 		 */
4533 		cdip = ct->ct_dip;
4534 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4535 		    (i_mdi_client_compute_state(ct, ph) ==
4536 		    MDI_CLIENT_STATE_FAILED)) {
4537 			i_mdi_client_unlock(ct);
4538 			MDI_PHCI_UNLOCK(ph);
4539 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4540 				/*
4541 				 * ndi_devi_offline() failed.
4542 				 * This pHCI provides the critical path
4543 				 * to one or more client devices.
4544 				 * Return busy.
4545 				 */
4546 				MDI_PHCI_LOCK(ph);
4547 				MDI_DEBUG(1, (CE_WARN, dip,
4548 				    "!pHCI device (%s%d) is Busy. %s",
4549 				    ddi_driver_name(dip), ddi_get_instance(dip),
4550 				    "This device can not be removed at "
4551 				    "this moment. Please try again later."));
4552 				failed_pip = pip;
4553 				break;
4554 			} else {
4555 				MDI_PHCI_LOCK(ph);
4556 				pip = next;
4557 			}
4558 		} else {
4559 			i_mdi_client_unlock(ct);
4560 			pip = next;
4561 		}
4562 	}
4563 
4564 	if (failed_pip) {
4565 		pip = ph->ph_path_head;
4566 		while (pip != failed_pip) {
4567 			MDI_PI_LOCK(pip);
4568 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4569 			ct = MDI_PI(pip)->pi_client;
4570 			i_mdi_client_lock(ct, pip);
4571 			cdip = ct->ct_dip;
4572 			switch (MDI_CLIENT_STATE(ct)) {
4573 			case MDI_CLIENT_STATE_OPTIMAL:
4574 			case MDI_CLIENT_STATE_DEGRADED:
4575 				if (cdip) {
4576 					MDI_PI_UNLOCK(pip);
4577 					i_mdi_client_unlock(ct);
4578 					MDI_PHCI_UNLOCK(ph);
4579 					(void) ndi_devi_online(cdip, 0);
4580 					MDI_PHCI_LOCK(ph);
4581 					pip = next;
4582 					continue;
4583 				}
4584 				break;
4585 
4586 			case MDI_CLIENT_STATE_FAILED:
4587 				if (cdip) {
4588 					MDI_PI_UNLOCK(pip);
4589 					i_mdi_client_unlock(ct);
4590 					MDI_PHCI_UNLOCK(ph);
4591 					(void) ndi_devi_offline(cdip, 0);
4592 					MDI_PHCI_LOCK(ph);
4593 					pip = next;
4594 					continue;
4595 				}
4596 				break;
4597 			}
4598 			MDI_PI_UNLOCK(pip);
4599 			i_mdi_client_unlock(ct);
4600 			pip = next;
4601 		}
4602 		MDI_PHCI_UNLOCK(ph);
4603 		return (NDI_BUSY);
4604 	}
4605 
4606 	/*
4607 	 * Mark the pHCI as offline
4608 	 */
4609 	MDI_PHCI_SET_OFFLINE(ph);
4610 
4611 	/*
4612 	 * Mark the child mdi_pathinfo nodes as transient
4613 	 */
4614 	pip = ph->ph_path_head;
4615 	while (pip != NULL) {
4616 		MDI_PI_LOCK(pip);
4617 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4618 		MDI_PI_SET_OFFLINING(pip);
4619 		MDI_PI_UNLOCK(pip);
4620 		pip = next;
4621 	}
4622 	MDI_PHCI_UNLOCK(ph);
4623 	/*
4624 	 * Give a chance for any pending commands to execute
4625 	 */
4626 	delay(1);
4627 	MDI_PHCI_LOCK(ph);
4628 	pip = ph->ph_path_head;
4629 	while (pip != NULL) {
4630 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4631 		(void) i_mdi_pi_offline(pip, flags);
4632 		MDI_PI_LOCK(pip);
4633 		ct = MDI_PI(pip)->pi_client;
4634 		if (!MDI_PI_IS_OFFLINE(pip)) {
4635 			MDI_DEBUG(1, (CE_WARN, dip,
4636 			    "!pHCI device (%s%d) is Busy. %s",
4637 			    ddi_driver_name(dip), ddi_get_instance(dip),
4638 			    "This device can not be removed at "
4639 			    "this moment. Please try again later."));
4640 			MDI_PI_UNLOCK(pip);
4641 			MDI_PHCI_SET_ONLINE(ph);
4642 			MDI_PHCI_UNLOCK(ph);
4643 			return (NDI_BUSY);
4644 		}
4645 		MDI_PI_UNLOCK(pip);
4646 		pip = next;
4647 	}
4648 	MDI_PHCI_UNLOCK(ph);
4649 
4650 	return (rv);
4651 }
4652 
4653 /*ARGSUSED*/
4654 static int
4655 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4656 {
4657 	int		rv = NDI_SUCCESS;
4658 	mdi_client_t	*ct;
4659 
4660 	/*
4661 	 * Client component to go offline.  Make sure that we are
4662 	 * not in failing over state and update client state
4663 	 * accordingly
4664 	 */
4665 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4666 	    dip));
4667 	ct = i_devi_get_client(dip);
4668 	if (ct != NULL) {
4669 		MDI_CLIENT_LOCK(ct);
4670 		if (ct->ct_unstable) {
4671 			/*
4672 			 * One or more paths are in transient state,
4673 			 * Dont allow offline of a client device
4674 			 */
4675 			MDI_DEBUG(1, (CE_WARN, dip,
4676 			    "!One or more paths to this device is "
4677 			    "in transient state. This device can not "
4678 			    "be removed at this moment. "
4679 			    "Please try again later."));
4680 			MDI_CLIENT_UNLOCK(ct);
4681 			return (NDI_BUSY);
4682 		}
4683 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4684 			/*
4685 			 * Failover is in progress, Dont allow DR of
4686 			 * a client device
4687 			 */
4688 			MDI_DEBUG(1, (CE_WARN, dip,
4689 			    "!Client device (%s%d) is Busy. %s",
4690 			    ddi_driver_name(dip), ddi_get_instance(dip),
4691 			    "This device can not be removed at "
4692 			    "this moment. Please try again later."));
4693 			MDI_CLIENT_UNLOCK(ct);
4694 			return (NDI_BUSY);
4695 		}
4696 		MDI_CLIENT_SET_OFFLINE(ct);
4697 
4698 		/*
4699 		 * Unbind our relationship with the dev_info node
4700 		 */
4701 		if (flags & NDI_DEVI_REMOVE) {
4702 			ct->ct_dip = NULL;
4703 		}
4704 		MDI_CLIENT_UNLOCK(ct);
4705 	}
4706 	return (rv);
4707 }
4708 
4709 /*
4710  * mdi_pre_attach():
4711  *		Pre attach() notification handler
4712  */
4713 
4714 /*ARGSUSED*/
4715 int
4716 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4717 {
4718 	/* don't support old DDI_PM_RESUME */
4719 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4720 	    (cmd == DDI_PM_RESUME))
4721 		return (DDI_FAILURE);
4722 
4723 	return (DDI_SUCCESS);
4724 }
4725 
4726 /*
4727  * mdi_post_attach():
4728  *		Post attach() notification handler
4729  */
4730 
4731 /*ARGSUSED*/
4732 void
4733 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4734 {
4735 	mdi_phci_t	*ph;
4736 	mdi_client_t	*ct;
4737 	mdi_pathinfo_t	*pip;
4738 
4739 	if (MDI_PHCI(dip)) {
4740 		ph = i_devi_get_phci(dip);
4741 		ASSERT(ph != NULL);
4742 
4743 		MDI_PHCI_LOCK(ph);
4744 		switch (cmd) {
4745 		case DDI_ATTACH:
4746 			MDI_DEBUG(2, (CE_NOTE, dip,
4747 			    "!pHCI post_attach: called %p\n", ph));
4748 			if (error == DDI_SUCCESS) {
4749 				MDI_PHCI_SET_ATTACH(ph);
4750 			} else {
4751 				MDI_DEBUG(1, (CE_NOTE, dip,
4752 				    "!pHCI post_attach: failed error=%d\n",
4753 				    error));
4754 				MDI_PHCI_SET_DETACH(ph);
4755 			}
4756 			break;
4757 
4758 		case DDI_RESUME:
4759 			MDI_DEBUG(2, (CE_NOTE, dip,
4760 			    "!pHCI post_resume: called %p\n", ph));
4761 			if (error == DDI_SUCCESS) {
4762 				MDI_PHCI_SET_RESUME(ph);
4763 			} else {
4764 				MDI_DEBUG(1, (CE_NOTE, dip,
4765 				    "!pHCI post_resume: failed error=%d\n",
4766 				    error));
4767 				MDI_PHCI_SET_SUSPEND(ph);
4768 			}
4769 			break;
4770 		}
4771 		MDI_PHCI_UNLOCK(ph);
4772 	}
4773 
4774 	if (MDI_CLIENT(dip)) {
4775 		ct = i_devi_get_client(dip);
4776 		ASSERT(ct != NULL);
4777 
4778 		MDI_CLIENT_LOCK(ct);
4779 		switch (cmd) {
4780 		case DDI_ATTACH:
4781 			MDI_DEBUG(2, (CE_NOTE, dip,
4782 			    "!Client post_attach: called %p\n", ct));
4783 			if (error != DDI_SUCCESS) {
4784 				MDI_DEBUG(1, (CE_NOTE, dip,
4785 				    "!Client post_attach: failed error=%d\n",
4786 				    error));
4787 				MDI_CLIENT_SET_DETACH(ct);
4788 				MDI_DEBUG(4, (CE_WARN, dip,
4789 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4790 				i_mdi_pm_reset_client(ct);
4791 				break;
4792 			}
4793 
4794 			/*
4795 			 * Client device has successfully attached.
4796 			 * Create kstats for any pathinfo structures
4797 			 * initially associated with this client.
4798 			 */
4799 			for (pip = ct->ct_path_head; pip != NULL;
4800 			    pip = (mdi_pathinfo_t *)
4801 			    MDI_PI(pip)->pi_client_link) {
4802 				(void) i_mdi_pi_kstat_create(pip);
4803 				i_mdi_report_path_state(ct, pip);
4804 			}
4805 			MDI_CLIENT_SET_ATTACH(ct);
4806 			break;
4807 
4808 		case DDI_RESUME:
4809 			MDI_DEBUG(2, (CE_NOTE, dip,
4810 			    "!Client post_attach: called %p\n", ct));
4811 			if (error == DDI_SUCCESS) {
4812 				MDI_CLIENT_SET_RESUME(ct);
4813 			} else {
4814 				MDI_DEBUG(1, (CE_NOTE, dip,
4815 				    "!Client post_resume: failed error=%d\n",
4816 				    error));
4817 				MDI_CLIENT_SET_SUSPEND(ct);
4818 			}
4819 			break;
4820 		}
4821 		MDI_CLIENT_UNLOCK(ct);
4822 	}
4823 }
4824 
4825 /*
4826  * mdi_pre_detach():
4827  *		Pre detach notification handler
4828  */
4829 
4830 /*ARGSUSED*/
4831 int
4832 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4833 {
4834 	int rv = DDI_SUCCESS;
4835 
4836 	if (MDI_CLIENT(dip)) {
4837 		(void) i_mdi_client_pre_detach(dip, cmd);
4838 	}
4839 
4840 	if (MDI_PHCI(dip)) {
4841 		rv = i_mdi_phci_pre_detach(dip, cmd);
4842 	}
4843 
4844 	return (rv);
4845 }
4846 
4847 /*ARGSUSED*/
4848 static int
4849 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4850 {
4851 	int		rv = DDI_SUCCESS;
4852 	mdi_phci_t	*ph;
4853 	mdi_client_t	*ct;
4854 	mdi_pathinfo_t	*pip;
4855 	mdi_pathinfo_t	*failed_pip = NULL;
4856 	mdi_pathinfo_t	*next;
4857 
4858 	ph = i_devi_get_phci(dip);
4859 	if (ph == NULL) {
4860 		return (rv);
4861 	}
4862 
4863 	MDI_PHCI_LOCK(ph);
4864 	switch (cmd) {
4865 	case DDI_DETACH:
4866 		MDI_DEBUG(2, (CE_NOTE, dip,
4867 		    "!pHCI pre_detach: called %p\n", ph));
4868 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4869 			/*
4870 			 * mdi_pathinfo nodes are still attached to
4871 			 * this pHCI. Fail the detach for this pHCI.
4872 			 */
4873 			MDI_DEBUG(2, (CE_WARN, dip,
4874 			    "!pHCI pre_detach: "
4875 			    "mdi_pathinfo nodes are still attached "
4876 			    "%p\n", ph));
4877 			rv = DDI_FAILURE;
4878 			break;
4879 		}
4880 		MDI_PHCI_SET_DETACH(ph);
4881 		break;
4882 
4883 	case DDI_SUSPEND:
4884 		/*
4885 		 * pHCI is getting suspended.  Since mpxio client
4886 		 * devices may not be suspended at this point, to avoid
4887 		 * a potential stack overflow, it is important to suspend
4888 		 * client devices before pHCI can be suspended.
4889 		 */
4890 
4891 		MDI_DEBUG(2, (CE_NOTE, dip,
4892 		    "!pHCI pre_suspend: called %p\n", ph));
4893 		/*
4894 		 * Suspend all the client devices accessible through this pHCI
4895 		 */
4896 		pip = ph->ph_path_head;
4897 		while (pip != NULL && rv == DDI_SUCCESS) {
4898 			dev_info_t *cdip;
4899 			MDI_PI_LOCK(pip);
4900 			next =
4901 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4902 			ct = MDI_PI(pip)->pi_client;
4903 			i_mdi_client_lock(ct, pip);
4904 			cdip = ct->ct_dip;
4905 			MDI_PI_UNLOCK(pip);
4906 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4907 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4908 				i_mdi_client_unlock(ct);
4909 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4910 				    DDI_SUCCESS) {
4911 					/*
4912 					 * Suspend of one of the client
4913 					 * device has failed.
4914 					 */
4915 					MDI_DEBUG(1, (CE_WARN, dip,
4916 					    "!Suspend of device (%s%d) failed.",
4917 					    ddi_driver_name(cdip),
4918 					    ddi_get_instance(cdip)));
4919 					failed_pip = pip;
4920 					break;
4921 				}
4922 			} else {
4923 				i_mdi_client_unlock(ct);
4924 			}
4925 			pip = next;
4926 		}
4927 
4928 		if (rv == DDI_SUCCESS) {
4929 			/*
4930 			 * Suspend of client devices is complete. Proceed
4931 			 * with pHCI suspend.
4932 			 */
4933 			MDI_PHCI_SET_SUSPEND(ph);
4934 		} else {
4935 			/*
4936 			 * Revert back all the suspended client device states
4937 			 * to converse.
4938 			 */
4939 			pip = ph->ph_path_head;
4940 			while (pip != failed_pip) {
4941 				dev_info_t *cdip;
4942 				MDI_PI_LOCK(pip);
4943 				next =
4944 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4945 				ct = MDI_PI(pip)->pi_client;
4946 				i_mdi_client_lock(ct, pip);
4947 				cdip = ct->ct_dip;
4948 				MDI_PI_UNLOCK(pip);
4949 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4950 					i_mdi_client_unlock(ct);
4951 					(void) devi_attach(cdip, DDI_RESUME);
4952 				} else {
4953 					i_mdi_client_unlock(ct);
4954 				}
4955 				pip = next;
4956 			}
4957 		}
4958 		break;
4959 
4960 	default:
4961 		rv = DDI_FAILURE;
4962 		break;
4963 	}
4964 	MDI_PHCI_UNLOCK(ph);
4965 	return (rv);
4966 }
4967 
4968 /*ARGSUSED*/
4969 static int
4970 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4971 {
4972 	int		rv = DDI_SUCCESS;
4973 	mdi_client_t	*ct;
4974 
4975 	ct = i_devi_get_client(dip);
4976 	if (ct == NULL) {
4977 		return (rv);
4978 	}
4979 
4980 	MDI_CLIENT_LOCK(ct);
4981 	switch (cmd) {
4982 	case DDI_DETACH:
4983 		MDI_DEBUG(2, (CE_NOTE, dip,
4984 		    "!Client pre_detach: called %p\n", ct));
4985 		MDI_CLIENT_SET_DETACH(ct);
4986 		break;
4987 
4988 	case DDI_SUSPEND:
4989 		MDI_DEBUG(2, (CE_NOTE, dip,
4990 		    "!Client pre_suspend: called %p\n", ct));
4991 		MDI_CLIENT_SET_SUSPEND(ct);
4992 		break;
4993 
4994 	default:
4995 		rv = DDI_FAILURE;
4996 		break;
4997 	}
4998 	MDI_CLIENT_UNLOCK(ct);
4999 	return (rv);
5000 }
5001 
5002 /*
5003  * mdi_post_detach():
5004  *		Post detach notification handler
5005  */
5006 
5007 /*ARGSUSED*/
5008 void
5009 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5010 {
5011 	/*
5012 	 * Detach/Suspend of mpxio component failed. Update our state
5013 	 * too
5014 	 */
5015 	if (MDI_PHCI(dip))
5016 		i_mdi_phci_post_detach(dip, cmd, error);
5017 
5018 	if (MDI_CLIENT(dip))
5019 		i_mdi_client_post_detach(dip, cmd, error);
5020 }
5021 
5022 /*ARGSUSED*/
5023 static void
5024 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5025 {
5026 	mdi_phci_t	*ph;
5027 
5028 	/*
5029 	 * Detach/Suspend of phci component failed. Update our state
5030 	 * too
5031 	 */
5032 	ph = i_devi_get_phci(dip);
5033 	if (ph == NULL) {
5034 		return;
5035 	}
5036 
5037 	MDI_PHCI_LOCK(ph);
5038 	/*
5039 	 * Detach of pHCI failed. Restore back converse
5040 	 * state
5041 	 */
5042 	switch (cmd) {
5043 	case DDI_DETACH:
5044 		MDI_DEBUG(2, (CE_NOTE, dip,
5045 		    "!pHCI post_detach: called %p\n", ph));
5046 		if (error != DDI_SUCCESS)
5047 			MDI_PHCI_SET_ATTACH(ph);
5048 		break;
5049 
5050 	case DDI_SUSPEND:
5051 		MDI_DEBUG(2, (CE_NOTE, dip,
5052 		    "!pHCI post_suspend: called %p\n", ph));
5053 		if (error != DDI_SUCCESS)
5054 			MDI_PHCI_SET_RESUME(ph);
5055 		break;
5056 	}
5057 	MDI_PHCI_UNLOCK(ph);
5058 }
5059 
5060 /*ARGSUSED*/
5061 static void
5062 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5063 {
5064 	mdi_client_t	*ct;
5065 
5066 	ct = i_devi_get_client(dip);
5067 	if (ct == NULL) {
5068 		return;
5069 	}
5070 	MDI_CLIENT_LOCK(ct);
5071 	/*
5072 	 * Detach of Client failed. Restore back converse
5073 	 * state
5074 	 */
5075 	switch (cmd) {
5076 	case DDI_DETACH:
5077 		MDI_DEBUG(2, (CE_NOTE, dip,
5078 		    "!Client post_detach: called %p\n", ct));
5079 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5080 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5081 			    "i_mdi_pm_rele_client\n"));
5082 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5083 		} else {
5084 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5085 			    "i_mdi_pm_reset_client\n"));
5086 			i_mdi_pm_reset_client(ct);
5087 		}
5088 		if (error != DDI_SUCCESS)
5089 			MDI_CLIENT_SET_ATTACH(ct);
5090 		break;
5091 
5092 	case DDI_SUSPEND:
5093 		MDI_DEBUG(2, (CE_NOTE, dip,
5094 		    "!Client post_suspend: called %p\n", ct));
5095 		if (error != DDI_SUCCESS)
5096 			MDI_CLIENT_SET_RESUME(ct);
5097 		break;
5098 	}
5099 	MDI_CLIENT_UNLOCK(ct);
5100 }
5101 
5102 /*
5103  * create and install per-path (client - pHCI) statistics
5104  * I/O stats supported: nread, nwritten, reads, and writes
5105  * Error stats - hard errors, soft errors, & transport errors
5106  */
5107 static int
5108 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5109 {
5110 
5111 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5112 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5113 	char ksname[KSTAT_STRLEN];
5114 	mdi_pathinfo_t *cpip;
5115 	const char *err_postfix = ",err";
5116 	kstat_t	*kiosp, *kerrsp;
5117 	struct pi_errs	*nsp;
5118 	struct mdi_pi_kstats *mdi_statp;
5119 
5120 	ASSERT(client != NULL && ppath != NULL);
5121 
5122 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5123 
5124 	if (MDI_PI(pip)->pi_kstats != NULL)
5125 		return (MDI_SUCCESS);
5126 
5127 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5128 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5129 		if (cpip == pip)
5130 			continue;
5131 		/*
5132 		 * We have found a different path with same parent
5133 		 * kstats for a given client-pHCI are common
5134 		 */
5135 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5136 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5137 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5138 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5139 			return (MDI_SUCCESS);
5140 		}
5141 	}
5142 
5143 	/*
5144 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5145 	 * clamp length of name against max length of error kstat name
5146 	 */
5147 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5148 	    ddi_driver_name(client), ddi_get_instance(client),
5149 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5150 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5151 		return (MDI_FAILURE);
5152 	}
5153 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5154 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5155 		return (MDI_FAILURE);
5156 	}
5157 
5158 	(void) strcat(ksname, err_postfix);
5159 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5160 	    KSTAT_TYPE_NAMED,
5161 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5162 
5163 	if (kerrsp == NULL) {
5164 		kstat_delete(kiosp);
5165 		return (MDI_FAILURE);
5166 	}
5167 
5168 	nsp = (struct pi_errs *)kerrsp->ks_data;
5169 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5170 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5171 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5172 	    KSTAT_DATA_UINT32);
5173 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5174 	    KSTAT_DATA_UINT32);
5175 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5176 	    KSTAT_DATA_UINT32);
5177 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5178 	    KSTAT_DATA_UINT32);
5179 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5180 	    KSTAT_DATA_UINT32);
5181 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5182 	    KSTAT_DATA_UINT32);
5183 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5184 	    KSTAT_DATA_UINT32);
5185 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5186 
5187 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5188 	mdi_statp->pi_kstat_ref = 1;
5189 	mdi_statp->pi_kstat_iostats = kiosp;
5190 	mdi_statp->pi_kstat_errstats = kerrsp;
5191 	kstat_install(kiosp);
5192 	kstat_install(kerrsp);
5193 	MDI_PI(pip)->pi_kstats = mdi_statp;
5194 	return (MDI_SUCCESS);
5195 }
5196 
5197 /*
5198  * destroy per-path properties
5199  */
5200 static void
5201 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5202 {
5203 
5204 	struct mdi_pi_kstats *mdi_statp;
5205 
5206 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5207 		return;
5208 
5209 	MDI_PI(pip)->pi_kstats = NULL;
5210 
5211 	/*
5212 	 * the kstat may be shared between multiple pathinfo nodes
5213 	 * decrement this pathinfo's usage, removing the kstats
5214 	 * themselves when the last pathinfo reference is removed.
5215 	 */
5216 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5217 	if (--mdi_statp->pi_kstat_ref != 0)
5218 		return;
5219 
5220 	kstat_delete(mdi_statp->pi_kstat_iostats);
5221 	kstat_delete(mdi_statp->pi_kstat_errstats);
5222 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5223 }
5224 
5225 /*
5226  * update I/O paths KSTATS
5227  */
5228 void
5229 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5230 {
5231 	kstat_t *iostatp;
5232 	size_t xfer_cnt;
5233 
5234 	ASSERT(pip != NULL);
5235 
5236 	/*
5237 	 * I/O can be driven across a path prior to having path
5238 	 * statistics available, i.e. probe(9e).
5239 	 */
5240 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5241 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5242 		xfer_cnt = bp->b_bcount - bp->b_resid;
5243 		if (bp->b_flags & B_READ) {
5244 			KSTAT_IO_PTR(iostatp)->reads++;
5245 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5246 		} else {
5247 			KSTAT_IO_PTR(iostatp)->writes++;
5248 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5249 		}
5250 	}
5251 }
5252 
5253 /*
5254  * Enable the path(specific client/target/initiator)
5255  * Enabling a path means that MPxIO may select the enabled path for routing
5256  * future I/O requests, subject to other path state constraints.
5257  */
5258 int
5259 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5260 {
5261 	mdi_phci_t	*ph;
5262 
5263 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5264 	if (ph == NULL) {
5265 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5266 			" failed. pip: %p ph = NULL\n", pip));
5267 		return (MDI_FAILURE);
5268 	}
5269 
5270 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5271 		MDI_ENABLE_OP);
5272 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5273 		" Returning success pip = %p. ph = %p\n", pip, ph));
5274 	return (MDI_SUCCESS);
5275 
5276 }
5277 
5278 /*
5279  * Disable the path (specific client/target/initiator)
5280  * Disabling a path means that MPxIO will not select the disabled path for
5281  * routing any new I/O requests.
5282  */
5283 int
5284 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5285 {
5286 	mdi_phci_t	*ph;
5287 
5288 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5289 	if (ph == NULL) {
5290 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5291 			" failed. pip: %p ph = NULL\n", pip));
5292 		return (MDI_FAILURE);
5293 	}
5294 
5295 	(void) i_mdi_enable_disable_path(pip,
5296 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5297 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5298 		"Returning success pip = %p. ph = %p", pip, ph));
5299 	return (MDI_SUCCESS);
5300 }
5301 
5302 /*
5303  * disable the path to a particular pHCI (pHCI specified in the phci_path
5304  * argument) for a particular client (specified in the client_path argument).
5305  * Disabling a path means that MPxIO will not select the disabled path for
5306  * routing any new I/O requests.
5307  * NOTE: this will be removed once the NWS files are changed to use the new
5308  * mdi_{enable,disable}_path interfaces
5309  */
5310 int
5311 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5312 {
5313 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5314 }
5315 
5316 /*
5317  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5318  * argument) for a particular client (specified in the client_path argument).
5319  * Enabling a path means that MPxIO may select the enabled path for routing
5320  * future I/O requests, subject to other path state constraints.
5321  * NOTE: this will be removed once the NWS files are changed to use the new
5322  * mdi_{enable,disable}_path interfaces
5323  */
5324 
5325 int
5326 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5327 {
5328 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5329 }
5330 
5331 /*
5332  * Common routine for doing enable/disable.
5333  */
5334 static mdi_pathinfo_t *
5335 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5336 		int op)
5337 {
5338 	int		sync_flag = 0;
5339 	int		rv;
5340 	mdi_pathinfo_t 	*next;
5341 	int		(*f)() = NULL;
5342 
5343 	f = vh->vh_ops->vo_pi_state_change;
5344 
5345 	sync_flag = (flags << 8) & 0xf00;
5346 
5347 	/*
5348 	 * Do a callback into the mdi consumer to let it
5349 	 * know that path is about to get enabled/disabled.
5350 	 */
5351 	if (f != NULL) {
5352 		rv = (*f)(vh->vh_dip, pip, 0,
5353 			MDI_PI_EXT_STATE(pip),
5354 			MDI_EXT_STATE_CHANGE | sync_flag |
5355 			op | MDI_BEFORE_STATE_CHANGE);
5356 		if (rv != MDI_SUCCESS) {
5357 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5358 			"!vo_pi_state_change: failed rv = %x", rv));
5359 		}
5360 	}
5361 	MDI_PI_LOCK(pip);
5362 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5363 
5364 	switch (flags) {
5365 		case USER_DISABLE:
5366 			if (op == MDI_DISABLE_OP)
5367 				MDI_PI_SET_USER_DISABLE(pip);
5368 			else
5369 				MDI_PI_SET_USER_ENABLE(pip);
5370 			break;
5371 		case DRIVER_DISABLE:
5372 			if (op == MDI_DISABLE_OP)
5373 				MDI_PI_SET_DRV_DISABLE(pip);
5374 			else
5375 				MDI_PI_SET_DRV_ENABLE(pip);
5376 			break;
5377 		case DRIVER_DISABLE_TRANSIENT:
5378 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5379 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5380 			else
5381 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5382 			break;
5383 	}
5384 	MDI_PI_UNLOCK(pip);
5385 	/*
5386 	 * Do a callback into the mdi consumer to let it
5387 	 * know that path is now enabled/disabled.
5388 	 */
5389 	if (f != NULL) {
5390 		rv = (*f)(vh->vh_dip, pip, 0,
5391 			MDI_PI_EXT_STATE(pip),
5392 			MDI_EXT_STATE_CHANGE | sync_flag |
5393 			op | MDI_AFTER_STATE_CHANGE);
5394 		if (rv != MDI_SUCCESS) {
5395 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5396 			"!vo_pi_state_change: failed rv = %x", rv));
5397 		}
5398 	}
5399 	return (next);
5400 }
5401 
5402 /*
5403  * Common routine for doing enable/disable.
5404  * NOTE: this will be removed once the NWS files are changed to use the new
5405  * mdi_{enable,disable}_path has been putback
5406  */
5407 int
5408 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5409 {
5410 
5411 	mdi_phci_t	*ph;
5412 	mdi_vhci_t	*vh = NULL;
5413 	mdi_client_t	*ct;
5414 	mdi_pathinfo_t	*next, *pip;
5415 	int		found_it;
5416 
5417 	ph = i_devi_get_phci(pdip);
5418 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5419 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5420 	if (ph == NULL) {
5421 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5422 			" failed. ph = NULL operation = %d\n", op));
5423 		return (MDI_FAILURE);
5424 	}
5425 
5426 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5427 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5428 			" Invalid operation = %d\n", op));
5429 		return (MDI_FAILURE);
5430 	}
5431 
5432 	vh = ph->ph_vhci;
5433 
5434 	if (cdip == NULL) {
5435 		/*
5436 		 * Need to mark the Phci as enabled/disabled.
5437 		 */
5438 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5439 		"Operation %d for the phci\n", op));
5440 		MDI_PHCI_LOCK(ph);
5441 		switch (flags) {
5442 			case USER_DISABLE:
5443 				if (op == MDI_DISABLE_OP)
5444 					MDI_PHCI_SET_USER_DISABLE(ph);
5445 				else
5446 					MDI_PHCI_SET_USER_ENABLE(ph);
5447 				break;
5448 			case DRIVER_DISABLE:
5449 				if (op == MDI_DISABLE_OP)
5450 					MDI_PHCI_SET_DRV_DISABLE(ph);
5451 				else
5452 					MDI_PHCI_SET_DRV_ENABLE(ph);
5453 				break;
5454 			case DRIVER_DISABLE_TRANSIENT:
5455 				if (op == MDI_DISABLE_OP)
5456 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5457 				else
5458 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5459 				break;
5460 			default:
5461 				MDI_PHCI_UNLOCK(ph);
5462 				MDI_DEBUG(1, (CE_NOTE, NULL,
5463 				"!i_mdi_pi_enable_disable:"
5464 				" Invalid flag argument= %d\n", flags));
5465 		}
5466 
5467 		/*
5468 		 * Phci has been disabled. Now try to enable/disable
5469 		 * path info's to each client.
5470 		 */
5471 		pip = ph->ph_path_head;
5472 		while (pip != NULL) {
5473 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
5474 		}
5475 		MDI_PHCI_UNLOCK(ph);
5476 	} else {
5477 
5478 		/*
5479 		 * Disable a specific client.
5480 		 */
5481 		ct = i_devi_get_client(cdip);
5482 		if (ct == NULL) {
5483 			MDI_DEBUG(1, (CE_NOTE, NULL,
5484 			"!i_mdi_pi_enable_disable:"
5485 			" failed. ct = NULL operation = %d\n", op));
5486 			return (MDI_FAILURE);
5487 		}
5488 
5489 		MDI_CLIENT_LOCK(ct);
5490 		pip = ct->ct_path_head;
5491 		found_it = 0;
5492 		while (pip != NULL) {
5493 			MDI_PI_LOCK(pip);
5494 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5495 			if (MDI_PI(pip)->pi_phci == ph) {
5496 				MDI_PI_UNLOCK(pip);
5497 				found_it = 1;
5498 				break;
5499 			}
5500 			MDI_PI_UNLOCK(pip);
5501 			pip = next;
5502 		}
5503 
5504 
5505 		MDI_CLIENT_UNLOCK(ct);
5506 		if (found_it == 0) {
5507 			MDI_DEBUG(1, (CE_NOTE, NULL,
5508 			"!i_mdi_pi_enable_disable:"
5509 			" failed. Could not find corresponding pip\n"));
5510 			return (MDI_FAILURE);
5511 		}
5512 
5513 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
5514 	}
5515 
5516 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5517 		" Returning success op: %x pdip = %p cdip = %p\n", op,
5518 			pdip, cdip));
5519 	return (MDI_SUCCESS);
5520 }
5521 
5522 /*ARGSUSED3*/
5523 int
5524 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5525     int flags, clock_t timeout)
5526 {
5527 	mdi_pathinfo_t *pip;
5528 	dev_info_t *dip;
5529 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5530 	char *paddr;
5531 
5532 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5533 
5534 	if (!MDI_PHCI(pdip))
5535 		return (MDI_FAILURE);
5536 
5537 	paddr = strchr(devnm, '@');
5538 	if (paddr == NULL)
5539 		return (MDI_FAILURE);
5540 
5541 	paddr++;	/* skip '@' */
5542 	pip = mdi_pi_find(pdip, NULL, paddr);
5543 	while (pip == NULL && timeout > 0) {
5544 		if (interval > timeout)
5545 			interval = timeout;
5546 		if (flags & NDI_DEVI_DEBUG) {
5547 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5548 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5549 			    paddr, interval, timeout);
5550 		}
5551 		delay(interval);
5552 		timeout -= interval;
5553 		interval += interval;
5554 		pip = mdi_pi_find(pdip, NULL, paddr);
5555 	}
5556 
5557 	if (pip == NULL)
5558 		return (MDI_FAILURE);
5559 	dip = mdi_pi_get_client(pip);
5560 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5561 		return (MDI_FAILURE);
5562 	*cdipp = dip;
5563 
5564 	/* TODO: holding should happen inside search functions */
5565 	ndi_hold_devi(dip);
5566 	return (MDI_SUCCESS);
5567 }
5568 
5569 /*
5570  * Ensure phci powered up
5571  */
5572 static void
5573 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5574 {
5575 	dev_info_t	*ph_dip;
5576 
5577 	ASSERT(pip != NULL);
5578 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5579 
5580 	if (MDI_PI(pip)->pi_pm_held) {
5581 		return;
5582 	}
5583 
5584 	ph_dip = mdi_pi_get_phci(pip);
5585 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5586 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5587 	if (ph_dip == NULL) {
5588 		return;
5589 	}
5590 
5591 	MDI_PI_UNLOCK(pip);
5592 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5593 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5594 	pm_hold_power(ph_dip);
5595 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5596 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5597 	MDI_PI_LOCK(pip);
5598 
5599 	MDI_PI(pip)->pi_pm_held = 1;
5600 }
5601 
5602 /*
5603  * Allow phci powered down
5604  */
5605 static void
5606 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5607 {
5608 	dev_info_t	*ph_dip = NULL;
5609 
5610 	ASSERT(pip != NULL);
5611 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5612 
5613 	if (MDI_PI(pip)->pi_pm_held == 0) {
5614 		return;
5615 	}
5616 
5617 	ph_dip = mdi_pi_get_phci(pip);
5618 	ASSERT(ph_dip != NULL);
5619 
5620 	MDI_PI_UNLOCK(pip);
5621 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5622 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5623 
5624 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5625 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5626 	pm_rele_power(ph_dip);
5627 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5628 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5629 
5630 	MDI_PI_LOCK(pip);
5631 	MDI_PI(pip)->pi_pm_held = 0;
5632 }
5633 
5634 static void
5635 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5636 {
5637 	ASSERT(ct);
5638 
5639 	ct->ct_power_cnt += incr;
5640 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5641 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5642 	ASSERT(ct->ct_power_cnt >= 0);
5643 }
5644 
5645 static void
5646 i_mdi_rele_all_phci(mdi_client_t *ct)
5647 {
5648 	mdi_pathinfo_t  *pip;
5649 
5650 	ASSERT(mutex_owned(&ct->ct_mutex));
5651 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5652 	while (pip != NULL) {
5653 		mdi_hold_path(pip);
5654 		MDI_PI_LOCK(pip);
5655 		i_mdi_pm_rele_pip(pip);
5656 		MDI_PI_UNLOCK(pip);
5657 		mdi_rele_path(pip);
5658 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5659 	}
5660 }
5661 
5662 static void
5663 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5664 {
5665 	ASSERT(ct);
5666 
5667 	if (i_ddi_devi_attached(ct->ct_dip)) {
5668 		ct->ct_power_cnt -= decr;
5669 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5670 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5671 	}
5672 
5673 	ASSERT(ct->ct_power_cnt >= 0);
5674 	if (ct->ct_power_cnt == 0) {
5675 		i_mdi_rele_all_phci(ct);
5676 		return;
5677 	}
5678 }
5679 
5680 static void
5681 i_mdi_pm_reset_client(mdi_client_t *ct)
5682 {
5683 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5684 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5685 	ct->ct_power_cnt = 0;
5686 	i_mdi_rele_all_phci(ct);
5687 	ct->ct_powercnt_config = 0;
5688 	ct->ct_powercnt_unconfig = 0;
5689 	ct->ct_powercnt_reset = 1;
5690 }
5691 
5692 static void
5693 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5694 {
5695 	mdi_pathinfo_t  *pip;
5696 	ASSERT(mutex_owned(&ct->ct_mutex));
5697 
5698 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5699 	while (pip != NULL) {
5700 		mdi_hold_path(pip);
5701 		MDI_PI_LOCK(pip);
5702 		i_mdi_pm_hold_pip(pip);
5703 		MDI_PI_UNLOCK(pip);
5704 		mdi_rele_path(pip);
5705 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5706 	}
5707 }
5708 
5709 static int
5710 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5711 {
5712 	int		ret;
5713 	dev_info_t	*ph_dip;
5714 
5715 	MDI_PI_LOCK(pip);
5716 	i_mdi_pm_hold_pip(pip);
5717 
5718 	ph_dip = mdi_pi_get_phci(pip);
5719 	MDI_PI_UNLOCK(pip);
5720 
5721 	/* bring all components of phci to full power */
5722 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5723 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5724 	    ddi_get_instance(ph_dip)));
5725 
5726 	ret = pm_powerup(ph_dip);
5727 
5728 	if (ret == DDI_FAILURE) {
5729 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5730 		    "pm_powerup FAILED for %s%d\n",
5731 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5732 
5733 		MDI_PI_LOCK(pip);
5734 		i_mdi_pm_rele_pip(pip);
5735 		MDI_PI_UNLOCK(pip);
5736 		return (MDI_FAILURE);
5737 	}
5738 
5739 	return (MDI_SUCCESS);
5740 }
5741 
5742 static int
5743 i_mdi_power_all_phci(mdi_client_t *ct)
5744 {
5745 	mdi_pathinfo_t  *pip;
5746 	int		succeeded = 0;
5747 
5748 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5749 	while (pip != NULL) {
5750 		mdi_hold_path(pip);
5751 		MDI_CLIENT_UNLOCK(ct);
5752 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5753 			succeeded = 1;
5754 
5755 		ASSERT(ct == MDI_PI(pip)->pi_client);
5756 		MDI_CLIENT_LOCK(ct);
5757 		mdi_rele_path(pip);
5758 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5759 	}
5760 
5761 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5762 }
5763 
5764 /*
5765  * mdi_bus_power():
5766  *		1. Place the phci(s) into powered up state so that
5767  *		   client can do power management
5768  *		2. Ensure phci powered up as client power managing
5769  * Return Values:
5770  *		MDI_SUCCESS
5771  *		MDI_FAILURE
5772  */
5773 int
5774 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5775     void *arg, void *result)
5776 {
5777 	int			ret = MDI_SUCCESS;
5778 	pm_bp_child_pwrchg_t	*bpc;
5779 	mdi_client_t		*ct;
5780 	dev_info_t		*cdip;
5781 	pm_bp_has_changed_t	*bphc;
5782 
5783 	/*
5784 	 * BUS_POWER_NOINVOL not supported
5785 	 */
5786 	if (op == BUS_POWER_NOINVOL)
5787 		return (MDI_FAILURE);
5788 
5789 	/*
5790 	 * ignore other OPs.
5791 	 * return quickly to save cou cycles on the ct processing
5792 	 */
5793 	switch (op) {
5794 	case BUS_POWER_PRE_NOTIFICATION:
5795 	case BUS_POWER_POST_NOTIFICATION:
5796 		bpc = (pm_bp_child_pwrchg_t *)arg;
5797 		cdip = bpc->bpc_dip;
5798 		break;
5799 	case BUS_POWER_HAS_CHANGED:
5800 		bphc = (pm_bp_has_changed_t *)arg;
5801 		cdip = bphc->bphc_dip;
5802 		break;
5803 	default:
5804 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5805 	}
5806 
5807 	ASSERT(MDI_CLIENT(cdip));
5808 
5809 	ct = i_devi_get_client(cdip);
5810 	if (ct == NULL)
5811 		return (MDI_FAILURE);
5812 
5813 	/*
5814 	 * wait till the mdi_pathinfo node state change are processed
5815 	 */
5816 	MDI_CLIENT_LOCK(ct);
5817 	switch (op) {
5818 	case BUS_POWER_PRE_NOTIFICATION:
5819 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5820 		    "BUS_POWER_PRE_NOTIFICATION:"
5821 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5822 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5823 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5824 
5825 		/* serialize power level change per client */
5826 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5827 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5828 
5829 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5830 
5831 		if (ct->ct_power_cnt == 0) {
5832 			ret = i_mdi_power_all_phci(ct);
5833 		}
5834 
5835 		/*
5836 		 * if new_level > 0:
5837 		 *	- hold phci(s)
5838 		 *	- power up phci(s) if not already
5839 		 * ignore power down
5840 		 */
5841 		if (bpc->bpc_nlevel > 0) {
5842 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5843 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5844 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5845 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5846 			}
5847 		}
5848 		break;
5849 	case BUS_POWER_POST_NOTIFICATION:
5850 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5851 		    "BUS_POWER_POST_NOTIFICATION:"
5852 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5853 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5854 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5855 		    *(int *)result));
5856 
5857 		if (*(int *)result == DDI_SUCCESS) {
5858 			if (bpc->bpc_nlevel > 0) {
5859 				MDI_CLIENT_SET_POWER_UP(ct);
5860 			} else {
5861 				MDI_CLIENT_SET_POWER_DOWN(ct);
5862 			}
5863 		}
5864 
5865 		/* release the hold we did in pre-notification */
5866 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5867 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5868 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5869 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5870 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5871 		}
5872 
5873 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5874 			/* another thread might started attaching */
5875 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5876 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5877 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5878 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5879 			/* detaching has been taken care in pm_post_unconfig */
5880 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5881 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5882 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5883 				i_mdi_pm_reset_client(ct);
5884 			}
5885 		}
5886 
5887 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5888 		cv_broadcast(&ct->ct_powerchange_cv);
5889 
5890 		break;
5891 
5892 	/* need to do more */
5893 	case BUS_POWER_HAS_CHANGED:
5894 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5895 		    "BUS_POWER_HAS_CHANGED:"
5896 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5897 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5898 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5899 
5900 		if (bphc->bphc_nlevel > 0 &&
5901 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5902 			if (ct->ct_power_cnt == 0) {
5903 				ret = i_mdi_power_all_phci(ct);
5904 			}
5905 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5906 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5907 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5908 		}
5909 
5910 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5911 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5912 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5913 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5914 		}
5915 		break;
5916 	}
5917 
5918 	MDI_CLIENT_UNLOCK(ct);
5919 	return (ret);
5920 }
5921 
5922 static int
5923 i_mdi_pm_pre_config_one(dev_info_t *child)
5924 {
5925 	int		ret = MDI_SUCCESS;
5926 	mdi_client_t	*ct;
5927 
5928 	ct = i_devi_get_client(child);
5929 	if (ct == NULL)
5930 		return (MDI_FAILURE);
5931 
5932 	MDI_CLIENT_LOCK(ct);
5933 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5934 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5935 
5936 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5937 		MDI_CLIENT_UNLOCK(ct);
5938 		MDI_DEBUG(4, (CE_NOTE, child,
5939 		    "i_mdi_pm_pre_config_one already configured\n"));
5940 		return (MDI_SUCCESS);
5941 	}
5942 
5943 	if (ct->ct_powercnt_config) {
5944 		MDI_CLIENT_UNLOCK(ct);
5945 		MDI_DEBUG(4, (CE_NOTE, child,
5946 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5947 		return (MDI_SUCCESS);
5948 	}
5949 
5950 	if (ct->ct_power_cnt == 0) {
5951 		ret = i_mdi_power_all_phci(ct);
5952 	}
5953 	MDI_DEBUG(4, (CE_NOTE, child,
5954 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5955 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5956 	ct->ct_powercnt_config = 1;
5957 	ct->ct_powercnt_reset = 0;
5958 	MDI_CLIENT_UNLOCK(ct);
5959 	return (ret);
5960 }
5961 
5962 static int
5963 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5964 {
5965 	int			ret = MDI_SUCCESS;
5966 	dev_info_t		*cdip;
5967 	int			circ;
5968 
5969 	ASSERT(MDI_VHCI(parent));
5970 
5971 	/* ndi_devi_config_one */
5972 	if (child) {
5973 		return (i_mdi_pm_pre_config_one(child));
5974 	}
5975 
5976 	/* devi_config_common */
5977 	ndi_devi_enter(parent, &circ);
5978 	cdip = ddi_get_child(parent);
5979 	while (cdip) {
5980 		dev_info_t *next = ddi_get_next_sibling(cdip);
5981 
5982 		ret = i_mdi_pm_pre_config_one(cdip);
5983 		if (ret != MDI_SUCCESS)
5984 			break;
5985 		cdip = next;
5986 	}
5987 	ndi_devi_exit(parent, circ);
5988 	return (ret);
5989 }
5990 
5991 static int
5992 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5993 {
5994 	int		ret = MDI_SUCCESS;
5995 	mdi_client_t	*ct;
5996 
5997 	ct = i_devi_get_client(child);
5998 	if (ct == NULL)
5999 		return (MDI_FAILURE);
6000 
6001 	MDI_CLIENT_LOCK(ct);
6002 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6003 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6004 
6005 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6006 		MDI_DEBUG(4, (CE_NOTE, child,
6007 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6008 		MDI_CLIENT_UNLOCK(ct);
6009 		return (MDI_SUCCESS);
6010 	}
6011 
6012 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6013 	    (flags & NDI_AUTODETACH)) {
6014 		MDI_DEBUG(4, (CE_NOTE, child,
6015 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6016 		MDI_CLIENT_UNLOCK(ct);
6017 		return (MDI_FAILURE);
6018 	}
6019 
6020 	if (ct->ct_powercnt_unconfig) {
6021 		MDI_DEBUG(4, (CE_NOTE, child,
6022 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6023 		MDI_CLIENT_UNLOCK(ct);
6024 		*held = 1;
6025 		return (MDI_SUCCESS);
6026 	}
6027 
6028 	if (ct->ct_power_cnt == 0) {
6029 		ret = i_mdi_power_all_phci(ct);
6030 	}
6031 	MDI_DEBUG(4, (CE_NOTE, child,
6032 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6033 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6034 	ct->ct_powercnt_unconfig = 1;
6035 	ct->ct_powercnt_reset = 0;
6036 	MDI_CLIENT_UNLOCK(ct);
6037 	if (ret == MDI_SUCCESS)
6038 		*held = 1;
6039 	return (ret);
6040 }
6041 
6042 static int
6043 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
6044     int flags)
6045 {
6046 	int			ret = MDI_SUCCESS;
6047 	dev_info_t		*cdip;
6048 	int			circ;
6049 
6050 	ASSERT(MDI_VHCI(parent));
6051 	*held = 0;
6052 
6053 	/* ndi_devi_unconfig_one */
6054 	if (child) {
6055 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6056 	}
6057 
6058 	/* devi_unconfig_common */
6059 	ndi_devi_enter(parent, &circ);
6060 	cdip = ddi_get_child(parent);
6061 	while (cdip) {
6062 		dev_info_t *next = ddi_get_next_sibling(cdip);
6063 
6064 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6065 		cdip = next;
6066 	}
6067 	ndi_devi_exit(parent, circ);
6068 
6069 	if (*held)
6070 		ret = MDI_SUCCESS;
6071 
6072 	return (ret);
6073 }
6074 
6075 static void
6076 i_mdi_pm_post_config_one(dev_info_t *child)
6077 {
6078 	mdi_client_t	*ct;
6079 
6080 	ct = i_devi_get_client(child);
6081 	if (ct == NULL)
6082 		return;
6083 
6084 	MDI_CLIENT_LOCK(ct);
6085 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6086 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6087 
6088 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6089 		MDI_DEBUG(4, (CE_NOTE, child,
6090 		    "i_mdi_pm_post_config_one NOT configured\n"));
6091 		MDI_CLIENT_UNLOCK(ct);
6092 		return;
6093 	}
6094 
6095 	/* client has not been updated */
6096 	if (MDI_CLIENT_IS_FAILED(ct)) {
6097 		MDI_DEBUG(4, (CE_NOTE, child,
6098 		    "i_mdi_pm_post_config_one NOT configured\n"));
6099 		MDI_CLIENT_UNLOCK(ct);
6100 		return;
6101 	}
6102 
6103 	/* another thread might have powered it down or detached it */
6104 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6105 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6106 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6107 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6108 		MDI_DEBUG(4, (CE_NOTE, child,
6109 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6110 		i_mdi_pm_reset_client(ct);
6111 	} else {
6112 		mdi_pathinfo_t  *pip, *next;
6113 		int	valid_path_count = 0;
6114 
6115 		MDI_DEBUG(4, (CE_NOTE, child,
6116 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6117 		pip = ct->ct_path_head;
6118 		while (pip != NULL) {
6119 			MDI_PI_LOCK(pip);
6120 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6121 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6122 				valid_path_count ++;
6123 			MDI_PI_UNLOCK(pip);
6124 			pip = next;
6125 		}
6126 		i_mdi_pm_rele_client(ct, valid_path_count);
6127 	}
6128 	ct->ct_powercnt_config = 0;
6129 	MDI_CLIENT_UNLOCK(ct);
6130 }
6131 
6132 static void
6133 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6134 {
6135 	int		circ;
6136 	dev_info_t	*cdip;
6137 	ASSERT(MDI_VHCI(parent));
6138 
6139 	/* ndi_devi_config_one */
6140 	if (child) {
6141 		i_mdi_pm_post_config_one(child);
6142 		return;
6143 	}
6144 
6145 	/* devi_config_common */
6146 	ndi_devi_enter(parent, &circ);
6147 	cdip = ddi_get_child(parent);
6148 	while (cdip) {
6149 		dev_info_t *next = ddi_get_next_sibling(cdip);
6150 
6151 		i_mdi_pm_post_config_one(cdip);
6152 		cdip = next;
6153 	}
6154 	ndi_devi_exit(parent, circ);
6155 }
6156 
6157 static void
6158 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6159 {
6160 	mdi_client_t	*ct;
6161 
6162 	ct = i_devi_get_client(child);
6163 	if (ct == NULL)
6164 		return;
6165 
6166 	MDI_CLIENT_LOCK(ct);
6167 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6168 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6169 
6170 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6171 		MDI_DEBUG(4, (CE_NOTE, child,
6172 		    "i_mdi_pm_post_unconfig NOT held\n"));
6173 		MDI_CLIENT_UNLOCK(ct);
6174 		return;
6175 	}
6176 
6177 	/* failure detaching or another thread just attached it */
6178 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6179 	    i_ddi_devi_attached(ct->ct_dip)) ||
6180 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6181 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6182 		MDI_DEBUG(4, (CE_NOTE, child,
6183 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6184 		i_mdi_pm_reset_client(ct);
6185 	} else {
6186 		mdi_pathinfo_t  *pip, *next;
6187 		int	valid_path_count = 0;
6188 
6189 		MDI_DEBUG(4, (CE_NOTE, child,
6190 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6191 		pip = ct->ct_path_head;
6192 		while (pip != NULL) {
6193 			MDI_PI_LOCK(pip);
6194 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6195 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6196 				valid_path_count ++;
6197 			MDI_PI_UNLOCK(pip);
6198 			pip = next;
6199 		}
6200 		i_mdi_pm_rele_client(ct, valid_path_count);
6201 		ct->ct_powercnt_unconfig = 0;
6202 	}
6203 
6204 	MDI_CLIENT_UNLOCK(ct);
6205 }
6206 
6207 static void
6208 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6209 {
6210 	int			circ;
6211 	dev_info_t		*cdip;
6212 
6213 	ASSERT(MDI_VHCI(parent));
6214 
6215 	if (!held) {
6216 		MDI_DEBUG(4, (CE_NOTE, parent,
6217 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6218 		return;
6219 	}
6220 
6221 	if (child) {
6222 		i_mdi_pm_post_unconfig_one(child);
6223 		return;
6224 	}
6225 
6226 	ndi_devi_enter(parent, &circ);
6227 	cdip = ddi_get_child(parent);
6228 	while (cdip) {
6229 		dev_info_t *next = ddi_get_next_sibling(cdip);
6230 
6231 		i_mdi_pm_post_unconfig_one(cdip);
6232 		cdip = next;
6233 	}
6234 	ndi_devi_exit(parent, circ);
6235 }
6236 
6237 int
6238 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6239 {
6240 	int			circ, ret = MDI_SUCCESS;
6241 	dev_info_t		*client_dip = NULL;
6242 	mdi_client_t		*ct;
6243 
6244 	/*
6245 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6246 	 * Power up pHCI for the named client device.
6247 	 * Note: Before the client is enumerated under vhci by phci,
6248 	 * client_dip can be NULL. Then proceed to power up all the
6249 	 * pHCIs.
6250 	 */
6251 	if (devnm != NULL) {
6252 		ndi_devi_enter(vdip, &circ);
6253 		client_dip = ndi_devi_findchild(vdip, devnm);
6254 		ndi_devi_exit(vdip, circ);
6255 	}
6256 
6257 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6258 
6259 	switch (op) {
6260 	case MDI_PM_PRE_CONFIG:
6261 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6262 
6263 		break;
6264 	case MDI_PM_PRE_UNCONFIG:
6265 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6266 		    flags);
6267 
6268 		break;
6269 	case MDI_PM_POST_CONFIG:
6270 		i_mdi_pm_post_config(vdip, client_dip);
6271 
6272 		break;
6273 	case MDI_PM_POST_UNCONFIG:
6274 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6275 
6276 		break;
6277 	case MDI_PM_HOLD_POWER:
6278 	case MDI_PM_RELE_POWER:
6279 		ASSERT(args);
6280 
6281 		client_dip = (dev_info_t *)args;
6282 		ASSERT(MDI_CLIENT(client_dip));
6283 
6284 		ct = i_devi_get_client(client_dip);
6285 		MDI_CLIENT_LOCK(ct);
6286 
6287 		if (op == MDI_PM_HOLD_POWER) {
6288 			if (ct->ct_power_cnt == 0) {
6289 				(void) i_mdi_power_all_phci(ct);
6290 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6291 				    "mdi_power i_mdi_pm_hold_client\n"));
6292 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6293 			}
6294 		} else {
6295 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6296 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6297 				    "mdi_power i_mdi_pm_rele_client\n"));
6298 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6299 			} else {
6300 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6301 				    "mdi_power i_mdi_pm_reset_client\n"));
6302 				i_mdi_pm_reset_client(ct);
6303 			}
6304 		}
6305 
6306 		MDI_CLIENT_UNLOCK(ct);
6307 		break;
6308 	default:
6309 		break;
6310 	}
6311 
6312 	return (ret);
6313 }
6314 
6315 int
6316 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6317 {
6318 	mdi_vhci_t *vhci;
6319 
6320 	if (!MDI_VHCI(dip))
6321 		return (MDI_FAILURE);
6322 
6323 	if (mdi_class) {
6324 		vhci = DEVI(dip)->devi_mdi_xhci;
6325 		ASSERT(vhci);
6326 		*mdi_class = vhci->vh_class;
6327 	}
6328 
6329 	return (MDI_SUCCESS);
6330 }
6331 
6332 int
6333 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6334 {
6335 	mdi_phci_t *phci;
6336 
6337 	if (!MDI_PHCI(dip))
6338 		return (MDI_FAILURE);
6339 
6340 	if (mdi_class) {
6341 		phci = DEVI(dip)->devi_mdi_xhci;
6342 		ASSERT(phci);
6343 		*mdi_class = phci->ph_vhci->vh_class;
6344 	}
6345 
6346 	return (MDI_SUCCESS);
6347 }
6348 
6349 int
6350 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6351 {
6352 	mdi_client_t *client;
6353 
6354 	if (!MDI_CLIENT(dip))
6355 		return (MDI_FAILURE);
6356 
6357 	if (mdi_class) {
6358 		client = DEVI(dip)->devi_mdi_client;
6359 		ASSERT(client);
6360 		*mdi_class = client->ct_vhci->vh_class;
6361 	}
6362 
6363 	return (MDI_SUCCESS);
6364 }
6365 
6366 void *
6367 mdi_client_get_vhci_private(dev_info_t *dip)
6368 {
6369 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6370 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6371 		mdi_client_t	*ct;
6372 		ct = i_devi_get_client(dip);
6373 		return (ct->ct_vprivate);
6374 	}
6375 	return (NULL);
6376 }
6377 
6378 void
6379 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6380 {
6381 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6382 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6383 		mdi_client_t	*ct;
6384 		ct = i_devi_get_client(dip);
6385 		ct->ct_vprivate = data;
6386 	}
6387 }
6388 /*
6389  * mdi_pi_get_vhci_private():
6390  *		Get the vhci private information associated with the
6391  *		mdi_pathinfo node
6392  */
6393 void *
6394 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6395 {
6396 	caddr_t	vprivate = NULL;
6397 	if (pip) {
6398 		vprivate = MDI_PI(pip)->pi_vprivate;
6399 	}
6400 	return (vprivate);
6401 }
6402 
6403 /*
6404  * mdi_pi_set_vhci_private():
6405  *		Set the vhci private information in the mdi_pathinfo node
6406  */
6407 void
6408 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6409 {
6410 	if (pip) {
6411 		MDI_PI(pip)->pi_vprivate = priv;
6412 	}
6413 }
6414 
6415 /*
6416  * mdi_phci_get_vhci_private():
6417  *		Get the vhci private information associated with the
6418  *		mdi_phci node
6419  */
6420 void *
6421 mdi_phci_get_vhci_private(dev_info_t *dip)
6422 {
6423 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6424 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6425 		mdi_phci_t	*ph;
6426 		ph = i_devi_get_phci(dip);
6427 		return (ph->ph_vprivate);
6428 	}
6429 	return (NULL);
6430 }
6431 
6432 /*
6433  * mdi_phci_set_vhci_private():
6434  *		Set the vhci private information in the mdi_phci node
6435  */
6436 void
6437 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6438 {
6439 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6440 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6441 		mdi_phci_t	*ph;
6442 		ph = i_devi_get_phci(dip);
6443 		ph->ph_vprivate = priv;
6444 	}
6445 }
6446 
6447 /*
6448  * List of vhci class names:
6449  * A vhci class name must be in this list only if the corresponding vhci
6450  * driver intends to use the mdi provided bus config implementation
6451  * (i.e., mdi_vhci_bus_config()).
6452  */
6453 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6454 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6455 
6456 /*
6457  * Built-in list of phci drivers for every vhci class.
6458  * All phci drivers expect iscsi have root device support.
6459  */
6460 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
6461 	{ "fp", 1 },
6462 	{ "iscsi", 0 },
6463 	{ "ibsrp", 1 }
6464 	};
6465 
6466 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
6467 
6468 /*
6469  * During boot time, the on-disk vhci cache for every vhci class is read
6470  * in the form of an nvlist and stored here.
6471  */
6472 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6473 
6474 /* nvpair names in vhci cache nvlist */
6475 #define	MDI_VHCI_CACHE_VERSION	1
6476 #define	MDI_NVPNAME_VERSION	"version"
6477 #define	MDI_NVPNAME_PHCIS	"phcis"
6478 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6479 
6480 /*
6481  * Given vhci class name, return its on-disk vhci cache filename.
6482  * Memory for the returned filename which includes the full path is allocated
6483  * by this function.
6484  */
6485 static char *
6486 vhclass2vhcache_filename(char *vhclass)
6487 {
6488 	char *filename;
6489 	int len;
6490 	static char *fmt = "/etc/devices/mdi_%s_cache";
6491 
6492 	/*
6493 	 * fmt contains the on-disk vhci cache file name format;
6494 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6495 	 */
6496 
6497 	/* the -1 below is to account for "%s" in the format string */
6498 	len = strlen(fmt) + strlen(vhclass) - 1;
6499 	filename = kmem_alloc(len, KM_SLEEP);
6500 	(void) snprintf(filename, len, fmt, vhclass);
6501 	ASSERT(len == (strlen(filename) + 1));
6502 	return (filename);
6503 }
6504 
6505 /*
6506  * initialize the vhci cache related data structures and read the on-disk
6507  * vhci cached data into memory.
6508  */
6509 static void
6510 setup_vhci_cache(mdi_vhci_t *vh)
6511 {
6512 	mdi_vhci_config_t *vhc;
6513 	mdi_vhci_cache_t *vhcache;
6514 	int i;
6515 	nvlist_t *nvl = NULL;
6516 
6517 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6518 	vh->vh_config = vhc;
6519 	vhcache = &vhc->vhc_vhcache;
6520 
6521 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6522 
6523 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6524 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6525 
6526 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6527 
6528 	/*
6529 	 * Create string hash; same as mod_hash_create_strhash() except that
6530 	 * we use NULL key destructor.
6531 	 */
6532 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6533 	    mdi_bus_config_cache_hash_size,
6534 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6535 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6536 
6537 	setup_phci_driver_list(vh);
6538 
6539 	/*
6540 	 * The on-disk vhci cache is read during booting prior to the
6541 	 * lights-out period by mdi_read_devices_files().
6542 	 */
6543 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6544 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6545 			nvl = vhcache_nvl[i];
6546 			vhcache_nvl[i] = NULL;
6547 			break;
6548 		}
6549 	}
6550 
6551 	/*
6552 	 * this is to cover the case of some one manually causing unloading
6553 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6554 	 */
6555 	if (nvl == NULL && modrootloaded)
6556 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6557 
6558 	if (nvl != NULL) {
6559 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6560 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6561 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6562 		else  {
6563 			cmn_err(CE_WARN,
6564 			    "%s: data file corrupted, will recreate\n",
6565 			    vhc->vhc_vhcache_filename);
6566 		}
6567 		rw_exit(&vhcache->vhcache_lock);
6568 		nvlist_free(nvl);
6569 	}
6570 
6571 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6572 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6573 
6574 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6575 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6576 }
6577 
6578 /*
6579  * free all vhci cache related resources
6580  */
6581 static int
6582 destroy_vhci_cache(mdi_vhci_t *vh)
6583 {
6584 	mdi_vhci_config_t *vhc = vh->vh_config;
6585 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6586 	mdi_vhcache_phci_t *cphci, *cphci_next;
6587 	mdi_vhcache_client_t *cct, *cct_next;
6588 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6589 
6590 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6591 		return (MDI_FAILURE);
6592 
6593 	kmem_free(vhc->vhc_vhcache_filename,
6594 	    strlen(vhc->vhc_vhcache_filename) + 1);
6595 
6596 	if (vhc->vhc_phci_driver_list)
6597 		free_phci_driver_list(vhc);
6598 
6599 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6600 
6601 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6602 	    cphci = cphci_next) {
6603 		cphci_next = cphci->cphci_next;
6604 		free_vhcache_phci(cphci);
6605 	}
6606 
6607 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6608 		cct_next = cct->cct_next;
6609 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6610 			cpi_next = cpi->cpi_next;
6611 			free_vhcache_pathinfo(cpi);
6612 		}
6613 		free_vhcache_client(cct);
6614 	}
6615 
6616 	rw_destroy(&vhcache->vhcache_lock);
6617 
6618 	mutex_destroy(&vhc->vhc_lock);
6619 	cv_destroy(&vhc->vhc_cv);
6620 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6621 	return (MDI_SUCCESS);
6622 }
6623 
6624 /*
6625  * Setup the list of phci drivers associated with the specified vhci class.
6626  * MDI uses this information to rebuild bus config cache if in case the
6627  * cache is not available or corrupted.
6628  */
6629 static void
6630 setup_phci_driver_list(mdi_vhci_t *vh)
6631 {
6632 	mdi_vhci_config_t *vhc = vh->vh_config;
6633 	mdi_phci_driver_info_t *driver_list;
6634 	char **driver_list1;
6635 	uint_t ndrivers, ndrivers1;
6636 	int i, j;
6637 
6638 	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6639 		driver_list = scsi_phci_driver_list;
6640 		ndrivers = sizeof (scsi_phci_driver_list) /
6641 		    sizeof (mdi_phci_driver_info_t);
6642 	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
6643 		driver_list = ib_phci_driver_list;
6644 		ndrivers = sizeof (ib_phci_driver_list) /
6645 		    sizeof (mdi_phci_driver_info_t);
6646 	} else {
6647 		driver_list = NULL;
6648 		ndrivers = 0;
6649 	}
6650 
6651 	/*
6652 	 * The driver.conf file of a vhci driver can specify additional
6653 	 * phci drivers using a project private "phci-drivers" property.
6654 	 */
6655 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
6656 	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
6657 	    &ndrivers1) != DDI_PROP_SUCCESS)
6658 		ndrivers1 = 0;
6659 
6660 	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
6661 	if (vhc->vhc_nphci_drivers == 0)
6662 		return;
6663 
6664 	vhc->vhc_phci_driver_list = kmem_alloc(
6665 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
6666 
6667 	for (i = 0; i < ndrivers; i++) {
6668 		vhc->vhc_phci_driver_list[i].phdriver_name =
6669 		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
6670 		vhc->vhc_phci_driver_list[i].phdriver_root_support =
6671 		    driver_list[i].phdriver_root_support;
6672 	}
6673 
6674 	for (j = 0; j < ndrivers1; j++, i++) {
6675 		vhc->vhc_phci_driver_list[i].phdriver_name =
6676 		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
6677 		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
6678 	}
6679 
6680 	if (ndrivers1)
6681 		ddi_prop_free(driver_list1);
6682 }
6683 
6684 /*
6685  * Free the memory allocated for the phci driver list
6686  */
6687 static void
6688 free_phci_driver_list(mdi_vhci_config_t *vhc)
6689 {
6690 	int i;
6691 
6692 	if (vhc->vhc_phci_driver_list == NULL)
6693 		return;
6694 
6695 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
6696 		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
6697 		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
6698 	}
6699 
6700 	kmem_free(vhc->vhc_phci_driver_list,
6701 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
6702 }
6703 
6704 /*
6705  * Stop all vhci cache related async threads and free their resources.
6706  */
6707 static int
6708 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6709 {
6710 	mdi_async_client_config_t *acc, *acc_next;
6711 
6712 	mutex_enter(&vhc->vhc_lock);
6713 	vhc->vhc_flags |= MDI_VHC_EXIT;
6714 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6715 	cv_broadcast(&vhc->vhc_cv);
6716 
6717 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6718 	    vhc->vhc_acc_thrcount != 0) {
6719 		mutex_exit(&vhc->vhc_lock);
6720 		delay(1);
6721 		mutex_enter(&vhc->vhc_lock);
6722 	}
6723 
6724 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6725 
6726 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6727 		acc_next = acc->acc_next;
6728 		free_async_client_config(acc);
6729 	}
6730 	vhc->vhc_acc_list_head = NULL;
6731 	vhc->vhc_acc_list_tail = NULL;
6732 	vhc->vhc_acc_count = 0;
6733 
6734 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6735 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6736 		mutex_exit(&vhc->vhc_lock);
6737 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6738 			vhcache_dirty(vhc);
6739 			return (MDI_FAILURE);
6740 		}
6741 	} else
6742 		mutex_exit(&vhc->vhc_lock);
6743 
6744 	if (callb_delete(vhc->vhc_cbid) != 0)
6745 		return (MDI_FAILURE);
6746 
6747 	return (MDI_SUCCESS);
6748 }
6749 
6750 /*
6751  * Stop vhci cache flush thread
6752  */
6753 /* ARGSUSED */
6754 static boolean_t
6755 stop_vhcache_flush_thread(void *arg, int code)
6756 {
6757 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6758 
6759 	mutex_enter(&vhc->vhc_lock);
6760 	vhc->vhc_flags |= MDI_VHC_EXIT;
6761 	cv_broadcast(&vhc->vhc_cv);
6762 
6763 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6764 		mutex_exit(&vhc->vhc_lock);
6765 		delay(1);
6766 		mutex_enter(&vhc->vhc_lock);
6767 	}
6768 
6769 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6770 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6771 		mutex_exit(&vhc->vhc_lock);
6772 		(void) flush_vhcache(vhc, 1);
6773 	} else
6774 		mutex_exit(&vhc->vhc_lock);
6775 
6776 	return (B_TRUE);
6777 }
6778 
6779 /*
6780  * Enqueue the vhcache phci (cphci) at the tail of the list
6781  */
6782 static void
6783 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6784 {
6785 	cphci->cphci_next = NULL;
6786 	if (vhcache->vhcache_phci_head == NULL)
6787 		vhcache->vhcache_phci_head = cphci;
6788 	else
6789 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6790 	vhcache->vhcache_phci_tail = cphci;
6791 }
6792 
6793 /*
6794  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6795  */
6796 static void
6797 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6798     mdi_vhcache_pathinfo_t *cpi)
6799 {
6800 	cpi->cpi_next = NULL;
6801 	if (cct->cct_cpi_head == NULL)
6802 		cct->cct_cpi_head = cpi;
6803 	else
6804 		cct->cct_cpi_tail->cpi_next = cpi;
6805 	cct->cct_cpi_tail = cpi;
6806 }
6807 
6808 /*
6809  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6810  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6811  * flag set come at the beginning of the list. All cpis which have this
6812  * flag set come at the end of the list.
6813  */
6814 static void
6815 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6816     mdi_vhcache_pathinfo_t *newcpi)
6817 {
6818 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6819 
6820 	if (cct->cct_cpi_head == NULL ||
6821 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6822 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6823 	else {
6824 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6825 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6826 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6827 			;
6828 
6829 		if (prev_cpi == NULL)
6830 			cct->cct_cpi_head = newcpi;
6831 		else
6832 			prev_cpi->cpi_next = newcpi;
6833 
6834 		newcpi->cpi_next = cpi;
6835 
6836 		if (cpi == NULL)
6837 			cct->cct_cpi_tail = newcpi;
6838 	}
6839 }
6840 
6841 /*
6842  * Enqueue the vhcache client (cct) at the tail of the list
6843  */
6844 static void
6845 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6846     mdi_vhcache_client_t *cct)
6847 {
6848 	cct->cct_next = NULL;
6849 	if (vhcache->vhcache_client_head == NULL)
6850 		vhcache->vhcache_client_head = cct;
6851 	else
6852 		vhcache->vhcache_client_tail->cct_next = cct;
6853 	vhcache->vhcache_client_tail = cct;
6854 }
6855 
6856 static void
6857 free_string_array(char **str, int nelem)
6858 {
6859 	int i;
6860 
6861 	if (str) {
6862 		for (i = 0; i < nelem; i++) {
6863 			if (str[i])
6864 				kmem_free(str[i], strlen(str[i]) + 1);
6865 		}
6866 		kmem_free(str, sizeof (char *) * nelem);
6867 	}
6868 }
6869 
6870 static void
6871 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6872 {
6873 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6874 	kmem_free(cphci, sizeof (*cphci));
6875 }
6876 
6877 static void
6878 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6879 {
6880 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6881 	kmem_free(cpi, sizeof (*cpi));
6882 }
6883 
6884 static void
6885 free_vhcache_client(mdi_vhcache_client_t *cct)
6886 {
6887 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6888 	kmem_free(cct, sizeof (*cct));
6889 }
6890 
6891 static char *
6892 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6893 {
6894 	char *name_addr;
6895 	int len;
6896 
6897 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6898 	name_addr = kmem_alloc(len, KM_SLEEP);
6899 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6900 
6901 	if (ret_len)
6902 		*ret_len = len;
6903 	return (name_addr);
6904 }
6905 
6906 /*
6907  * Copy the contents of paddrnvl to vhci cache.
6908  * paddrnvl nvlist contains path information for a vhci client.
6909  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6910  */
6911 static void
6912 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6913     mdi_vhcache_client_t *cct)
6914 {
6915 	nvpair_t *nvp = NULL;
6916 	mdi_vhcache_pathinfo_t *cpi;
6917 	uint_t nelem;
6918 	uint32_t *val;
6919 
6920 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6921 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6922 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6923 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6924 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6925 		ASSERT(nelem == 2);
6926 		cpi->cpi_cphci = cphci_list[val[0]];
6927 		cpi->cpi_flags = val[1];
6928 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6929 	}
6930 }
6931 
6932 /*
6933  * Copy the contents of caddrmapnvl to vhci cache.
6934  * caddrmapnvl nvlist contains vhci client address to phci client address
6935  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6936  * this nvlist.
6937  */
6938 static void
6939 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6940     mdi_vhcache_phci_t *cphci_list[])
6941 {
6942 	nvpair_t *nvp = NULL;
6943 	nvlist_t *paddrnvl;
6944 	mdi_vhcache_client_t *cct;
6945 
6946 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6947 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6948 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6949 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6950 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6951 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6952 		/* the client must contain at least one path */
6953 		ASSERT(cct->cct_cpi_head != NULL);
6954 
6955 		enqueue_vhcache_client(vhcache, cct);
6956 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6957 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6958 	}
6959 }
6960 
6961 /*
6962  * Copy the contents of the main nvlist to vhci cache.
6963  *
6964  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6965  * The nvlist contains the mappings between the vhci client addresses and
6966  * their corresponding phci client addresses.
6967  *
6968  * The structure of the nvlist is as follows:
6969  *
6970  * Main nvlist:
6971  *	NAME		TYPE		DATA
6972  *	version		int32		version number
6973  *	phcis		string array	array of phci paths
6974  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6975  *
6976  * structure of c2paddrs_nvl:
6977  *	NAME		TYPE		DATA
6978  *	caddr1		nvlist_t	paddrs_nvl1
6979  *	caddr2		nvlist_t	paddrs_nvl2
6980  *	...
6981  * where caddr1, caddr2, ... are vhci client name and addresses in the
6982  * form of "<clientname>@<clientaddress>".
6983  * (for example: "ssd@2000002037cd9f72");
6984  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6985  *
6986  * structure of paddrs_nvl:
6987  *	NAME		TYPE		DATA
6988  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6989  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6990  *	...
6991  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6992  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6993  * phci-ids are integers that identify PHCIs to which the
6994  * the bus specific address belongs to. These integers are used as an index
6995  * into to the phcis string array in the main nvlist to get the PHCI path.
6996  */
6997 static int
6998 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6999 {
7000 	char **phcis, **phci_namep;
7001 	uint_t nphcis;
7002 	mdi_vhcache_phci_t *cphci, **cphci_list;
7003 	nvlist_t *caddrmapnvl;
7004 	int32_t ver;
7005 	int i;
7006 	size_t cphci_list_size;
7007 
7008 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7009 
7010 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7011 	    ver != MDI_VHCI_CACHE_VERSION)
7012 		return (MDI_FAILURE);
7013 
7014 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7015 	    &nphcis) != 0)
7016 		return (MDI_SUCCESS);
7017 
7018 	ASSERT(nphcis > 0);
7019 
7020 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7021 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7022 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7023 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7024 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7025 		enqueue_vhcache_phci(vhcache, cphci);
7026 		cphci_list[i] = cphci;
7027 	}
7028 
7029 	ASSERT(vhcache->vhcache_phci_head != NULL);
7030 
7031 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7032 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7033 
7034 	kmem_free(cphci_list, cphci_list_size);
7035 	return (MDI_SUCCESS);
7036 }
7037 
7038 /*
7039  * Build paddrnvl for the specified client using the information in the
7040  * vhci cache and add it to the caddrmapnnvl.
7041  * Returns 0 on success, errno on failure.
7042  */
7043 static int
7044 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7045     nvlist_t *caddrmapnvl)
7046 {
7047 	mdi_vhcache_pathinfo_t *cpi;
7048 	nvlist_t *nvl;
7049 	int err;
7050 	uint32_t val[2];
7051 
7052 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7053 
7054 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7055 		return (err);
7056 
7057 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7058 		val[0] = cpi->cpi_cphci->cphci_id;
7059 		val[1] = cpi->cpi_flags;
7060 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7061 		    != 0)
7062 			goto out;
7063 	}
7064 
7065 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7066 out:
7067 	nvlist_free(nvl);
7068 	return (err);
7069 }
7070 
7071 /*
7072  * Build caddrmapnvl using the information in the vhci cache
7073  * and add it to the mainnvl.
7074  * Returns 0 on success, errno on failure.
7075  */
7076 static int
7077 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7078 {
7079 	mdi_vhcache_client_t *cct;
7080 	nvlist_t *nvl;
7081 	int err;
7082 
7083 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7084 
7085 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7086 		return (err);
7087 
7088 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7089 	    cct = cct->cct_next) {
7090 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7091 			goto out;
7092 	}
7093 
7094 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7095 out:
7096 	nvlist_free(nvl);
7097 	return (err);
7098 }
7099 
7100 /*
7101  * Build nvlist using the information in the vhci cache.
7102  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7103  * Returns nvl on success, NULL on failure.
7104  */
7105 static nvlist_t *
7106 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7107 {
7108 	mdi_vhcache_phci_t *cphci;
7109 	uint_t phci_count;
7110 	char **phcis;
7111 	nvlist_t *nvl;
7112 	int err, i;
7113 
7114 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7115 		nvl = NULL;
7116 		goto out;
7117 	}
7118 
7119 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7120 	    MDI_VHCI_CACHE_VERSION)) != 0)
7121 		goto out;
7122 
7123 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7124 	if (vhcache->vhcache_phci_head == NULL) {
7125 		rw_exit(&vhcache->vhcache_lock);
7126 		return (nvl);
7127 	}
7128 
7129 	phci_count = 0;
7130 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7131 	    cphci = cphci->cphci_next)
7132 		cphci->cphci_id = phci_count++;
7133 
7134 	/* build phci pathname list */
7135 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7136 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7137 	    cphci = cphci->cphci_next, i++)
7138 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7139 
7140 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7141 	    phci_count);
7142 	free_string_array(phcis, phci_count);
7143 
7144 	if (err == 0 &&
7145 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7146 		rw_exit(&vhcache->vhcache_lock);
7147 		return (nvl);
7148 	}
7149 
7150 	rw_exit(&vhcache->vhcache_lock);
7151 out:
7152 	if (nvl)
7153 		nvlist_free(nvl);
7154 	return (NULL);
7155 }
7156 
7157 /*
7158  * Lookup vhcache phci structure for the specified phci path.
7159  */
7160 static mdi_vhcache_phci_t *
7161 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7162 {
7163 	mdi_vhcache_phci_t *cphci;
7164 
7165 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7166 
7167 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7168 	    cphci = cphci->cphci_next) {
7169 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7170 			return (cphci);
7171 	}
7172 
7173 	return (NULL);
7174 }
7175 
7176 /*
7177  * Lookup vhcache phci structure for the specified phci.
7178  */
7179 static mdi_vhcache_phci_t *
7180 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7181 {
7182 	mdi_vhcache_phci_t *cphci;
7183 
7184 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7185 
7186 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7187 	    cphci = cphci->cphci_next) {
7188 		if (cphci->cphci_phci == ph)
7189 			return (cphci);
7190 	}
7191 
7192 	return (NULL);
7193 }
7194 
7195 /*
7196  * Add the specified phci to the vhci cache if not already present.
7197  */
7198 static void
7199 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7200 {
7201 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7202 	mdi_vhcache_phci_t *cphci;
7203 	char *pathname;
7204 	int cache_updated;
7205 
7206 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7207 
7208 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7209 	(void) ddi_pathname(ph->ph_dip, pathname);
7210 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7211 	    != NULL) {
7212 		cphci->cphci_phci = ph;
7213 		cache_updated = 0;
7214 	} else {
7215 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7216 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7217 		cphci->cphci_phci = ph;
7218 		enqueue_vhcache_phci(vhcache, cphci);
7219 		cache_updated = 1;
7220 	}
7221 
7222 	rw_exit(&vhcache->vhcache_lock);
7223 
7224 	/*
7225 	 * Since a new phci has been added, reset
7226 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7227 	 * during next vhcache_discover_paths().
7228 	 */
7229 	mutex_enter(&vhc->vhc_lock);
7230 	vhc->vhc_path_discovery_cutoff_time = 0;
7231 	mutex_exit(&vhc->vhc_lock);
7232 
7233 	kmem_free(pathname, MAXPATHLEN);
7234 	if (cache_updated)
7235 		vhcache_dirty(vhc);
7236 }
7237 
7238 /*
7239  * Remove the reference to the specified phci from the vhci cache.
7240  */
7241 static void
7242 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7243 {
7244 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7245 	mdi_vhcache_phci_t *cphci;
7246 
7247 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7248 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7249 		/* do not remove the actual mdi_vhcache_phci structure */
7250 		cphci->cphci_phci = NULL;
7251 	}
7252 	rw_exit(&vhcache->vhcache_lock);
7253 }
7254 
7255 static void
7256 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7257     mdi_vhcache_lookup_token_t *src)
7258 {
7259 	if (src == NULL) {
7260 		dst->lt_cct = NULL;
7261 		dst->lt_cct_lookup_time = 0;
7262 	} else {
7263 		dst->lt_cct = src->lt_cct;
7264 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7265 	}
7266 }
7267 
7268 /*
7269  * Look up vhcache client for the specified client.
7270  */
7271 static mdi_vhcache_client_t *
7272 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7273     mdi_vhcache_lookup_token_t *token)
7274 {
7275 	mod_hash_val_t hv;
7276 	char *name_addr;
7277 	int len;
7278 
7279 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7280 
7281 	/*
7282 	 * If no vhcache clean occurred since the last lookup, we can
7283 	 * simply return the cct from the last lookup operation.
7284 	 * It works because ccts are never freed except during the vhcache
7285 	 * cleanup operation.
7286 	 */
7287 	if (token != NULL &&
7288 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7289 		return (token->lt_cct);
7290 
7291 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7292 	if (mod_hash_find(vhcache->vhcache_client_hash,
7293 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7294 		if (token) {
7295 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7296 			token->lt_cct_lookup_time = lbolt64;
7297 		}
7298 	} else {
7299 		if (token) {
7300 			token->lt_cct = NULL;
7301 			token->lt_cct_lookup_time = 0;
7302 		}
7303 		hv = NULL;
7304 	}
7305 	kmem_free(name_addr, len);
7306 	return ((mdi_vhcache_client_t *)hv);
7307 }
7308 
7309 /*
7310  * Add the specified path to the vhci cache if not already present.
7311  * Also add the vhcache client for the client corresponding to this path
7312  * if it doesn't already exist.
7313  */
7314 static void
7315 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7316 {
7317 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7318 	mdi_vhcache_client_t *cct;
7319 	mdi_vhcache_pathinfo_t *cpi;
7320 	mdi_phci_t *ph = pip->pi_phci;
7321 	mdi_client_t *ct = pip->pi_client;
7322 	int cache_updated = 0;
7323 
7324 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7325 
7326 	/* if vhcache client for this pip doesn't already exist, add it */
7327 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7328 	    NULL)) == NULL) {
7329 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7330 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7331 		    ct->ct_guid, NULL);
7332 		enqueue_vhcache_client(vhcache, cct);
7333 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7334 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7335 		cache_updated = 1;
7336 	}
7337 
7338 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7339 		if (cpi->cpi_cphci->cphci_phci == ph &&
7340 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7341 			cpi->cpi_pip = pip;
7342 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7343 				cpi->cpi_flags &=
7344 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7345 				sort_vhcache_paths(cct);
7346 				cache_updated = 1;
7347 			}
7348 			break;
7349 		}
7350 	}
7351 
7352 	if (cpi == NULL) {
7353 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7354 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7355 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7356 		ASSERT(cpi->cpi_cphci != NULL);
7357 		cpi->cpi_pip = pip;
7358 		enqueue_vhcache_pathinfo(cct, cpi);
7359 		cache_updated = 1;
7360 	}
7361 
7362 	rw_exit(&vhcache->vhcache_lock);
7363 
7364 	if (cache_updated)
7365 		vhcache_dirty(vhc);
7366 }
7367 
7368 /*
7369  * Remove the reference to the specified path from the vhci cache.
7370  */
7371 static void
7372 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7373 {
7374 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7375 	mdi_client_t *ct = pip->pi_client;
7376 	mdi_vhcache_client_t *cct;
7377 	mdi_vhcache_pathinfo_t *cpi;
7378 
7379 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7380 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7381 	    NULL)) != NULL) {
7382 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7383 		    cpi = cpi->cpi_next) {
7384 			if (cpi->cpi_pip == pip) {
7385 				cpi->cpi_pip = NULL;
7386 				break;
7387 			}
7388 		}
7389 	}
7390 	rw_exit(&vhcache->vhcache_lock);
7391 }
7392 
7393 /*
7394  * Flush the vhci cache to disk.
7395  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7396  */
7397 static int
7398 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7399 {
7400 	nvlist_t *nvl;
7401 	int err;
7402 	int rv;
7403 
7404 	/*
7405 	 * It is possible that the system may shutdown before
7406 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7407 	 * flushing the cache in this case do not check for
7408 	 * i_ddi_io_initialized when force flag is set.
7409 	 */
7410 	if (force_flag == 0 && !i_ddi_io_initialized())
7411 		return (MDI_FAILURE);
7412 
7413 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7414 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7415 		nvlist_free(nvl);
7416 	} else
7417 		err = EFAULT;
7418 
7419 	rv = MDI_SUCCESS;
7420 	mutex_enter(&vhc->vhc_lock);
7421 	if (err != 0) {
7422 		if (err == EROFS) {
7423 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7424 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7425 			    MDI_VHC_VHCACHE_DIRTY);
7426 		} else {
7427 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7428 				cmn_err(CE_CONT, "%s: update failed\n",
7429 				    vhc->vhc_vhcache_filename);
7430 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7431 			}
7432 			rv = MDI_FAILURE;
7433 		}
7434 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7435 		cmn_err(CE_CONT,
7436 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7437 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7438 	}
7439 	mutex_exit(&vhc->vhc_lock);
7440 
7441 	return (rv);
7442 }
7443 
7444 /*
7445  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7446  * Exits itself if left idle for the idle timeout period.
7447  */
7448 static void
7449 vhcache_flush_thread(void *arg)
7450 {
7451 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7452 	clock_t idle_time, quit_at_ticks;
7453 	callb_cpr_t cprinfo;
7454 
7455 	/* number of seconds to sleep idle before exiting */
7456 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7457 
7458 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7459 	    "mdi_vhcache_flush");
7460 	mutex_enter(&vhc->vhc_lock);
7461 	for (; ; ) {
7462 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7463 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7464 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7465 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7466 				(void) cv_timedwait(&vhc->vhc_cv,
7467 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7468 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7469 			} else {
7470 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7471 				mutex_exit(&vhc->vhc_lock);
7472 
7473 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7474 					vhcache_dirty(vhc);
7475 
7476 				mutex_enter(&vhc->vhc_lock);
7477 			}
7478 		}
7479 
7480 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7481 
7482 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7483 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7484 		    ddi_get_lbolt() < quit_at_ticks) {
7485 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7486 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7487 			    quit_at_ticks);
7488 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7489 		}
7490 
7491 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7492 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7493 			goto out;
7494 	}
7495 
7496 out:
7497 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7498 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7499 	CALLB_CPR_EXIT(&cprinfo);
7500 }
7501 
7502 /*
7503  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7504  */
7505 static void
7506 vhcache_dirty(mdi_vhci_config_t *vhc)
7507 {
7508 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7509 	int create_thread;
7510 
7511 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7512 	/* do not flush cache until the cache is fully built */
7513 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7514 		rw_exit(&vhcache->vhcache_lock);
7515 		return;
7516 	}
7517 	rw_exit(&vhcache->vhcache_lock);
7518 
7519 	mutex_enter(&vhc->vhc_lock);
7520 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7521 		mutex_exit(&vhc->vhc_lock);
7522 		return;
7523 	}
7524 
7525 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7526 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7527 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7528 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7529 		cv_broadcast(&vhc->vhc_cv);
7530 		create_thread = 0;
7531 	} else {
7532 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7533 		create_thread = 1;
7534 	}
7535 	mutex_exit(&vhc->vhc_lock);
7536 
7537 	if (create_thread)
7538 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7539 		    0, &p0, TS_RUN, minclsyspri);
7540 }
7541 
7542 /*
7543  * phci bus config structure - one for for each phci bus config operation that
7544  * we initiate on behalf of a vhci.
7545  */
7546 typedef struct mdi_phci_bus_config_s {
7547 	char *phbc_phci_path;
7548 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7549 	struct mdi_phci_bus_config_s *phbc_next;
7550 } mdi_phci_bus_config_t;
7551 
7552 /* vhci bus config structure - one for each vhci bus config operation */
7553 typedef struct mdi_vhci_bus_config_s {
7554 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7555 	major_t vhbc_op_major;		/* bus config op major */
7556 	uint_t vhbc_op_flags;		/* bus config op flags */
7557 	kmutex_t vhbc_lock;
7558 	kcondvar_t vhbc_cv;
7559 	int vhbc_thr_count;
7560 } mdi_vhci_bus_config_t;
7561 
7562 /*
7563  * bus config the specified phci
7564  */
7565 static void
7566 bus_config_phci(void *arg)
7567 {
7568 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7569 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7570 	dev_info_t *ph_dip;
7571 
7572 	/*
7573 	 * first configure all path components upto phci and then configure
7574 	 * the phci children.
7575 	 */
7576 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7577 	    != NULL) {
7578 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7579 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7580 			(void) ndi_devi_config_driver(ph_dip,
7581 			    vhbc->vhbc_op_flags,
7582 			    vhbc->vhbc_op_major);
7583 		} else
7584 			(void) ndi_devi_config(ph_dip,
7585 			    vhbc->vhbc_op_flags);
7586 
7587 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7588 		ndi_rele_devi(ph_dip);
7589 	}
7590 
7591 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7592 	kmem_free(phbc, sizeof (*phbc));
7593 
7594 	mutex_enter(&vhbc->vhbc_lock);
7595 	vhbc->vhbc_thr_count--;
7596 	if (vhbc->vhbc_thr_count == 0)
7597 		cv_broadcast(&vhbc->vhbc_cv);
7598 	mutex_exit(&vhbc->vhbc_lock);
7599 }
7600 
7601 /*
7602  * Bus config all phcis associated with the vhci in parallel.
7603  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7604  */
7605 static void
7606 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7607     ddi_bus_config_op_t op, major_t maj)
7608 {
7609 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7610 	mdi_vhci_bus_config_t *vhbc;
7611 	mdi_vhcache_phci_t *cphci;
7612 
7613 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7614 	if (vhcache->vhcache_phci_head == NULL) {
7615 		rw_exit(&vhcache->vhcache_lock);
7616 		return;
7617 	}
7618 
7619 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7620 
7621 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7622 	    cphci = cphci->cphci_next) {
7623 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7624 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7625 		    KM_SLEEP);
7626 		phbc->phbc_vhbusconfig = vhbc;
7627 		phbc->phbc_next = phbc_head;
7628 		phbc_head = phbc;
7629 		vhbc->vhbc_thr_count++;
7630 	}
7631 	rw_exit(&vhcache->vhcache_lock);
7632 
7633 	vhbc->vhbc_op = op;
7634 	vhbc->vhbc_op_major = maj;
7635 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7636 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7637 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7638 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7639 
7640 	/* now create threads to initiate bus config on all phcis in parallel */
7641 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7642 		phbc_next = phbc->phbc_next;
7643 		if (mdi_mtc_off)
7644 			bus_config_phci((void *)phbc);
7645 		else
7646 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7647 			    0, &p0, TS_RUN, minclsyspri);
7648 	}
7649 
7650 	mutex_enter(&vhbc->vhbc_lock);
7651 	/* wait until all threads exit */
7652 	while (vhbc->vhbc_thr_count > 0)
7653 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7654 	mutex_exit(&vhbc->vhbc_lock);
7655 
7656 	mutex_destroy(&vhbc->vhbc_lock);
7657 	cv_destroy(&vhbc->vhbc_cv);
7658 	kmem_free(vhbc, sizeof (*vhbc));
7659 }
7660 
7661 /*
7662  * Single threaded version of bus_config_all_phcis()
7663  */
7664 static void
7665 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7666     ddi_bus_config_op_t op, major_t maj)
7667 {
7668 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7669 
7670 	single_threaded_vhconfig_enter(vhc);
7671 	bus_config_all_phcis(vhcache, flags, op, maj);
7672 	single_threaded_vhconfig_exit(vhc);
7673 }
7674 
7675 /*
7676  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7677  * The path includes the child component in addition to the phci path.
7678  */
7679 static int
7680 bus_config_one_phci_child(char *path)
7681 {
7682 	dev_info_t *ph_dip, *child;
7683 	char *devnm;
7684 	int rv = MDI_FAILURE;
7685 
7686 	/* extract the child component of the phci */
7687 	devnm = strrchr(path, '/');
7688 	*devnm++ = '\0';
7689 
7690 	/*
7691 	 * first configure all path components upto phci and then
7692 	 * configure the phci child.
7693 	 */
7694 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7695 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7696 		    NDI_SUCCESS) {
7697 			/*
7698 			 * release the hold that ndi_devi_config_one() placed
7699 			 */
7700 			ndi_rele_devi(child);
7701 			rv = MDI_SUCCESS;
7702 		}
7703 
7704 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7705 		ndi_rele_devi(ph_dip);
7706 	}
7707 
7708 	devnm--;
7709 	*devnm = '/';
7710 	return (rv);
7711 }
7712 
7713 /*
7714  * Build a list of phci client paths for the specified vhci client.
7715  * The list includes only those phci client paths which aren't configured yet.
7716  */
7717 static mdi_phys_path_t *
7718 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7719 {
7720 	mdi_vhcache_pathinfo_t *cpi;
7721 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7722 	int config_path, len;
7723 
7724 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7725 		/*
7726 		 * include only those paths that aren't configured.
7727 		 */
7728 		config_path = 0;
7729 		if (cpi->cpi_pip == NULL)
7730 			config_path = 1;
7731 		else {
7732 			MDI_PI_LOCK(cpi->cpi_pip);
7733 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7734 				config_path = 1;
7735 			MDI_PI_UNLOCK(cpi->cpi_pip);
7736 		}
7737 
7738 		if (config_path) {
7739 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7740 			len = strlen(cpi->cpi_cphci->cphci_path) +
7741 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7742 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7743 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7744 			    cpi->cpi_cphci->cphci_path, ct_name,
7745 			    cpi->cpi_addr);
7746 			pp->phys_path_next = NULL;
7747 
7748 			if (pp_head == NULL)
7749 				pp_head = pp;
7750 			else
7751 				pp_tail->phys_path_next = pp;
7752 			pp_tail = pp;
7753 		}
7754 	}
7755 
7756 	return (pp_head);
7757 }
7758 
7759 /*
7760  * Free the memory allocated for phci client path list.
7761  */
7762 static void
7763 free_phclient_path_list(mdi_phys_path_t *pp_head)
7764 {
7765 	mdi_phys_path_t *pp, *pp_next;
7766 
7767 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7768 		pp_next = pp->phys_path_next;
7769 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7770 		kmem_free(pp, sizeof (*pp));
7771 	}
7772 }
7773 
7774 /*
7775  * Allocated async client structure and initialize with the specified values.
7776  */
7777 static mdi_async_client_config_t *
7778 alloc_async_client_config(char *ct_name, char *ct_addr,
7779     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7780 {
7781 	mdi_async_client_config_t *acc;
7782 
7783 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7784 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7785 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7786 	acc->acc_phclient_path_list_head = pp_head;
7787 	init_vhcache_lookup_token(&acc->acc_token, tok);
7788 	acc->acc_next = NULL;
7789 	return (acc);
7790 }
7791 
7792 /*
7793  * Free the memory allocated for the async client structure and their members.
7794  */
7795 static void
7796 free_async_client_config(mdi_async_client_config_t *acc)
7797 {
7798 	if (acc->acc_phclient_path_list_head)
7799 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7800 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7801 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7802 	kmem_free(acc, sizeof (*acc));
7803 }
7804 
7805 /*
7806  * Sort vhcache pathinfos (cpis) of the specified client.
7807  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7808  * flag set come at the beginning of the list. All cpis which have this
7809  * flag set come at the end of the list.
7810  */
7811 static void
7812 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7813 {
7814 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7815 
7816 	cpi_head = cct->cct_cpi_head;
7817 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7818 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7819 		cpi_next = cpi->cpi_next;
7820 		enqueue_vhcache_pathinfo(cct, cpi);
7821 	}
7822 }
7823 
7824 /*
7825  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7826  * every vhcache pathinfo of the specified client. If not adjust the flag
7827  * setting appropriately.
7828  *
7829  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7830  * on-disk vhci cache. So every time this flag is updated the cache must be
7831  * flushed.
7832  */
7833 static void
7834 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7835     mdi_vhcache_lookup_token_t *tok)
7836 {
7837 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7838 	mdi_vhcache_client_t *cct;
7839 	mdi_vhcache_pathinfo_t *cpi;
7840 
7841 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7842 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7843 	    == NULL) {
7844 		rw_exit(&vhcache->vhcache_lock);
7845 		return;
7846 	}
7847 
7848 	/*
7849 	 * to avoid unnecessary on-disk cache updates, first check if an
7850 	 * update is really needed. If no update is needed simply return.
7851 	 */
7852 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7853 		if ((cpi->cpi_pip != NULL &&
7854 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7855 		    (cpi->cpi_pip == NULL &&
7856 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7857 			break;
7858 		}
7859 	}
7860 	if (cpi == NULL) {
7861 		rw_exit(&vhcache->vhcache_lock);
7862 		return;
7863 	}
7864 
7865 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7866 		rw_exit(&vhcache->vhcache_lock);
7867 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7868 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7869 		    tok)) == NULL) {
7870 			rw_exit(&vhcache->vhcache_lock);
7871 			return;
7872 		}
7873 	}
7874 
7875 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7876 		if (cpi->cpi_pip != NULL)
7877 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7878 		else
7879 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7880 	}
7881 	sort_vhcache_paths(cct);
7882 
7883 	rw_exit(&vhcache->vhcache_lock);
7884 	vhcache_dirty(vhc);
7885 }
7886 
7887 /*
7888  * Configure all specified paths of the client.
7889  */
7890 static void
7891 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7892     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7893 {
7894 	mdi_phys_path_t *pp;
7895 
7896 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7897 		(void) bus_config_one_phci_child(pp->phys_path);
7898 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7899 }
7900 
7901 /*
7902  * Dequeue elements from vhci async client config list and bus configure
7903  * their corresponding phci clients.
7904  */
7905 static void
7906 config_client_paths_thread(void *arg)
7907 {
7908 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7909 	mdi_async_client_config_t *acc;
7910 	clock_t quit_at_ticks;
7911 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7912 	callb_cpr_t cprinfo;
7913 
7914 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7915 	    "mdi_config_client_paths");
7916 
7917 	for (; ; ) {
7918 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7919 
7920 		mutex_enter(&vhc->vhc_lock);
7921 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7922 		    vhc->vhc_acc_list_head == NULL &&
7923 		    ddi_get_lbolt() < quit_at_ticks) {
7924 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7925 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7926 			    quit_at_ticks);
7927 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7928 		}
7929 
7930 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7931 		    vhc->vhc_acc_list_head == NULL)
7932 			goto out;
7933 
7934 		acc = vhc->vhc_acc_list_head;
7935 		vhc->vhc_acc_list_head = acc->acc_next;
7936 		if (vhc->vhc_acc_list_head == NULL)
7937 			vhc->vhc_acc_list_tail = NULL;
7938 		vhc->vhc_acc_count--;
7939 		mutex_exit(&vhc->vhc_lock);
7940 
7941 		config_client_paths_sync(vhc, acc->acc_ct_name,
7942 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7943 		    &acc->acc_token);
7944 
7945 		free_async_client_config(acc);
7946 	}
7947 
7948 out:
7949 	vhc->vhc_acc_thrcount--;
7950 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7951 	CALLB_CPR_EXIT(&cprinfo);
7952 }
7953 
7954 /*
7955  * Arrange for all the phci client paths (pp_head) for the specified client
7956  * to be bus configured asynchronously by a thread.
7957  */
7958 static void
7959 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7960     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7961 {
7962 	mdi_async_client_config_t *acc, *newacc;
7963 	int create_thread;
7964 
7965 	if (pp_head == NULL)
7966 		return;
7967 
7968 	if (mdi_mtc_off) {
7969 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7970 		free_phclient_path_list(pp_head);
7971 		return;
7972 	}
7973 
7974 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7975 	ASSERT(newacc);
7976 
7977 	mutex_enter(&vhc->vhc_lock);
7978 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7979 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7980 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7981 			free_async_client_config(newacc);
7982 			mutex_exit(&vhc->vhc_lock);
7983 			return;
7984 		}
7985 	}
7986 
7987 	if (vhc->vhc_acc_list_head == NULL)
7988 		vhc->vhc_acc_list_head = newacc;
7989 	else
7990 		vhc->vhc_acc_list_tail->acc_next = newacc;
7991 	vhc->vhc_acc_list_tail = newacc;
7992 	vhc->vhc_acc_count++;
7993 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7994 		cv_broadcast(&vhc->vhc_cv);
7995 		create_thread = 0;
7996 	} else {
7997 		vhc->vhc_acc_thrcount++;
7998 		create_thread = 1;
7999 	}
8000 	mutex_exit(&vhc->vhc_lock);
8001 
8002 	if (create_thread)
8003 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8004 		    0, &p0, TS_RUN, minclsyspri);
8005 }
8006 
8007 /*
8008  * Return number of online paths for the specified client.
8009  */
8010 static int
8011 nonline_paths(mdi_vhcache_client_t *cct)
8012 {
8013 	mdi_vhcache_pathinfo_t *cpi;
8014 	int online_count = 0;
8015 
8016 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8017 		if (cpi->cpi_pip != NULL) {
8018 			MDI_PI_LOCK(cpi->cpi_pip);
8019 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8020 				online_count++;
8021 			MDI_PI_UNLOCK(cpi->cpi_pip);
8022 		}
8023 	}
8024 
8025 	return (online_count);
8026 }
8027 
8028 /*
8029  * Bus configure all paths for the specified vhci client.
8030  * If at least one path for the client is already online, the remaining paths
8031  * will be configured asynchronously. Otherwise, it synchronously configures
8032  * the paths until at least one path is online and then rest of the paths
8033  * will be configured asynchronously.
8034  */
8035 static void
8036 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8037 {
8038 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8039 	mdi_phys_path_t *pp_head, *pp;
8040 	mdi_vhcache_client_t *cct;
8041 	mdi_vhcache_lookup_token_t tok;
8042 
8043 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8044 
8045 	init_vhcache_lookup_token(&tok, NULL);
8046 
8047 	if (ct_name == NULL || ct_addr == NULL ||
8048 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8049 	    == NULL ||
8050 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8051 		rw_exit(&vhcache->vhcache_lock);
8052 		return;
8053 	}
8054 
8055 	/* if at least one path is online, configure the rest asynchronously */
8056 	if (nonline_paths(cct) > 0) {
8057 		rw_exit(&vhcache->vhcache_lock);
8058 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8059 		return;
8060 	}
8061 
8062 	rw_exit(&vhcache->vhcache_lock);
8063 
8064 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8065 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8066 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8067 
8068 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8069 			    ct_addr, &tok)) == NULL) {
8070 				rw_exit(&vhcache->vhcache_lock);
8071 				goto out;
8072 			}
8073 
8074 			if (nonline_paths(cct) > 0 &&
8075 			    pp->phys_path_next != NULL) {
8076 				rw_exit(&vhcache->vhcache_lock);
8077 				config_client_paths_async(vhc, ct_name, ct_addr,
8078 				    pp->phys_path_next, &tok);
8079 				pp->phys_path_next = NULL;
8080 				goto out;
8081 			}
8082 
8083 			rw_exit(&vhcache->vhcache_lock);
8084 		}
8085 	}
8086 
8087 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8088 out:
8089 	free_phclient_path_list(pp_head);
8090 }
8091 
8092 static void
8093 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8094 {
8095 	mutex_enter(&vhc->vhc_lock);
8096 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8097 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8098 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8099 	mutex_exit(&vhc->vhc_lock);
8100 }
8101 
8102 static void
8103 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8104 {
8105 	mutex_enter(&vhc->vhc_lock);
8106 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8107 	cv_broadcast(&vhc->vhc_cv);
8108 	mutex_exit(&vhc->vhc_lock);
8109 }
8110 
8111 /*
8112  * Attach the phci driver instances associated with the vhci:
8113  * If root is mounted attach all phci driver instances.
8114  * If root is not mounted, attach the instances of only those phci
8115  * drivers that have the root support.
8116  */
8117 static void
8118 attach_phci_drivers(mdi_vhci_config_t *vhc)
8119 {
8120 	int  i;
8121 	major_t m;
8122 
8123 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
8124 		if (modrootloaded == 0 &&
8125 		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
8126 			continue;
8127 
8128 		m = ddi_name_to_major(
8129 		    vhc->vhc_phci_driver_list[i].phdriver_name);
8130 		if (m != (major_t)-1) {
8131 			if (ddi_hold_installed_driver(m) != NULL)
8132 				ddi_rele_driver(m);
8133 		}
8134 	}
8135 }
8136 
8137 /*
8138  * Build vhci cache:
8139  *
8140  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8141  * the phci driver instances. During this process the cache gets built.
8142  *
8143  * Cache is built fully if the root is mounted.
8144  * If the root is not mounted, phci drivers that do not have root support
8145  * are not attached. As a result the cache is built partially. The entries
8146  * in the cache reflect only those phci drivers that have root support.
8147  */
8148 static int
8149 build_vhci_cache(mdi_vhci_config_t *vhc)
8150 {
8151 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8152 
8153 	single_threaded_vhconfig_enter(vhc);
8154 
8155 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8156 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8157 		rw_exit(&vhcache->vhcache_lock);
8158 		single_threaded_vhconfig_exit(vhc);
8159 		return (0);
8160 	}
8161 	rw_exit(&vhcache->vhcache_lock);
8162 
8163 	attach_phci_drivers(vhc);
8164 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8165 	    BUS_CONFIG_ALL, (major_t)-1);
8166 
8167 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8168 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8169 	rw_exit(&vhcache->vhcache_lock);
8170 
8171 	single_threaded_vhconfig_exit(vhc);
8172 	vhcache_dirty(vhc);
8173 	return (1);
8174 }
8175 
8176 /*
8177  * Determine if discovery of paths is needed.
8178  */
8179 static int
8180 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8181 {
8182 	int rv = 1;
8183 
8184 	mutex_enter(&vhc->vhc_lock);
8185 	if (i_ddi_io_initialized() == 0) {
8186 		if (vhc->vhc_path_discovery_boot > 0) {
8187 			vhc->vhc_path_discovery_boot--;
8188 			goto out;
8189 		}
8190 	} else {
8191 		if (vhc->vhc_path_discovery_postboot > 0) {
8192 			vhc->vhc_path_discovery_postboot--;
8193 			goto out;
8194 		}
8195 	}
8196 
8197 	/*
8198 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8199 	 * This is to avoid a series of full path discoveries when opening
8200 	 * stale /dev/[r]dsk links.
8201 	 */
8202 	if (mdi_path_discovery_interval != -1 &&
8203 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8204 		goto out;
8205 
8206 	rv = 0;
8207 out:
8208 	mutex_exit(&vhc->vhc_lock);
8209 	return (rv);
8210 }
8211 
8212 /*
8213  * Discover all paths:
8214  *
8215  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8216  * driver instances. During this process all paths will be discovered.
8217  */
8218 static int
8219 vhcache_discover_paths(mdi_vhci_config_t *vhc)
8220 {
8221 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8222 	int rv = 0;
8223 
8224 	single_threaded_vhconfig_enter(vhc);
8225 
8226 	if (vhcache_do_discovery(vhc)) {
8227 		attach_phci_drivers(vhc);
8228 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8229 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8230 
8231 		mutex_enter(&vhc->vhc_lock);
8232 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8233 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8234 		mutex_exit(&vhc->vhc_lock);
8235 		rv = 1;
8236 	}
8237 
8238 	single_threaded_vhconfig_exit(vhc);
8239 	return (rv);
8240 }
8241 
8242 /*
8243  * Generic vhci bus config implementation:
8244  *
8245  * Parameters
8246  *	vdip	vhci dip
8247  *	flags	bus config flags
8248  *	op	bus config operation
8249  *	The remaining parameters are bus config operation specific
8250  *
8251  * for BUS_CONFIG_ONE
8252  *	arg	pointer to name@addr
8253  *	child	upon successful return from this function, *child will be
8254  *		set to the configured and held devinfo child node of vdip.
8255  *	ct_addr	pointer to client address (i.e. GUID)
8256  *
8257  * for BUS_CONFIG_DRIVER
8258  *	arg	major number of the driver
8259  *	child and ct_addr parameters are ignored
8260  *
8261  * for BUS_CONFIG_ALL
8262  *	arg, child, and ct_addr parameters are ignored
8263  *
8264  * Note that for the rest of the bus config operations, this function simply
8265  * calls the framework provided default bus config routine.
8266  */
8267 int
8268 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8269     void *arg, dev_info_t **child, char *ct_addr)
8270 {
8271 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8272 	mdi_vhci_config_t *vhc = vh->vh_config;
8273 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8274 	int rv = 0;
8275 	int params_valid = 0;
8276 	char *cp;
8277 
8278 	/*
8279 	 * While bus configuring phcis, the phci driver interactions with MDI
8280 	 * cause child nodes to be enumerated under the vhci node for which
8281 	 * they need to ndi_devi_enter the vhci node.
8282 	 *
8283 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8284 	 * for the bus config operations on phcis to finish while holding the
8285 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8286 	 * phcis and call the default framework provided bus config function
8287 	 * if we are called with ndi_devi_enter lock held.
8288 	 */
8289 	if (DEVI_BUSY_OWNED(vdip)) {
8290 		MDI_DEBUG(2, (CE_NOTE, vdip,
8291 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8292 		goto default_bus_config;
8293 	}
8294 
8295 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8296 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8297 		rw_exit(&vhcache->vhcache_lock);
8298 		rv = build_vhci_cache(vhc);
8299 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8300 	}
8301 
8302 	switch (op) {
8303 	case BUS_CONFIG_ONE:
8304 		if (arg != NULL && ct_addr != NULL) {
8305 			/* extract node name */
8306 			cp = (char *)arg;
8307 			while (*cp != '\0' && *cp != '@')
8308 				cp++;
8309 			if (*cp == '@') {
8310 				params_valid = 1;
8311 				*cp = '\0';
8312 				config_client_paths(vhc, (char *)arg, ct_addr);
8313 				/* config_client_paths() releases cache_lock */
8314 				*cp = '@';
8315 				break;
8316 			}
8317 		}
8318 
8319 		rw_exit(&vhcache->vhcache_lock);
8320 		break;
8321 
8322 	case BUS_CONFIG_DRIVER:
8323 		rw_exit(&vhcache->vhcache_lock);
8324 		if (rv == 0)
8325 			st_bus_config_all_phcis(vhc, flags, op,
8326 			    (major_t)(uintptr_t)arg);
8327 		break;
8328 
8329 	case BUS_CONFIG_ALL:
8330 		rw_exit(&vhcache->vhcache_lock);
8331 		if (rv == 0)
8332 			st_bus_config_all_phcis(vhc, flags, op, -1);
8333 		break;
8334 
8335 	default:
8336 		rw_exit(&vhcache->vhcache_lock);
8337 		break;
8338 	}
8339 
8340 
8341 default_bus_config:
8342 	/*
8343 	 * All requested child nodes are enumerated under the vhci.
8344 	 * Now configure them.
8345 	 */
8346 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8347 	    NDI_SUCCESS) {
8348 		return (MDI_SUCCESS);
8349 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8350 		/* discover all paths and try configuring again */
8351 		if (vhcache_discover_paths(vhc) &&
8352 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8353 		    NDI_SUCCESS)
8354 			return (MDI_SUCCESS);
8355 	}
8356 
8357 	return (MDI_FAILURE);
8358 }
8359 
8360 /*
8361  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8362  */
8363 static nvlist_t *
8364 read_on_disk_vhci_cache(char *vhci_class)
8365 {
8366 	nvlist_t *nvl;
8367 	int err;
8368 	char *filename;
8369 
8370 	filename = vhclass2vhcache_filename(vhci_class);
8371 
8372 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8373 		kmem_free(filename, strlen(filename) + 1);
8374 		return (nvl);
8375 	} else if (err == EIO)
8376 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8377 	else if (err == EINVAL)
8378 		cmn_err(CE_WARN,
8379 		    "%s: data file corrupted, will recreate\n", filename);
8380 
8381 	kmem_free(filename, strlen(filename) + 1);
8382 	return (NULL);
8383 }
8384 
8385 /*
8386  * Read on-disk vhci cache into nvlists for all vhci classes.
8387  * Called during booting by i_ddi_read_devices_files().
8388  */
8389 void
8390 mdi_read_devices_files(void)
8391 {
8392 	int i;
8393 
8394 	for (i = 0; i < N_VHCI_CLASSES; i++)
8395 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8396 }
8397 
8398 /*
8399  * Remove all stale entries from vhci cache.
8400  */
8401 static void
8402 clean_vhcache(mdi_vhci_config_t *vhc)
8403 {
8404 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8405 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8406 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8407 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8408 
8409 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8410 
8411 	cct_head = vhcache->vhcache_client_head;
8412 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8413 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8414 		cct_next = cct->cct_next;
8415 
8416 		cpi_head = cct->cct_cpi_head;
8417 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8418 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8419 			cpi_next = cpi->cpi_next;
8420 			if (cpi->cpi_pip != NULL) {
8421 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8422 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8423 			} else
8424 				free_vhcache_pathinfo(cpi);
8425 		}
8426 
8427 		if (cct->cct_cpi_head != NULL)
8428 			enqueue_vhcache_client(vhcache, cct);
8429 		else {
8430 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8431 			    (mod_hash_key_t)cct->cct_name_addr);
8432 			free_vhcache_client(cct);
8433 		}
8434 	}
8435 
8436 	cphci_head = vhcache->vhcache_phci_head;
8437 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8438 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8439 		cphci_next = cphci->cphci_next;
8440 		if (cphci->cphci_phci != NULL)
8441 			enqueue_vhcache_phci(vhcache, cphci);
8442 		else
8443 			free_vhcache_phci(cphci);
8444 	}
8445 
8446 	vhcache->vhcache_clean_time = lbolt64;
8447 	rw_exit(&vhcache->vhcache_lock);
8448 	vhcache_dirty(vhc);
8449 }
8450 
8451 /*
8452  * Remove all stale entries from vhci cache.
8453  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8454  */
8455 void
8456 mdi_clean_vhcache(void)
8457 {
8458 	mdi_vhci_t *vh;
8459 
8460 	mutex_enter(&mdi_mutex);
8461 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8462 		vh->vh_refcnt++;
8463 		mutex_exit(&mdi_mutex);
8464 		clean_vhcache(vh->vh_config);
8465 		mutex_enter(&mdi_mutex);
8466 		vh->vh_refcnt--;
8467 	}
8468 	mutex_exit(&mdi_mutex);
8469 }
8470 
8471 /*
8472  * mdi_vhci_walk_clients():
8473  *		Walker routine to traverse client dev_info nodes
8474  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8475  * below the client, including nexus devices, which we dont want.
8476  * So we just traverse the immediate siblings, starting from 1st client.
8477  */
8478 void
8479 mdi_vhci_walk_clients(dev_info_t *vdip,
8480     int (*f)(dev_info_t *, void *), void *arg)
8481 {
8482 	dev_info_t	*cdip;
8483 	mdi_client_t	*ct;
8484 
8485 	mutex_enter(&mdi_mutex);
8486 
8487 	cdip = ddi_get_child(vdip);
8488 
8489 	while (cdip) {
8490 		ct = i_devi_get_client(cdip);
8491 		MDI_CLIENT_LOCK(ct);
8492 
8493 		switch ((*f)(cdip, arg)) {
8494 		case DDI_WALK_CONTINUE:
8495 			cdip = ddi_get_next_sibling(cdip);
8496 			MDI_CLIENT_UNLOCK(ct);
8497 			break;
8498 
8499 		default:
8500 			MDI_CLIENT_UNLOCK(ct);
8501 			mutex_exit(&mdi_mutex);
8502 			return;
8503 		}
8504 	}
8505 
8506 	mutex_exit(&mdi_mutex);
8507 }
8508 
8509 /*
8510  * mdi_vhci_walk_phcis():
8511  *		Walker routine to traverse phci dev_info nodes
8512  */
8513 void
8514 mdi_vhci_walk_phcis(dev_info_t *vdip,
8515     int (*f)(dev_info_t *, void *), void *arg)
8516 {
8517 	mdi_vhci_t	*vh = NULL;
8518 	mdi_phci_t	*ph = NULL;
8519 
8520 	mutex_enter(&mdi_mutex);
8521 
8522 	vh = i_devi_get_vhci(vdip);
8523 	ph = vh->vh_phci_head;
8524 
8525 	while (ph) {
8526 		MDI_PHCI_LOCK(ph);
8527 
8528 		switch ((*f)(ph->ph_dip, arg)) {
8529 		case DDI_WALK_CONTINUE:
8530 			MDI_PHCI_UNLOCK(ph);
8531 			ph = ph->ph_next;
8532 			break;
8533 
8534 		default:
8535 			MDI_PHCI_UNLOCK(ph);
8536 			mutex_exit(&mdi_mutex);
8537 			return;
8538 		}
8539 	}
8540 
8541 	mutex_exit(&mdi_mutex);
8542 }
8543 
8544 
8545 /*
8546  * mdi_walk_vhcis():
8547  *		Walker routine to traverse vhci dev_info nodes
8548  */
8549 void
8550 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8551 {
8552 	mdi_vhci_t	*vh = NULL;
8553 
8554 	mutex_enter(&mdi_mutex);
8555 	/*
8556 	 * Scan for already registered vhci
8557 	 */
8558 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8559 		vh->vh_refcnt++;
8560 		mutex_exit(&mdi_mutex);
8561 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8562 			mutex_enter(&mdi_mutex);
8563 			vh->vh_refcnt--;
8564 			break;
8565 		} else {
8566 			mutex_enter(&mdi_mutex);
8567 			vh->vh_refcnt--;
8568 		}
8569 	}
8570 
8571 	mutex_exit(&mdi_mutex);
8572 }
8573 
8574 /*
8575  * i_mdi_log_sysevent():
8576  *		Logs events for pickup by syseventd
8577  */
8578 static void
8579 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8580 {
8581 	char		*path_name;
8582 	nvlist_t	*attr_list;
8583 
8584 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8585 	    KM_SLEEP) != DDI_SUCCESS) {
8586 		goto alloc_failed;
8587 	}
8588 
8589 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8590 	(void) ddi_pathname(dip, path_name);
8591 
8592 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8593 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8594 		goto error;
8595 	}
8596 
8597 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8598 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8599 		goto error;
8600 	}
8601 
8602 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8603 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8604 		goto error;
8605 	}
8606 
8607 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8608 	    path_name) != DDI_SUCCESS) {
8609 		goto error;
8610 	}
8611 
8612 	if (nvlist_add_string(attr_list, DDI_CLASS,
8613 	    ph_vh_class) != DDI_SUCCESS) {
8614 		goto error;
8615 	}
8616 
8617 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8618 	    attr_list, NULL, DDI_SLEEP);
8619 
8620 error:
8621 	kmem_free(path_name, MAXPATHLEN);
8622 	nvlist_free(attr_list);
8623 	return;
8624 
8625 alloc_failed:
8626 	MDI_DEBUG(1, (CE_WARN, dip,
8627 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8628 }
8629