xref: /titanic_41/usr/src/uts/common/os/sunmdi.c (revision 724365f7556fc4201fdb11766ebc6bd918523130)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
29  * detailed discussion of the overall mpxio architecture.
30  *
31  * Default locking order:
32  *
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
35  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
37  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
38  */
39 
40 #include <sys/note.h>
41 #include <sys/types.h>
42 #include <sys/varargs.h>
43 #include <sys/param.h>
44 #include <sys/errno.h>
45 #include <sys/uio.h>
46 #include <sys/buf.h>
47 #include <sys/modctl.h>
48 #include <sys/open.h>
49 #include <sys/kmem.h>
50 #include <sys/poll.h>
51 #include <sys/conf.h>
52 #include <sys/bootconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/stat.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddipropdefs.h>
58 #include <sys/sunndi.h>
59 #include <sys/ndi_impldefs.h>
60 #include <sys/promif.h>
61 #include <sys/sunmdi.h>
62 #include <sys/mdi_impldefs.h>
63 #include <sys/taskq.h>
64 #include <sys/epm.h>
65 #include <sys/sunpm.h>
66 #include <sys/modhash.h>
67 #include <sys/disp.h>
68 #include <sys/autoconf.h>
69 
70 #ifdef	DEBUG
71 #include <sys/debug.h>
72 int	mdi_debug = 1;
73 #define	MDI_DEBUG(level, stmnt) \
74 	    if (mdi_debug >= (level)) i_mdi_log stmnt
75 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
76 #else	/* !DEBUG */
77 #define	MDI_DEBUG(level, stmnt)
78 #endif	/* DEBUG */
79 
80 extern pri_t	minclsyspri;
81 extern int	modrootloaded;
82 
83 /*
84  * Global mutex:
85  * Protects vHCI list and structure members, pHCI and Client lists.
86  */
87 kmutex_t	mdi_mutex;
88 
89 /*
90  * Registered vHCI class driver lists
91  */
92 int		mdi_vhci_count;
93 mdi_vhci_t	*mdi_vhci_head;
94 mdi_vhci_t	*mdi_vhci_tail;
95 
96 /*
97  * Client Hash Table size
98  */
99 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
100 
101 /*
102  * taskq interface definitions
103  */
104 #define	MDI_TASKQ_N_THREADS	8
105 #define	MDI_TASKQ_PRI		minclsyspri
106 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
107 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
108 
109 taskq_t				*mdi_taskq;
110 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
111 
112 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
113 
114 /*
115  * The data should be "quiet" for this interval (in seconds) before the
116  * vhci cached data is flushed to the disk.
117  */
118 static int mdi_vhcache_flush_delay = 10;
119 
120 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
121 static int mdi_vhcache_flush_daemon_idle_time = 60;
122 
123 /*
124  * MDI falls back to discovery of all paths when a bus_config_one fails.
125  * The following parameters can be used to tune this operation.
126  *
127  * mdi_path_discovery_boot
128  *	Number of times path discovery will be attempted during early boot.
129  *	Probably there is no reason to ever set this value to greater than one.
130  *
131  * mdi_path_discovery_postboot
132  *	Number of times path discovery will be attempted after early boot.
133  *	Set it to a minimum of two to allow for discovery of iscsi paths which
134  *	may happen very late during booting.
135  *
136  * mdi_path_discovery_interval
137  *	Minimum number of seconds MDI will wait between successive discovery
138  *	of all paths. Set it to -1 to disable discovery of all paths.
139  */
140 static int mdi_path_discovery_boot = 1;
141 static int mdi_path_discovery_postboot = 2;
142 static int mdi_path_discovery_interval = 10;
143 
144 /*
145  * number of seconds the asynchronous configuration thread will sleep idle
146  * before exiting.
147  */
148 static int mdi_async_config_idle_time = 600;
149 
150 static int mdi_bus_config_cache_hash_size = 256;
151 
152 /* turns off multithreaded configuration for certain operations */
153 static int mdi_mtc_off = 0;
154 
155 /*
156  * MDI component property name/value string definitions
157  */
158 const char 		*mdi_component_prop = "mpxio-component";
159 const char		*mdi_component_prop_vhci = "vhci";
160 const char		*mdi_component_prop_phci = "phci";
161 const char		*mdi_component_prop_client = "client";
162 
163 /*
164  * MDI client global unique identifier property name
165  */
166 const char		*mdi_client_guid_prop = "client-guid";
167 
168 /*
169  * MDI client load balancing property name/value string definitions
170  */
171 const char		*mdi_load_balance = "load-balance";
172 const char		*mdi_load_balance_none = "none";
173 const char		*mdi_load_balance_rr = "round-robin";
174 const char		*mdi_load_balance_lba = "logical-block";
175 
176 /*
177  * Obsolete vHCI class definition; to be removed after Leadville update
178  */
179 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
180 
181 static char vhci_greeting[] =
182 	"\tThere already exists one vHCI driver for class %s\n"
183 	"\tOnly one vHCI driver for each class is allowed\n";
184 
185 /*
186  * Static function prototypes
187  */
188 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
189 static int		i_mdi_client_offline(dev_info_t *, uint_t);
190 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
191 static void		i_mdi_phci_post_detach(dev_info_t *,
192 			    ddi_detach_cmd_t, int);
193 static int		i_mdi_client_pre_detach(dev_info_t *,
194 			    ddi_detach_cmd_t);
195 static void		i_mdi_client_post_detach(dev_info_t *,
196 			    ddi_detach_cmd_t, int);
197 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
198 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
199 static int 		i_mdi_lba_lb(mdi_client_t *ct,
200 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
201 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
202 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
203 static void		i_mdi_pm_reset_client(mdi_client_t *);
204 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
205 static int		i_mdi_power_all_phci(mdi_client_t *);
206 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
207 
208 
209 /*
210  * Internal mdi_pathinfo node functions
211  */
212 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
213 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
214 
215 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
216 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
217 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
218 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
219 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
220 			    mdi_client_t *);
221 static void		i_mdi_phci_unlock(mdi_phci_t *);
222 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
223 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
224 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
225 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
226 			    mdi_client_t *);
227 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
228 static void		i_mdi_client_remove_path(mdi_client_t *,
229 			    mdi_pathinfo_t *);
230 
231 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
232 			    mdi_pathinfo_state_t, int);
233 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
234 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
235 			    char **, int);
236 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
237 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
238 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
239 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
240 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
241 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
242 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
243 static void		i_mdi_client_update_state(mdi_client_t *);
244 static int		i_mdi_client_compute_state(mdi_client_t *,
245 			    mdi_phci_t *);
246 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
247 static void		i_mdi_client_unlock(mdi_client_t *);
248 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
249 static mdi_client_t	*i_devi_get_client(dev_info_t *);
250 /*
251  * NOTE: this will be removed once the NWS files are changed to use the new
252  * mdi_{enable,disable}_path interfaces
253  */
254 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
255 				int, int);
256 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
257 				mdi_vhci_t *vh, int flags, int op);
258 /*
259  * Failover related function prototypes
260  */
261 static int		i_mdi_failover(void *);
262 
263 /*
264  * misc internal functions
265  */
266 static int		i_mdi_get_hash_key(char *);
267 static int		i_map_nvlist_error_to_mdi(int);
268 static void		i_mdi_report_path_state(mdi_client_t *,
269 			    mdi_pathinfo_t *);
270 
271 static void		setup_vhci_cache(mdi_vhci_t *);
272 static int		destroy_vhci_cache(mdi_vhci_t *);
273 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
274 static boolean_t	stop_vhcache_flush_thread(void *, int);
275 static void		free_string_array(char **, int);
276 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
277 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
278 static void		free_vhcache_client(mdi_vhcache_client_t *);
279 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
280 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
281 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
282 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
283 static void		vhcache_pi_add(mdi_vhci_config_t *,
284 			    struct mdi_pathinfo *);
285 static void		vhcache_pi_remove(mdi_vhci_config_t *,
286 			    struct mdi_pathinfo *);
287 static void		free_phclient_path_list(mdi_phys_path_t *);
288 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
289 static int		flush_vhcache(mdi_vhci_config_t *, int);
290 static void		vhcache_dirty(mdi_vhci_config_t *);
291 static void		free_async_client_config(mdi_async_client_config_t *);
292 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
293 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
294 static nvlist_t		*read_on_disk_vhci_cache(char *);
295 extern int		fread_nvlist(char *, nvlist_t **);
296 extern int		fwrite_nvlist(char *, nvlist_t *);
297 
298 /* called once when first vhci registers with mdi */
299 static void
300 i_mdi_init()
301 {
302 	static int initialized = 0;
303 
304 	if (initialized)
305 		return;
306 	initialized = 1;
307 
308 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
309 	/*
310 	 * Create our taskq resources
311 	 */
312 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
313 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
314 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
315 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
316 }
317 
318 /*
319  * mdi_get_component_type():
320  *		Return mpxio component type
321  * Return Values:
322  *		MDI_COMPONENT_NONE
323  *		MDI_COMPONENT_VHCI
324  *		MDI_COMPONENT_PHCI
325  *		MDI_COMPONENT_CLIENT
326  * XXX This doesn't work under multi-level MPxIO and should be
327  *	removed when clients migrate mdi_is_*() interfaces.
328  */
329 int
330 mdi_get_component_type(dev_info_t *dip)
331 {
332 	return (DEVI(dip)->devi_mdi_component);
333 }
334 
335 /*
336  * mdi_vhci_register():
337  *		Register a vHCI module with the mpxio framework
338  *		mdi_vhci_register() is called by vHCI drivers to register the
339  *		'class_driver' vHCI driver and its MDI entrypoints with the
340  *		mpxio framework.  The vHCI driver must call this interface as
341  *		part of its attach(9e) handler.
342  *		Competing threads may try to attach mdi_vhci_register() as
343  *		the vHCI drivers are loaded and attached as a result of pHCI
344  *		driver instance registration (mdi_phci_register()) with the
345  *		framework.
346  * Return Values:
347  *		MDI_SUCCESS
348  *		MDI_FAILURE
349  */
350 
351 /*ARGSUSED*/
352 int
353 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
354     int flags)
355 {
356 	mdi_vhci_t		*vh = NULL;
357 
358 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
359 
360 	i_mdi_init();
361 
362 	mutex_enter(&mdi_mutex);
363 	/*
364 	 * Scan for already registered vhci
365 	 */
366 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
367 		if (strcmp(vh->vh_class, class) == 0) {
368 			/*
369 			 * vHCI has already been created.  Check for valid
370 			 * vHCI ops registration.  We only support one vHCI
371 			 * module per class
372 			 */
373 			if (vh->vh_ops != NULL) {
374 				mutex_exit(&mdi_mutex);
375 				cmn_err(CE_NOTE, vhci_greeting, class);
376 				return (MDI_FAILURE);
377 			}
378 			break;
379 		}
380 	}
381 
382 	/*
383 	 * if not yet created, create the vHCI component
384 	 */
385 	if (vh == NULL) {
386 		struct client_hash	*hash = NULL;
387 		char			*load_balance;
388 
389 		/*
390 		 * Allocate and initialize the mdi extensions
391 		 */
392 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
393 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
394 		    KM_SLEEP);
395 		vh->vh_client_table = hash;
396 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
397 		(void) strcpy(vh->vh_class, class);
398 		vh->vh_lb = LOAD_BALANCE_RR;
399 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
400 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
401 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
402 				vh->vh_lb = LOAD_BALANCE_NONE;
403 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
404 				    == 0) {
405 				vh->vh_lb = LOAD_BALANCE_LBA;
406 			}
407 			ddi_prop_free(load_balance);
408 		}
409 
410 		/*
411 		 * Store the vHCI ops vectors
412 		 */
413 		vh->vh_dip = vdip;
414 		vh->vh_ops = vops;
415 
416 		setup_vhci_cache(vh);
417 
418 		if (mdi_vhci_head == NULL) {
419 			mdi_vhci_head = vh;
420 		}
421 		if (mdi_vhci_tail) {
422 			mdi_vhci_tail->vh_next = vh;
423 		}
424 		mdi_vhci_tail = vh;
425 		mdi_vhci_count++;
426 	}
427 
428 	/*
429 	 * Claim the devfs node as a vhci component
430 	 */
431 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
432 
433 	/*
434 	 * Initialize our back reference from dev_info node
435 	 */
436 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
437 	mutex_exit(&mdi_mutex);
438 	return (MDI_SUCCESS);
439 }
440 
441 /*
442  * mdi_vhci_unregister():
443  *		Unregister a vHCI module from mpxio framework
444  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
445  * 		of a vhci to unregister it from the framework.
446  * Return Values:
447  *		MDI_SUCCESS
448  *		MDI_FAILURE
449  */
450 
451 /*ARGSUSED*/
452 int
453 mdi_vhci_unregister(dev_info_t *vdip, int flags)
454 {
455 	mdi_vhci_t	*found, *vh, *prev = NULL;
456 
457 	/*
458 	 * Check for invalid VHCI
459 	 */
460 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
461 		return (MDI_FAILURE);
462 
463 	mutex_enter(&mdi_mutex);
464 
465 	/*
466 	 * Scan the list of registered vHCIs for a match
467 	 */
468 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
469 		if (found == vh)
470 			break;
471 		prev = found;
472 	}
473 
474 	if (found == NULL) {
475 		mutex_exit(&mdi_mutex);
476 		return (MDI_FAILURE);
477 	}
478 
479 	/*
480 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
481 	 * should have been unregistered, before a vHCI can be
482 	 * unregistered.
483 	 */
484 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
485 		mutex_exit(&mdi_mutex);
486 		return (MDI_FAILURE);
487 	}
488 
489 	/*
490 	 * Remove the vHCI from the global list
491 	 */
492 	if (vh == mdi_vhci_head) {
493 		mdi_vhci_head = vh->vh_next;
494 	} else {
495 		prev->vh_next = vh->vh_next;
496 	}
497 	if (vh == mdi_vhci_tail) {
498 		mdi_vhci_tail = prev;
499 	}
500 
501 	mdi_vhci_count--;
502 	mutex_exit(&mdi_mutex);
503 
504 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
505 		/* add vhci to the global list */
506 		mutex_enter(&mdi_mutex);
507 		if (mdi_vhci_head == NULL)
508 			mdi_vhci_head = vh;
509 		else
510 			mdi_vhci_tail->vh_next = vh;
511 		mdi_vhci_tail = vh;
512 		mdi_vhci_count++;
513 		mutex_exit(&mdi_mutex);
514 		return (MDI_FAILURE);
515 	}
516 
517 	vh->vh_ops = NULL;
518 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
519 	DEVI(vdip)->devi_mdi_xhci = NULL;
520 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
521 	kmem_free(vh->vh_client_table,
522 	    mdi_client_table_size * sizeof (struct client_hash));
523 
524 	kmem_free(vh, sizeof (mdi_vhci_t));
525 	return (MDI_SUCCESS);
526 }
527 
528 /*
529  * i_mdi_vhci_class2vhci():
530  *		Look for a matching vHCI module given a vHCI class name
531  * Return Values:
532  *		Handle to a vHCI component
533  *		NULL
534  */
535 static mdi_vhci_t *
536 i_mdi_vhci_class2vhci(char *class)
537 {
538 	mdi_vhci_t	*vh = NULL;
539 
540 	ASSERT(!MUTEX_HELD(&mdi_mutex));
541 
542 	mutex_enter(&mdi_mutex);
543 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
544 		if (strcmp(vh->vh_class, class) == 0) {
545 			break;
546 		}
547 	}
548 	mutex_exit(&mdi_mutex);
549 	return (vh);
550 }
551 
552 /*
553  * i_devi_get_vhci():
554  *		Utility function to get the handle to a vHCI component
555  * Return Values:
556  *		Handle to a vHCI component
557  *		NULL
558  */
559 mdi_vhci_t *
560 i_devi_get_vhci(dev_info_t *vdip)
561 {
562 	mdi_vhci_t	*vh = NULL;
563 	if (MDI_VHCI(vdip)) {
564 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
565 	}
566 	return (vh);
567 }
568 
569 /*
570  * mdi_phci_register():
571  *		Register a pHCI module with mpxio framework
572  *		mdi_phci_register() is called by pHCI drivers to register with
573  *		the mpxio framework and a specific 'class_driver' vHCI.  The
574  *		pHCI driver must call this interface as part of its attach(9e)
575  *		handler.
576  * Return Values:
577  *		MDI_SUCCESS
578  *		MDI_FAILURE
579  */
580 
581 /*ARGSUSED*/
582 int
583 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
584 {
585 	mdi_phci_t		*ph;
586 	mdi_vhci_t		*vh;
587 	char			*data;
588 	char			*pathname;
589 
590 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
591 	(void) ddi_pathname(pdip, pathname);
592 
593 	/*
594 	 * Check for mpxio-disable property. Enable mpxio if the property is
595 	 * missing or not set to "yes".
596 	 * If the property is set to "yes" then emit a brief message.
597 	 */
598 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
599 	    &data) == DDI_SUCCESS)) {
600 		if (strcmp(data, "yes") == 0) {
601 			MDI_DEBUG(1, (CE_CONT, pdip,
602 			    "?%s (%s%d) multipath capabilities "
603 			    "disabled via %s.conf.\n", pathname,
604 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
605 			    ddi_driver_name(pdip)));
606 			ddi_prop_free(data);
607 			kmem_free(pathname, MAXPATHLEN);
608 			return (MDI_FAILURE);
609 		}
610 		ddi_prop_free(data);
611 	}
612 
613 	kmem_free(pathname, MAXPATHLEN);
614 
615 	/*
616 	 * Search for a matching vHCI
617 	 */
618 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
619 	if (vh == NULL) {
620 		return (MDI_FAILURE);
621 	}
622 
623 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
624 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
625 	ph->ph_dip = pdip;
626 	ph->ph_vhci = vh;
627 	ph->ph_next = NULL;
628 	ph->ph_unstable = 0;
629 	ph->ph_vprivate = 0;
630 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
631 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
632 
633 	MDI_PHCI_SET_POWER_UP(ph);
634 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
635 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
636 
637 	vhcache_phci_add(vh->vh_config, ph);
638 
639 	mutex_enter(&mdi_mutex);
640 	if (vh->vh_phci_head == NULL) {
641 		vh->vh_phci_head = ph;
642 	}
643 	if (vh->vh_phci_tail) {
644 		vh->vh_phci_tail->ph_next = ph;
645 	}
646 	vh->vh_phci_tail = ph;
647 	vh->vh_phci_count++;
648 	mutex_exit(&mdi_mutex);
649 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
650 	return (MDI_SUCCESS);
651 }
652 
653 /*
654  * mdi_phci_unregister():
655  *		Unregister a pHCI module from mpxio framework
656  *		mdi_phci_unregister() is called by the pHCI drivers from their
657  *		detach(9E) handler to unregister their instances from the
658  *		framework.
659  * Return Values:
660  *		MDI_SUCCESS
661  *		MDI_FAILURE
662  */
663 
664 /*ARGSUSED*/
665 int
666 mdi_phci_unregister(dev_info_t *pdip, int flags)
667 {
668 	mdi_vhci_t		*vh;
669 	mdi_phci_t		*ph;
670 	mdi_phci_t		*tmp;
671 	mdi_phci_t		*prev = NULL;
672 
673 	ph = i_devi_get_phci(pdip);
674 	if (ph == NULL) {
675 		MDI_DEBUG(1, (CE_WARN, pdip,
676 		    "!pHCI unregister: Not a valid pHCI"));
677 		return (MDI_FAILURE);
678 	}
679 
680 	vh = ph->ph_vhci;
681 	ASSERT(vh != NULL);
682 	if (vh == NULL) {
683 		MDI_DEBUG(1, (CE_WARN, pdip,
684 		    "!pHCI unregister: Not a valid vHCI"));
685 		return (MDI_FAILURE);
686 	}
687 
688 	mutex_enter(&mdi_mutex);
689 	tmp = vh->vh_phci_head;
690 	while (tmp) {
691 		if (tmp == ph) {
692 			break;
693 		}
694 		prev = tmp;
695 		tmp = tmp->ph_next;
696 	}
697 
698 	if (ph == vh->vh_phci_head) {
699 		vh->vh_phci_head = ph->ph_next;
700 	} else {
701 		prev->ph_next = ph->ph_next;
702 	}
703 
704 	if (ph == vh->vh_phci_tail) {
705 		vh->vh_phci_tail = prev;
706 	}
707 
708 	vh->vh_phci_count--;
709 
710 	mutex_exit(&mdi_mutex);
711 
712 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
713 	    ESC_DDI_INITIATOR_UNREGISTER);
714 	vhcache_phci_remove(vh->vh_config, ph);
715 	cv_destroy(&ph->ph_unstable_cv);
716 	cv_destroy(&ph->ph_powerchange_cv);
717 	mutex_destroy(&ph->ph_mutex);
718 	kmem_free(ph, sizeof (mdi_phci_t));
719 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
720 	DEVI(pdip)->devi_mdi_xhci = NULL;
721 	return (MDI_SUCCESS);
722 }
723 
724 /*
725  * i_devi_get_phci():
726  * 		Utility function to return the phci extensions.
727  */
728 static mdi_phci_t *
729 i_devi_get_phci(dev_info_t *pdip)
730 {
731 	mdi_phci_t	*ph = NULL;
732 	if (MDI_PHCI(pdip)) {
733 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
734 	}
735 	return (ph);
736 }
737 
738 /*
739  * mdi_phci_path2devinfo():
740  * 		Utility function to search for a valid phci device given
741  *		the devfs pathname.
742  */
743 
744 dev_info_t *
745 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
746 {
747 	char		*temp_pathname;
748 	mdi_vhci_t	*vh;
749 	mdi_phci_t	*ph;
750 	dev_info_t 	*pdip = NULL;
751 
752 	vh = i_devi_get_vhci(vdip);
753 	ASSERT(vh != NULL);
754 
755 	if (vh == NULL) {
756 		/*
757 		 * Invalid vHCI component, return failure
758 		 */
759 		return (NULL);
760 	}
761 
762 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
763 	mutex_enter(&mdi_mutex);
764 	ph = vh->vh_phci_head;
765 	while (ph != NULL) {
766 		pdip = ph->ph_dip;
767 		ASSERT(pdip != NULL);
768 		*temp_pathname = '\0';
769 		(void) ddi_pathname(pdip, temp_pathname);
770 		if (strcmp(temp_pathname, pathname) == 0) {
771 			break;
772 		}
773 		ph = ph->ph_next;
774 	}
775 	if (ph == NULL) {
776 		pdip = NULL;
777 	}
778 	mutex_exit(&mdi_mutex);
779 	kmem_free(temp_pathname, MAXPATHLEN);
780 	return (pdip);
781 }
782 
783 /*
784  * mdi_phci_get_path_count():
785  * 		get number of path information nodes associated with a given
786  *		pHCI device.
787  */
788 int
789 mdi_phci_get_path_count(dev_info_t *pdip)
790 {
791 	mdi_phci_t	*ph;
792 	int		count = 0;
793 
794 	ph = i_devi_get_phci(pdip);
795 	if (ph != NULL) {
796 		count = ph->ph_path_count;
797 	}
798 	return (count);
799 }
800 
801 /*
802  * i_mdi_phci_lock():
803  *		Lock a pHCI device
804  * Return Values:
805  *		None
806  * Note:
807  *		The default locking order is:
808  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
809  *		But there are number of situations where locks need to be
810  *		grabbed in reverse order.  This routine implements try and lock
811  *		mechanism depending on the requested parameter option.
812  */
813 static void
814 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
815 {
816 	if (pip) {
817 		/* Reverse locking is requested. */
818 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
819 			/*
820 			 * tryenter failed. Try to grab again
821 			 * after a small delay
822 			 */
823 			MDI_PI_HOLD(pip);
824 			MDI_PI_UNLOCK(pip);
825 			delay(1);
826 			MDI_PI_LOCK(pip);
827 			MDI_PI_RELE(pip);
828 		}
829 	} else {
830 		MDI_PHCI_LOCK(ph);
831 	}
832 }
833 
834 /*
835  * i_mdi_phci_get_client_lock():
836  *		Lock a pHCI device
837  * Return Values:
838  *		None
839  * Note:
840  *		The default locking order is:
841  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
842  *		But there are number of situations where locks need to be
843  *		grabbed in reverse order.  This routine implements try and lock
844  *		mechanism depending on the requested parameter option.
845  */
846 static void
847 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
848 {
849 	if (ct) {
850 		/* Reverse locking is requested. */
851 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
852 			/*
853 			 * tryenter failed. Try to grab again
854 			 * after a small delay
855 			 */
856 			MDI_CLIENT_UNLOCK(ct);
857 			delay(1);
858 			MDI_CLIENT_LOCK(ct);
859 		}
860 	} else {
861 		MDI_PHCI_LOCK(ph);
862 	}
863 }
864 
865 /*
866  * i_mdi_phci_unlock():
867  *		Unlock the pHCI component
868  */
869 static void
870 i_mdi_phci_unlock(mdi_phci_t *ph)
871 {
872 	MDI_PHCI_UNLOCK(ph);
873 }
874 
875 /*
876  * i_mdi_devinfo_create():
877  *		create client device's devinfo node
878  * Return Values:
879  *		dev_info
880  *		NULL
881  * Notes:
882  */
883 static dev_info_t *
884 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
885 	char **compatible, int ncompatible)
886 {
887 	dev_info_t *cdip = NULL;
888 
889 	ASSERT(MUTEX_HELD(&mdi_mutex));
890 
891 	/* Verify for duplicate entry */
892 	cdip = i_mdi_devinfo_find(vh, name, guid);
893 	ASSERT(cdip == NULL);
894 	if (cdip) {
895 		cmn_err(CE_WARN,
896 		    "i_mdi_devinfo_create: client dip %p already exists",
897 			(void *)cdip);
898 	}
899 
900 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
901 	if (cdip == NULL)
902 		goto fail;
903 
904 	/*
905 	 * Create component type and Global unique identifier
906 	 * properties
907 	 */
908 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
909 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
910 		goto fail;
911 	}
912 
913 	/* Decorate the node with compatible property */
914 	if (compatible &&
915 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
916 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
917 		goto fail;
918 	}
919 
920 	return (cdip);
921 
922 fail:
923 	if (cdip) {
924 		(void) ndi_prop_remove_all(cdip);
925 		(void) ndi_devi_free(cdip);
926 	}
927 	return (NULL);
928 }
929 
930 /*
931  * i_mdi_devinfo_find():
932  *		Find a matching devinfo node for given client node name
933  *		and its guid.
934  * Return Values:
935  *		Handle to a dev_info node or NULL
936  */
937 
938 static dev_info_t *
939 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
940 {
941 	char			*data;
942 	dev_info_t 		*cdip = NULL;
943 	dev_info_t 		*ndip = NULL;
944 	int			circular;
945 
946 	ndi_devi_enter(vh->vh_dip, &circular);
947 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
948 	while ((cdip = ndip) != NULL) {
949 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
950 
951 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
952 			continue;
953 		}
954 
955 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
956 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
957 		    &data) != DDI_PROP_SUCCESS) {
958 			continue;
959 		}
960 
961 		if (strcmp(data, guid) != 0) {
962 			ddi_prop_free(data);
963 			continue;
964 		}
965 		ddi_prop_free(data);
966 		break;
967 	}
968 	ndi_devi_exit(vh->vh_dip, circular);
969 	return (cdip);
970 }
971 
972 /*
973  * i_mdi_devinfo_remove():
974  *		Remove a client device node
975  */
976 static int
977 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
978 {
979 	int	rv = MDI_SUCCESS;
980 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
981 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
982 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
983 		if (rv != NDI_SUCCESS) {
984 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
985 			    " failed. cdip = %p\n", cdip));
986 		}
987 		/*
988 		 * Convert to MDI error code
989 		 */
990 		switch (rv) {
991 		case NDI_SUCCESS:
992 			rv = MDI_SUCCESS;
993 			break;
994 		case NDI_BUSY:
995 			rv = MDI_BUSY;
996 			break;
997 		default:
998 			rv = MDI_FAILURE;
999 			break;
1000 		}
1001 	}
1002 	return (rv);
1003 }
1004 
1005 /*
1006  * i_devi_get_client()
1007  *		Utility function to get mpxio component extensions
1008  */
1009 static mdi_client_t *
1010 i_devi_get_client(dev_info_t *cdip)
1011 {
1012 	mdi_client_t	*ct = NULL;
1013 	if (MDI_CLIENT(cdip)) {
1014 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1015 	}
1016 	return (ct);
1017 }
1018 
1019 /*
1020  * i_mdi_is_child_present():
1021  *		Search for the presence of client device dev_info node
1022  */
1023 
1024 static int
1025 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1026 {
1027 	int		rv = MDI_FAILURE;
1028 	struct dev_info	*dip;
1029 	int		circular;
1030 
1031 	ndi_devi_enter(vdip, &circular);
1032 	dip = DEVI(vdip)->devi_child;
1033 	while (dip) {
1034 		if (dip == DEVI(cdip)) {
1035 			rv = MDI_SUCCESS;
1036 			break;
1037 		}
1038 		dip = dip->devi_sibling;
1039 	}
1040 	ndi_devi_exit(vdip, circular);
1041 	return (rv);
1042 }
1043 
1044 
1045 /*
1046  * i_mdi_client_lock():
1047  *		Grab client component lock
1048  * Return Values:
1049  *		None
1050  * Note:
1051  *		The default locking order is:
1052  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1053  *		But there are number of situations where locks need to be
1054  *		grabbed in reverse order.  This routine implements try and lock
1055  *		mechanism depending on the requested parameter option.
1056  */
1057 
1058 static void
1059 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1060 {
1061 	if (pip) {
1062 		/*
1063 		 * Reverse locking is requested.
1064 		 */
1065 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1066 			/*
1067 			 * tryenter failed. Try to grab again
1068 			 * after a small delay
1069 			 */
1070 			MDI_PI_HOLD(pip);
1071 			MDI_PI_UNLOCK(pip);
1072 			delay(1);
1073 			MDI_PI_LOCK(pip);
1074 			MDI_PI_RELE(pip);
1075 		}
1076 	} else {
1077 		MDI_CLIENT_LOCK(ct);
1078 	}
1079 }
1080 
1081 /*
1082  * i_mdi_client_unlock():
1083  *		Unlock a client component
1084  */
1085 
1086 static void
1087 i_mdi_client_unlock(mdi_client_t *ct)
1088 {
1089 	MDI_CLIENT_UNLOCK(ct);
1090 }
1091 
1092 /*
1093  * i_mdi_client_alloc():
1094  * 		Allocate and initialize a client structure.  Caller should
1095  *		hold the global mdi_mutex.
1096  * Return Values:
1097  *		Handle to a client component
1098  */
1099 /*ARGSUSED*/
1100 static mdi_client_t *
1101 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1102 {
1103 	mdi_client_t	*ct;
1104 
1105 	ASSERT(MUTEX_HELD(&mdi_mutex));
1106 
1107 	/*
1108 	 * Allocate and initialize a component structure.
1109 	 */
1110 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1111 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1112 	ct->ct_hnext = NULL;
1113 	ct->ct_hprev = NULL;
1114 	ct->ct_dip = NULL;
1115 	ct->ct_vhci = vh;
1116 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1117 	(void) strcpy(ct->ct_drvname, name);
1118 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1119 	(void) strcpy(ct->ct_guid, lguid);
1120 	ct->ct_cprivate = NULL;
1121 	ct->ct_vprivate = NULL;
1122 	ct->ct_flags = 0;
1123 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1124 	MDI_CLIENT_SET_OFFLINE(ct);
1125 	MDI_CLIENT_SET_DETACH(ct);
1126 	MDI_CLIENT_SET_POWER_UP(ct);
1127 	ct->ct_failover_flags = 0;
1128 	ct->ct_failover_status = 0;
1129 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1130 	ct->ct_unstable = 0;
1131 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1132 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1133 	ct->ct_lb = vh->vh_lb;
1134 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1135 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1136 	ct->ct_path_count = 0;
1137 	ct->ct_path_head = NULL;
1138 	ct->ct_path_tail = NULL;
1139 	ct->ct_path_last = NULL;
1140 
1141 	/*
1142 	 * Add this client component to our client hash queue
1143 	 */
1144 	i_mdi_client_enlist_table(vh, ct);
1145 	return (ct);
1146 }
1147 
1148 /*
1149  * i_mdi_client_enlist_table():
1150  *		Attach the client device to the client hash table. Caller
1151  *		should hold the mdi_mutex
1152  */
1153 
1154 static void
1155 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1156 {
1157 	int 			index;
1158 	struct client_hash	*head;
1159 
1160 	ASSERT(MUTEX_HELD(&mdi_mutex));
1161 	index = i_mdi_get_hash_key(ct->ct_guid);
1162 	head = &vh->vh_client_table[index];
1163 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1164 	head->ct_hash_head = ct;
1165 	head->ct_hash_count++;
1166 	vh->vh_client_count++;
1167 }
1168 
1169 /*
1170  * i_mdi_client_delist_table():
1171  *		Attach the client device to the client hash table.
1172  *		Caller should hold the mdi_mutex
1173  */
1174 
1175 static void
1176 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1177 {
1178 	int			index;
1179 	char			*guid;
1180 	struct client_hash 	*head;
1181 	mdi_client_t		*next;
1182 	mdi_client_t		*last;
1183 
1184 	ASSERT(MUTEX_HELD(&mdi_mutex));
1185 	guid = ct->ct_guid;
1186 	index = i_mdi_get_hash_key(guid);
1187 	head = &vh->vh_client_table[index];
1188 
1189 	last = NULL;
1190 	next = (mdi_client_t *)head->ct_hash_head;
1191 	while (next != NULL) {
1192 		if (next == ct) {
1193 			break;
1194 		}
1195 		last = next;
1196 		next = next->ct_hnext;
1197 	}
1198 
1199 	if (next) {
1200 		head->ct_hash_count--;
1201 		if (last == NULL) {
1202 			head->ct_hash_head = ct->ct_hnext;
1203 		} else {
1204 			last->ct_hnext = ct->ct_hnext;
1205 		}
1206 		ct->ct_hnext = NULL;
1207 		vh->vh_client_count--;
1208 	}
1209 }
1210 
1211 
1212 /*
1213  * i_mdi_client_free():
1214  *		Free a client component
1215  */
1216 static int
1217 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1218 {
1219 	int		rv = MDI_SUCCESS;
1220 	int		flags = ct->ct_flags;
1221 	dev_info_t	*cdip;
1222 	dev_info_t	*vdip;
1223 
1224 	ASSERT(MUTEX_HELD(&mdi_mutex));
1225 	vdip = vh->vh_dip;
1226 	cdip = ct->ct_dip;
1227 
1228 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1229 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1230 	DEVI(cdip)->devi_mdi_client = NULL;
1231 
1232 	/*
1233 	 * Clear out back ref. to dev_info_t node
1234 	 */
1235 	ct->ct_dip = NULL;
1236 
1237 	/*
1238 	 * Remove this client from our hash queue
1239 	 */
1240 	i_mdi_client_delist_table(vh, ct);
1241 
1242 	/*
1243 	 * Uninitialize and free the component
1244 	 */
1245 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1246 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1247 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1248 	cv_destroy(&ct->ct_failover_cv);
1249 	cv_destroy(&ct->ct_unstable_cv);
1250 	cv_destroy(&ct->ct_powerchange_cv);
1251 	mutex_destroy(&ct->ct_mutex);
1252 	kmem_free(ct, sizeof (*ct));
1253 
1254 	if (cdip != NULL) {
1255 		mutex_exit(&mdi_mutex);
1256 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1257 		mutex_enter(&mdi_mutex);
1258 	}
1259 	return (rv);
1260 }
1261 
1262 /*
1263  * i_mdi_client_find():
1264  * 		Find the client structure corresponding to a given guid
1265  *		Caller should hold the mdi_mutex
1266  */
1267 static mdi_client_t *
1268 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1269 {
1270 	int			index;
1271 	struct client_hash	*head;
1272 	mdi_client_t		*ct;
1273 
1274 	ASSERT(MUTEX_HELD(&mdi_mutex));
1275 	index = i_mdi_get_hash_key(guid);
1276 	head = &vh->vh_client_table[index];
1277 
1278 	ct = head->ct_hash_head;
1279 	while (ct != NULL) {
1280 		if (strcmp(ct->ct_guid, guid) == 0 &&
1281 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1282 			break;
1283 		}
1284 		ct = ct->ct_hnext;
1285 	}
1286 	return (ct);
1287 }
1288 
1289 
1290 
1291 /*
1292  * i_mdi_client_update_state():
1293  *		Compute and update client device state
1294  * Notes:
1295  *		A client device can be in any of three possible states:
1296  *
1297  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1298  *		one online/standby paths. Can tolerate failures.
1299  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1300  *		no alternate paths available as standby. A failure on the online
1301  *		would result in loss of access to device data.
1302  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1303  *		no paths available to access the device.
1304  */
1305 static void
1306 i_mdi_client_update_state(mdi_client_t *ct)
1307 {
1308 	int state;
1309 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1310 	state = i_mdi_client_compute_state(ct, NULL);
1311 	MDI_CLIENT_SET_STATE(ct, state);
1312 }
1313 
1314 /*
1315  * i_mdi_client_compute_state():
1316  *		Compute client device state
1317  *
1318  *		mdi_phci_t *	Pointer to pHCI structure which should
1319  *				while computing the new value.  Used by
1320  *				i_mdi_phci_offline() to find the new
1321  *				client state after DR of a pHCI.
1322  */
1323 static int
1324 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1325 {
1326 	int		state;
1327 	int		online_count = 0;
1328 	int		standby_count = 0;
1329 	mdi_pathinfo_t	*pip, *next;
1330 
1331 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1332 	pip = ct->ct_path_head;
1333 	while (pip != NULL) {
1334 		MDI_PI_LOCK(pip);
1335 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1336 		if (MDI_PI(pip)->pi_phci == ph) {
1337 			MDI_PI_UNLOCK(pip);
1338 			pip = next;
1339 			continue;
1340 		}
1341 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1342 				== MDI_PATHINFO_STATE_ONLINE)
1343 			online_count++;
1344 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1345 				== MDI_PATHINFO_STATE_STANDBY)
1346 			standby_count++;
1347 		MDI_PI_UNLOCK(pip);
1348 		pip = next;
1349 	}
1350 
1351 	if (online_count == 0) {
1352 		if (standby_count == 0) {
1353 			state = MDI_CLIENT_STATE_FAILED;
1354 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1355 			    " ct = %p\n", ct));
1356 		} else if (standby_count == 1) {
1357 			state = MDI_CLIENT_STATE_DEGRADED;
1358 		} else {
1359 			state = MDI_CLIENT_STATE_OPTIMAL;
1360 		}
1361 	} else if (online_count == 1) {
1362 		if (standby_count == 0) {
1363 			state = MDI_CLIENT_STATE_DEGRADED;
1364 		} else {
1365 			state = MDI_CLIENT_STATE_OPTIMAL;
1366 		}
1367 	} else {
1368 		state = MDI_CLIENT_STATE_OPTIMAL;
1369 	}
1370 	return (state);
1371 }
1372 
1373 /*
1374  * i_mdi_client2devinfo():
1375  *		Utility function
1376  */
1377 dev_info_t *
1378 i_mdi_client2devinfo(mdi_client_t *ct)
1379 {
1380 	return (ct->ct_dip);
1381 }
1382 
1383 /*
1384  * mdi_client_path2_devinfo():
1385  * 		Given the parent devinfo and child devfs pathname, search for
1386  *		a valid devfs node handle.
1387  */
1388 dev_info_t *
1389 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1390 {
1391 	dev_info_t 	*cdip = NULL;
1392 	dev_info_t 	*ndip = NULL;
1393 	char		*temp_pathname;
1394 	int		circular;
1395 
1396 	/*
1397 	 * Allocate temp buffer
1398 	 */
1399 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1400 
1401 	/*
1402 	 * Lock parent against changes
1403 	 */
1404 	ndi_devi_enter(vdip, &circular);
1405 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1406 	while ((cdip = ndip) != NULL) {
1407 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1408 
1409 		*temp_pathname = '\0';
1410 		(void) ddi_pathname(cdip, temp_pathname);
1411 		if (strcmp(temp_pathname, pathname) == 0) {
1412 			break;
1413 		}
1414 	}
1415 	/*
1416 	 * Release devinfo lock
1417 	 */
1418 	ndi_devi_exit(vdip, circular);
1419 
1420 	/*
1421 	 * Free the temp buffer
1422 	 */
1423 	kmem_free(temp_pathname, MAXPATHLEN);
1424 	return (cdip);
1425 }
1426 
1427 
1428 /*
1429  * mdi_client_get_path_count():
1430  * 		Utility function to get number of path information nodes
1431  *		associated with a given client device.
1432  */
1433 int
1434 mdi_client_get_path_count(dev_info_t *cdip)
1435 {
1436 	mdi_client_t	*ct;
1437 	int		count = 0;
1438 
1439 	ct = i_devi_get_client(cdip);
1440 	if (ct != NULL) {
1441 		count = ct->ct_path_count;
1442 	}
1443 	return (count);
1444 }
1445 
1446 
1447 /*
1448  * i_mdi_get_hash_key():
1449  * 		Create a hash using strings as keys
1450  *
1451  */
1452 static int
1453 i_mdi_get_hash_key(char *str)
1454 {
1455 	uint32_t	g, hash = 0;
1456 	char		*p;
1457 
1458 	for (p = str; *p != '\0'; p++) {
1459 		g = *p;
1460 		hash += g;
1461 	}
1462 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1463 }
1464 
1465 /*
1466  * mdi_get_lb_policy():
1467  * 		Get current load balancing policy for a given client device
1468  */
1469 client_lb_t
1470 mdi_get_lb_policy(dev_info_t *cdip)
1471 {
1472 	client_lb_t	lb = LOAD_BALANCE_NONE;
1473 	mdi_client_t	*ct;
1474 
1475 	ct = i_devi_get_client(cdip);
1476 	if (ct != NULL) {
1477 		lb = ct->ct_lb;
1478 	}
1479 	return (lb);
1480 }
1481 
1482 /*
1483  * mdi_set_lb_region_size():
1484  * 		Set current region size for the load-balance
1485  */
1486 int
1487 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1488 {
1489 	mdi_client_t	*ct;
1490 	int		rv = MDI_FAILURE;
1491 
1492 	ct = i_devi_get_client(cdip);
1493 	if (ct != NULL && ct->ct_lb_args != NULL) {
1494 		ct->ct_lb_args->region_size = region_size;
1495 		rv = MDI_SUCCESS;
1496 	}
1497 	return (rv);
1498 }
1499 
1500 /*
1501  * mdi_Set_lb_policy():
1502  * 		Set current load balancing policy for a given client device
1503  */
1504 int
1505 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1506 {
1507 	mdi_client_t	*ct;
1508 	int		rv = MDI_FAILURE;
1509 
1510 	ct = i_devi_get_client(cdip);
1511 	if (ct != NULL) {
1512 		ct->ct_lb = lb;
1513 		rv = MDI_SUCCESS;
1514 	}
1515 	return (rv);
1516 }
1517 
1518 /*
1519  * mdi_failover():
1520  *		failover function called by the vHCI drivers to initiate
1521  *		a failover operation.  This is typically due to non-availability
1522  *		of online paths to route I/O requests.  Failover can be
1523  *		triggered through user application also.
1524  *
1525  *		The vHCI driver calls mdi_failover() to initiate a failover
1526  *		operation. mdi_failover() calls back into the vHCI driver's
1527  *		vo_failover() entry point to perform the actual failover
1528  *		operation.  The reason for requiring the vHCI driver to
1529  *		initiate failover by calling mdi_failover(), instead of directly
1530  *		executing vo_failover() itself, is to ensure that the mdi
1531  *		framework can keep track of the client state properly.
1532  *		Additionally, mdi_failover() provides as a convenience the
1533  *		option of performing the failover operation synchronously or
1534  *		asynchronously
1535  *
1536  *		Upon successful completion of the failover operation, the
1537  *		paths that were previously ONLINE will be in the STANDBY state,
1538  *		and the newly activated paths will be in the ONLINE state.
1539  *
1540  *		The flags modifier determines whether the activation is done
1541  *		synchronously: MDI_FAILOVER_SYNC
1542  * Return Values:
1543  *		MDI_SUCCESS
1544  *		MDI_FAILURE
1545  *		MDI_BUSY
1546  */
1547 /*ARGSUSED*/
1548 int
1549 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1550 {
1551 	int			rv;
1552 	mdi_client_t		*ct;
1553 
1554 	ct = i_devi_get_client(cdip);
1555 	ASSERT(ct != NULL);
1556 	if (ct == NULL) {
1557 		/* cdip is not a valid client device. Nothing more to do. */
1558 		return (MDI_FAILURE);
1559 	}
1560 
1561 	MDI_CLIENT_LOCK(ct);
1562 
1563 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1564 		/* A path to the client is being freed */
1565 		MDI_CLIENT_UNLOCK(ct);
1566 		return (MDI_BUSY);
1567 	}
1568 
1569 
1570 	if (MDI_CLIENT_IS_FAILED(ct)) {
1571 		/*
1572 		 * Client is in failed state. Nothing more to do.
1573 		 */
1574 		MDI_CLIENT_UNLOCK(ct);
1575 		return (MDI_FAILURE);
1576 	}
1577 
1578 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1579 		/*
1580 		 * Failover is already in progress; return BUSY
1581 		 */
1582 		MDI_CLIENT_UNLOCK(ct);
1583 		return (MDI_BUSY);
1584 	}
1585 	/*
1586 	 * Make sure that mdi_pathinfo node state changes are processed.
1587 	 * We do not allow failovers to progress while client path state
1588 	 * changes are in progress
1589 	 */
1590 	if (ct->ct_unstable) {
1591 		if (flags == MDI_FAILOVER_ASYNC) {
1592 			MDI_CLIENT_UNLOCK(ct);
1593 			return (MDI_BUSY);
1594 		} else {
1595 			while (ct->ct_unstable)
1596 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1597 		}
1598 	}
1599 
1600 	/*
1601 	 * Client device is in stable state. Before proceeding, perform sanity
1602 	 * checks again.
1603 	 */
1604 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1605 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1606 		/*
1607 		 * Client is in failed state. Nothing more to do.
1608 		 */
1609 		MDI_CLIENT_UNLOCK(ct);
1610 		return (MDI_FAILURE);
1611 	}
1612 
1613 	/*
1614 	 * Set the client state as failover in progress.
1615 	 */
1616 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1617 	ct->ct_failover_flags = flags;
1618 	MDI_CLIENT_UNLOCK(ct);
1619 
1620 	if (flags == MDI_FAILOVER_ASYNC) {
1621 		/*
1622 		 * Submit the initiate failover request via CPR safe
1623 		 * taskq threads.
1624 		 */
1625 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1626 		    ct, KM_SLEEP);
1627 		return (MDI_ACCEPT);
1628 	} else {
1629 		/*
1630 		 * Synchronous failover mode.  Typically invoked from the user
1631 		 * land.
1632 		 */
1633 		rv = i_mdi_failover(ct);
1634 	}
1635 	return (rv);
1636 }
1637 
1638 /*
1639  * i_mdi_failover():
1640  *		internal failover function. Invokes vHCI drivers failover
1641  *		callback function and process the failover status
1642  * Return Values:
1643  *		None
1644  *
1645  * Note: A client device in failover state can not be detached or freed.
1646  */
1647 static int
1648 i_mdi_failover(void *arg)
1649 {
1650 	int		rv = MDI_SUCCESS;
1651 	mdi_client_t	*ct = (mdi_client_t *)arg;
1652 	mdi_vhci_t	*vh = ct->ct_vhci;
1653 
1654 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1655 
1656 	if (vh->vh_ops->vo_failover != NULL) {
1657 		/*
1658 		 * Call vHCI drivers callback routine
1659 		 */
1660 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1661 		    ct->ct_failover_flags);
1662 	}
1663 
1664 	MDI_CLIENT_LOCK(ct);
1665 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1666 
1667 	/*
1668 	 * Save the failover return status
1669 	 */
1670 	ct->ct_failover_status = rv;
1671 
1672 	/*
1673 	 * As a result of failover, client status would have been changed.
1674 	 * Update the client state and wake up anyone waiting on this client
1675 	 * device.
1676 	 */
1677 	i_mdi_client_update_state(ct);
1678 
1679 	cv_broadcast(&ct->ct_failover_cv);
1680 	MDI_CLIENT_UNLOCK(ct);
1681 	return (rv);
1682 }
1683 
1684 /*
1685  * Load balancing is logical block.
1686  * IOs within the range described by region_size
1687  * would go on the same path. This would improve the
1688  * performance by cache-hit on some of the RAID devices.
1689  * Search only for online paths(At some point we
1690  * may want to balance across target ports).
1691  * If no paths are found then default to round-robin.
1692  */
1693 static int
1694 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1695 {
1696 	int		path_index = -1;
1697 	int		online_path_count = 0;
1698 	int		online_nonpref_path_count = 0;
1699 	int 		region_size = ct->ct_lb_args->region_size;
1700 	mdi_pathinfo_t	*pip;
1701 	mdi_pathinfo_t	*next;
1702 	int		preferred, path_cnt;
1703 
1704 	pip = ct->ct_path_head;
1705 	while (pip) {
1706 		MDI_PI_LOCK(pip);
1707 		if (MDI_PI(pip)->pi_state ==
1708 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1709 			online_path_count++;
1710 		} else if (MDI_PI(pip)->pi_state ==
1711 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1712 			online_nonpref_path_count++;
1713 		}
1714 		next = (mdi_pathinfo_t *)
1715 		    MDI_PI(pip)->pi_client_link;
1716 		MDI_PI_UNLOCK(pip);
1717 		pip = next;
1718 	}
1719 	/* if found any online/preferred then use this type */
1720 	if (online_path_count > 0) {
1721 		path_cnt = online_path_count;
1722 		preferred = 1;
1723 	} else if (online_nonpref_path_count > 0) {
1724 		path_cnt = online_nonpref_path_count;
1725 		preferred = 0;
1726 	} else {
1727 		path_cnt = 0;
1728 	}
1729 	if (path_cnt) {
1730 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1731 		pip = ct->ct_path_head;
1732 		while (pip && path_index != -1) {
1733 			MDI_PI_LOCK(pip);
1734 			if (path_index == 0 &&
1735 			    (MDI_PI(pip)->pi_state ==
1736 			    MDI_PATHINFO_STATE_ONLINE) &&
1737 				MDI_PI(pip)->pi_preferred == preferred) {
1738 				MDI_PI_HOLD(pip);
1739 				MDI_PI_UNLOCK(pip);
1740 				*ret_pip = pip;
1741 				return (MDI_SUCCESS);
1742 			}
1743 			path_index --;
1744 			next = (mdi_pathinfo_t *)
1745 			    MDI_PI(pip)->pi_client_link;
1746 			MDI_PI_UNLOCK(pip);
1747 			pip = next;
1748 		}
1749 		if (pip == NULL) {
1750 			MDI_DEBUG(4, (CE_NOTE, NULL,
1751 			    "!lba %p, no pip !!\n",
1752 				bp->b_blkno));
1753 		} else {
1754 			MDI_DEBUG(4, (CE_NOTE, NULL,
1755 			    "!lba %p, no pip for path_index, "
1756 			    "pip %p\n", pip));
1757 		}
1758 	}
1759 	return (MDI_FAILURE);
1760 }
1761 
1762 /*
1763  * mdi_select_path():
1764  *		select a path to access a client device.
1765  *
1766  *		mdi_select_path() function is called by the vHCI drivers to
1767  *		select a path to route the I/O request to.  The caller passes
1768  *		the block I/O data transfer structure ("buf") as one of the
1769  *		parameters.  The mpxio framework uses the buf structure
1770  *		contents to maintain per path statistics (total I/O size /
1771  *		count pending).  If more than one online paths are available to
1772  *		select, the framework automatically selects a suitable path
1773  *		for routing I/O request. If a failover operation is active for
1774  *		this client device the call shall be failed with MDI_BUSY error
1775  *		code.
1776  *
1777  *		By default this function returns a suitable path in online
1778  *		state based on the current load balancing policy.  Currently
1779  *		we support LOAD_BALANCE_NONE (Previously selected online path
1780  *		will continue to be used till the path is usable) and
1781  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1782  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1783  *		based on the logical block).  The load balancing
1784  *		through vHCI drivers configuration file (driver.conf).
1785  *
1786  *		vHCI drivers may override this default behavior by specifying
1787  *		appropriate flags.  If start_pip is specified (non NULL) is
1788  *		used as start point to walk and find the next appropriate path.
1789  *		The following values are currently defined:
1790  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1791  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1792  *
1793  *		The non-standard behavior is used by the scsi_vhci driver,
1794  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1795  *		attach of client devices (to avoid an unnecessary failover
1796  *		when the STANDBY path comes up first), during failover
1797  *		(to activate a STANDBY path as ONLINE).
1798  *
1799  *		The selected path in returned in a held state (ref_cnt).
1800  *		Caller should release the hold by calling mdi_rele_path().
1801  *
1802  * Return Values:
1803  *		MDI_SUCCESS	- Completed successfully
1804  *		MDI_BUSY 	- Client device is busy failing over
1805  *		MDI_NOPATH	- Client device is online, but no valid path are
1806  *				  available to access this client device
1807  *		MDI_FAILURE	- Invalid client device or state
1808  *		MDI_DEVI_ONLINING
1809  *				- Client device (struct dev_info state) is in
1810  *				  onlining state.
1811  */
1812 
1813 /*ARGSUSED*/
1814 int
1815 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1816     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1817 {
1818 	mdi_client_t	*ct;
1819 	mdi_pathinfo_t	*pip;
1820 	mdi_pathinfo_t	*next;
1821 	mdi_pathinfo_t	*head;
1822 	mdi_pathinfo_t	*start;
1823 	client_lb_t	lbp;	/* load balancing policy */
1824 	int		sb = 1;	/* standard behavior */
1825 	int		preferred = 1;	/* preferred path */
1826 	int		cond, cont = 1;
1827 	int		retry = 0;
1828 
1829 	if (flags != 0) {
1830 		/*
1831 		 * disable default behavior
1832 		 */
1833 		sb = 0;
1834 	}
1835 
1836 	*ret_pip = NULL;
1837 	ct = i_devi_get_client(cdip);
1838 	if (ct == NULL) {
1839 		/* mdi extensions are NULL, Nothing more to do */
1840 		return (MDI_FAILURE);
1841 	}
1842 
1843 	MDI_CLIENT_LOCK(ct);
1844 
1845 	if (sb) {
1846 		if (MDI_CLIENT_IS_FAILED(ct)) {
1847 			/*
1848 			 * Client is not ready to accept any I/O requests.
1849 			 * Fail this request.
1850 			 */
1851 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1852 			    "client state offline ct = %p\n", ct));
1853 			MDI_CLIENT_UNLOCK(ct);
1854 			return (MDI_FAILURE);
1855 		}
1856 
1857 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1858 			/*
1859 			 * Check for Failover is in progress. If so tell the
1860 			 * caller that this device is busy.
1861 			 */
1862 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1863 			    "client failover in progress ct = %p\n", ct));
1864 			MDI_CLIENT_UNLOCK(ct);
1865 			return (MDI_BUSY);
1866 		}
1867 
1868 		/*
1869 		 * Check to see whether the client device is attached.
1870 		 * If not so, let the vHCI driver manually select a path
1871 		 * (standby) and let the probe/attach process to continue.
1872 		 */
1873 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
1874 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1875 			MDI_CLIENT_UNLOCK(ct);
1876 			return (MDI_DEVI_ONLINING);
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Cache in the client list head.  If head of the list is NULL
1882 	 * return MDI_NOPATH
1883 	 */
1884 	head = ct->ct_path_head;
1885 	if (head == NULL) {
1886 		MDI_CLIENT_UNLOCK(ct);
1887 		return (MDI_NOPATH);
1888 	}
1889 
1890 	/*
1891 	 * for non default behavior, bypass current
1892 	 * load balancing policy and always use LOAD_BALANCE_RR
1893 	 * except that the start point will be adjusted based
1894 	 * on the provided start_pip
1895 	 */
1896 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1897 
1898 	switch (lbp) {
1899 	case LOAD_BALANCE_NONE:
1900 		/*
1901 		 * Load balancing is None  or Alternate path mode
1902 		 * Start looking for a online mdi_pathinfo node starting from
1903 		 * last known selected path
1904 		 */
1905 		preferred = 1;
1906 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1907 		if (pip == NULL) {
1908 			pip = head;
1909 		}
1910 		start = pip;
1911 		do {
1912 			MDI_PI_LOCK(pip);
1913 			/*
1914 			 * No need to explicitly check if the path is disabled.
1915 			 * Since we are checking for state == ONLINE and the
1916 			 * same veriable is used for DISABLE/ENABLE information.
1917 			 */
1918 			if ((MDI_PI(pip)->pi_state  ==
1919 				MDI_PATHINFO_STATE_ONLINE) &&
1920 				preferred == MDI_PI(pip)->pi_preferred) {
1921 				/*
1922 				 * Return the path in hold state. Caller should
1923 				 * release the lock by calling mdi_rele_path()
1924 				 */
1925 				MDI_PI_HOLD(pip);
1926 				MDI_PI_UNLOCK(pip);
1927 				ct->ct_path_last = pip;
1928 				*ret_pip = pip;
1929 				MDI_CLIENT_UNLOCK(ct);
1930 				return (MDI_SUCCESS);
1931 			}
1932 
1933 			/*
1934 			 * Path is busy.
1935 			 */
1936 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1937 			    MDI_PI_IS_TRANSIENT(pip))
1938 				retry = 1;
1939 			/*
1940 			 * Keep looking for a next available online path
1941 			 */
1942 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1943 			if (next == NULL) {
1944 				next = head;
1945 			}
1946 			MDI_PI_UNLOCK(pip);
1947 			pip = next;
1948 			if (start == pip && preferred) {
1949 				preferred = 0;
1950 			} else if (start == pip && !preferred) {
1951 				cont = 0;
1952 			}
1953 		} while (cont);
1954 		break;
1955 
1956 	case LOAD_BALANCE_LBA:
1957 		/*
1958 		 * Make sure we are looking
1959 		 * for an online path. Otherwise, if it is for a STANDBY
1960 		 * path request, it will go through and fetch an ONLINE
1961 		 * path which is not desirable.
1962 		 */
1963 		if ((ct->ct_lb_args != NULL) &&
1964 			    (ct->ct_lb_args->region_size) && bp &&
1965 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1966 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1967 				    == MDI_SUCCESS) {
1968 				MDI_CLIENT_UNLOCK(ct);
1969 				return (MDI_SUCCESS);
1970 			}
1971 		}
1972 		/*  FALLTHROUGH */
1973 	case LOAD_BALANCE_RR:
1974 		/*
1975 		 * Load balancing is Round Robin. Start looking for a online
1976 		 * mdi_pathinfo node starting from last known selected path
1977 		 * as the start point.  If override flags are specified,
1978 		 * process accordingly.
1979 		 * If the search is already in effect(start_pip not null),
1980 		 * then lets just use the same path preference to continue the
1981 		 * traversal.
1982 		 */
1983 
1984 		if (start_pip != NULL) {
1985 			preferred = MDI_PI(start_pip)->pi_preferred;
1986 		} else {
1987 			preferred = 1;
1988 		}
1989 
1990 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1991 		if (start == NULL) {
1992 			pip = head;
1993 		} else {
1994 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1995 			if (pip == NULL) {
1996 				if (!sb) {
1997 					if (preferred == 0) {
1998 						/*
1999 						 * Looks like we have completed
2000 						 * the traversal as preferred
2001 						 * value is 0. Time to bail out.
2002 						 */
2003 						*ret_pip = NULL;
2004 						MDI_CLIENT_UNLOCK(ct);
2005 						return (MDI_NOPATH);
2006 					} else {
2007 						/*
2008 						 * Looks like we reached the
2009 						 * end of the list. Lets enable
2010 						 * traversal of non preferred
2011 						 * paths.
2012 						 */
2013 						preferred = 0;
2014 					}
2015 				}
2016 				pip = head;
2017 			}
2018 		}
2019 		start = pip;
2020 		do {
2021 			MDI_PI_LOCK(pip);
2022 			if (sb) {
2023 				cond = ((MDI_PI(pip)->pi_state ==
2024 				    MDI_PATHINFO_STATE_ONLINE &&
2025 					MDI_PI(pip)->pi_preferred ==
2026 						preferred) ? 1 : 0);
2027 			} else {
2028 				if (flags == MDI_SELECT_ONLINE_PATH) {
2029 					cond = ((MDI_PI(pip)->pi_state ==
2030 					    MDI_PATHINFO_STATE_ONLINE &&
2031 						MDI_PI(pip)->pi_preferred ==
2032 						preferred) ? 1 : 0);
2033 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2034 					cond = ((MDI_PI(pip)->pi_state ==
2035 					    MDI_PATHINFO_STATE_STANDBY &&
2036 						MDI_PI(pip)->pi_preferred ==
2037 						preferred) ? 1 : 0);
2038 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2039 				    MDI_SELECT_STANDBY_PATH)) {
2040 					cond = (((MDI_PI(pip)->pi_state ==
2041 					    MDI_PATHINFO_STATE_ONLINE ||
2042 					    (MDI_PI(pip)->pi_state ==
2043 					    MDI_PATHINFO_STATE_STANDBY)) &&
2044 						MDI_PI(pip)->pi_preferred ==
2045 						preferred) ? 1 : 0);
2046 				} else if (flags ==
2047 					(MDI_SELECT_STANDBY_PATH |
2048 					MDI_SELECT_ONLINE_PATH |
2049 					MDI_SELECT_USER_DISABLE_PATH)) {
2050 					cond = (((MDI_PI(pip)->pi_state ==
2051 					    MDI_PATHINFO_STATE_ONLINE ||
2052 					    (MDI_PI(pip)->pi_state ==
2053 					    MDI_PATHINFO_STATE_STANDBY) ||
2054 						(MDI_PI(pip)->pi_state ==
2055 					    (MDI_PATHINFO_STATE_ONLINE|
2056 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2057 						(MDI_PI(pip)->pi_state ==
2058 					    (MDI_PATHINFO_STATE_STANDBY |
2059 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2060 						MDI_PI(pip)->pi_preferred ==
2061 						preferred) ? 1 : 0);
2062 				} else {
2063 					cond = 0;
2064 				}
2065 			}
2066 			/*
2067 			 * No need to explicitly check if the path is disabled.
2068 			 * Since we are checking for state == ONLINE and the
2069 			 * same veriable is used for DISABLE/ENABLE information.
2070 			 */
2071 			if (cond) {
2072 				/*
2073 				 * Return the path in hold state. Caller should
2074 				 * release the lock by calling mdi_rele_path()
2075 				 */
2076 				MDI_PI_HOLD(pip);
2077 				MDI_PI_UNLOCK(pip);
2078 				if (sb)
2079 					ct->ct_path_last = pip;
2080 				*ret_pip = pip;
2081 				MDI_CLIENT_UNLOCK(ct);
2082 				return (MDI_SUCCESS);
2083 			}
2084 			/*
2085 			 * Path is busy.
2086 			 */
2087 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2088 			    MDI_PI_IS_TRANSIENT(pip))
2089 				retry = 1;
2090 
2091 			/*
2092 			 * Keep looking for a next available online path
2093 			 */
2094 do_again:
2095 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2096 			if (next == NULL) {
2097 				if (!sb) {
2098 					if (preferred == 1) {
2099 						/*
2100 						 * Looks like we reached the
2101 						 * end of the list. Lets enable
2102 						 * traversal of non preferred
2103 						 * paths.
2104 						 */
2105 						preferred = 0;
2106 						next = head;
2107 					} else {
2108 						/*
2109 						 * We have done both the passes
2110 						 * Preferred as well as for
2111 						 * Non-preferred. Bail out now.
2112 						 */
2113 						cont = 0;
2114 					}
2115 				} else {
2116 					/*
2117 					 * Standard behavior case.
2118 					 */
2119 					next = head;
2120 				}
2121 			}
2122 			MDI_PI_UNLOCK(pip);
2123 			if (cont == 0) {
2124 				break;
2125 			}
2126 			pip = next;
2127 
2128 			if (!sb) {
2129 				/*
2130 				 * We need to handle the selection of
2131 				 * non-preferred path in the following
2132 				 * case:
2133 				 *
2134 				 * +------+   +------+   +------+   +-----+
2135 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2136 				 * +------+   +------+   +------+   +-----+
2137 				 *
2138 				 * If we start the search with B, we need to
2139 				 * skip beyond B to pick C which is non -
2140 				 * preferred in the second pass. The following
2141 				 * test, if true, will allow us to skip over
2142 				 * the 'start'(B in the example) to select
2143 				 * other non preferred elements.
2144 				 */
2145 				if ((start_pip != NULL) && (start_pip == pip) &&
2146 				    (MDI_PI(start_pip)->pi_preferred
2147 				    != preferred)) {
2148 					/*
2149 					 * try again after going past the start
2150 					 * pip
2151 					 */
2152 					MDI_PI_LOCK(pip);
2153 					goto do_again;
2154 				}
2155 			} else {
2156 				/*
2157 				 * Standard behavior case
2158 				 */
2159 				if (start == pip && preferred) {
2160 					/* look for nonpreferred paths */
2161 					preferred = 0;
2162 				} else if (start == pip && !preferred) {
2163 					/*
2164 					 * Exit condition
2165 					 */
2166 					cont = 0;
2167 				}
2168 			}
2169 		} while (cont);
2170 		break;
2171 	}
2172 
2173 	MDI_CLIENT_UNLOCK(ct);
2174 	if (retry == 1) {
2175 		return (MDI_BUSY);
2176 	} else {
2177 		return (MDI_NOPATH);
2178 	}
2179 }
2180 
2181 /*
2182  * For a client, return the next available path to any phci
2183  *
2184  * Note:
2185  *		Caller should hold the branch's devinfo node to get a consistent
2186  *		snap shot of the mdi_pathinfo nodes.
2187  *
2188  *		Please note that even the list is stable the mdi_pathinfo
2189  *		node state and properties are volatile.  The caller should lock
2190  *		and unlock the nodes by calling mdi_pi_lock() and
2191  *		mdi_pi_unlock() functions to get a stable properties.
2192  *
2193  *		If there is a need to use the nodes beyond the hold of the
2194  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2195  *		need to be held against unexpected removal by calling
2196  *		mdi_hold_path() and should be released by calling
2197  *		mdi_rele_path() on completion.
2198  */
2199 mdi_pathinfo_t *
2200 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2201 {
2202 	mdi_client_t *ct;
2203 
2204 	if (!MDI_CLIENT(ct_dip))
2205 		return (NULL);
2206 
2207 	/*
2208 	 * Walk through client link
2209 	 */
2210 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2211 	ASSERT(ct != NULL);
2212 
2213 	if (pip == NULL)
2214 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2215 
2216 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2217 }
2218 
2219 /*
2220  * For a phci, return the next available path to any client
2221  * Note: ditto mdi_get_next_phci_path()
2222  */
2223 mdi_pathinfo_t *
2224 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2225 {
2226 	mdi_phci_t *ph;
2227 
2228 	if (!MDI_PHCI(ph_dip))
2229 		return (NULL);
2230 
2231 	/*
2232 	 * Walk through pHCI link
2233 	 */
2234 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2235 	ASSERT(ph != NULL);
2236 
2237 	if (pip == NULL)
2238 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2239 
2240 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2241 }
2242 
2243 /*
2244  * mdi_get_nextpath():
2245  *		mdi_pathinfo node walker function.  Get the next node from the
2246  *		client or pHCI device list.
2247  *
2248  * XXX This is wrapper function for compatibility purposes only.
2249  *
2250  *	It doesn't work under Multi-level MPxIO, where a dip
2251  *	is both client and phci (which link should next_path follow?).
2252  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2253  *	this interface should be removed.
2254  */
2255 void
2256 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2257     mdi_pathinfo_t **ret_pip)
2258 {
2259 	if (MDI_CLIENT(dip)) {
2260 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2261 	} else if (MDI_PHCI(dip)) {
2262 		*ret_pip = mdi_get_next_client_path(dip, pip);
2263 	} else {
2264 		*ret_pip = NULL;
2265 	}
2266 }
2267 
2268 /*
2269  * mdi_hold_path():
2270  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2271  * Return Values:
2272  *		None
2273  */
2274 void
2275 mdi_hold_path(mdi_pathinfo_t *pip)
2276 {
2277 	if (pip) {
2278 		MDI_PI_LOCK(pip);
2279 		MDI_PI_HOLD(pip);
2280 		MDI_PI_UNLOCK(pip);
2281 	}
2282 }
2283 
2284 
2285 /*
2286  * mdi_rele_path():
2287  *		Release the mdi_pathinfo node which was selected
2288  *		through mdi_select_path() mechanism or manually held by
2289  *		calling mdi_hold_path().
2290  * Return Values:
2291  *		None
2292  */
2293 void
2294 mdi_rele_path(mdi_pathinfo_t *pip)
2295 {
2296 	if (pip) {
2297 		MDI_PI_LOCK(pip);
2298 		MDI_PI_RELE(pip);
2299 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2300 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2301 		}
2302 		MDI_PI_UNLOCK(pip);
2303 	}
2304 }
2305 
2306 
2307 /*
2308  * mdi_pi_lock():
2309  * 		Lock the mdi_pathinfo node.
2310  * Note:
2311  *		The caller should release the lock by calling mdi_pi_unlock()
2312  */
2313 void
2314 mdi_pi_lock(mdi_pathinfo_t *pip)
2315 {
2316 	ASSERT(pip != NULL);
2317 	if (pip) {
2318 		MDI_PI_LOCK(pip);
2319 	}
2320 }
2321 
2322 
2323 /*
2324  * mdi_pi_unlock():
2325  * 		Unlock the mdi_pathinfo node.
2326  * Note:
2327  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2328  */
2329 void
2330 mdi_pi_unlock(mdi_pathinfo_t *pip)
2331 {
2332 	ASSERT(pip != NULL);
2333 	if (pip) {
2334 		MDI_PI_UNLOCK(pip);
2335 	}
2336 }
2337 
2338 /*
2339  * mdi_pi_find():
2340  *		Search the list of mdi_pathinfo nodes attached to the
2341  *		pHCI/Client device node whose path address matches "paddr".
2342  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2343  *		found.
2344  * Return Values:
2345  *		mdi_pathinfo node handle
2346  *		NULL
2347  * Notes:
2348  *		Caller need not hold any locks to call this function.
2349  */
2350 mdi_pathinfo_t *
2351 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2352 {
2353 	mdi_phci_t		*ph;
2354 	mdi_vhci_t		*vh;
2355 	mdi_client_t		*ct;
2356 	mdi_pathinfo_t		*pip = NULL;
2357 
2358 	if ((pdip == NULL) || (paddr == NULL)) {
2359 		return (NULL);
2360 	}
2361 	ph = i_devi_get_phci(pdip);
2362 	if (ph == NULL) {
2363 		/*
2364 		 * Invalid pHCI device, Nothing more to do.
2365 		 */
2366 		MDI_DEBUG(2, (CE_WARN, NULL,
2367 		    "!mdi_pi_find: invalid phci"));
2368 		return (NULL);
2369 	}
2370 
2371 	vh = ph->ph_vhci;
2372 	if (vh == NULL) {
2373 		/*
2374 		 * Invalid vHCI device, Nothing more to do.
2375 		 */
2376 		MDI_DEBUG(2, (CE_WARN, NULL,
2377 		    "!mdi_pi_find: invalid phci"));
2378 		return (NULL);
2379 	}
2380 
2381 	/*
2382 	 * Look for client device identified by caddr (guid)
2383 	 */
2384 	if (caddr == NULL) {
2385 		/*
2386 		 * Find a mdi_pathinfo node under pHCI list for a matching
2387 		 * unit address.
2388 		 */
2389 		mutex_enter(&ph->ph_mutex);
2390 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2391 
2392 		while (pip != NULL) {
2393 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2394 				break;
2395 			}
2396 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2397 		}
2398 		mutex_exit(&ph->ph_mutex);
2399 		return (pip);
2400 	}
2401 
2402 	/*
2403 	 * XXX - Is the rest of the code in this function really necessary?
2404 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2405 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2406 	 * whether the search is based on the pathinfo nodes attached to
2407 	 * the pHCI or the client node, the result will be the same.
2408 	 */
2409 
2410 	/*
2411 	 * Find the client device corresponding to 'caddr'
2412 	 */
2413 	mutex_enter(&mdi_mutex);
2414 
2415 	/*
2416 	 * XXX - Passing NULL to the following function works as long as the
2417 	 * the client addresses (caddr) are unique per vhci basis.
2418 	 */
2419 	ct = i_mdi_client_find(vh, NULL, caddr);
2420 	if (ct == NULL) {
2421 		/*
2422 		 * Client not found, Obviously mdi_pathinfo node has not been
2423 		 * created yet.
2424 		 */
2425 		mutex_exit(&mdi_mutex);
2426 		return (pip);
2427 	}
2428 
2429 	/*
2430 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2431 	 * pHCI and paddr
2432 	 */
2433 	MDI_CLIENT_LOCK(ct);
2434 
2435 	/*
2436 	 * Release the global mutex as it is no more needed. Note: We always
2437 	 * respect the locking order while acquiring.
2438 	 */
2439 	mutex_exit(&mdi_mutex);
2440 
2441 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2442 	while (pip != NULL) {
2443 		/*
2444 		 * Compare the unit address
2445 		 */
2446 		if ((MDI_PI(pip)->pi_phci == ph) &&
2447 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2448 			break;
2449 		}
2450 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2451 	}
2452 	MDI_CLIENT_UNLOCK(ct);
2453 	return (pip);
2454 }
2455 
2456 /*
2457  * mdi_pi_alloc():
2458  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2459  *		The mdi_pathinfo node returned by this function identifies a
2460  *		unique device path is capable of having properties attached
2461  *		and passed to mdi_pi_online() to fully attach and online the
2462  *		path and client device node.
2463  *		The mdi_pathinfo node returned by this function must be
2464  *		destroyed using mdi_pi_free() if the path is no longer
2465  *		operational or if the caller fails to attach a client device
2466  *		node when calling mdi_pi_online(). The framework will not free
2467  *		the resources allocated.
2468  *		This function can be called from both interrupt and kernel
2469  *		contexts.  DDI_NOSLEEP flag should be used while calling
2470  *		from interrupt contexts.
2471  * Return Values:
2472  *		MDI_SUCCESS
2473  *		MDI_FAILURE
2474  *		MDI_NOMEM
2475  */
2476 /*ARGSUSED*/
2477 int
2478 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2479     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2480 {
2481 	mdi_vhci_t	*vh;
2482 	mdi_phci_t	*ph;
2483 	mdi_client_t	*ct;
2484 	mdi_pathinfo_t	*pip = NULL;
2485 	dev_info_t	*cdip;
2486 	int		rv = MDI_NOMEM;
2487 	int		path_allocated = 0;
2488 
2489 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2490 	    ret_pip == NULL) {
2491 		/* Nothing more to do */
2492 		return (MDI_FAILURE);
2493 	}
2494 
2495 	*ret_pip = NULL;
2496 	ph = i_devi_get_phci(pdip);
2497 	ASSERT(ph != NULL);
2498 	if (ph == NULL) {
2499 		/* Invalid pHCI device, return failure */
2500 		MDI_DEBUG(1, (CE_WARN, NULL,
2501 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2502 		return (MDI_FAILURE);
2503 	}
2504 
2505 	MDI_PHCI_LOCK(ph);
2506 	vh = ph->ph_vhci;
2507 	if (vh == NULL) {
2508 		/* Invalid vHCI device, return failure */
2509 		MDI_DEBUG(1, (CE_WARN, NULL,
2510 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2511 		MDI_PHCI_UNLOCK(ph);
2512 		return (MDI_FAILURE);
2513 	}
2514 
2515 	if (MDI_PHCI_IS_READY(ph) == 0) {
2516 		/*
2517 		 * Do not allow new node creation when pHCI is in
2518 		 * offline/suspended states
2519 		 */
2520 		MDI_DEBUG(1, (CE_WARN, NULL,
2521 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2522 		MDI_PHCI_UNLOCK(ph);
2523 		return (MDI_BUSY);
2524 	}
2525 	MDI_PHCI_UNSTABLE(ph);
2526 	MDI_PHCI_UNLOCK(ph);
2527 
2528 	/* look for a matching client, create one if not found */
2529 	mutex_enter(&mdi_mutex);
2530 	ct = i_mdi_client_find(vh, cname, caddr);
2531 	if (ct == NULL) {
2532 		ct = i_mdi_client_alloc(vh, cname, caddr);
2533 		ASSERT(ct != NULL);
2534 	}
2535 
2536 	if (ct->ct_dip == NULL) {
2537 		/*
2538 		 * Allocate a devinfo node
2539 		 */
2540 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2541 		    compatible, ncompatible);
2542 		if (ct->ct_dip == NULL) {
2543 			(void) i_mdi_client_free(vh, ct);
2544 			goto fail;
2545 		}
2546 	}
2547 	cdip = ct->ct_dip;
2548 
2549 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2550 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2551 
2552 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2553 	while (pip != NULL) {
2554 		/*
2555 		 * Compare the unit address
2556 		 */
2557 		if ((MDI_PI(pip)->pi_phci == ph) &&
2558 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2559 			break;
2560 		}
2561 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2562 	}
2563 
2564 	if (pip == NULL) {
2565 		/*
2566 		 * This is a new path for this client device.  Allocate and
2567 		 * initialize a new pathinfo node
2568 		 */
2569 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2570 		ASSERT(pip != NULL);
2571 		path_allocated = 1;
2572 	}
2573 	rv = MDI_SUCCESS;
2574 
2575 fail:
2576 	/*
2577 	 * Release the global mutex.
2578 	 */
2579 	mutex_exit(&mdi_mutex);
2580 
2581 	/*
2582 	 * Mark the pHCI as stable
2583 	 */
2584 	MDI_PHCI_LOCK(ph);
2585 	MDI_PHCI_STABLE(ph);
2586 	MDI_PHCI_UNLOCK(ph);
2587 	*ret_pip = pip;
2588 
2589 	if (path_allocated)
2590 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2591 
2592 	return (rv);
2593 }
2594 
2595 /*ARGSUSED*/
2596 int
2597 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2598     int flags, mdi_pathinfo_t **ret_pip)
2599 {
2600 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2601 	    flags, ret_pip));
2602 }
2603 
2604 /*
2605  * i_mdi_pi_alloc():
2606  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2607  * Return Values:
2608  *		mdi_pathinfo
2609  */
2610 
2611 /*ARGSUSED*/
2612 static mdi_pathinfo_t *
2613 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2614 {
2615 	mdi_pathinfo_t	*pip;
2616 	int		ct_circular;
2617 	int		ph_circular;
2618 	int		se_flag;
2619 	int		kmem_flag;
2620 
2621 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2622 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2623 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2624 	    MDI_PATHINFO_STATE_TRANSIENT;
2625 
2626 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2627 		MDI_PI_SET_USER_DISABLE(pip);
2628 
2629 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2630 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2631 
2632 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2633 		MDI_PI_SET_DRV_DISABLE(pip);
2634 
2635 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2636 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2637 	MDI_PI(pip)->pi_client = ct;
2638 	MDI_PI(pip)->pi_phci = ph;
2639 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2640 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2641 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2642 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2643 	MDI_PI(pip)->pi_pprivate = NULL;
2644 	MDI_PI(pip)->pi_cprivate = NULL;
2645 	MDI_PI(pip)->pi_vprivate = NULL;
2646 	MDI_PI(pip)->pi_client_link = NULL;
2647 	MDI_PI(pip)->pi_phci_link = NULL;
2648 	MDI_PI(pip)->pi_ref_cnt = 0;
2649 	MDI_PI(pip)->pi_kstats = NULL;
2650 	MDI_PI(pip)->pi_preferred = 1;
2651 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2652 
2653 	/*
2654 	 * Lock both dev_info nodes against changes in parallel.
2655 	 */
2656 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2657 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2658 
2659 	i_mdi_phci_add_path(ph, pip);
2660 	i_mdi_client_add_path(ct, pip);
2661 
2662 	ndi_devi_exit(ph->ph_dip, ph_circular);
2663 	ndi_devi_exit(ct->ct_dip, ct_circular);
2664 
2665 	/* determine interrupt context */
2666 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2667 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2668 
2669 	i_ddi_di_cache_invalidate(kmem_flag);
2670 
2671 	return (pip);
2672 }
2673 
2674 /*
2675  * i_mdi_phci_add_path():
2676  * 		Add a mdi_pathinfo node to pHCI list.
2677  * Notes:
2678  *		Caller should per-pHCI mutex
2679  */
2680 
2681 static void
2682 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2683 {
2684 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2685 
2686 	if (ph->ph_path_head == NULL) {
2687 		ph->ph_path_head = pip;
2688 	} else {
2689 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2690 	}
2691 	ph->ph_path_tail = pip;
2692 	ph->ph_path_count++;
2693 }
2694 
2695 /*
2696  * i_mdi_client_add_path():
2697  *		Add mdi_pathinfo node to client list
2698  */
2699 
2700 static void
2701 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2702 {
2703 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2704 
2705 	if (ct->ct_path_head == NULL) {
2706 		ct->ct_path_head = pip;
2707 	} else {
2708 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2709 	}
2710 	ct->ct_path_tail = pip;
2711 	ct->ct_path_count++;
2712 }
2713 
2714 /*
2715  * mdi_pi_free():
2716  *		Free the mdi_pathinfo node and also client device node if this
2717  *		is the last path to the device
2718  * Return Values:
2719  *		MDI_SUCCESS
2720  *		MDI_FAILURE
2721  *		MDI_BUSY
2722  */
2723 
2724 /*ARGSUSED*/
2725 int
2726 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2727 {
2728 	int		rv = MDI_SUCCESS;
2729 	mdi_vhci_t	*vh;
2730 	mdi_phci_t	*ph;
2731 	mdi_client_t	*ct;
2732 	int		(*f)();
2733 	int		client_held = 0;
2734 
2735 	MDI_PI_LOCK(pip);
2736 	ph = MDI_PI(pip)->pi_phci;
2737 	ASSERT(ph != NULL);
2738 	if (ph == NULL) {
2739 		/*
2740 		 * Invalid pHCI device, return failure
2741 		 */
2742 		MDI_DEBUG(1, (CE_WARN, NULL,
2743 		    "!mdi_pi_free: invalid pHCI"));
2744 		MDI_PI_UNLOCK(pip);
2745 		return (MDI_FAILURE);
2746 	}
2747 
2748 	vh = ph->ph_vhci;
2749 	ASSERT(vh != NULL);
2750 	if (vh == NULL) {
2751 		/* Invalid pHCI device, return failure */
2752 		MDI_DEBUG(1, (CE_WARN, NULL,
2753 		    "!mdi_pi_free: invalid vHCI"));
2754 		MDI_PI_UNLOCK(pip);
2755 		return (MDI_FAILURE);
2756 	}
2757 
2758 	ct = MDI_PI(pip)->pi_client;
2759 	ASSERT(ct != NULL);
2760 	if (ct == NULL) {
2761 		/*
2762 		 * Invalid Client device, return failure
2763 		 */
2764 		MDI_DEBUG(1, (CE_WARN, NULL,
2765 		    "!mdi_pi_free: invalid client"));
2766 		MDI_PI_UNLOCK(pip);
2767 		return (MDI_FAILURE);
2768 	}
2769 
2770 	/*
2771 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2772 	 * if the node state is either offline or init and the reference count
2773 	 * is zero.
2774 	 */
2775 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2776 	    MDI_PI_IS_INITING(pip))) {
2777 		/*
2778 		 * Node is busy
2779 		 */
2780 		MDI_DEBUG(1, (CE_WARN, NULL,
2781 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2782 		MDI_PI_UNLOCK(pip);
2783 		return (MDI_BUSY);
2784 	}
2785 
2786 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2787 		/*
2788 		 * Give a chance for pending I/Os to complete.
2789 		 */
2790 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2791 		    "%d cmds still pending on path: %p\n",
2792 		    MDI_PI(pip)->pi_ref_cnt, pip));
2793 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2794 		    &MDI_PI(pip)->pi_mutex,
2795 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2796 			/*
2797 			 * The timeout time reached without ref_cnt being zero
2798 			 * being signaled.
2799 			 */
2800 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2801 			    "!mdi_pi_free: "
2802 			    "Timeout reached on path %p without the cond\n",
2803 			    pip));
2804 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2805 			    "!mdi_pi_free: "
2806 			    "%d cmds still pending on path: %p\n",
2807 			    MDI_PI(pip)->pi_ref_cnt, pip));
2808 			MDI_PI_UNLOCK(pip);
2809 			return (MDI_BUSY);
2810 		}
2811 	}
2812 	if (MDI_PI(pip)->pi_pm_held) {
2813 		client_held = 1;
2814 	}
2815 	MDI_PI_UNLOCK(pip);
2816 
2817 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2818 
2819 	MDI_CLIENT_LOCK(ct);
2820 
2821 	/* Prevent further failovers till mdi_mutex is held */
2822 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2823 
2824 	/*
2825 	 * Wait till failover is complete before removing this node.
2826 	 */
2827 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2828 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2829 
2830 	MDI_CLIENT_UNLOCK(ct);
2831 	mutex_enter(&mdi_mutex);
2832 	MDI_CLIENT_LOCK(ct);
2833 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2834 
2835 	if (!MDI_PI_IS_INITING(pip)) {
2836 		f = vh->vh_ops->vo_pi_uninit;
2837 		if (f != NULL) {
2838 			rv = (*f)(vh->vh_dip, pip, 0);
2839 		}
2840 	}
2841 	/*
2842 	 * If vo_pi_uninit() completed successfully.
2843 	 */
2844 	if (rv == MDI_SUCCESS) {
2845 		if (client_held) {
2846 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2847 			    "i_mdi_pm_rele_client\n"));
2848 			i_mdi_pm_rele_client(ct, 1);
2849 		}
2850 		i_mdi_pi_free(ph, pip, ct);
2851 		if (ct->ct_path_count == 0) {
2852 			/*
2853 			 * Client lost its last path.
2854 			 * Clean up the client device
2855 			 */
2856 			MDI_CLIENT_UNLOCK(ct);
2857 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2858 			mutex_exit(&mdi_mutex);
2859 			return (rv);
2860 		}
2861 	}
2862 	MDI_CLIENT_UNLOCK(ct);
2863 	mutex_exit(&mdi_mutex);
2864 
2865 	if (rv == MDI_FAILURE)
2866 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2867 
2868 	return (rv);
2869 }
2870 
2871 /*
2872  * i_mdi_pi_free():
2873  *		Free the mdi_pathinfo node
2874  */
2875 static void
2876 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2877 {
2878 	int	ct_circular;
2879 	int	ph_circular;
2880 	int	se_flag;
2881 	int	kmem_flag;
2882 
2883 	/*
2884 	 * remove any per-path kstats
2885 	 */
2886 	i_mdi_pi_kstat_destroy(pip);
2887 
2888 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2889 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2890 
2891 	i_mdi_client_remove_path(ct, pip);
2892 	i_mdi_phci_remove_path(ph, pip);
2893 
2894 	ndi_devi_exit(ph->ph_dip, ph_circular);
2895 	ndi_devi_exit(ct->ct_dip, ct_circular);
2896 
2897 	/* determine interrupt context */
2898 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2899 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2900 
2901 	i_ddi_di_cache_invalidate(kmem_flag);
2902 
2903 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2904 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2905 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2906 	if (MDI_PI(pip)->pi_addr) {
2907 		kmem_free(MDI_PI(pip)->pi_addr,
2908 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2909 		MDI_PI(pip)->pi_addr = NULL;
2910 	}
2911 
2912 	if (MDI_PI(pip)->pi_prop) {
2913 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2914 		MDI_PI(pip)->pi_prop = NULL;
2915 	}
2916 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2917 }
2918 
2919 
2920 /*
2921  * i_mdi_phci_remove_path():
2922  * 		Remove a mdi_pathinfo node from pHCI list.
2923  * Notes:
2924  *		Caller should hold per-pHCI mutex
2925  */
2926 
2927 static void
2928 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2929 {
2930 	mdi_pathinfo_t	*prev = NULL;
2931 	mdi_pathinfo_t	*path = NULL;
2932 
2933 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2934 
2935 	path = ph->ph_path_head;
2936 	while (path != NULL) {
2937 		if (path == pip) {
2938 			break;
2939 		}
2940 		prev = path;
2941 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2942 	}
2943 
2944 	if (path) {
2945 		ph->ph_path_count--;
2946 		if (prev) {
2947 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2948 		} else {
2949 			ph->ph_path_head =
2950 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2951 		}
2952 		if (ph->ph_path_tail == path) {
2953 			ph->ph_path_tail = prev;
2954 		}
2955 	}
2956 
2957 	/*
2958 	 * Clear the pHCI link
2959 	 */
2960 	MDI_PI(pip)->pi_phci_link = NULL;
2961 	MDI_PI(pip)->pi_phci = NULL;
2962 }
2963 
2964 /*
2965  * i_mdi_client_remove_path():
2966  * 		Remove a mdi_pathinfo node from client path list.
2967  */
2968 
2969 static void
2970 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2971 {
2972 	mdi_pathinfo_t	*prev = NULL;
2973 	mdi_pathinfo_t	*path;
2974 
2975 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2976 
2977 	path = ct->ct_path_head;
2978 	while (path != NULL) {
2979 		if (path == pip) {
2980 			break;
2981 		}
2982 		prev = path;
2983 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2984 	}
2985 
2986 	if (path) {
2987 		ct->ct_path_count--;
2988 		if (prev) {
2989 			MDI_PI(prev)->pi_client_link =
2990 			    MDI_PI(path)->pi_client_link;
2991 		} else {
2992 			ct->ct_path_head =
2993 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2994 		}
2995 		if (ct->ct_path_tail == path) {
2996 			ct->ct_path_tail = prev;
2997 		}
2998 		if (ct->ct_path_last == path) {
2999 			ct->ct_path_last = ct->ct_path_head;
3000 		}
3001 	}
3002 	MDI_PI(pip)->pi_client_link = NULL;
3003 	MDI_PI(pip)->pi_client = NULL;
3004 }
3005 
3006 /*
3007  * i_mdi_pi_state_change():
3008  *		online a mdi_pathinfo node
3009  *
3010  * Return Values:
3011  *		MDI_SUCCESS
3012  *		MDI_FAILURE
3013  */
3014 /*ARGSUSED*/
3015 static int
3016 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3017 {
3018 	int		rv = MDI_SUCCESS;
3019 	mdi_vhci_t	*vh;
3020 	mdi_phci_t	*ph;
3021 	mdi_client_t	*ct;
3022 	int		(*f)();
3023 	dev_info_t	*cdip;
3024 
3025 	MDI_PI_LOCK(pip);
3026 
3027 	ph = MDI_PI(pip)->pi_phci;
3028 	ASSERT(ph);
3029 	if (ph == NULL) {
3030 		/*
3031 		 * Invalid pHCI device, fail the request
3032 		 */
3033 		MDI_PI_UNLOCK(pip);
3034 		MDI_DEBUG(1, (CE_WARN, NULL,
3035 		    "!mdi_pi_state_change: invalid phci"));
3036 		return (MDI_FAILURE);
3037 	}
3038 
3039 	vh = ph->ph_vhci;
3040 	ASSERT(vh);
3041 	if (vh == NULL) {
3042 		/*
3043 		 * Invalid vHCI device, fail the request
3044 		 */
3045 		MDI_PI_UNLOCK(pip);
3046 		MDI_DEBUG(1, (CE_WARN, NULL,
3047 		    "!mdi_pi_state_change: invalid vhci"));
3048 		return (MDI_FAILURE);
3049 	}
3050 
3051 	ct = MDI_PI(pip)->pi_client;
3052 	ASSERT(ct != NULL);
3053 	if (ct == NULL) {
3054 		/*
3055 		 * Invalid client device, fail the request
3056 		 */
3057 		MDI_PI_UNLOCK(pip);
3058 		MDI_DEBUG(1, (CE_WARN, NULL,
3059 		    "!mdi_pi_state_change: invalid client"));
3060 		return (MDI_FAILURE);
3061 	}
3062 
3063 	/*
3064 	 * If this path has not been initialized yet, Callback vHCI driver's
3065 	 * pathinfo node initialize entry point
3066 	 */
3067 
3068 	if (MDI_PI_IS_INITING(pip)) {
3069 		MDI_PI_UNLOCK(pip);
3070 		f = vh->vh_ops->vo_pi_init;
3071 		if (f != NULL) {
3072 			rv = (*f)(vh->vh_dip, pip, 0);
3073 			if (rv != MDI_SUCCESS) {
3074 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3075 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3076 				    vh, pip));
3077 				return (MDI_FAILURE);
3078 			}
3079 		}
3080 		MDI_PI_LOCK(pip);
3081 		MDI_PI_CLEAR_TRANSIENT(pip);
3082 	}
3083 
3084 	/*
3085 	 * Do not allow state transition when pHCI is in offline/suspended
3086 	 * states
3087 	 */
3088 	i_mdi_phci_lock(ph, pip);
3089 	if (MDI_PHCI_IS_READY(ph) == 0) {
3090 		MDI_DEBUG(1, (CE_WARN, NULL,
3091 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3092 		MDI_PI_UNLOCK(pip);
3093 		i_mdi_phci_unlock(ph);
3094 		return (MDI_BUSY);
3095 	}
3096 	MDI_PHCI_UNSTABLE(ph);
3097 	i_mdi_phci_unlock(ph);
3098 
3099 	/*
3100 	 * Check if mdi_pathinfo state is in transient state.
3101 	 * If yes, offlining is in progress and wait till transient state is
3102 	 * cleared.
3103 	 */
3104 	if (MDI_PI_IS_TRANSIENT(pip)) {
3105 		while (MDI_PI_IS_TRANSIENT(pip)) {
3106 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3107 			    &MDI_PI(pip)->pi_mutex);
3108 		}
3109 	}
3110 
3111 	/*
3112 	 * Grab the client lock in reverse order sequence and release the
3113 	 * mdi_pathinfo mutex.
3114 	 */
3115 	i_mdi_client_lock(ct, pip);
3116 	MDI_PI_UNLOCK(pip);
3117 
3118 	/*
3119 	 * Wait till failover state is cleared
3120 	 */
3121 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3122 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3123 
3124 	/*
3125 	 * Mark the mdi_pathinfo node state as transient
3126 	 */
3127 	MDI_PI_LOCK(pip);
3128 	switch (state) {
3129 	case MDI_PATHINFO_STATE_ONLINE:
3130 		MDI_PI_SET_ONLINING(pip);
3131 		break;
3132 
3133 	case MDI_PATHINFO_STATE_STANDBY:
3134 		MDI_PI_SET_STANDBYING(pip);
3135 		break;
3136 
3137 	case MDI_PATHINFO_STATE_FAULT:
3138 		/*
3139 		 * Mark the pathinfo state as FAULTED
3140 		 */
3141 		MDI_PI_SET_FAULTING(pip);
3142 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3143 		break;
3144 
3145 	case MDI_PATHINFO_STATE_OFFLINE:
3146 		/*
3147 		 * ndi_devi_offline() cannot hold pip or ct locks.
3148 		 */
3149 		MDI_PI_UNLOCK(pip);
3150 		/*
3151 		 * Do not offline if path will become last path and path
3152 		 * is busy for user initiated events.
3153 		 */
3154 		cdip = ct->ct_dip;
3155 		if ((flag & NDI_DEVI_REMOVE) &&
3156 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3157 			i_mdi_client_unlock(ct);
3158 			rv = ndi_devi_offline(cdip, 0);
3159 			if (rv != NDI_SUCCESS) {
3160 				/*
3161 				 * Convert to MDI error code
3162 				 */
3163 				switch (rv) {
3164 				case NDI_BUSY:
3165 					rv = MDI_BUSY;
3166 					break;
3167 				default:
3168 					rv = MDI_FAILURE;
3169 					break;
3170 				}
3171 				goto state_change_exit;
3172 			} else {
3173 				i_mdi_client_lock(ct, NULL);
3174 			}
3175 		}
3176 		/*
3177 		 * Mark the mdi_pathinfo node state as transient
3178 		 */
3179 		MDI_PI_LOCK(pip);
3180 		MDI_PI_SET_OFFLINING(pip);
3181 		break;
3182 	}
3183 	MDI_PI_UNLOCK(pip);
3184 	MDI_CLIENT_UNSTABLE(ct);
3185 	i_mdi_client_unlock(ct);
3186 
3187 	f = vh->vh_ops->vo_pi_state_change;
3188 	if (f != NULL) {
3189 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3190 		if (rv == MDI_NOT_SUPPORTED) {
3191 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3192 		}
3193 		if (rv != MDI_SUCCESS) {
3194 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3195 			    "!vo_pi_state_change: failed rv = %x", rv));
3196 		}
3197 	}
3198 	MDI_CLIENT_LOCK(ct);
3199 	MDI_PI_LOCK(pip);
3200 	if (MDI_PI_IS_TRANSIENT(pip)) {
3201 		if (rv == MDI_SUCCESS) {
3202 			MDI_PI_CLEAR_TRANSIENT(pip);
3203 		} else {
3204 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3205 		}
3206 	}
3207 
3208 	/*
3209 	 * Wake anyone waiting for this mdi_pathinfo node
3210 	 */
3211 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3212 	MDI_PI_UNLOCK(pip);
3213 
3214 	/*
3215 	 * Mark the client device as stable
3216 	 */
3217 	MDI_CLIENT_STABLE(ct);
3218 	if (rv == MDI_SUCCESS) {
3219 		if (ct->ct_unstable == 0) {
3220 			cdip = ct->ct_dip;
3221 
3222 			/*
3223 			 * Onlining the mdi_pathinfo node will impact the
3224 			 * client state Update the client and dev_info node
3225 			 * state accordingly
3226 			 */
3227 			rv = NDI_SUCCESS;
3228 			i_mdi_client_update_state(ct);
3229 			switch (MDI_CLIENT_STATE(ct)) {
3230 			case MDI_CLIENT_STATE_OPTIMAL:
3231 			case MDI_CLIENT_STATE_DEGRADED:
3232 				if (cdip && !i_ddi_devi_attached(cdip) &&
3233 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3234 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3235 
3236 					i_mdi_client_unlock(ct);
3237 					/*
3238 					 * Must do ndi_devi_online() through
3239 					 * hotplug thread for deferred
3240 					 * attach mechanism to work
3241 					 */
3242 					rv = ndi_devi_online(cdip, 0);
3243 					i_mdi_client_lock(ct, NULL);
3244 					if ((rv != NDI_SUCCESS) &&
3245 					    (MDI_CLIENT_STATE(ct) ==
3246 					    MDI_CLIENT_STATE_DEGRADED)) {
3247 						/*
3248 						 * ndi_devi_online failed.
3249 						 * Reset client flags to
3250 						 * offline.
3251 						 */
3252 						MDI_DEBUG(1, (CE_WARN, cdip,
3253 						    "!ndi_devi_online: failed "
3254 						    " Error: %x", rv));
3255 						MDI_CLIENT_SET_OFFLINE(ct);
3256 					}
3257 					if (rv != NDI_SUCCESS) {
3258 						/* Reset the path state */
3259 						MDI_PI_LOCK(pip);
3260 						MDI_PI(pip)->pi_state =
3261 						    MDI_PI_OLD_STATE(pip);
3262 						MDI_PI_UNLOCK(pip);
3263 					}
3264 				}
3265 				break;
3266 
3267 			case MDI_CLIENT_STATE_FAILED:
3268 				/*
3269 				 * This is the last path case for
3270 				 * non-user initiated events.
3271 				 */
3272 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3273 				    cdip && (i_ddi_node_state(cdip) >=
3274 				    DS_INITIALIZED)) {
3275 					i_mdi_client_unlock(ct);
3276 					rv = ndi_devi_offline(cdip, 0);
3277 					i_mdi_client_lock(ct, NULL);
3278 
3279 					if (rv != NDI_SUCCESS) {
3280 						/*
3281 						 * ndi_devi_offline failed.
3282 						 * Reset client flags to
3283 						 * online as the path could not
3284 						 * be offlined.
3285 						 */
3286 						MDI_DEBUG(1, (CE_WARN, cdip,
3287 						    "!ndi_devi_offline: failed "
3288 						    " Error: %x", rv));
3289 						MDI_CLIENT_SET_ONLINE(ct);
3290 					}
3291 				}
3292 				break;
3293 			}
3294 			/*
3295 			 * Convert to MDI error code
3296 			 */
3297 			switch (rv) {
3298 			case NDI_SUCCESS:
3299 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3300 				i_mdi_report_path_state(ct, pip);
3301 				rv = MDI_SUCCESS;
3302 				break;
3303 			case NDI_BUSY:
3304 				rv = MDI_BUSY;
3305 				break;
3306 			default:
3307 				rv = MDI_FAILURE;
3308 				break;
3309 			}
3310 		}
3311 	}
3312 	MDI_CLIENT_UNLOCK(ct);
3313 
3314 state_change_exit:
3315 	/*
3316 	 * Mark the pHCI as stable again.
3317 	 */
3318 	MDI_PHCI_LOCK(ph);
3319 	MDI_PHCI_STABLE(ph);
3320 	MDI_PHCI_UNLOCK(ph);
3321 	return (rv);
3322 }
3323 
3324 /*
3325  * mdi_pi_online():
3326  *		Place the path_info node in the online state.  The path is
3327  *		now available to be selected by mdi_select_path() for
3328  *		transporting I/O requests to client devices.
3329  * Return Values:
3330  *		MDI_SUCCESS
3331  *		MDI_FAILURE
3332  */
3333 int
3334 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3335 {
3336 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3337 	dev_info_t *cdip;
3338 	int		client_held = 0;
3339 	int rv;
3340 
3341 	ASSERT(ct != NULL);
3342 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3343 	if (rv != MDI_SUCCESS)
3344 		return (rv);
3345 
3346 	MDI_PI_LOCK(pip);
3347 	if (MDI_PI(pip)->pi_pm_held == 0) {
3348 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3349 		    "i_mdi_pm_hold_pip\n"));
3350 		i_mdi_pm_hold_pip(pip);
3351 		client_held = 1;
3352 	}
3353 	MDI_PI_UNLOCK(pip);
3354 
3355 	if (client_held) {
3356 		MDI_CLIENT_LOCK(ct);
3357 		if (ct->ct_power_cnt == 0) {
3358 			rv = i_mdi_power_all_phci(ct);
3359 		}
3360 
3361 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3362 		    "i_mdi_pm_hold_client\n"));
3363 		i_mdi_pm_hold_client(ct, 1);
3364 		MDI_CLIENT_UNLOCK(ct);
3365 	}
3366 
3367 	/*
3368 	 * Create the per-path (pathinfo) IO and error kstats which
3369 	 * are reported via iostat(1m).
3370 	 *
3371 	 * Defer creating the per-path kstats if device is not yet
3372 	 * attached;  the names of the kstats are constructed in part
3373 	 * using the devices instance number which is assigned during
3374 	 * process of attaching the client device.
3375 	 *
3376 	 * The framework post_attach handler, mdi_post_attach(), is
3377 	 * is responsible for initializing the client's pathinfo list
3378 	 * once successfully attached.
3379 	 */
3380 	cdip = ct->ct_dip;
3381 	ASSERT(cdip);
3382 	if (cdip == NULL || !i_ddi_devi_attached(cdip))
3383 		return (rv);
3384 
3385 	MDI_CLIENT_LOCK(ct);
3386 	rv = i_mdi_pi_kstat_create(pip);
3387 	MDI_CLIENT_UNLOCK(ct);
3388 	return (rv);
3389 }
3390 
3391 /*
3392  * mdi_pi_standby():
3393  *		Place the mdi_pathinfo node in standby state
3394  *
3395  * Return Values:
3396  *		MDI_SUCCESS
3397  *		MDI_FAILURE
3398  */
3399 int
3400 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3401 {
3402 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3403 }
3404 
3405 /*
3406  * mdi_pi_fault():
3407  *		Place the mdi_pathinfo node in fault'ed state
3408  * Return Values:
3409  *		MDI_SUCCESS
3410  *		MDI_FAILURE
3411  */
3412 int
3413 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3414 {
3415 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3416 }
3417 
3418 /*
3419  * mdi_pi_offline():
3420  *		Offline a mdi_pathinfo node.
3421  * Return Values:
3422  *		MDI_SUCCESS
3423  *		MDI_FAILURE
3424  */
3425 int
3426 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3427 {
3428 	int	ret, client_held = 0;
3429 	mdi_client_t	*ct;
3430 
3431 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3432 
3433 	if (ret == MDI_SUCCESS) {
3434 		MDI_PI_LOCK(pip);
3435 		if (MDI_PI(pip)->pi_pm_held) {
3436 			client_held = 1;
3437 		}
3438 		MDI_PI_UNLOCK(pip);
3439 
3440 		if (client_held) {
3441 			ct = MDI_PI(pip)->pi_client;
3442 			MDI_CLIENT_LOCK(ct);
3443 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3444 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3445 			i_mdi_pm_rele_client(ct, 1);
3446 			MDI_CLIENT_UNLOCK(ct);
3447 		}
3448 	}
3449 
3450 	return (ret);
3451 }
3452 
3453 /*
3454  * i_mdi_pi_offline():
3455  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3456  */
3457 static int
3458 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3459 {
3460 	dev_info_t	*vdip = NULL;
3461 	mdi_vhci_t	*vh = NULL;
3462 	mdi_client_t	*ct = NULL;
3463 	int		(*f)();
3464 	int		rv;
3465 
3466 	MDI_PI_LOCK(pip);
3467 	ct = MDI_PI(pip)->pi_client;
3468 	ASSERT(ct != NULL);
3469 
3470 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3471 		/*
3472 		 * Give a chance for pending I/Os to complete.
3473 		 */
3474 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3475 		    "%d cmds still pending on path: %p\n",
3476 		    MDI_PI(pip)->pi_ref_cnt, pip));
3477 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3478 		    &MDI_PI(pip)->pi_mutex,
3479 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3480 			/*
3481 			 * The timeout time reached without ref_cnt being zero
3482 			 * being signaled.
3483 			 */
3484 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3485 			    "Timeout reached on path %p without the cond\n",
3486 			    pip));
3487 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3488 			    "%d cmds still pending on path: %p\n",
3489 			    MDI_PI(pip)->pi_ref_cnt, pip));
3490 		}
3491 	}
3492 	vh = ct->ct_vhci;
3493 	vdip = vh->vh_dip;
3494 
3495 	/*
3496 	 * Notify vHCI that has registered this event
3497 	 */
3498 	ASSERT(vh->vh_ops);
3499 	f = vh->vh_ops->vo_pi_state_change;
3500 
3501 	if (f != NULL) {
3502 		MDI_PI_UNLOCK(pip);
3503 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3504 		    flags)) != MDI_SUCCESS) {
3505 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3506 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3507 		}
3508 		MDI_PI_LOCK(pip);
3509 	}
3510 
3511 	/*
3512 	 * Set the mdi_pathinfo node state and clear the transient condition
3513 	 */
3514 	MDI_PI_SET_OFFLINE(pip);
3515 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3516 	MDI_PI_UNLOCK(pip);
3517 
3518 	MDI_CLIENT_LOCK(ct);
3519 	if (rv == MDI_SUCCESS) {
3520 		if (ct->ct_unstable == 0) {
3521 			dev_info_t	*cdip = ct->ct_dip;
3522 
3523 			/*
3524 			 * Onlining the mdi_pathinfo node will impact the
3525 			 * client state Update the client and dev_info node
3526 			 * state accordingly
3527 			 */
3528 			i_mdi_client_update_state(ct);
3529 			rv = NDI_SUCCESS;
3530 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3531 				if (cdip &&
3532 				    (i_ddi_node_state(cdip) >=
3533 				    DS_INITIALIZED)) {
3534 					MDI_CLIENT_UNLOCK(ct);
3535 					rv = ndi_devi_offline(cdip, 0);
3536 					MDI_CLIENT_LOCK(ct);
3537 					if (rv != NDI_SUCCESS) {
3538 						/*
3539 						 * ndi_devi_offline failed.
3540 						 * Reset client flags to
3541 						 * online.
3542 						 */
3543 						MDI_DEBUG(4, (CE_WARN, cdip,
3544 						    "!ndi_devi_offline: failed "
3545 						    " Error: %x", rv));
3546 						MDI_CLIENT_SET_ONLINE(ct);
3547 					}
3548 				}
3549 			}
3550 			/*
3551 			 * Convert to MDI error code
3552 			 */
3553 			switch (rv) {
3554 			case NDI_SUCCESS:
3555 				rv = MDI_SUCCESS;
3556 				break;
3557 			case NDI_BUSY:
3558 				rv = MDI_BUSY;
3559 				break;
3560 			default:
3561 				rv = MDI_FAILURE;
3562 				break;
3563 			}
3564 		}
3565 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3566 		i_mdi_report_path_state(ct, pip);
3567 	}
3568 
3569 	MDI_CLIENT_UNLOCK(ct);
3570 
3571 	/*
3572 	 * Change in the mdi_pathinfo node state will impact the client state
3573 	 */
3574 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3575 	    ct, pip));
3576 	return (rv);
3577 }
3578 
3579 
3580 /*
3581  * mdi_pi_get_addr():
3582  *		Get the unit address associated with a mdi_pathinfo node
3583  *
3584  * Return Values:
3585  *		char *
3586  */
3587 char *
3588 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3589 {
3590 	if (pip == NULL)
3591 		return (NULL);
3592 
3593 	return (MDI_PI(pip)->pi_addr);
3594 }
3595 
3596 /*
3597  * mdi_pi_get_client():
3598  *		Get the client devinfo associated with a mdi_pathinfo node
3599  *
3600  * Return Values:
3601  *		Handle to client device dev_info node
3602  */
3603 dev_info_t *
3604 mdi_pi_get_client(mdi_pathinfo_t *pip)
3605 {
3606 	dev_info_t	*dip = NULL;
3607 	if (pip) {
3608 		dip = MDI_PI(pip)->pi_client->ct_dip;
3609 	}
3610 	return (dip);
3611 }
3612 
3613 /*
3614  * mdi_pi_get_phci():
3615  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3616  * Return Values:
3617  *		Handle to dev_info node
3618  */
3619 dev_info_t *
3620 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3621 {
3622 	dev_info_t	*dip = NULL;
3623 	if (pip) {
3624 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3625 	}
3626 	return (dip);
3627 }
3628 
3629 /*
3630  * mdi_pi_get_client_private():
3631  *		Get the client private information associated with the
3632  *		mdi_pathinfo node
3633  */
3634 void *
3635 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3636 {
3637 	void *cprivate = NULL;
3638 	if (pip) {
3639 		cprivate = MDI_PI(pip)->pi_cprivate;
3640 	}
3641 	return (cprivate);
3642 }
3643 
3644 /*
3645  * mdi_pi_set_client_private():
3646  *		Set the client private information in the mdi_pathinfo node
3647  */
3648 void
3649 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3650 {
3651 	if (pip) {
3652 		MDI_PI(pip)->pi_cprivate = priv;
3653 	}
3654 }
3655 
3656 /*
3657  * mdi_pi_get_phci_private():
3658  *		Get the pHCI private information associated with the
3659  *		mdi_pathinfo node
3660  */
3661 caddr_t
3662 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3663 {
3664 	caddr_t	pprivate = NULL;
3665 	if (pip) {
3666 		pprivate = MDI_PI(pip)->pi_pprivate;
3667 	}
3668 	return (pprivate);
3669 }
3670 
3671 /*
3672  * mdi_pi_set_phci_private():
3673  *		Set the pHCI private information in the mdi_pathinfo node
3674  */
3675 void
3676 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3677 {
3678 	if (pip) {
3679 		MDI_PI(pip)->pi_pprivate = priv;
3680 	}
3681 }
3682 
3683 /*
3684  * mdi_pi_get_state():
3685  *		Get the mdi_pathinfo node state. Transient states are internal
3686  *		and not provided to the users
3687  */
3688 mdi_pathinfo_state_t
3689 mdi_pi_get_state(mdi_pathinfo_t *pip)
3690 {
3691 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3692 
3693 	if (pip) {
3694 		if (MDI_PI_IS_TRANSIENT(pip)) {
3695 			/*
3696 			 * mdi_pathinfo is in state transition.  Return the
3697 			 * last good state.
3698 			 */
3699 			state = MDI_PI_OLD_STATE(pip);
3700 		} else {
3701 			state = MDI_PI_STATE(pip);
3702 		}
3703 	}
3704 	return (state);
3705 }
3706 
3707 /*
3708  * Note that the following function needs to be the new interface for
3709  * mdi_pi_get_state when mpxio gets integrated to ON.
3710  */
3711 int
3712 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3713 		uint32_t *ext_state)
3714 {
3715 	*state = MDI_PATHINFO_STATE_INIT;
3716 
3717 	if (pip) {
3718 		if (MDI_PI_IS_TRANSIENT(pip)) {
3719 			/*
3720 			 * mdi_pathinfo is in state transition.  Return the
3721 			 * last good state.
3722 			 */
3723 			*state = MDI_PI_OLD_STATE(pip);
3724 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3725 		} else {
3726 			*state = MDI_PI_STATE(pip);
3727 			*ext_state = MDI_PI_EXT_STATE(pip);
3728 		}
3729 	}
3730 	return (MDI_SUCCESS);
3731 }
3732 
3733 /*
3734  * mdi_pi_get_preferred:
3735  *	Get the preferred path flag
3736  */
3737 int
3738 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3739 {
3740 	if (pip) {
3741 		return (MDI_PI(pip)->pi_preferred);
3742 	}
3743 	return (0);
3744 }
3745 
3746 /*
3747  * mdi_pi_set_preferred:
3748  *	Set the preferred path flag
3749  */
3750 void
3751 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3752 {
3753 	if (pip) {
3754 		MDI_PI(pip)->pi_preferred = preferred;
3755 	}
3756 }
3757 
3758 
3759 /*
3760  * mdi_pi_set_state():
3761  *		Set the mdi_pathinfo node state
3762  */
3763 void
3764 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3765 {
3766 	uint32_t	ext_state;
3767 
3768 	if (pip) {
3769 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3770 		MDI_PI(pip)->pi_state = state;
3771 		MDI_PI(pip)->pi_state |= ext_state;
3772 	}
3773 }
3774 
3775 /*
3776  * Property functions:
3777  */
3778 
3779 int
3780 i_map_nvlist_error_to_mdi(int val)
3781 {
3782 	int rv;
3783 
3784 	switch (val) {
3785 	case 0:
3786 		rv = DDI_PROP_SUCCESS;
3787 		break;
3788 	case EINVAL:
3789 	case ENOTSUP:
3790 		rv = DDI_PROP_INVAL_ARG;
3791 		break;
3792 	case ENOMEM:
3793 		rv = DDI_PROP_NO_MEMORY;
3794 		break;
3795 	default:
3796 		rv = DDI_PROP_NOT_FOUND;
3797 		break;
3798 	}
3799 	return (rv);
3800 }
3801 
3802 /*
3803  * mdi_pi_get_next_prop():
3804  * 		Property walk function.  The caller should hold mdi_pi_lock()
3805  *		and release by calling mdi_pi_unlock() at the end of walk to
3806  *		get a consistent value.
3807  */
3808 
3809 nvpair_t *
3810 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3811 {
3812 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3813 		return (NULL);
3814 	}
3815 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3816 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3817 }
3818 
3819 /*
3820  * mdi_prop_remove():
3821  * 		Remove the named property from the named list.
3822  */
3823 
3824 int
3825 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3826 {
3827 	if (pip == NULL) {
3828 		return (DDI_PROP_NOT_FOUND);
3829 	}
3830 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3831 	MDI_PI_LOCK(pip);
3832 	if (MDI_PI(pip)->pi_prop == NULL) {
3833 		MDI_PI_UNLOCK(pip);
3834 		return (DDI_PROP_NOT_FOUND);
3835 	}
3836 	if (name) {
3837 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3838 	} else {
3839 		char		nvp_name[MAXNAMELEN];
3840 		nvpair_t	*nvp;
3841 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3842 		while (nvp) {
3843 			nvpair_t	*next;
3844 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3845 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3846 			    nvpair_name(nvp));
3847 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3848 			    nvp_name);
3849 			nvp = next;
3850 		}
3851 	}
3852 	MDI_PI_UNLOCK(pip);
3853 	return (DDI_PROP_SUCCESS);
3854 }
3855 
3856 /*
3857  * mdi_prop_size():
3858  * 		Get buffer size needed to pack the property data.
3859  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3860  *		buffer size.
3861  */
3862 
3863 int
3864 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3865 {
3866 	int	rv;
3867 	size_t	bufsize;
3868 
3869 	*buflenp = 0;
3870 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3871 		return (DDI_PROP_NOT_FOUND);
3872 	}
3873 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3874 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3875 	    &bufsize, NV_ENCODE_NATIVE);
3876 	*buflenp = bufsize;
3877 	return (i_map_nvlist_error_to_mdi(rv));
3878 }
3879 
3880 /*
3881  * mdi_prop_pack():
3882  * 		pack the property list.  The caller should hold the
3883  *		mdi_pathinfo_t node to get a consistent data
3884  */
3885 
3886 int
3887 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3888 {
3889 	int	rv;
3890 	size_t	bufsize;
3891 
3892 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3893 		return (DDI_PROP_NOT_FOUND);
3894 	}
3895 
3896 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3897 
3898 	bufsize = buflen;
3899 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3900 	    NV_ENCODE_NATIVE, KM_SLEEP);
3901 
3902 	return (i_map_nvlist_error_to_mdi(rv));
3903 }
3904 
3905 /*
3906  * mdi_prop_update_byte():
3907  *		Create/Update a byte property
3908  */
3909 int
3910 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3911 {
3912 	int rv;
3913 
3914 	if (pip == NULL) {
3915 		return (DDI_PROP_INVAL_ARG);
3916 	}
3917 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3918 	MDI_PI_LOCK(pip);
3919 	if (MDI_PI(pip)->pi_prop == NULL) {
3920 		MDI_PI_UNLOCK(pip);
3921 		return (DDI_PROP_NOT_FOUND);
3922 	}
3923 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3924 	MDI_PI_UNLOCK(pip);
3925 	return (i_map_nvlist_error_to_mdi(rv));
3926 }
3927 
3928 /*
3929  * mdi_prop_update_byte_array():
3930  *		Create/Update a byte array property
3931  */
3932 int
3933 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3934     uint_t nelements)
3935 {
3936 	int rv;
3937 
3938 	if (pip == NULL) {
3939 		return (DDI_PROP_INVAL_ARG);
3940 	}
3941 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3942 	MDI_PI_LOCK(pip);
3943 	if (MDI_PI(pip)->pi_prop == NULL) {
3944 		MDI_PI_UNLOCK(pip);
3945 		return (DDI_PROP_NOT_FOUND);
3946 	}
3947 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3948 	MDI_PI_UNLOCK(pip);
3949 	return (i_map_nvlist_error_to_mdi(rv));
3950 }
3951 
3952 /*
3953  * mdi_prop_update_int():
3954  *		Create/Update a 32 bit integer property
3955  */
3956 int
3957 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3958 {
3959 	int rv;
3960 
3961 	if (pip == NULL) {
3962 		return (DDI_PROP_INVAL_ARG);
3963 	}
3964 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3965 	MDI_PI_LOCK(pip);
3966 	if (MDI_PI(pip)->pi_prop == NULL) {
3967 		MDI_PI_UNLOCK(pip);
3968 		return (DDI_PROP_NOT_FOUND);
3969 	}
3970 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3971 	MDI_PI_UNLOCK(pip);
3972 	return (i_map_nvlist_error_to_mdi(rv));
3973 }
3974 
3975 /*
3976  * mdi_prop_update_int64():
3977  *		Create/Update a 64 bit integer property
3978  */
3979 int
3980 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3981 {
3982 	int rv;
3983 
3984 	if (pip == NULL) {
3985 		return (DDI_PROP_INVAL_ARG);
3986 	}
3987 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3988 	MDI_PI_LOCK(pip);
3989 	if (MDI_PI(pip)->pi_prop == NULL) {
3990 		MDI_PI_UNLOCK(pip);
3991 		return (DDI_PROP_NOT_FOUND);
3992 	}
3993 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3994 	MDI_PI_UNLOCK(pip);
3995 	return (i_map_nvlist_error_to_mdi(rv));
3996 }
3997 
3998 /*
3999  * mdi_prop_update_int_array():
4000  *		Create/Update a int array property
4001  */
4002 int
4003 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4004 	    uint_t nelements)
4005 {
4006 	int rv;
4007 
4008 	if (pip == NULL) {
4009 		return (DDI_PROP_INVAL_ARG);
4010 	}
4011 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4012 	MDI_PI_LOCK(pip);
4013 	if (MDI_PI(pip)->pi_prop == NULL) {
4014 		MDI_PI_UNLOCK(pip);
4015 		return (DDI_PROP_NOT_FOUND);
4016 	}
4017 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4018 	    nelements);
4019 	MDI_PI_UNLOCK(pip);
4020 	return (i_map_nvlist_error_to_mdi(rv));
4021 }
4022 
4023 /*
4024  * mdi_prop_update_string():
4025  *		Create/Update a string property
4026  */
4027 int
4028 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4029 {
4030 	int rv;
4031 
4032 	if (pip == NULL) {
4033 		return (DDI_PROP_INVAL_ARG);
4034 	}
4035 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4036 	MDI_PI_LOCK(pip);
4037 	if (MDI_PI(pip)->pi_prop == NULL) {
4038 		MDI_PI_UNLOCK(pip);
4039 		return (DDI_PROP_NOT_FOUND);
4040 	}
4041 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4042 	MDI_PI_UNLOCK(pip);
4043 	return (i_map_nvlist_error_to_mdi(rv));
4044 }
4045 
4046 /*
4047  * mdi_prop_update_string_array():
4048  *		Create/Update a string array property
4049  */
4050 int
4051 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4052     uint_t nelements)
4053 {
4054 	int rv;
4055 
4056 	if (pip == NULL) {
4057 		return (DDI_PROP_INVAL_ARG);
4058 	}
4059 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4060 	MDI_PI_LOCK(pip);
4061 	if (MDI_PI(pip)->pi_prop == NULL) {
4062 		MDI_PI_UNLOCK(pip);
4063 		return (DDI_PROP_NOT_FOUND);
4064 	}
4065 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4066 	    nelements);
4067 	MDI_PI_UNLOCK(pip);
4068 	return (i_map_nvlist_error_to_mdi(rv));
4069 }
4070 
4071 /*
4072  * mdi_prop_lookup_byte():
4073  * 		Look for byte property identified by name.  The data returned
4074  *		is the actual property and valid as long as mdi_pathinfo_t node
4075  *		is alive.
4076  */
4077 int
4078 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4079 {
4080 	int rv;
4081 
4082 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4083 		return (DDI_PROP_NOT_FOUND);
4084 	}
4085 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4086 	return (i_map_nvlist_error_to_mdi(rv));
4087 }
4088 
4089 
4090 /*
4091  * mdi_prop_lookup_byte_array():
4092  * 		Look for byte array property identified by name.  The data
4093  *		returned is the actual property and valid as long as
4094  *		mdi_pathinfo_t node is alive.
4095  */
4096 int
4097 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4098     uint_t *nelements)
4099 {
4100 	int rv;
4101 
4102 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4103 		return (DDI_PROP_NOT_FOUND);
4104 	}
4105 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4106 	    nelements);
4107 	return (i_map_nvlist_error_to_mdi(rv));
4108 }
4109 
4110 /*
4111  * mdi_prop_lookup_int():
4112  * 		Look for int property identified by name.  The data returned
4113  *		is the actual property and valid as long as mdi_pathinfo_t
4114  *		node is alive.
4115  */
4116 int
4117 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4118 {
4119 	int rv;
4120 
4121 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4122 		return (DDI_PROP_NOT_FOUND);
4123 	}
4124 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4125 	return (i_map_nvlist_error_to_mdi(rv));
4126 }
4127 
4128 /*
4129  * mdi_prop_lookup_int64():
4130  * 		Look for int64 property identified by name.  The data returned
4131  *		is the actual property and valid as long as mdi_pathinfo_t node
4132  *		is alive.
4133  */
4134 int
4135 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4136 {
4137 	int rv;
4138 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4139 		return (DDI_PROP_NOT_FOUND);
4140 	}
4141 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4142 	return (i_map_nvlist_error_to_mdi(rv));
4143 }
4144 
4145 /*
4146  * mdi_prop_lookup_int_array():
4147  * 		Look for int array property identified by name.  The data
4148  *		returned is the actual property and valid as long as
4149  *		mdi_pathinfo_t node is alive.
4150  */
4151 int
4152 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4153     uint_t *nelements)
4154 {
4155 	int rv;
4156 
4157 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4158 		return (DDI_PROP_NOT_FOUND);
4159 	}
4160 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4161 	    (int32_t **)data, nelements);
4162 	return (i_map_nvlist_error_to_mdi(rv));
4163 }
4164 
4165 /*
4166  * mdi_prop_lookup_string():
4167  * 		Look for string property identified by name.  The data
4168  *		returned is the actual property and valid as long as
4169  *		mdi_pathinfo_t node is alive.
4170  */
4171 int
4172 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4173 {
4174 	int rv;
4175 
4176 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4177 		return (DDI_PROP_NOT_FOUND);
4178 	}
4179 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4180 	return (i_map_nvlist_error_to_mdi(rv));
4181 }
4182 
4183 /*
4184  * mdi_prop_lookup_string_array():
4185  * 		Look for string array property identified by name.  The data
4186  *		returned is the actual property and valid as long as
4187  *		mdi_pathinfo_t node is alive.
4188  */
4189 
4190 int
4191 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4192     uint_t *nelements)
4193 {
4194 	int rv;
4195 
4196 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4197 		return (DDI_PROP_NOT_FOUND);
4198 	}
4199 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4200 	    nelements);
4201 	return (i_map_nvlist_error_to_mdi(rv));
4202 }
4203 
4204 /*
4205  * mdi_prop_free():
4206  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4207  *		functions return the pointer to actual property data and not a
4208  *		copy of it.  So the data returned is valid as long as
4209  *		mdi_pathinfo_t node is valid.
4210  */
4211 
4212 /*ARGSUSED*/
4213 int
4214 mdi_prop_free(void *data)
4215 {
4216 	return (DDI_PROP_SUCCESS);
4217 }
4218 
4219 /*ARGSUSED*/
4220 static void
4221 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4222 {
4223 	char		*phci_path, *ct_path;
4224 	char		*ct_status;
4225 	char		*status;
4226 	dev_info_t	*dip = ct->ct_dip;
4227 	char		lb_buf[64];
4228 
4229 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4230 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4231 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4232 		return;
4233 	}
4234 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4235 		ct_status = "optimal";
4236 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4237 		ct_status = "degraded";
4238 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4239 		ct_status = "failed";
4240 	} else {
4241 		ct_status = "unknown";
4242 	}
4243 
4244 	if (MDI_PI_IS_OFFLINE(pip)) {
4245 		status = "offline";
4246 	} else if (MDI_PI_IS_ONLINE(pip)) {
4247 		status = "online";
4248 	} else if (MDI_PI_IS_STANDBY(pip)) {
4249 		status = "standby";
4250 	} else if (MDI_PI_IS_FAULT(pip)) {
4251 		status = "faulted";
4252 	} else {
4253 		status = "unknown";
4254 	}
4255 
4256 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4257 		(void) snprintf(lb_buf, sizeof (lb_buf),
4258 		    "%s, region-size: %d", mdi_load_balance_lba,
4259 			ct->ct_lb_args->region_size);
4260 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4261 		(void) snprintf(lb_buf, sizeof (lb_buf),
4262 		    "%s", mdi_load_balance_none);
4263 	} else {
4264 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4265 		    mdi_load_balance_rr);
4266 	}
4267 
4268 	if (dip) {
4269 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4270 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4271 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4272 		    "path %s (%s%d) to target address: %s is %s"
4273 		    " Load balancing: %s\n",
4274 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4275 		    ddi_get_instance(dip), ct_status,
4276 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4277 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4278 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4279 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4280 		kmem_free(phci_path, MAXPATHLEN);
4281 		kmem_free(ct_path, MAXPATHLEN);
4282 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4283 	}
4284 }
4285 
4286 #ifdef	DEBUG
4287 /*
4288  * i_mdi_log():
4289  *		Utility function for error message management
4290  *
4291  */
4292 
4293 /*VARARGS3*/
4294 static void
4295 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4296 {
4297 	char		buf[MAXNAMELEN];
4298 	char		name[MAXNAMELEN];
4299 	va_list		ap;
4300 	int		log_only = 0;
4301 	int		boot_only = 0;
4302 	int		console_only = 0;
4303 
4304 	if (dip) {
4305 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4306 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4307 			    ddi_node_name(dip), ddi_get_instance(dip));
4308 		} else {
4309 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4310 			    ddi_node_name(dip), ddi_get_instance(dip));
4311 		}
4312 	} else {
4313 		name[0] = '\0';
4314 	}
4315 
4316 	va_start(ap, fmt);
4317 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4318 	va_end(ap);
4319 
4320 	switch (buf[0]) {
4321 	case '!':
4322 		log_only = 1;
4323 		break;
4324 	case '?':
4325 		boot_only = 1;
4326 		break;
4327 	case '^':
4328 		console_only = 1;
4329 		break;
4330 	}
4331 
4332 	switch (level) {
4333 	case CE_NOTE:
4334 		level = CE_CONT;
4335 		/* FALLTHROUGH */
4336 	case CE_CONT:
4337 	case CE_WARN:
4338 	case CE_PANIC:
4339 		if (boot_only) {
4340 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4341 		} else if (console_only) {
4342 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4343 		} else if (log_only) {
4344 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4345 		} else {
4346 			cmn_err(level, "%s\t%s", name, buf);
4347 		}
4348 		break;
4349 	default:
4350 		cmn_err(level, "%s\t%s", name, buf);
4351 		break;
4352 	}
4353 }
4354 #endif	/* DEBUG */
4355 
4356 void
4357 i_mdi_client_online(dev_info_t *ct_dip)
4358 {
4359 	mdi_client_t	*ct;
4360 
4361 	/*
4362 	 * Client online notification. Mark client state as online
4363 	 * restore our binding with dev_info node
4364 	 */
4365 	ct = i_devi_get_client(ct_dip);
4366 	ASSERT(ct != NULL);
4367 	MDI_CLIENT_LOCK(ct);
4368 	MDI_CLIENT_SET_ONLINE(ct);
4369 	/* catch for any memory leaks */
4370 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4371 	ct->ct_dip = ct_dip;
4372 
4373 	if (ct->ct_power_cnt == 0)
4374 		(void) i_mdi_power_all_phci(ct);
4375 
4376 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4377 	    "i_mdi_pm_hold_client\n"));
4378 	i_mdi_pm_hold_client(ct, 1);
4379 
4380 	MDI_CLIENT_UNLOCK(ct);
4381 }
4382 
4383 void
4384 i_mdi_phci_online(dev_info_t *ph_dip)
4385 {
4386 	mdi_phci_t	*ph;
4387 
4388 	/* pHCI online notification. Mark state accordingly */
4389 	ph = i_devi_get_phci(ph_dip);
4390 	ASSERT(ph != NULL);
4391 	MDI_PHCI_LOCK(ph);
4392 	MDI_PHCI_SET_ONLINE(ph);
4393 	MDI_PHCI_UNLOCK(ph);
4394 }
4395 
4396 /*
4397  * mdi_devi_online():
4398  * 		Online notification from NDI framework on pHCI/client
4399  *		device online.
4400  * Return Values:
4401  *		NDI_SUCCESS
4402  *		MDI_FAILURE
4403  */
4404 
4405 /*ARGSUSED*/
4406 int
4407 mdi_devi_online(dev_info_t *dip, uint_t flags)
4408 {
4409 	if (MDI_PHCI(dip)) {
4410 		i_mdi_phci_online(dip);
4411 	}
4412 
4413 	if (MDI_CLIENT(dip)) {
4414 		i_mdi_client_online(dip);
4415 	}
4416 	return (NDI_SUCCESS);
4417 }
4418 
4419 /*
4420  * mdi_devi_offline():
4421  * 		Offline notification from NDI framework on pHCI/Client device
4422  *		offline.
4423  *
4424  * Return Values:
4425  *		NDI_SUCCESS
4426  *		NDI_FAILURE
4427  */
4428 
4429 /*ARGSUSED*/
4430 int
4431 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4432 {
4433 	int		rv = NDI_SUCCESS;
4434 
4435 	if (MDI_CLIENT(dip)) {
4436 		rv = i_mdi_client_offline(dip, flags);
4437 		if (rv != NDI_SUCCESS)
4438 			return (rv);
4439 	}
4440 
4441 	if (MDI_PHCI(dip)) {
4442 		rv = i_mdi_phci_offline(dip, flags);
4443 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4444 			/* set client back online */
4445 			i_mdi_client_online(dip);
4446 		}
4447 	}
4448 
4449 	return (rv);
4450 }
4451 
4452 /*ARGSUSED*/
4453 static int
4454 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4455 {
4456 	int		rv = NDI_SUCCESS;
4457 	mdi_phci_t	*ph;
4458 	mdi_client_t	*ct;
4459 	mdi_pathinfo_t	*pip;
4460 	mdi_pathinfo_t	*next;
4461 	mdi_pathinfo_t	*failed_pip = NULL;
4462 	dev_info_t	*cdip;
4463 
4464 	/*
4465 	 * pHCI component offline notification
4466 	 * Make sure that this pHCI instance is free to be offlined.
4467 	 * If it is OK to proceed, Offline and remove all the child
4468 	 * mdi_pathinfo nodes.  This process automatically offlines
4469 	 * corresponding client devices, for which this pHCI provides
4470 	 * critical services.
4471 	 */
4472 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4473 	    dip));
4474 
4475 	ph = i_devi_get_phci(dip);
4476 	if (ph == NULL) {
4477 		return (rv);
4478 	}
4479 
4480 	MDI_PHCI_LOCK(ph);
4481 
4482 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4483 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4484 		MDI_PHCI_UNLOCK(ph);
4485 		return (NDI_SUCCESS);
4486 	}
4487 
4488 	/*
4489 	 * Check to see if the pHCI can be offlined
4490 	 */
4491 	if (ph->ph_unstable) {
4492 		MDI_DEBUG(1, (CE_WARN, dip,
4493 		    "!One or more target devices are in transient "
4494 		    "state. This device can not be removed at "
4495 		    "this moment. Please try again later."));
4496 		MDI_PHCI_UNLOCK(ph);
4497 		return (NDI_BUSY);
4498 	}
4499 
4500 	pip = ph->ph_path_head;
4501 	while (pip != NULL) {
4502 		MDI_PI_LOCK(pip);
4503 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4504 		/*
4505 		 * The mdi_pathinfo state is OK. Check the client state.
4506 		 * If failover in progress fail the pHCI from offlining
4507 		 */
4508 		ct = MDI_PI(pip)->pi_client;
4509 		i_mdi_client_lock(ct, pip);
4510 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4511 		    (ct->ct_unstable)) {
4512 			/*
4513 			 * Failover is in progress, Fail the DR
4514 			 */
4515 			MDI_DEBUG(1, (CE_WARN, dip,
4516 			    "!pHCI device (%s%d) is Busy. %s",
4517 			    ddi_driver_name(dip), ddi_get_instance(dip),
4518 			    "This device can not be removed at "
4519 			    "this moment. Please try again later."));
4520 			MDI_PI_UNLOCK(pip);
4521 			MDI_CLIENT_UNLOCK(ct);
4522 			MDI_PHCI_UNLOCK(ph);
4523 			return (NDI_BUSY);
4524 		}
4525 		MDI_PI_UNLOCK(pip);
4526 
4527 		/*
4528 		 * Check to see of we are removing the last path of this
4529 		 * client device...
4530 		 */
4531 		cdip = ct->ct_dip;
4532 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4533 		    (i_mdi_client_compute_state(ct, ph) ==
4534 		    MDI_CLIENT_STATE_FAILED)) {
4535 			i_mdi_client_unlock(ct);
4536 			MDI_PHCI_UNLOCK(ph);
4537 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4538 				/*
4539 				 * ndi_devi_offline() failed.
4540 				 * This pHCI provides the critical path
4541 				 * to one or more client devices.
4542 				 * Return busy.
4543 				 */
4544 				MDI_PHCI_LOCK(ph);
4545 				MDI_DEBUG(1, (CE_WARN, dip,
4546 				    "!pHCI device (%s%d) is Busy. %s",
4547 				    ddi_driver_name(dip), ddi_get_instance(dip),
4548 				    "This device can not be removed at "
4549 				    "this moment. Please try again later."));
4550 				failed_pip = pip;
4551 				break;
4552 			} else {
4553 				MDI_PHCI_LOCK(ph);
4554 				pip = next;
4555 			}
4556 		} else {
4557 			i_mdi_client_unlock(ct);
4558 			pip = next;
4559 		}
4560 	}
4561 
4562 	if (failed_pip) {
4563 		pip = ph->ph_path_head;
4564 		while (pip != failed_pip) {
4565 			MDI_PI_LOCK(pip);
4566 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4567 			ct = MDI_PI(pip)->pi_client;
4568 			i_mdi_client_lock(ct, pip);
4569 			cdip = ct->ct_dip;
4570 			switch (MDI_CLIENT_STATE(ct)) {
4571 			case MDI_CLIENT_STATE_OPTIMAL:
4572 			case MDI_CLIENT_STATE_DEGRADED:
4573 				if (cdip) {
4574 					MDI_PI_UNLOCK(pip);
4575 					i_mdi_client_unlock(ct);
4576 					MDI_PHCI_UNLOCK(ph);
4577 					(void) ndi_devi_online(cdip, 0);
4578 					MDI_PHCI_LOCK(ph);
4579 					pip = next;
4580 					continue;
4581 				}
4582 				break;
4583 
4584 			case MDI_CLIENT_STATE_FAILED:
4585 				if (cdip) {
4586 					MDI_PI_UNLOCK(pip);
4587 					i_mdi_client_unlock(ct);
4588 					MDI_PHCI_UNLOCK(ph);
4589 					(void) ndi_devi_offline(cdip, 0);
4590 					MDI_PHCI_LOCK(ph);
4591 					pip = next;
4592 					continue;
4593 				}
4594 				break;
4595 			}
4596 			MDI_PI_UNLOCK(pip);
4597 			i_mdi_client_unlock(ct);
4598 			pip = next;
4599 		}
4600 		MDI_PHCI_UNLOCK(ph);
4601 		return (NDI_BUSY);
4602 	}
4603 
4604 	/*
4605 	 * Mark the pHCI as offline
4606 	 */
4607 	MDI_PHCI_SET_OFFLINE(ph);
4608 
4609 	/*
4610 	 * Mark the child mdi_pathinfo nodes as transient
4611 	 */
4612 	pip = ph->ph_path_head;
4613 	while (pip != NULL) {
4614 		MDI_PI_LOCK(pip);
4615 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4616 		MDI_PI_SET_OFFLINING(pip);
4617 		MDI_PI_UNLOCK(pip);
4618 		pip = next;
4619 	}
4620 	MDI_PHCI_UNLOCK(ph);
4621 	/*
4622 	 * Give a chance for any pending commands to execute
4623 	 */
4624 	delay(1);
4625 	MDI_PHCI_LOCK(ph);
4626 	pip = ph->ph_path_head;
4627 	while (pip != NULL) {
4628 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4629 		(void) i_mdi_pi_offline(pip, flags);
4630 		MDI_PI_LOCK(pip);
4631 		ct = MDI_PI(pip)->pi_client;
4632 		if (!MDI_PI_IS_OFFLINE(pip)) {
4633 			MDI_DEBUG(1, (CE_WARN, dip,
4634 			    "!pHCI device (%s%d) is Busy. %s",
4635 			    ddi_driver_name(dip), ddi_get_instance(dip),
4636 			    "This device can not be removed at "
4637 			    "this moment. Please try again later."));
4638 			MDI_PI_UNLOCK(pip);
4639 			MDI_PHCI_SET_ONLINE(ph);
4640 			MDI_PHCI_UNLOCK(ph);
4641 			return (NDI_BUSY);
4642 		}
4643 		MDI_PI_UNLOCK(pip);
4644 		pip = next;
4645 	}
4646 	MDI_PHCI_UNLOCK(ph);
4647 
4648 	return (rv);
4649 }
4650 
4651 /*ARGSUSED*/
4652 static int
4653 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4654 {
4655 	int		rv = NDI_SUCCESS;
4656 	mdi_client_t	*ct;
4657 
4658 	/*
4659 	 * Client component to go offline.  Make sure that we are
4660 	 * not in failing over state and update client state
4661 	 * accordingly
4662 	 */
4663 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4664 	    dip));
4665 	ct = i_devi_get_client(dip);
4666 	if (ct != NULL) {
4667 		MDI_CLIENT_LOCK(ct);
4668 		if (ct->ct_unstable) {
4669 			/*
4670 			 * One or more paths are in transient state,
4671 			 * Dont allow offline of a client device
4672 			 */
4673 			MDI_DEBUG(1, (CE_WARN, dip,
4674 			    "!One or more paths to this device is "
4675 			    "in transient state. This device can not "
4676 			    "be removed at this moment. "
4677 			    "Please try again later."));
4678 			MDI_CLIENT_UNLOCK(ct);
4679 			return (NDI_BUSY);
4680 		}
4681 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4682 			/*
4683 			 * Failover is in progress, Dont allow DR of
4684 			 * a client device
4685 			 */
4686 			MDI_DEBUG(1, (CE_WARN, dip,
4687 			    "!Client device (%s%d) is Busy. %s",
4688 			    ddi_driver_name(dip), ddi_get_instance(dip),
4689 			    "This device can not be removed at "
4690 			    "this moment. Please try again later."));
4691 			MDI_CLIENT_UNLOCK(ct);
4692 			return (NDI_BUSY);
4693 		}
4694 		MDI_CLIENT_SET_OFFLINE(ct);
4695 
4696 		/*
4697 		 * Unbind our relationship with the dev_info node
4698 		 */
4699 		if (flags & NDI_DEVI_REMOVE) {
4700 			ct->ct_dip = NULL;
4701 		}
4702 		MDI_CLIENT_UNLOCK(ct);
4703 	}
4704 	return (rv);
4705 }
4706 
4707 /*
4708  * mdi_pre_attach():
4709  *		Pre attach() notification handler
4710  */
4711 
4712 /*ARGSUSED*/
4713 int
4714 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4715 {
4716 	/* don't support old DDI_PM_RESUME */
4717 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4718 	    (cmd == DDI_PM_RESUME))
4719 		return (DDI_FAILURE);
4720 
4721 	return (DDI_SUCCESS);
4722 }
4723 
4724 /*
4725  * mdi_post_attach():
4726  *		Post attach() notification handler
4727  */
4728 
4729 /*ARGSUSED*/
4730 void
4731 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4732 {
4733 	mdi_phci_t	*ph;
4734 	mdi_client_t	*ct;
4735 	mdi_pathinfo_t	*pip;
4736 
4737 	if (MDI_PHCI(dip)) {
4738 		ph = i_devi_get_phci(dip);
4739 		ASSERT(ph != NULL);
4740 
4741 		MDI_PHCI_LOCK(ph);
4742 		switch (cmd) {
4743 		case DDI_ATTACH:
4744 			MDI_DEBUG(2, (CE_NOTE, dip,
4745 			    "!pHCI post_attach: called %p\n", ph));
4746 			if (error == DDI_SUCCESS) {
4747 				MDI_PHCI_SET_ATTACH(ph);
4748 			} else {
4749 				MDI_DEBUG(1, (CE_NOTE, dip,
4750 				    "!pHCI post_attach: failed error=%d\n",
4751 				    error));
4752 				MDI_PHCI_SET_DETACH(ph);
4753 			}
4754 			break;
4755 
4756 		case DDI_RESUME:
4757 			MDI_DEBUG(2, (CE_NOTE, dip,
4758 			    "!pHCI post_resume: called %p\n", ph));
4759 			if (error == DDI_SUCCESS) {
4760 				MDI_PHCI_SET_RESUME(ph);
4761 			} else {
4762 				MDI_DEBUG(1, (CE_NOTE, dip,
4763 				    "!pHCI post_resume: failed error=%d\n",
4764 				    error));
4765 				MDI_PHCI_SET_SUSPEND(ph);
4766 			}
4767 			break;
4768 		}
4769 		MDI_PHCI_UNLOCK(ph);
4770 	}
4771 
4772 	if (MDI_CLIENT(dip)) {
4773 		ct = i_devi_get_client(dip);
4774 		ASSERT(ct != NULL);
4775 
4776 		MDI_CLIENT_LOCK(ct);
4777 		switch (cmd) {
4778 		case DDI_ATTACH:
4779 			MDI_DEBUG(2, (CE_NOTE, dip,
4780 			    "!Client post_attach: called %p\n", ct));
4781 			if (error != DDI_SUCCESS) {
4782 				MDI_DEBUG(1, (CE_NOTE, dip,
4783 				    "!Client post_attach: failed error=%d\n",
4784 				    error));
4785 				MDI_CLIENT_SET_DETACH(ct);
4786 				MDI_DEBUG(4, (CE_WARN, dip,
4787 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4788 				i_mdi_pm_reset_client(ct);
4789 				break;
4790 			}
4791 
4792 			/*
4793 			 * Client device has successfully attached.
4794 			 * Create kstats for any pathinfo structures
4795 			 * initially associated with this client.
4796 			 */
4797 			for (pip = ct->ct_path_head; pip != NULL;
4798 			    pip = (mdi_pathinfo_t *)
4799 			    MDI_PI(pip)->pi_client_link) {
4800 				(void) i_mdi_pi_kstat_create(pip);
4801 				i_mdi_report_path_state(ct, pip);
4802 			}
4803 			MDI_CLIENT_SET_ATTACH(ct);
4804 			break;
4805 
4806 		case DDI_RESUME:
4807 			MDI_DEBUG(2, (CE_NOTE, dip,
4808 			    "!Client post_attach: called %p\n", ct));
4809 			if (error == DDI_SUCCESS) {
4810 				MDI_CLIENT_SET_RESUME(ct);
4811 			} else {
4812 				MDI_DEBUG(1, (CE_NOTE, dip,
4813 				    "!Client post_resume: failed error=%d\n",
4814 				    error));
4815 				MDI_CLIENT_SET_SUSPEND(ct);
4816 			}
4817 			break;
4818 		}
4819 		MDI_CLIENT_UNLOCK(ct);
4820 	}
4821 }
4822 
4823 /*
4824  * mdi_pre_detach():
4825  *		Pre detach notification handler
4826  */
4827 
4828 /*ARGSUSED*/
4829 int
4830 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4831 {
4832 	int rv = DDI_SUCCESS;
4833 
4834 	if (MDI_CLIENT(dip)) {
4835 		(void) i_mdi_client_pre_detach(dip, cmd);
4836 	}
4837 
4838 	if (MDI_PHCI(dip)) {
4839 		rv = i_mdi_phci_pre_detach(dip, cmd);
4840 	}
4841 
4842 	return (rv);
4843 }
4844 
4845 /*ARGSUSED*/
4846 static int
4847 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4848 {
4849 	int		rv = DDI_SUCCESS;
4850 	mdi_phci_t	*ph;
4851 	mdi_client_t	*ct;
4852 	mdi_pathinfo_t	*pip;
4853 	mdi_pathinfo_t	*failed_pip = NULL;
4854 	mdi_pathinfo_t	*next;
4855 
4856 	ph = i_devi_get_phci(dip);
4857 	if (ph == NULL) {
4858 		return (rv);
4859 	}
4860 
4861 	MDI_PHCI_LOCK(ph);
4862 	switch (cmd) {
4863 	case DDI_DETACH:
4864 		MDI_DEBUG(2, (CE_NOTE, dip,
4865 		    "!pHCI pre_detach: called %p\n", ph));
4866 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4867 			/*
4868 			 * mdi_pathinfo nodes are still attached to
4869 			 * this pHCI. Fail the detach for this pHCI.
4870 			 */
4871 			MDI_DEBUG(2, (CE_WARN, dip,
4872 			    "!pHCI pre_detach: "
4873 			    "mdi_pathinfo nodes are still attached "
4874 			    "%p\n", ph));
4875 			rv = DDI_FAILURE;
4876 			break;
4877 		}
4878 		MDI_PHCI_SET_DETACH(ph);
4879 		break;
4880 
4881 	case DDI_SUSPEND:
4882 		/*
4883 		 * pHCI is getting suspended.  Since mpxio client
4884 		 * devices may not be suspended at this point, to avoid
4885 		 * a potential stack overflow, it is important to suspend
4886 		 * client devices before pHCI can be suspended.
4887 		 */
4888 
4889 		MDI_DEBUG(2, (CE_NOTE, dip,
4890 		    "!pHCI pre_suspend: called %p\n", ph));
4891 		/*
4892 		 * Suspend all the client devices accessible through this pHCI
4893 		 */
4894 		pip = ph->ph_path_head;
4895 		while (pip != NULL && rv == DDI_SUCCESS) {
4896 			dev_info_t *cdip;
4897 			MDI_PI_LOCK(pip);
4898 			next =
4899 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4900 			ct = MDI_PI(pip)->pi_client;
4901 			i_mdi_client_lock(ct, pip);
4902 			cdip = ct->ct_dip;
4903 			MDI_PI_UNLOCK(pip);
4904 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4905 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4906 				i_mdi_client_unlock(ct);
4907 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4908 				    DDI_SUCCESS) {
4909 					/*
4910 					 * Suspend of one of the client
4911 					 * device has failed.
4912 					 */
4913 					MDI_DEBUG(1, (CE_WARN, dip,
4914 					    "!Suspend of device (%s%d) failed.",
4915 					    ddi_driver_name(cdip),
4916 					    ddi_get_instance(cdip)));
4917 					failed_pip = pip;
4918 					break;
4919 				}
4920 			} else {
4921 				i_mdi_client_unlock(ct);
4922 			}
4923 			pip = next;
4924 		}
4925 
4926 		if (rv == DDI_SUCCESS) {
4927 			/*
4928 			 * Suspend of client devices is complete. Proceed
4929 			 * with pHCI suspend.
4930 			 */
4931 			MDI_PHCI_SET_SUSPEND(ph);
4932 		} else {
4933 			/*
4934 			 * Revert back all the suspended client device states
4935 			 * to converse.
4936 			 */
4937 			pip = ph->ph_path_head;
4938 			while (pip != failed_pip) {
4939 				dev_info_t *cdip;
4940 				MDI_PI_LOCK(pip);
4941 				next =
4942 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4943 				ct = MDI_PI(pip)->pi_client;
4944 				i_mdi_client_lock(ct, pip);
4945 				cdip = ct->ct_dip;
4946 				MDI_PI_UNLOCK(pip);
4947 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4948 					i_mdi_client_unlock(ct);
4949 					(void) devi_attach(cdip, DDI_RESUME);
4950 				} else {
4951 					i_mdi_client_unlock(ct);
4952 				}
4953 				pip = next;
4954 			}
4955 		}
4956 		break;
4957 
4958 	default:
4959 		rv = DDI_FAILURE;
4960 		break;
4961 	}
4962 	MDI_PHCI_UNLOCK(ph);
4963 	return (rv);
4964 }
4965 
4966 /*ARGSUSED*/
4967 static int
4968 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4969 {
4970 	int		rv = DDI_SUCCESS;
4971 	mdi_client_t	*ct;
4972 
4973 	ct = i_devi_get_client(dip);
4974 	if (ct == NULL) {
4975 		return (rv);
4976 	}
4977 
4978 	MDI_CLIENT_LOCK(ct);
4979 	switch (cmd) {
4980 	case DDI_DETACH:
4981 		MDI_DEBUG(2, (CE_NOTE, dip,
4982 		    "!Client pre_detach: called %p\n", ct));
4983 		MDI_CLIENT_SET_DETACH(ct);
4984 		break;
4985 
4986 	case DDI_SUSPEND:
4987 		MDI_DEBUG(2, (CE_NOTE, dip,
4988 		    "!Client pre_suspend: called %p\n", ct));
4989 		MDI_CLIENT_SET_SUSPEND(ct);
4990 		break;
4991 
4992 	default:
4993 		rv = DDI_FAILURE;
4994 		break;
4995 	}
4996 	MDI_CLIENT_UNLOCK(ct);
4997 	return (rv);
4998 }
4999 
5000 /*
5001  * mdi_post_detach():
5002  *		Post detach notification handler
5003  */
5004 
5005 /*ARGSUSED*/
5006 void
5007 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5008 {
5009 	/*
5010 	 * Detach/Suspend of mpxio component failed. Update our state
5011 	 * too
5012 	 */
5013 	if (MDI_PHCI(dip))
5014 		i_mdi_phci_post_detach(dip, cmd, error);
5015 
5016 	if (MDI_CLIENT(dip))
5017 		i_mdi_client_post_detach(dip, cmd, error);
5018 }
5019 
5020 /*ARGSUSED*/
5021 static void
5022 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5023 {
5024 	mdi_phci_t	*ph;
5025 
5026 	/*
5027 	 * Detach/Suspend of phci component failed. Update our state
5028 	 * too
5029 	 */
5030 	ph = i_devi_get_phci(dip);
5031 	if (ph == NULL) {
5032 		return;
5033 	}
5034 
5035 	MDI_PHCI_LOCK(ph);
5036 	/*
5037 	 * Detach of pHCI failed. Restore back converse
5038 	 * state
5039 	 */
5040 	switch (cmd) {
5041 	case DDI_DETACH:
5042 		MDI_DEBUG(2, (CE_NOTE, dip,
5043 		    "!pHCI post_detach: called %p\n", ph));
5044 		if (error != DDI_SUCCESS)
5045 			MDI_PHCI_SET_ATTACH(ph);
5046 		break;
5047 
5048 	case DDI_SUSPEND:
5049 		MDI_DEBUG(2, (CE_NOTE, dip,
5050 		    "!pHCI post_suspend: called %p\n", ph));
5051 		if (error != DDI_SUCCESS)
5052 			MDI_PHCI_SET_RESUME(ph);
5053 		break;
5054 	}
5055 	MDI_PHCI_UNLOCK(ph);
5056 }
5057 
5058 /*ARGSUSED*/
5059 static void
5060 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5061 {
5062 	mdi_client_t	*ct;
5063 
5064 	ct = i_devi_get_client(dip);
5065 	if (ct == NULL) {
5066 		return;
5067 	}
5068 	MDI_CLIENT_LOCK(ct);
5069 	/*
5070 	 * Detach of Client failed. Restore back converse
5071 	 * state
5072 	 */
5073 	switch (cmd) {
5074 	case DDI_DETACH:
5075 		MDI_DEBUG(2, (CE_NOTE, dip,
5076 		    "!Client post_detach: called %p\n", ct));
5077 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5078 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5079 			    "i_mdi_pm_rele_client\n"));
5080 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5081 		} else {
5082 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5083 			    "i_mdi_pm_reset_client\n"));
5084 			i_mdi_pm_reset_client(ct);
5085 		}
5086 		if (error != DDI_SUCCESS)
5087 			MDI_CLIENT_SET_ATTACH(ct);
5088 		break;
5089 
5090 	case DDI_SUSPEND:
5091 		MDI_DEBUG(2, (CE_NOTE, dip,
5092 		    "!Client post_suspend: called %p\n", ct));
5093 		if (error != DDI_SUCCESS)
5094 			MDI_CLIENT_SET_RESUME(ct);
5095 		break;
5096 	}
5097 	MDI_CLIENT_UNLOCK(ct);
5098 }
5099 
5100 /*
5101  * create and install per-path (client - pHCI) statistics
5102  * I/O stats supported: nread, nwritten, reads, and writes
5103  * Error stats - hard errors, soft errors, & transport errors
5104  */
5105 static int
5106 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5107 {
5108 
5109 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5110 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5111 	char ksname[KSTAT_STRLEN];
5112 	mdi_pathinfo_t *cpip;
5113 	const char *err_postfix = ",err";
5114 	kstat_t	*kiosp, *kerrsp;
5115 	struct pi_errs	*nsp;
5116 	struct mdi_pi_kstats *mdi_statp;
5117 
5118 	ASSERT(client != NULL && ppath != NULL);
5119 
5120 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5121 
5122 	if (MDI_PI(pip)->pi_kstats != NULL)
5123 		return (MDI_SUCCESS);
5124 
5125 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5126 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5127 		if (cpip == pip)
5128 			continue;
5129 		/*
5130 		 * We have found a different path with same parent
5131 		 * kstats for a given client-pHCI are common
5132 		 */
5133 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5134 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5135 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5136 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5137 			return (MDI_SUCCESS);
5138 		}
5139 	}
5140 
5141 	/*
5142 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5143 	 * clamp length of name against max length of error kstat name
5144 	 */
5145 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5146 	    ddi_driver_name(client), ddi_get_instance(client),
5147 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5148 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5149 		return (MDI_FAILURE);
5150 	}
5151 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5152 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5153 		return (MDI_FAILURE);
5154 	}
5155 
5156 	(void) strcat(ksname, err_postfix);
5157 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5158 	    KSTAT_TYPE_NAMED,
5159 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5160 
5161 	if (kerrsp == NULL) {
5162 		kstat_delete(kiosp);
5163 		return (MDI_FAILURE);
5164 	}
5165 
5166 	nsp = (struct pi_errs *)kerrsp->ks_data;
5167 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5168 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5169 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5170 	    KSTAT_DATA_UINT32);
5171 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5172 	    KSTAT_DATA_UINT32);
5173 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5174 	    KSTAT_DATA_UINT32);
5175 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5176 	    KSTAT_DATA_UINT32);
5177 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5178 	    KSTAT_DATA_UINT32);
5179 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5180 	    KSTAT_DATA_UINT32);
5181 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5182 	    KSTAT_DATA_UINT32);
5183 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5184 
5185 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5186 	mdi_statp->pi_kstat_ref = 1;
5187 	mdi_statp->pi_kstat_iostats = kiosp;
5188 	mdi_statp->pi_kstat_errstats = kerrsp;
5189 	kstat_install(kiosp);
5190 	kstat_install(kerrsp);
5191 	MDI_PI(pip)->pi_kstats = mdi_statp;
5192 	return (MDI_SUCCESS);
5193 }
5194 
5195 /*
5196  * destroy per-path properties
5197  */
5198 static void
5199 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5200 {
5201 
5202 	struct mdi_pi_kstats *mdi_statp;
5203 
5204 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5205 		return;
5206 
5207 	MDI_PI(pip)->pi_kstats = NULL;
5208 
5209 	/*
5210 	 * the kstat may be shared between multiple pathinfo nodes
5211 	 * decrement this pathinfo's usage, removing the kstats
5212 	 * themselves when the last pathinfo reference is removed.
5213 	 */
5214 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5215 	if (--mdi_statp->pi_kstat_ref != 0)
5216 		return;
5217 
5218 	kstat_delete(mdi_statp->pi_kstat_iostats);
5219 	kstat_delete(mdi_statp->pi_kstat_errstats);
5220 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5221 }
5222 
5223 /*
5224  * update I/O paths KSTATS
5225  */
5226 void
5227 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5228 {
5229 	kstat_t *iostatp;
5230 	size_t xfer_cnt;
5231 
5232 	ASSERT(pip != NULL);
5233 
5234 	/*
5235 	 * I/O can be driven across a path prior to having path
5236 	 * statistics available, i.e. probe(9e).
5237 	 */
5238 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5239 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5240 		xfer_cnt = bp->b_bcount - bp->b_resid;
5241 		if (bp->b_flags & B_READ) {
5242 			KSTAT_IO_PTR(iostatp)->reads++;
5243 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5244 		} else {
5245 			KSTAT_IO_PTR(iostatp)->writes++;
5246 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5247 		}
5248 	}
5249 }
5250 
5251 /*
5252  * Enable the path(specific client/target/initiator)
5253  * Enabling a path means that MPxIO may select the enabled path for routing
5254  * future I/O requests, subject to other path state constraints.
5255  */
5256 int
5257 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5258 {
5259 	mdi_phci_t	*ph;
5260 
5261 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5262 	if (ph == NULL) {
5263 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5264 			" failed. pip: %p ph = NULL\n", pip));
5265 		return (MDI_FAILURE);
5266 	}
5267 
5268 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5269 		MDI_ENABLE_OP);
5270 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5271 		" Returning success pip = %p. ph = %p\n", pip, ph));
5272 	return (MDI_SUCCESS);
5273 
5274 }
5275 
5276 /*
5277  * Disable the path (specific client/target/initiator)
5278  * Disabling a path means that MPxIO will not select the disabled path for
5279  * routing any new I/O requests.
5280  */
5281 int
5282 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5283 {
5284 	mdi_phci_t	*ph;
5285 
5286 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5287 	if (ph == NULL) {
5288 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5289 			" failed. pip: %p ph = NULL\n", pip));
5290 		return (MDI_FAILURE);
5291 	}
5292 
5293 	(void) i_mdi_enable_disable_path(pip,
5294 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5295 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5296 		"Returning success pip = %p. ph = %p", pip, ph));
5297 	return (MDI_SUCCESS);
5298 }
5299 
5300 /*
5301  * disable the path to a particular pHCI (pHCI specified in the phci_path
5302  * argument) for a particular client (specified in the client_path argument).
5303  * Disabling a path means that MPxIO will not select the disabled path for
5304  * routing any new I/O requests.
5305  * NOTE: this will be removed once the NWS files are changed to use the new
5306  * mdi_{enable,disable}_path interfaces
5307  */
5308 int
5309 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5310 {
5311 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5312 }
5313 
5314 /*
5315  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5316  * argument) for a particular client (specified in the client_path argument).
5317  * Enabling a path means that MPxIO may select the enabled path for routing
5318  * future I/O requests, subject to other path state constraints.
5319  * NOTE: this will be removed once the NWS files are changed to use the new
5320  * mdi_{enable,disable}_path interfaces
5321  */
5322 
5323 int
5324 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5325 {
5326 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5327 }
5328 
5329 /*
5330  * Common routine for doing enable/disable.
5331  */
5332 static mdi_pathinfo_t *
5333 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5334 		int op)
5335 {
5336 	int		sync_flag = 0;
5337 	int		rv;
5338 	mdi_pathinfo_t 	*next;
5339 	int		(*f)() = NULL;
5340 
5341 	f = vh->vh_ops->vo_pi_state_change;
5342 
5343 	sync_flag = (flags << 8) & 0xf00;
5344 
5345 	/*
5346 	 * Do a callback into the mdi consumer to let it
5347 	 * know that path is about to get enabled/disabled.
5348 	 */
5349 	if (f != NULL) {
5350 		rv = (*f)(vh->vh_dip, pip, 0,
5351 			MDI_PI_EXT_STATE(pip),
5352 			MDI_EXT_STATE_CHANGE | sync_flag |
5353 			op | MDI_BEFORE_STATE_CHANGE);
5354 		if (rv != MDI_SUCCESS) {
5355 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5356 			"!vo_pi_state_change: failed rv = %x", rv));
5357 		}
5358 	}
5359 	MDI_PI_LOCK(pip);
5360 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5361 
5362 	switch (flags) {
5363 		case USER_DISABLE:
5364 			if (op == MDI_DISABLE_OP)
5365 				MDI_PI_SET_USER_DISABLE(pip);
5366 			else
5367 				MDI_PI_SET_USER_ENABLE(pip);
5368 			break;
5369 		case DRIVER_DISABLE:
5370 			if (op == MDI_DISABLE_OP)
5371 				MDI_PI_SET_DRV_DISABLE(pip);
5372 			else
5373 				MDI_PI_SET_DRV_ENABLE(pip);
5374 			break;
5375 		case DRIVER_DISABLE_TRANSIENT:
5376 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5377 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5378 			else
5379 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5380 			break;
5381 	}
5382 	MDI_PI_UNLOCK(pip);
5383 	/*
5384 	 * Do a callback into the mdi consumer to let it
5385 	 * know that path is now enabled/disabled.
5386 	 */
5387 	if (f != NULL) {
5388 		rv = (*f)(vh->vh_dip, pip, 0,
5389 			MDI_PI_EXT_STATE(pip),
5390 			MDI_EXT_STATE_CHANGE | sync_flag |
5391 			op | MDI_AFTER_STATE_CHANGE);
5392 		if (rv != MDI_SUCCESS) {
5393 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5394 			"!vo_pi_state_change: failed rv = %x", rv));
5395 		}
5396 	}
5397 	return (next);
5398 }
5399 
5400 /*
5401  * Common routine for doing enable/disable.
5402  * NOTE: this will be removed once the NWS files are changed to use the new
5403  * mdi_{enable,disable}_path has been putback
5404  */
5405 int
5406 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5407 {
5408 
5409 	mdi_phci_t	*ph;
5410 	mdi_vhci_t	*vh = NULL;
5411 	mdi_client_t	*ct;
5412 	mdi_pathinfo_t	*next, *pip;
5413 	int		found_it;
5414 
5415 	ph = i_devi_get_phci(pdip);
5416 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5417 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5418 	if (ph == NULL) {
5419 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5420 			" failed. ph = NULL operation = %d\n", op));
5421 		return (MDI_FAILURE);
5422 	}
5423 
5424 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5425 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5426 			" Invalid operation = %d\n", op));
5427 		return (MDI_FAILURE);
5428 	}
5429 
5430 	vh = ph->ph_vhci;
5431 
5432 	if (cdip == NULL) {
5433 		/*
5434 		 * Need to mark the Phci as enabled/disabled.
5435 		 */
5436 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5437 		"Operation %d for the phci\n", op));
5438 		MDI_PHCI_LOCK(ph);
5439 		switch (flags) {
5440 			case USER_DISABLE:
5441 				if (op == MDI_DISABLE_OP)
5442 					MDI_PHCI_SET_USER_DISABLE(ph);
5443 				else
5444 					MDI_PHCI_SET_USER_ENABLE(ph);
5445 				break;
5446 			case DRIVER_DISABLE:
5447 				if (op == MDI_DISABLE_OP)
5448 					MDI_PHCI_SET_DRV_DISABLE(ph);
5449 				else
5450 					MDI_PHCI_SET_DRV_ENABLE(ph);
5451 				break;
5452 			case DRIVER_DISABLE_TRANSIENT:
5453 				if (op == MDI_DISABLE_OP)
5454 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5455 				else
5456 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5457 				break;
5458 			default:
5459 				MDI_PHCI_UNLOCK(ph);
5460 				MDI_DEBUG(1, (CE_NOTE, NULL,
5461 				"!i_mdi_pi_enable_disable:"
5462 				" Invalid flag argument= %d\n", flags));
5463 		}
5464 
5465 		/*
5466 		 * Phci has been disabled. Now try to enable/disable
5467 		 * path info's to each client.
5468 		 */
5469 		pip = ph->ph_path_head;
5470 		while (pip != NULL) {
5471 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
5472 		}
5473 		MDI_PHCI_UNLOCK(ph);
5474 	} else {
5475 
5476 		/*
5477 		 * Disable a specific client.
5478 		 */
5479 		ct = i_devi_get_client(cdip);
5480 		if (ct == NULL) {
5481 			MDI_DEBUG(1, (CE_NOTE, NULL,
5482 			"!i_mdi_pi_enable_disable:"
5483 			" failed. ct = NULL operation = %d\n", op));
5484 			return (MDI_FAILURE);
5485 		}
5486 
5487 		MDI_CLIENT_LOCK(ct);
5488 		pip = ct->ct_path_head;
5489 		found_it = 0;
5490 		while (pip != NULL) {
5491 			MDI_PI_LOCK(pip);
5492 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5493 			if (MDI_PI(pip)->pi_phci == ph) {
5494 				MDI_PI_UNLOCK(pip);
5495 				found_it = 1;
5496 				break;
5497 			}
5498 			MDI_PI_UNLOCK(pip);
5499 			pip = next;
5500 		}
5501 
5502 
5503 		MDI_CLIENT_UNLOCK(ct);
5504 		if (found_it == 0) {
5505 			MDI_DEBUG(1, (CE_NOTE, NULL,
5506 			"!i_mdi_pi_enable_disable:"
5507 			" failed. Could not find corresponding pip\n"));
5508 			return (MDI_FAILURE);
5509 		}
5510 
5511 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
5512 	}
5513 
5514 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5515 		" Returning success op: %x pdip = %p cdip = %p\n", op,
5516 			pdip, cdip));
5517 	return (MDI_SUCCESS);
5518 }
5519 
5520 /*ARGSUSED3*/
5521 int
5522 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5523     int flags, clock_t timeout)
5524 {
5525 	mdi_pathinfo_t *pip;
5526 	dev_info_t *dip;
5527 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5528 	char *paddr;
5529 
5530 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5531 
5532 	if (!MDI_PHCI(pdip))
5533 		return (MDI_FAILURE);
5534 
5535 	paddr = strchr(devnm, '@');
5536 	if (paddr == NULL)
5537 		return (MDI_FAILURE);
5538 
5539 	paddr++;	/* skip '@' */
5540 	pip = mdi_pi_find(pdip, NULL, paddr);
5541 	while (pip == NULL && timeout > 0) {
5542 		if (interval > timeout)
5543 			interval = timeout;
5544 		if (flags & NDI_DEVI_DEBUG) {
5545 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5546 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5547 			    paddr, interval, timeout);
5548 		}
5549 		delay(interval);
5550 		timeout -= interval;
5551 		interval += interval;
5552 		pip = mdi_pi_find(pdip, NULL, paddr);
5553 	}
5554 
5555 	if (pip == NULL)
5556 		return (MDI_FAILURE);
5557 	dip = mdi_pi_get_client(pip);
5558 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5559 		return (MDI_FAILURE);
5560 	*cdipp = dip;
5561 
5562 	/* TODO: holding should happen inside search functions */
5563 	ndi_hold_devi(dip);
5564 	return (MDI_SUCCESS);
5565 }
5566 
5567 /*
5568  * Ensure phci powered up
5569  */
5570 static void
5571 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5572 {
5573 	dev_info_t	*ph_dip;
5574 
5575 	ASSERT(pip != NULL);
5576 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5577 
5578 	if (MDI_PI(pip)->pi_pm_held) {
5579 		return;
5580 	}
5581 
5582 	ph_dip = mdi_pi_get_phci(pip);
5583 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5584 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5585 	if (ph_dip == NULL) {
5586 		return;
5587 	}
5588 
5589 	MDI_PI_UNLOCK(pip);
5590 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5591 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5592 	pm_hold_power(ph_dip);
5593 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5594 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5595 	MDI_PI_LOCK(pip);
5596 
5597 	MDI_PI(pip)->pi_pm_held = 1;
5598 }
5599 
5600 /*
5601  * Allow phci powered down
5602  */
5603 static void
5604 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5605 {
5606 	dev_info_t	*ph_dip = NULL;
5607 
5608 	ASSERT(pip != NULL);
5609 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5610 
5611 	if (MDI_PI(pip)->pi_pm_held == 0) {
5612 		return;
5613 	}
5614 
5615 	ph_dip = mdi_pi_get_phci(pip);
5616 	ASSERT(ph_dip != NULL);
5617 
5618 	MDI_PI_UNLOCK(pip);
5619 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5620 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5621 
5622 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5623 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5624 	pm_rele_power(ph_dip);
5625 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5626 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5627 
5628 	MDI_PI_LOCK(pip);
5629 	MDI_PI(pip)->pi_pm_held = 0;
5630 }
5631 
5632 static void
5633 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5634 {
5635 	ASSERT(ct);
5636 
5637 	ct->ct_power_cnt += incr;
5638 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5639 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5640 	ASSERT(ct->ct_power_cnt >= 0);
5641 }
5642 
5643 static void
5644 i_mdi_rele_all_phci(mdi_client_t *ct)
5645 {
5646 	mdi_pathinfo_t  *pip;
5647 
5648 	ASSERT(mutex_owned(&ct->ct_mutex));
5649 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5650 	while (pip != NULL) {
5651 		mdi_hold_path(pip);
5652 		MDI_PI_LOCK(pip);
5653 		i_mdi_pm_rele_pip(pip);
5654 		MDI_PI_UNLOCK(pip);
5655 		mdi_rele_path(pip);
5656 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5657 	}
5658 }
5659 
5660 static void
5661 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5662 {
5663 	ASSERT(ct);
5664 
5665 	if (i_ddi_devi_attached(ct->ct_dip)) {
5666 		ct->ct_power_cnt -= decr;
5667 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5668 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5669 	}
5670 
5671 	ASSERT(ct->ct_power_cnt >= 0);
5672 	if (ct->ct_power_cnt == 0) {
5673 		i_mdi_rele_all_phci(ct);
5674 		return;
5675 	}
5676 }
5677 
5678 static void
5679 i_mdi_pm_reset_client(mdi_client_t *ct)
5680 {
5681 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5682 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5683 	ct->ct_power_cnt = 0;
5684 	i_mdi_rele_all_phci(ct);
5685 	ct->ct_powercnt_config = 0;
5686 	ct->ct_powercnt_unconfig = 0;
5687 	ct->ct_powercnt_reset = 1;
5688 }
5689 
5690 static void
5691 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5692 {
5693 	mdi_pathinfo_t  *pip;
5694 	ASSERT(mutex_owned(&ct->ct_mutex));
5695 
5696 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5697 	while (pip != NULL) {
5698 		mdi_hold_path(pip);
5699 		MDI_PI_LOCK(pip);
5700 		i_mdi_pm_hold_pip(pip);
5701 		MDI_PI_UNLOCK(pip);
5702 		mdi_rele_path(pip);
5703 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5704 	}
5705 }
5706 
5707 static int
5708 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5709 {
5710 	int		ret;
5711 	dev_info_t	*ph_dip;
5712 
5713 	MDI_PI_LOCK(pip);
5714 	i_mdi_pm_hold_pip(pip);
5715 
5716 	ph_dip = mdi_pi_get_phci(pip);
5717 	MDI_PI_UNLOCK(pip);
5718 
5719 	/* bring all components of phci to full power */
5720 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5721 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5722 	    ddi_get_instance(ph_dip)));
5723 
5724 	ret = pm_powerup(ph_dip);
5725 
5726 	if (ret == DDI_FAILURE) {
5727 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5728 		    "pm_powerup FAILED for %s%d\n",
5729 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5730 
5731 		MDI_PI_LOCK(pip);
5732 		i_mdi_pm_rele_pip(pip);
5733 		MDI_PI_UNLOCK(pip);
5734 		return (MDI_FAILURE);
5735 	}
5736 
5737 	return (MDI_SUCCESS);
5738 }
5739 
5740 static int
5741 i_mdi_power_all_phci(mdi_client_t *ct)
5742 {
5743 	mdi_pathinfo_t  *pip;
5744 	int		succeeded = 0;
5745 
5746 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5747 	while (pip != NULL) {
5748 		mdi_hold_path(pip);
5749 		MDI_CLIENT_UNLOCK(ct);
5750 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5751 			succeeded = 1;
5752 
5753 		ASSERT(ct == MDI_PI(pip)->pi_client);
5754 		MDI_CLIENT_LOCK(ct);
5755 		mdi_rele_path(pip);
5756 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5757 	}
5758 
5759 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5760 }
5761 
5762 /*
5763  * mdi_bus_power():
5764  *		1. Place the phci(s) into powered up state so that
5765  *		   client can do power management
5766  *		2. Ensure phci powered up as client power managing
5767  * Return Values:
5768  *		MDI_SUCCESS
5769  *		MDI_FAILURE
5770  */
5771 int
5772 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5773     void *arg, void *result)
5774 {
5775 	int			ret = MDI_SUCCESS;
5776 	pm_bp_child_pwrchg_t	*bpc;
5777 	mdi_client_t		*ct;
5778 	dev_info_t		*cdip;
5779 	pm_bp_has_changed_t	*bphc;
5780 
5781 	/*
5782 	 * BUS_POWER_NOINVOL not supported
5783 	 */
5784 	if (op == BUS_POWER_NOINVOL)
5785 		return (MDI_FAILURE);
5786 
5787 	/*
5788 	 * ignore other OPs.
5789 	 * return quickly to save cou cycles on the ct processing
5790 	 */
5791 	switch (op) {
5792 	case BUS_POWER_PRE_NOTIFICATION:
5793 	case BUS_POWER_POST_NOTIFICATION:
5794 		bpc = (pm_bp_child_pwrchg_t *)arg;
5795 		cdip = bpc->bpc_dip;
5796 		break;
5797 	case BUS_POWER_HAS_CHANGED:
5798 		bphc = (pm_bp_has_changed_t *)arg;
5799 		cdip = bphc->bphc_dip;
5800 		break;
5801 	default:
5802 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5803 	}
5804 
5805 	ASSERT(MDI_CLIENT(cdip));
5806 
5807 	ct = i_devi_get_client(cdip);
5808 	if (ct == NULL)
5809 		return (MDI_FAILURE);
5810 
5811 	/*
5812 	 * wait till the mdi_pathinfo node state change are processed
5813 	 */
5814 	MDI_CLIENT_LOCK(ct);
5815 	switch (op) {
5816 	case BUS_POWER_PRE_NOTIFICATION:
5817 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5818 		    "BUS_POWER_PRE_NOTIFICATION:"
5819 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5820 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5821 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5822 
5823 		/* serialize power level change per client */
5824 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5825 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5826 
5827 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5828 
5829 		if (ct->ct_power_cnt == 0) {
5830 			ret = i_mdi_power_all_phci(ct);
5831 		}
5832 
5833 		/*
5834 		 * if new_level > 0:
5835 		 *	- hold phci(s)
5836 		 *	- power up phci(s) if not already
5837 		 * ignore power down
5838 		 */
5839 		if (bpc->bpc_nlevel > 0) {
5840 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5841 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5842 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5843 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5844 			}
5845 		}
5846 		break;
5847 	case BUS_POWER_POST_NOTIFICATION:
5848 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5849 		    "BUS_POWER_POST_NOTIFICATION:"
5850 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5851 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5852 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5853 		    *(int *)result));
5854 
5855 		if (*(int *)result == DDI_SUCCESS) {
5856 			if (bpc->bpc_nlevel > 0) {
5857 				MDI_CLIENT_SET_POWER_UP(ct);
5858 			} else {
5859 				MDI_CLIENT_SET_POWER_DOWN(ct);
5860 			}
5861 		}
5862 
5863 		/* release the hold we did in pre-notification */
5864 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5865 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5866 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5867 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5868 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5869 		}
5870 
5871 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5872 			/* another thread might started attaching */
5873 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5874 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5875 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5876 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5877 			/* detaching has been taken care in pm_post_unconfig */
5878 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5879 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5880 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5881 				i_mdi_pm_reset_client(ct);
5882 			}
5883 		}
5884 
5885 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5886 		cv_broadcast(&ct->ct_powerchange_cv);
5887 
5888 		break;
5889 
5890 	/* need to do more */
5891 	case BUS_POWER_HAS_CHANGED:
5892 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5893 		    "BUS_POWER_HAS_CHANGED:"
5894 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5895 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5896 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5897 
5898 		if (bphc->bphc_nlevel > 0 &&
5899 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5900 			if (ct->ct_power_cnt == 0) {
5901 				ret = i_mdi_power_all_phci(ct);
5902 			}
5903 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5904 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5905 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5906 		}
5907 
5908 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5909 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5910 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5911 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5912 		}
5913 		break;
5914 	}
5915 
5916 	MDI_CLIENT_UNLOCK(ct);
5917 	return (ret);
5918 }
5919 
5920 static int
5921 i_mdi_pm_pre_config_one(dev_info_t *child)
5922 {
5923 	int		ret = MDI_SUCCESS;
5924 	mdi_client_t	*ct;
5925 
5926 	ct = i_devi_get_client(child);
5927 	if (ct == NULL)
5928 		return (MDI_FAILURE);
5929 
5930 	MDI_CLIENT_LOCK(ct);
5931 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5932 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5933 
5934 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5935 		MDI_CLIENT_UNLOCK(ct);
5936 		MDI_DEBUG(4, (CE_NOTE, child,
5937 		    "i_mdi_pm_pre_config_one already configured\n"));
5938 		return (MDI_SUCCESS);
5939 	}
5940 
5941 	if (ct->ct_powercnt_config) {
5942 		MDI_CLIENT_UNLOCK(ct);
5943 		MDI_DEBUG(4, (CE_NOTE, child,
5944 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5945 		return (MDI_SUCCESS);
5946 	}
5947 
5948 	if (ct->ct_power_cnt == 0) {
5949 		ret = i_mdi_power_all_phci(ct);
5950 	}
5951 	MDI_DEBUG(4, (CE_NOTE, child,
5952 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5953 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5954 	ct->ct_powercnt_config = 1;
5955 	ct->ct_powercnt_reset = 0;
5956 	MDI_CLIENT_UNLOCK(ct);
5957 	return (ret);
5958 }
5959 
5960 static int
5961 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5962 {
5963 	int			ret = MDI_SUCCESS;
5964 	dev_info_t		*cdip;
5965 	int			circ;
5966 
5967 	ASSERT(MDI_VHCI(parent));
5968 
5969 	/* ndi_devi_config_one */
5970 	if (child) {
5971 		return (i_mdi_pm_pre_config_one(child));
5972 	}
5973 
5974 	/* devi_config_common */
5975 	ndi_devi_enter(parent, &circ);
5976 	cdip = ddi_get_child(parent);
5977 	while (cdip) {
5978 		dev_info_t *next = ddi_get_next_sibling(cdip);
5979 
5980 		ret = i_mdi_pm_pre_config_one(cdip);
5981 		if (ret != MDI_SUCCESS)
5982 			break;
5983 		cdip = next;
5984 	}
5985 	ndi_devi_exit(parent, circ);
5986 	return (ret);
5987 }
5988 
5989 static int
5990 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5991 {
5992 	int		ret = MDI_SUCCESS;
5993 	mdi_client_t	*ct;
5994 
5995 	ct = i_devi_get_client(child);
5996 	if (ct == NULL)
5997 		return (MDI_FAILURE);
5998 
5999 	MDI_CLIENT_LOCK(ct);
6000 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6001 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6002 
6003 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6004 		MDI_DEBUG(4, (CE_NOTE, child,
6005 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6006 		MDI_CLIENT_UNLOCK(ct);
6007 		return (MDI_SUCCESS);
6008 	}
6009 
6010 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6011 	    (flags & NDI_AUTODETACH)) {
6012 		MDI_DEBUG(4, (CE_NOTE, child,
6013 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6014 		MDI_CLIENT_UNLOCK(ct);
6015 		return (MDI_FAILURE);
6016 	}
6017 
6018 	if (ct->ct_powercnt_unconfig) {
6019 		MDI_DEBUG(4, (CE_NOTE, child,
6020 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6021 		MDI_CLIENT_UNLOCK(ct);
6022 		*held = 1;
6023 		return (MDI_SUCCESS);
6024 	}
6025 
6026 	if (ct->ct_power_cnt == 0) {
6027 		ret = i_mdi_power_all_phci(ct);
6028 	}
6029 	MDI_DEBUG(4, (CE_NOTE, child,
6030 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6031 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6032 	ct->ct_powercnt_unconfig = 1;
6033 	ct->ct_powercnt_reset = 0;
6034 	MDI_CLIENT_UNLOCK(ct);
6035 	if (ret == MDI_SUCCESS)
6036 		*held = 1;
6037 	return (ret);
6038 }
6039 
6040 static int
6041 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
6042     int flags)
6043 {
6044 	int			ret = MDI_SUCCESS;
6045 	dev_info_t		*cdip;
6046 	int			circ;
6047 
6048 	ASSERT(MDI_VHCI(parent));
6049 	*held = 0;
6050 
6051 	/* ndi_devi_unconfig_one */
6052 	if (child) {
6053 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6054 	}
6055 
6056 	/* devi_unconfig_common */
6057 	ndi_devi_enter(parent, &circ);
6058 	cdip = ddi_get_child(parent);
6059 	while (cdip) {
6060 		dev_info_t *next = ddi_get_next_sibling(cdip);
6061 
6062 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6063 		cdip = next;
6064 	}
6065 	ndi_devi_exit(parent, circ);
6066 
6067 	if (*held)
6068 		ret = MDI_SUCCESS;
6069 
6070 	return (ret);
6071 }
6072 
6073 static void
6074 i_mdi_pm_post_config_one(dev_info_t *child)
6075 {
6076 	mdi_client_t	*ct;
6077 
6078 	ct = i_devi_get_client(child);
6079 	if (ct == NULL)
6080 		return;
6081 
6082 	MDI_CLIENT_LOCK(ct);
6083 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6084 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6085 
6086 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6087 		MDI_DEBUG(4, (CE_NOTE, child,
6088 		    "i_mdi_pm_post_config_one NOT configured\n"));
6089 		MDI_CLIENT_UNLOCK(ct);
6090 		return;
6091 	}
6092 
6093 	/* client has not been updated */
6094 	if (MDI_CLIENT_IS_FAILED(ct)) {
6095 		MDI_DEBUG(4, (CE_NOTE, child,
6096 		    "i_mdi_pm_post_config_one NOT configured\n"));
6097 		MDI_CLIENT_UNLOCK(ct);
6098 		return;
6099 	}
6100 
6101 	/* another thread might have powered it down or detached it */
6102 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6103 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6104 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6105 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6106 		MDI_DEBUG(4, (CE_NOTE, child,
6107 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6108 		i_mdi_pm_reset_client(ct);
6109 	} else {
6110 		mdi_pathinfo_t  *pip, *next;
6111 		int	valid_path_count = 0;
6112 
6113 		MDI_DEBUG(4, (CE_NOTE, child,
6114 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6115 		pip = ct->ct_path_head;
6116 		while (pip != NULL) {
6117 			MDI_PI_LOCK(pip);
6118 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6119 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6120 				valid_path_count ++;
6121 			MDI_PI_UNLOCK(pip);
6122 			pip = next;
6123 		}
6124 		i_mdi_pm_rele_client(ct, valid_path_count);
6125 	}
6126 	ct->ct_powercnt_config = 0;
6127 	MDI_CLIENT_UNLOCK(ct);
6128 }
6129 
6130 static void
6131 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6132 {
6133 	int		circ;
6134 	dev_info_t	*cdip;
6135 	ASSERT(MDI_VHCI(parent));
6136 
6137 	/* ndi_devi_config_one */
6138 	if (child) {
6139 		i_mdi_pm_post_config_one(child);
6140 		return;
6141 	}
6142 
6143 	/* devi_config_common */
6144 	ndi_devi_enter(parent, &circ);
6145 	cdip = ddi_get_child(parent);
6146 	while (cdip) {
6147 		dev_info_t *next = ddi_get_next_sibling(cdip);
6148 
6149 		i_mdi_pm_post_config_one(cdip);
6150 		cdip = next;
6151 	}
6152 	ndi_devi_exit(parent, circ);
6153 }
6154 
6155 static void
6156 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6157 {
6158 	mdi_client_t	*ct;
6159 
6160 	ct = i_devi_get_client(child);
6161 	if (ct == NULL)
6162 		return;
6163 
6164 	MDI_CLIENT_LOCK(ct);
6165 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6166 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6167 
6168 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6169 		MDI_DEBUG(4, (CE_NOTE, child,
6170 		    "i_mdi_pm_post_unconfig NOT held\n"));
6171 		MDI_CLIENT_UNLOCK(ct);
6172 		return;
6173 	}
6174 
6175 	/* failure detaching or another thread just attached it */
6176 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6177 	    i_ddi_devi_attached(ct->ct_dip)) ||
6178 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6179 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6180 		MDI_DEBUG(4, (CE_NOTE, child,
6181 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6182 		i_mdi_pm_reset_client(ct);
6183 	} else {
6184 		mdi_pathinfo_t  *pip, *next;
6185 		int	valid_path_count = 0;
6186 
6187 		MDI_DEBUG(4, (CE_NOTE, child,
6188 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6189 		pip = ct->ct_path_head;
6190 		while (pip != NULL) {
6191 			MDI_PI_LOCK(pip);
6192 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6193 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6194 				valid_path_count ++;
6195 			MDI_PI_UNLOCK(pip);
6196 			pip = next;
6197 		}
6198 		i_mdi_pm_rele_client(ct, valid_path_count);
6199 		ct->ct_powercnt_unconfig = 0;
6200 	}
6201 
6202 	MDI_CLIENT_UNLOCK(ct);
6203 }
6204 
6205 static void
6206 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6207 {
6208 	int			circ;
6209 	dev_info_t		*cdip;
6210 
6211 	ASSERT(MDI_VHCI(parent));
6212 
6213 	if (!held) {
6214 		MDI_DEBUG(4, (CE_NOTE, parent,
6215 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6216 		return;
6217 	}
6218 
6219 	if (child) {
6220 		i_mdi_pm_post_unconfig_one(child);
6221 		return;
6222 	}
6223 
6224 	ndi_devi_enter(parent, &circ);
6225 	cdip = ddi_get_child(parent);
6226 	while (cdip) {
6227 		dev_info_t *next = ddi_get_next_sibling(cdip);
6228 
6229 		i_mdi_pm_post_unconfig_one(cdip);
6230 		cdip = next;
6231 	}
6232 	ndi_devi_exit(parent, circ);
6233 }
6234 
6235 int
6236 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6237 {
6238 	int			circ, ret = MDI_SUCCESS;
6239 	dev_info_t		*client_dip = NULL;
6240 	mdi_client_t		*ct;
6241 
6242 	/*
6243 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6244 	 * Power up pHCI for the named client device.
6245 	 * Note: Before the client is enumerated under vhci by phci,
6246 	 * client_dip can be NULL. Then proceed to power up all the
6247 	 * pHCIs.
6248 	 */
6249 	if (devnm != NULL) {
6250 		ndi_devi_enter(vdip, &circ);
6251 		client_dip = ndi_devi_findchild(vdip, devnm);
6252 		ndi_devi_exit(vdip, circ);
6253 	}
6254 
6255 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6256 
6257 	switch (op) {
6258 	case MDI_PM_PRE_CONFIG:
6259 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6260 
6261 		break;
6262 	case MDI_PM_PRE_UNCONFIG:
6263 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6264 		    flags);
6265 
6266 		break;
6267 	case MDI_PM_POST_CONFIG:
6268 		i_mdi_pm_post_config(vdip, client_dip);
6269 
6270 		break;
6271 	case MDI_PM_POST_UNCONFIG:
6272 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6273 
6274 		break;
6275 	case MDI_PM_HOLD_POWER:
6276 	case MDI_PM_RELE_POWER:
6277 		ASSERT(args);
6278 
6279 		client_dip = (dev_info_t *)args;
6280 		ASSERT(MDI_CLIENT(client_dip));
6281 
6282 		ct = i_devi_get_client(client_dip);
6283 		MDI_CLIENT_LOCK(ct);
6284 
6285 		if (op == MDI_PM_HOLD_POWER) {
6286 			if (ct->ct_power_cnt == 0) {
6287 				(void) i_mdi_power_all_phci(ct);
6288 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6289 				    "mdi_power i_mdi_pm_hold_client\n"));
6290 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6291 			}
6292 		} else {
6293 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6294 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6295 				    "mdi_power i_mdi_pm_rele_client\n"));
6296 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6297 			} else {
6298 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6299 				    "mdi_power i_mdi_pm_reset_client\n"));
6300 				i_mdi_pm_reset_client(ct);
6301 			}
6302 		}
6303 
6304 		MDI_CLIENT_UNLOCK(ct);
6305 		break;
6306 	default:
6307 		break;
6308 	}
6309 
6310 	return (ret);
6311 }
6312 
6313 int
6314 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6315 {
6316 	mdi_vhci_t *vhci;
6317 
6318 	if (!MDI_VHCI(dip))
6319 		return (MDI_FAILURE);
6320 
6321 	if (mdi_class) {
6322 		vhci = DEVI(dip)->devi_mdi_xhci;
6323 		ASSERT(vhci);
6324 		*mdi_class = vhci->vh_class;
6325 	}
6326 
6327 	return (MDI_SUCCESS);
6328 }
6329 
6330 int
6331 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6332 {
6333 	mdi_phci_t *phci;
6334 
6335 	if (!MDI_PHCI(dip))
6336 		return (MDI_FAILURE);
6337 
6338 	if (mdi_class) {
6339 		phci = DEVI(dip)->devi_mdi_xhci;
6340 		ASSERT(phci);
6341 		*mdi_class = phci->ph_vhci->vh_class;
6342 	}
6343 
6344 	return (MDI_SUCCESS);
6345 }
6346 
6347 int
6348 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6349 {
6350 	mdi_client_t *client;
6351 
6352 	if (!MDI_CLIENT(dip))
6353 		return (MDI_FAILURE);
6354 
6355 	if (mdi_class) {
6356 		client = DEVI(dip)->devi_mdi_client;
6357 		ASSERT(client);
6358 		*mdi_class = client->ct_vhci->vh_class;
6359 	}
6360 
6361 	return (MDI_SUCCESS);
6362 }
6363 
6364 void *
6365 mdi_client_get_vhci_private(dev_info_t *dip)
6366 {
6367 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6368 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6369 		mdi_client_t	*ct;
6370 		ct = i_devi_get_client(dip);
6371 		return (ct->ct_vprivate);
6372 	}
6373 	return (NULL);
6374 }
6375 
6376 void
6377 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6378 {
6379 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6380 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6381 		mdi_client_t	*ct;
6382 		ct = i_devi_get_client(dip);
6383 		ct->ct_vprivate = data;
6384 	}
6385 }
6386 /*
6387  * mdi_pi_get_vhci_private():
6388  *		Get the vhci private information associated with the
6389  *		mdi_pathinfo node
6390  */
6391 void *
6392 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6393 {
6394 	caddr_t	vprivate = NULL;
6395 	if (pip) {
6396 		vprivate = MDI_PI(pip)->pi_vprivate;
6397 	}
6398 	return (vprivate);
6399 }
6400 
6401 /*
6402  * mdi_pi_set_vhci_private():
6403  *		Set the vhci private information in the mdi_pathinfo node
6404  */
6405 void
6406 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6407 {
6408 	if (pip) {
6409 		MDI_PI(pip)->pi_vprivate = priv;
6410 	}
6411 }
6412 
6413 /*
6414  * mdi_phci_get_vhci_private():
6415  *		Get the vhci private information associated with the
6416  *		mdi_phci node
6417  */
6418 void *
6419 mdi_phci_get_vhci_private(dev_info_t *dip)
6420 {
6421 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6422 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6423 		mdi_phci_t	*ph;
6424 		ph = i_devi_get_phci(dip);
6425 		return (ph->ph_vprivate);
6426 	}
6427 	return (NULL);
6428 }
6429 
6430 /*
6431  * mdi_phci_set_vhci_private():
6432  *		Set the vhci private information in the mdi_phci node
6433  */
6434 void
6435 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6436 {
6437 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6438 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6439 		mdi_phci_t	*ph;
6440 		ph = i_devi_get_phci(dip);
6441 		ph->ph_vprivate = priv;
6442 	}
6443 }
6444 
6445 /*
6446  * List of vhci class names:
6447  * A vhci class name must be in this list only if the corresponding vhci
6448  * driver intends to use the mdi provided bus config implementation
6449  * (i.e., mdi_vhci_bus_config()).
6450  */
6451 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6452 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6453 
6454 /*
6455  * During boot time, the on-disk vhci cache for every vhci class is read
6456  * in the form of an nvlist and stored here.
6457  */
6458 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6459 
6460 /* nvpair names in vhci cache nvlist */
6461 #define	MDI_VHCI_CACHE_VERSION	1
6462 #define	MDI_NVPNAME_VERSION	"version"
6463 #define	MDI_NVPNAME_PHCIS	"phcis"
6464 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6465 
6466 /*
6467  * Given vhci class name, return its on-disk vhci cache filename.
6468  * Memory for the returned filename which includes the full path is allocated
6469  * by this function.
6470  */
6471 static char *
6472 vhclass2vhcache_filename(char *vhclass)
6473 {
6474 	char *filename;
6475 	int len;
6476 	static char *fmt = "/etc/devices/mdi_%s_cache";
6477 
6478 	/*
6479 	 * fmt contains the on-disk vhci cache file name format;
6480 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6481 	 */
6482 
6483 	/* the -1 below is to account for "%s" in the format string */
6484 	len = strlen(fmt) + strlen(vhclass) - 1;
6485 	filename = kmem_alloc(len, KM_SLEEP);
6486 	(void) snprintf(filename, len, fmt, vhclass);
6487 	ASSERT(len == (strlen(filename) + 1));
6488 	return (filename);
6489 }
6490 
6491 /*
6492  * initialize the vhci cache related data structures and read the on-disk
6493  * vhci cached data into memory.
6494  */
6495 static void
6496 setup_vhci_cache(mdi_vhci_t *vh)
6497 {
6498 	mdi_vhci_config_t *vhc;
6499 	mdi_vhci_cache_t *vhcache;
6500 	int i;
6501 	nvlist_t *nvl = NULL;
6502 
6503 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6504 	vh->vh_config = vhc;
6505 	vhcache = &vhc->vhc_vhcache;
6506 
6507 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6508 
6509 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6510 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6511 
6512 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6513 
6514 	/*
6515 	 * Create string hash; same as mod_hash_create_strhash() except that
6516 	 * we use NULL key destructor.
6517 	 */
6518 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6519 	    mdi_bus_config_cache_hash_size,
6520 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6521 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6522 
6523 	/*
6524 	 * The on-disk vhci cache is read during booting prior to the
6525 	 * lights-out period by mdi_read_devices_files().
6526 	 */
6527 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6528 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6529 			nvl = vhcache_nvl[i];
6530 			vhcache_nvl[i] = NULL;
6531 			break;
6532 		}
6533 	}
6534 
6535 	/*
6536 	 * this is to cover the case of some one manually causing unloading
6537 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6538 	 */
6539 	if (nvl == NULL && modrootloaded)
6540 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6541 
6542 	if (nvl != NULL) {
6543 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6544 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6545 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6546 		else  {
6547 			cmn_err(CE_WARN,
6548 			    "%s: data file corrupted, will recreate\n",
6549 			    vhc->vhc_vhcache_filename);
6550 		}
6551 		rw_exit(&vhcache->vhcache_lock);
6552 		nvlist_free(nvl);
6553 	}
6554 
6555 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6556 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6557 
6558 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6559 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6560 }
6561 
6562 /*
6563  * free all vhci cache related resources
6564  */
6565 static int
6566 destroy_vhci_cache(mdi_vhci_t *vh)
6567 {
6568 	mdi_vhci_config_t *vhc = vh->vh_config;
6569 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6570 	mdi_vhcache_phci_t *cphci, *cphci_next;
6571 	mdi_vhcache_client_t *cct, *cct_next;
6572 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6573 
6574 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6575 		return (MDI_FAILURE);
6576 
6577 	kmem_free(vhc->vhc_vhcache_filename,
6578 	    strlen(vhc->vhc_vhcache_filename) + 1);
6579 
6580 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6581 
6582 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6583 	    cphci = cphci_next) {
6584 		cphci_next = cphci->cphci_next;
6585 		free_vhcache_phci(cphci);
6586 	}
6587 
6588 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6589 		cct_next = cct->cct_next;
6590 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6591 			cpi_next = cpi->cpi_next;
6592 			free_vhcache_pathinfo(cpi);
6593 		}
6594 		free_vhcache_client(cct);
6595 	}
6596 
6597 	rw_destroy(&vhcache->vhcache_lock);
6598 
6599 	mutex_destroy(&vhc->vhc_lock);
6600 	cv_destroy(&vhc->vhc_cv);
6601 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6602 	return (MDI_SUCCESS);
6603 }
6604 
6605 /*
6606  * Stop all vhci cache related async threads and free their resources.
6607  */
6608 static int
6609 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6610 {
6611 	mdi_async_client_config_t *acc, *acc_next;
6612 
6613 	mutex_enter(&vhc->vhc_lock);
6614 	vhc->vhc_flags |= MDI_VHC_EXIT;
6615 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6616 	cv_broadcast(&vhc->vhc_cv);
6617 
6618 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6619 	    vhc->vhc_acc_thrcount != 0) {
6620 		mutex_exit(&vhc->vhc_lock);
6621 		delay(1);
6622 		mutex_enter(&vhc->vhc_lock);
6623 	}
6624 
6625 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6626 
6627 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6628 		acc_next = acc->acc_next;
6629 		free_async_client_config(acc);
6630 	}
6631 	vhc->vhc_acc_list_head = NULL;
6632 	vhc->vhc_acc_list_tail = NULL;
6633 	vhc->vhc_acc_count = 0;
6634 
6635 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6636 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6637 		mutex_exit(&vhc->vhc_lock);
6638 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6639 			vhcache_dirty(vhc);
6640 			return (MDI_FAILURE);
6641 		}
6642 	} else
6643 		mutex_exit(&vhc->vhc_lock);
6644 
6645 	if (callb_delete(vhc->vhc_cbid) != 0)
6646 		return (MDI_FAILURE);
6647 
6648 	return (MDI_SUCCESS);
6649 }
6650 
6651 /*
6652  * Stop vhci cache flush thread
6653  */
6654 /* ARGSUSED */
6655 static boolean_t
6656 stop_vhcache_flush_thread(void *arg, int code)
6657 {
6658 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6659 
6660 	mutex_enter(&vhc->vhc_lock);
6661 	vhc->vhc_flags |= MDI_VHC_EXIT;
6662 	cv_broadcast(&vhc->vhc_cv);
6663 
6664 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6665 		mutex_exit(&vhc->vhc_lock);
6666 		delay(1);
6667 		mutex_enter(&vhc->vhc_lock);
6668 	}
6669 
6670 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6671 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6672 		mutex_exit(&vhc->vhc_lock);
6673 		(void) flush_vhcache(vhc, 1);
6674 	} else
6675 		mutex_exit(&vhc->vhc_lock);
6676 
6677 	return (B_TRUE);
6678 }
6679 
6680 /*
6681  * Enqueue the vhcache phci (cphci) at the tail of the list
6682  */
6683 static void
6684 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6685 {
6686 	cphci->cphci_next = NULL;
6687 	if (vhcache->vhcache_phci_head == NULL)
6688 		vhcache->vhcache_phci_head = cphci;
6689 	else
6690 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6691 	vhcache->vhcache_phci_tail = cphci;
6692 }
6693 
6694 /*
6695  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6696  */
6697 static void
6698 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6699     mdi_vhcache_pathinfo_t *cpi)
6700 {
6701 	cpi->cpi_next = NULL;
6702 	if (cct->cct_cpi_head == NULL)
6703 		cct->cct_cpi_head = cpi;
6704 	else
6705 		cct->cct_cpi_tail->cpi_next = cpi;
6706 	cct->cct_cpi_tail = cpi;
6707 }
6708 
6709 /*
6710  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6711  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6712  * flag set come at the beginning of the list. All cpis which have this
6713  * flag set come at the end of the list.
6714  */
6715 static void
6716 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6717     mdi_vhcache_pathinfo_t *newcpi)
6718 {
6719 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6720 
6721 	if (cct->cct_cpi_head == NULL ||
6722 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6723 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6724 	else {
6725 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6726 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6727 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6728 			;
6729 
6730 		if (prev_cpi == NULL)
6731 			cct->cct_cpi_head = newcpi;
6732 		else
6733 			prev_cpi->cpi_next = newcpi;
6734 
6735 		newcpi->cpi_next = cpi;
6736 
6737 		if (cpi == NULL)
6738 			cct->cct_cpi_tail = newcpi;
6739 	}
6740 }
6741 
6742 /*
6743  * Enqueue the vhcache client (cct) at the tail of the list
6744  */
6745 static void
6746 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6747     mdi_vhcache_client_t *cct)
6748 {
6749 	cct->cct_next = NULL;
6750 	if (vhcache->vhcache_client_head == NULL)
6751 		vhcache->vhcache_client_head = cct;
6752 	else
6753 		vhcache->vhcache_client_tail->cct_next = cct;
6754 	vhcache->vhcache_client_tail = cct;
6755 }
6756 
6757 static void
6758 free_string_array(char **str, int nelem)
6759 {
6760 	int i;
6761 
6762 	if (str) {
6763 		for (i = 0; i < nelem; i++) {
6764 			if (str[i])
6765 				kmem_free(str[i], strlen(str[i]) + 1);
6766 		}
6767 		kmem_free(str, sizeof (char *) * nelem);
6768 	}
6769 }
6770 
6771 static void
6772 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6773 {
6774 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6775 	kmem_free(cphci, sizeof (*cphci));
6776 }
6777 
6778 static void
6779 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6780 {
6781 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6782 	kmem_free(cpi, sizeof (*cpi));
6783 }
6784 
6785 static void
6786 free_vhcache_client(mdi_vhcache_client_t *cct)
6787 {
6788 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6789 	kmem_free(cct, sizeof (*cct));
6790 }
6791 
6792 static char *
6793 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6794 {
6795 	char *name_addr;
6796 	int len;
6797 
6798 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6799 	name_addr = kmem_alloc(len, KM_SLEEP);
6800 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6801 
6802 	if (ret_len)
6803 		*ret_len = len;
6804 	return (name_addr);
6805 }
6806 
6807 /*
6808  * Copy the contents of paddrnvl to vhci cache.
6809  * paddrnvl nvlist contains path information for a vhci client.
6810  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6811  */
6812 static void
6813 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6814     mdi_vhcache_client_t *cct)
6815 {
6816 	nvpair_t *nvp = NULL;
6817 	mdi_vhcache_pathinfo_t *cpi;
6818 	uint_t nelem;
6819 	uint32_t *val;
6820 
6821 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6822 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6823 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6824 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6825 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6826 		ASSERT(nelem == 2);
6827 		cpi->cpi_cphci = cphci_list[val[0]];
6828 		cpi->cpi_flags = val[1];
6829 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6830 	}
6831 }
6832 
6833 /*
6834  * Copy the contents of caddrmapnvl to vhci cache.
6835  * caddrmapnvl nvlist contains vhci client address to phci client address
6836  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6837  * this nvlist.
6838  */
6839 static void
6840 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6841     mdi_vhcache_phci_t *cphci_list[])
6842 {
6843 	nvpair_t *nvp = NULL;
6844 	nvlist_t *paddrnvl;
6845 	mdi_vhcache_client_t *cct;
6846 
6847 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6848 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6849 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6850 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6851 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6852 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6853 		/* the client must contain at least one path */
6854 		ASSERT(cct->cct_cpi_head != NULL);
6855 
6856 		enqueue_vhcache_client(vhcache, cct);
6857 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6858 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6859 	}
6860 }
6861 
6862 /*
6863  * Copy the contents of the main nvlist to vhci cache.
6864  *
6865  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6866  * The nvlist contains the mappings between the vhci client addresses and
6867  * their corresponding phci client addresses.
6868  *
6869  * The structure of the nvlist is as follows:
6870  *
6871  * Main nvlist:
6872  *	NAME		TYPE		DATA
6873  *	version		int32		version number
6874  *	phcis		string array	array of phci paths
6875  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6876  *
6877  * structure of c2paddrs_nvl:
6878  *	NAME		TYPE		DATA
6879  *	caddr1		nvlist_t	paddrs_nvl1
6880  *	caddr2		nvlist_t	paddrs_nvl2
6881  *	...
6882  * where caddr1, caddr2, ... are vhci client name and addresses in the
6883  * form of "<clientname>@<clientaddress>".
6884  * (for example: "ssd@2000002037cd9f72");
6885  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6886  *
6887  * structure of paddrs_nvl:
6888  *	NAME		TYPE		DATA
6889  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6890  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6891  *	...
6892  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6893  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6894  * phci-ids are integers that identify PHCIs to which the
6895  * the bus specific address belongs to. These integers are used as an index
6896  * into to the phcis string array in the main nvlist to get the PHCI path.
6897  */
6898 static int
6899 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6900 {
6901 	char **phcis, **phci_namep;
6902 	uint_t nphcis;
6903 	mdi_vhcache_phci_t *cphci, **cphci_list;
6904 	nvlist_t *caddrmapnvl;
6905 	int32_t ver;
6906 	int i;
6907 	size_t cphci_list_size;
6908 
6909 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
6910 
6911 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
6912 	    ver != MDI_VHCI_CACHE_VERSION)
6913 		return (MDI_FAILURE);
6914 
6915 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
6916 	    &nphcis) != 0)
6917 		return (MDI_SUCCESS);
6918 
6919 	ASSERT(nphcis > 0);
6920 
6921 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
6922 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
6923 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
6924 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
6925 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
6926 		enqueue_vhcache_phci(vhcache, cphci);
6927 		cphci_list[i] = cphci;
6928 	}
6929 
6930 	ASSERT(vhcache->vhcache_phci_head != NULL);
6931 
6932 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
6933 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
6934 
6935 	kmem_free(cphci_list, cphci_list_size);
6936 	return (MDI_SUCCESS);
6937 }
6938 
6939 /*
6940  * Build paddrnvl for the specified client using the information in the
6941  * vhci cache and add it to the caddrmapnnvl.
6942  * Returns 0 on success, errno on failure.
6943  */
6944 static int
6945 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
6946     nvlist_t *caddrmapnvl)
6947 {
6948 	mdi_vhcache_pathinfo_t *cpi;
6949 	nvlist_t *nvl;
6950 	int err;
6951 	uint32_t val[2];
6952 
6953 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
6954 
6955 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
6956 		return (err);
6957 
6958 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
6959 		val[0] = cpi->cpi_cphci->cphci_id;
6960 		val[1] = cpi->cpi_flags;
6961 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
6962 		    != 0)
6963 			goto out;
6964 	}
6965 
6966 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
6967 out:
6968 	nvlist_free(nvl);
6969 	return (err);
6970 }
6971 
6972 /*
6973  * Build caddrmapnvl using the information in the vhci cache
6974  * and add it to the mainnvl.
6975  * Returns 0 on success, errno on failure.
6976  */
6977 static int
6978 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
6979 {
6980 	mdi_vhcache_client_t *cct;
6981 	nvlist_t *nvl;
6982 	int err;
6983 
6984 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
6985 
6986 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
6987 		return (err);
6988 
6989 	for (cct = vhcache->vhcache_client_head; cct != NULL;
6990 	    cct = cct->cct_next) {
6991 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
6992 			goto out;
6993 	}
6994 
6995 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
6996 out:
6997 	nvlist_free(nvl);
6998 	return (err);
6999 }
7000 
7001 /*
7002  * Build nvlist using the information in the vhci cache.
7003  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7004  * Returns nvl on success, NULL on failure.
7005  */
7006 static nvlist_t *
7007 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7008 {
7009 	mdi_vhcache_phci_t *cphci;
7010 	uint_t phci_count;
7011 	char **phcis;
7012 	nvlist_t *nvl;
7013 	int err, i;
7014 
7015 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7016 		nvl = NULL;
7017 		goto out;
7018 	}
7019 
7020 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7021 	    MDI_VHCI_CACHE_VERSION)) != 0)
7022 		goto out;
7023 
7024 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7025 	if (vhcache->vhcache_phci_head == NULL) {
7026 		rw_exit(&vhcache->vhcache_lock);
7027 		return (nvl);
7028 	}
7029 
7030 	phci_count = 0;
7031 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7032 	    cphci = cphci->cphci_next)
7033 		cphci->cphci_id = phci_count++;
7034 
7035 	/* build phci pathname list */
7036 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7037 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7038 	    cphci = cphci->cphci_next, i++)
7039 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7040 
7041 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7042 	    phci_count);
7043 	free_string_array(phcis, phci_count);
7044 
7045 	if (err == 0 &&
7046 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7047 		rw_exit(&vhcache->vhcache_lock);
7048 		return (nvl);
7049 	}
7050 
7051 	rw_exit(&vhcache->vhcache_lock);
7052 out:
7053 	if (nvl)
7054 		nvlist_free(nvl);
7055 	return (NULL);
7056 }
7057 
7058 /*
7059  * Lookup vhcache phci structure for the specified phci path.
7060  */
7061 static mdi_vhcache_phci_t *
7062 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7063 {
7064 	mdi_vhcache_phci_t *cphci;
7065 
7066 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7067 
7068 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7069 	    cphci = cphci->cphci_next) {
7070 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7071 			return (cphci);
7072 	}
7073 
7074 	return (NULL);
7075 }
7076 
7077 /*
7078  * Lookup vhcache phci structure for the specified phci.
7079  */
7080 static mdi_vhcache_phci_t *
7081 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7082 {
7083 	mdi_vhcache_phci_t *cphci;
7084 
7085 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7086 
7087 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7088 	    cphci = cphci->cphci_next) {
7089 		if (cphci->cphci_phci == ph)
7090 			return (cphci);
7091 	}
7092 
7093 	return (NULL);
7094 }
7095 
7096 /*
7097  * Add the specified phci to the vhci cache if not already present.
7098  */
7099 static void
7100 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7101 {
7102 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7103 	mdi_vhcache_phci_t *cphci;
7104 	char *pathname;
7105 	int cache_updated;
7106 
7107 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7108 
7109 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7110 	(void) ddi_pathname(ph->ph_dip, pathname);
7111 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7112 	    != NULL) {
7113 		cphci->cphci_phci = ph;
7114 		cache_updated = 0;
7115 	} else {
7116 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7117 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7118 		cphci->cphci_phci = ph;
7119 		enqueue_vhcache_phci(vhcache, cphci);
7120 		cache_updated = 1;
7121 	}
7122 
7123 	rw_exit(&vhcache->vhcache_lock);
7124 
7125 	/*
7126 	 * Since a new phci has been added, reset
7127 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7128 	 * during next vhcache_discover_paths().
7129 	 */
7130 	mutex_enter(&vhc->vhc_lock);
7131 	vhc->vhc_path_discovery_cutoff_time = 0;
7132 	mutex_exit(&vhc->vhc_lock);
7133 
7134 	kmem_free(pathname, MAXPATHLEN);
7135 	if (cache_updated)
7136 		vhcache_dirty(vhc);
7137 }
7138 
7139 /*
7140  * Remove the reference to the specified phci from the vhci cache.
7141  */
7142 static void
7143 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7144 {
7145 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7146 	mdi_vhcache_phci_t *cphci;
7147 
7148 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7149 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7150 		/* do not remove the actual mdi_vhcache_phci structure */
7151 		cphci->cphci_phci = NULL;
7152 	}
7153 	rw_exit(&vhcache->vhcache_lock);
7154 }
7155 
7156 static void
7157 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7158     mdi_vhcache_lookup_token_t *src)
7159 {
7160 	if (src == NULL) {
7161 		dst->lt_cct = NULL;
7162 		dst->lt_cct_lookup_time = 0;
7163 	} else {
7164 		dst->lt_cct = src->lt_cct;
7165 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7166 	}
7167 }
7168 
7169 /*
7170  * Look up vhcache client for the specified client.
7171  */
7172 static mdi_vhcache_client_t *
7173 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7174     mdi_vhcache_lookup_token_t *token)
7175 {
7176 	mod_hash_val_t hv;
7177 	char *name_addr;
7178 	int len;
7179 
7180 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7181 
7182 	/*
7183 	 * If no vhcache clean occurred since the last lookup, we can
7184 	 * simply return the cct from the last lookup operation.
7185 	 * It works because ccts are never freed except during the vhcache
7186 	 * cleanup operation.
7187 	 */
7188 	if (token != NULL &&
7189 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7190 		return (token->lt_cct);
7191 
7192 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7193 	if (mod_hash_find(vhcache->vhcache_client_hash,
7194 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7195 		if (token) {
7196 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7197 			token->lt_cct_lookup_time = lbolt64;
7198 		}
7199 	} else {
7200 		if (token) {
7201 			token->lt_cct = NULL;
7202 			token->lt_cct_lookup_time = 0;
7203 		}
7204 		hv = NULL;
7205 	}
7206 	kmem_free(name_addr, len);
7207 	return ((mdi_vhcache_client_t *)hv);
7208 }
7209 
7210 /*
7211  * Add the specified path to the vhci cache if not already present.
7212  * Also add the vhcache client for the client corresponding to this path
7213  * if it doesn't already exist.
7214  */
7215 static void
7216 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7217 {
7218 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7219 	mdi_vhcache_client_t *cct;
7220 	mdi_vhcache_pathinfo_t *cpi;
7221 	mdi_phci_t *ph = pip->pi_phci;
7222 	mdi_client_t *ct = pip->pi_client;
7223 	int cache_updated = 0;
7224 
7225 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7226 
7227 	/* if vhcache client for this pip doesn't already exist, add it */
7228 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7229 	    NULL)) == NULL) {
7230 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7231 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7232 		    ct->ct_guid, NULL);
7233 		enqueue_vhcache_client(vhcache, cct);
7234 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7235 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7236 		cache_updated = 1;
7237 	}
7238 
7239 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7240 		if (cpi->cpi_cphci->cphci_phci == ph &&
7241 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7242 			cpi->cpi_pip = pip;
7243 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7244 				cpi->cpi_flags &=
7245 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7246 				sort_vhcache_paths(cct);
7247 				cache_updated = 1;
7248 			}
7249 			break;
7250 		}
7251 	}
7252 
7253 	if (cpi == NULL) {
7254 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7255 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7256 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7257 		ASSERT(cpi->cpi_cphci != NULL);
7258 		cpi->cpi_pip = pip;
7259 		enqueue_vhcache_pathinfo(cct, cpi);
7260 		cache_updated = 1;
7261 	}
7262 
7263 	rw_exit(&vhcache->vhcache_lock);
7264 
7265 	if (cache_updated)
7266 		vhcache_dirty(vhc);
7267 }
7268 
7269 /*
7270  * Remove the reference to the specified path from the vhci cache.
7271  */
7272 static void
7273 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7274 {
7275 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7276 	mdi_client_t *ct = pip->pi_client;
7277 	mdi_vhcache_client_t *cct;
7278 	mdi_vhcache_pathinfo_t *cpi;
7279 
7280 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7281 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7282 	    NULL)) != NULL) {
7283 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7284 		    cpi = cpi->cpi_next) {
7285 			if (cpi->cpi_pip == pip) {
7286 				cpi->cpi_pip = NULL;
7287 				break;
7288 			}
7289 		}
7290 	}
7291 	rw_exit(&vhcache->vhcache_lock);
7292 }
7293 
7294 /*
7295  * Flush the vhci cache to disk.
7296  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7297  */
7298 static int
7299 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7300 {
7301 	nvlist_t *nvl;
7302 	int err;
7303 	int rv;
7304 
7305 	/*
7306 	 * It is possible that the system may shutdown before
7307 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7308 	 * flushing the cache in this case do not check for
7309 	 * i_ddi_io_initialized when force flag is set.
7310 	 */
7311 	if (force_flag == 0 && !i_ddi_io_initialized())
7312 		return (MDI_FAILURE);
7313 
7314 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7315 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7316 		nvlist_free(nvl);
7317 	} else
7318 		err = EFAULT;
7319 
7320 	rv = MDI_SUCCESS;
7321 	mutex_enter(&vhc->vhc_lock);
7322 	if (err != 0) {
7323 		if (err == EROFS) {
7324 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7325 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7326 			    MDI_VHC_VHCACHE_DIRTY);
7327 		} else {
7328 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7329 				cmn_err(CE_CONT, "%s: update failed\n",
7330 				    vhc->vhc_vhcache_filename);
7331 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7332 			}
7333 			rv = MDI_FAILURE;
7334 		}
7335 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7336 		cmn_err(CE_CONT,
7337 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7338 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7339 	}
7340 	mutex_exit(&vhc->vhc_lock);
7341 
7342 	return (rv);
7343 }
7344 
7345 /*
7346  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7347  * Exits itself if left idle for the idle timeout period.
7348  */
7349 static void
7350 vhcache_flush_thread(void *arg)
7351 {
7352 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7353 	clock_t idle_time, quit_at_ticks;
7354 	callb_cpr_t cprinfo;
7355 
7356 	/* number of seconds to sleep idle before exiting */
7357 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7358 
7359 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7360 	    "mdi_vhcache_flush");
7361 	mutex_enter(&vhc->vhc_lock);
7362 	for (; ; ) {
7363 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7364 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7365 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7366 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7367 				(void) cv_timedwait(&vhc->vhc_cv,
7368 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7369 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7370 			} else {
7371 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7372 				mutex_exit(&vhc->vhc_lock);
7373 
7374 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7375 					vhcache_dirty(vhc);
7376 
7377 				mutex_enter(&vhc->vhc_lock);
7378 			}
7379 		}
7380 
7381 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7382 
7383 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7384 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7385 		    ddi_get_lbolt() < quit_at_ticks) {
7386 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7387 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7388 			    quit_at_ticks);
7389 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7390 		}
7391 
7392 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7393 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7394 			goto out;
7395 	}
7396 
7397 out:
7398 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7399 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7400 	CALLB_CPR_EXIT(&cprinfo);
7401 }
7402 
7403 /*
7404  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7405  */
7406 static void
7407 vhcache_dirty(mdi_vhci_config_t *vhc)
7408 {
7409 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7410 	int create_thread;
7411 
7412 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7413 	/* do not flush cache until the cache is fully built */
7414 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7415 		rw_exit(&vhcache->vhcache_lock);
7416 		return;
7417 	}
7418 	rw_exit(&vhcache->vhcache_lock);
7419 
7420 	mutex_enter(&vhc->vhc_lock);
7421 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7422 		mutex_exit(&vhc->vhc_lock);
7423 		return;
7424 	}
7425 
7426 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7427 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7428 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7429 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7430 		cv_broadcast(&vhc->vhc_cv);
7431 		create_thread = 0;
7432 	} else {
7433 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7434 		create_thread = 1;
7435 	}
7436 	mutex_exit(&vhc->vhc_lock);
7437 
7438 	if (create_thread)
7439 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7440 		    0, &p0, TS_RUN, minclsyspri);
7441 }
7442 
7443 /*
7444  * phci bus config structure - one for for each phci bus config operation that
7445  * we initiate on behalf of a vhci.
7446  */
7447 typedef struct mdi_phci_bus_config_s {
7448 	char *phbc_phci_path;
7449 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7450 	struct mdi_phci_bus_config_s *phbc_next;
7451 } mdi_phci_bus_config_t;
7452 
7453 /* vhci bus config structure - one for each vhci bus config operation */
7454 typedef struct mdi_vhci_bus_config_s {
7455 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7456 	major_t vhbc_op_major;		/* bus config op major */
7457 	uint_t vhbc_op_flags;		/* bus config op flags */
7458 	kmutex_t vhbc_lock;
7459 	kcondvar_t vhbc_cv;
7460 	int vhbc_thr_count;
7461 } mdi_vhci_bus_config_t;
7462 
7463 /*
7464  * bus config the specified phci
7465  */
7466 static void
7467 bus_config_phci(void *arg)
7468 {
7469 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7470 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7471 	dev_info_t *ph_dip;
7472 
7473 	/*
7474 	 * first configure all path components upto phci and then configure
7475 	 * the phci children.
7476 	 */
7477 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7478 	    != NULL) {
7479 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7480 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7481 			(void) ndi_devi_config_driver(ph_dip,
7482 			    vhbc->vhbc_op_flags,
7483 			    vhbc->vhbc_op_major);
7484 		} else
7485 			(void) ndi_devi_config(ph_dip,
7486 			    vhbc->vhbc_op_flags);
7487 
7488 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7489 		ndi_rele_devi(ph_dip);
7490 	}
7491 
7492 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7493 	kmem_free(phbc, sizeof (*phbc));
7494 
7495 	mutex_enter(&vhbc->vhbc_lock);
7496 	vhbc->vhbc_thr_count--;
7497 	if (vhbc->vhbc_thr_count == 0)
7498 		cv_broadcast(&vhbc->vhbc_cv);
7499 	mutex_exit(&vhbc->vhbc_lock);
7500 }
7501 
7502 /*
7503  * Bus config all phcis associated with the vhci in parallel.
7504  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7505  */
7506 static void
7507 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7508     ddi_bus_config_op_t op, major_t maj)
7509 {
7510 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7511 	mdi_vhci_bus_config_t *vhbc;
7512 	mdi_vhcache_phci_t *cphci;
7513 
7514 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7515 	if (vhcache->vhcache_phci_head == NULL) {
7516 		rw_exit(&vhcache->vhcache_lock);
7517 		return;
7518 	}
7519 
7520 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7521 
7522 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7523 	    cphci = cphci->cphci_next) {
7524 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7525 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7526 		    KM_SLEEP);
7527 		phbc->phbc_vhbusconfig = vhbc;
7528 		phbc->phbc_next = phbc_head;
7529 		phbc_head = phbc;
7530 		vhbc->vhbc_thr_count++;
7531 	}
7532 	rw_exit(&vhcache->vhcache_lock);
7533 
7534 	vhbc->vhbc_op = op;
7535 	vhbc->vhbc_op_major = maj;
7536 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7537 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7538 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7539 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7540 
7541 	/* now create threads to initiate bus config on all phcis in parallel */
7542 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7543 		phbc_next = phbc->phbc_next;
7544 		if (mdi_mtc_off)
7545 			bus_config_phci((void *)phbc);
7546 		else
7547 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7548 			    0, &p0, TS_RUN, minclsyspri);
7549 	}
7550 
7551 	mutex_enter(&vhbc->vhbc_lock);
7552 	/* wait until all threads exit */
7553 	while (vhbc->vhbc_thr_count > 0)
7554 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7555 	mutex_exit(&vhbc->vhbc_lock);
7556 
7557 	mutex_destroy(&vhbc->vhbc_lock);
7558 	cv_destroy(&vhbc->vhbc_cv);
7559 	kmem_free(vhbc, sizeof (*vhbc));
7560 }
7561 
7562 /*
7563  * Single threaded version of bus_config_all_phcis()
7564  */
7565 static void
7566 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7567     ddi_bus_config_op_t op, major_t maj)
7568 {
7569 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7570 
7571 	single_threaded_vhconfig_enter(vhc);
7572 	bus_config_all_phcis(vhcache, flags, op, maj);
7573 	single_threaded_vhconfig_exit(vhc);
7574 }
7575 
7576 /*
7577  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7578  * The path includes the child component in addition to the phci path.
7579  */
7580 static int
7581 bus_config_one_phci_child(char *path)
7582 {
7583 	dev_info_t *ph_dip, *child;
7584 	char *devnm;
7585 	int rv = MDI_FAILURE;
7586 
7587 	/* extract the child component of the phci */
7588 	devnm = strrchr(path, '/');
7589 	*devnm++ = '\0';
7590 
7591 	/*
7592 	 * first configure all path components upto phci and then
7593 	 * configure the phci child.
7594 	 */
7595 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7596 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7597 		    NDI_SUCCESS) {
7598 			/*
7599 			 * release the hold that ndi_devi_config_one() placed
7600 			 */
7601 			ndi_rele_devi(child);
7602 			rv = MDI_SUCCESS;
7603 		}
7604 
7605 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7606 		ndi_rele_devi(ph_dip);
7607 	}
7608 
7609 	devnm--;
7610 	*devnm = '/';
7611 	return (rv);
7612 }
7613 
7614 /*
7615  * Build a list of phci client paths for the specified vhci client.
7616  * The list includes only those phci client paths which aren't configured yet.
7617  */
7618 static mdi_phys_path_t *
7619 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7620 {
7621 	mdi_vhcache_pathinfo_t *cpi;
7622 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7623 	int config_path, len;
7624 
7625 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7626 		/*
7627 		 * include only those paths that aren't configured.
7628 		 */
7629 		config_path = 0;
7630 		if (cpi->cpi_pip == NULL)
7631 			config_path = 1;
7632 		else {
7633 			MDI_PI_LOCK(cpi->cpi_pip);
7634 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7635 				config_path = 1;
7636 			MDI_PI_UNLOCK(cpi->cpi_pip);
7637 		}
7638 
7639 		if (config_path) {
7640 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7641 			len = strlen(cpi->cpi_cphci->cphci_path) +
7642 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7643 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7644 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7645 			    cpi->cpi_cphci->cphci_path, ct_name,
7646 			    cpi->cpi_addr);
7647 			pp->phys_path_next = NULL;
7648 
7649 			if (pp_head == NULL)
7650 				pp_head = pp;
7651 			else
7652 				pp_tail->phys_path_next = pp;
7653 			pp_tail = pp;
7654 		}
7655 	}
7656 
7657 	return (pp_head);
7658 }
7659 
7660 /*
7661  * Free the memory allocated for phci client path list.
7662  */
7663 static void
7664 free_phclient_path_list(mdi_phys_path_t *pp_head)
7665 {
7666 	mdi_phys_path_t *pp, *pp_next;
7667 
7668 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7669 		pp_next = pp->phys_path_next;
7670 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7671 		kmem_free(pp, sizeof (*pp));
7672 	}
7673 }
7674 
7675 /*
7676  * Allocated async client structure and initialize with the specified values.
7677  */
7678 static mdi_async_client_config_t *
7679 alloc_async_client_config(char *ct_name, char *ct_addr,
7680     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7681 {
7682 	mdi_async_client_config_t *acc;
7683 
7684 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7685 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7686 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7687 	acc->acc_phclient_path_list_head = pp_head;
7688 	init_vhcache_lookup_token(&acc->acc_token, tok);
7689 	acc->acc_next = NULL;
7690 	return (acc);
7691 }
7692 
7693 /*
7694  * Free the memory allocated for the async client structure and their members.
7695  */
7696 static void
7697 free_async_client_config(mdi_async_client_config_t *acc)
7698 {
7699 	if (acc->acc_phclient_path_list_head)
7700 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7701 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7702 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7703 	kmem_free(acc, sizeof (*acc));
7704 }
7705 
7706 /*
7707  * Sort vhcache pathinfos (cpis) of the specified client.
7708  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7709  * flag set come at the beginning of the list. All cpis which have this
7710  * flag set come at the end of the list.
7711  */
7712 static void
7713 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7714 {
7715 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7716 
7717 	cpi_head = cct->cct_cpi_head;
7718 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7719 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7720 		cpi_next = cpi->cpi_next;
7721 		enqueue_vhcache_pathinfo(cct, cpi);
7722 	}
7723 }
7724 
7725 /*
7726  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7727  * every vhcache pathinfo of the specified client. If not adjust the flag
7728  * setting appropriately.
7729  *
7730  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7731  * on-disk vhci cache. So every time this flag is updated the cache must be
7732  * flushed.
7733  */
7734 static void
7735 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7736     mdi_vhcache_lookup_token_t *tok)
7737 {
7738 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7739 	mdi_vhcache_client_t *cct;
7740 	mdi_vhcache_pathinfo_t *cpi;
7741 
7742 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7743 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7744 	    == NULL) {
7745 		rw_exit(&vhcache->vhcache_lock);
7746 		return;
7747 	}
7748 
7749 	/*
7750 	 * to avoid unnecessary on-disk cache updates, first check if an
7751 	 * update is really needed. If no update is needed simply return.
7752 	 */
7753 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7754 		if ((cpi->cpi_pip != NULL &&
7755 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7756 		    (cpi->cpi_pip == NULL &&
7757 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7758 			break;
7759 		}
7760 	}
7761 	if (cpi == NULL) {
7762 		rw_exit(&vhcache->vhcache_lock);
7763 		return;
7764 	}
7765 
7766 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7767 		rw_exit(&vhcache->vhcache_lock);
7768 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7769 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7770 		    tok)) == NULL) {
7771 			rw_exit(&vhcache->vhcache_lock);
7772 			return;
7773 		}
7774 	}
7775 
7776 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7777 		if (cpi->cpi_pip != NULL)
7778 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7779 		else
7780 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7781 	}
7782 	sort_vhcache_paths(cct);
7783 
7784 	rw_exit(&vhcache->vhcache_lock);
7785 	vhcache_dirty(vhc);
7786 }
7787 
7788 /*
7789  * Configure all specified paths of the client.
7790  */
7791 static void
7792 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7793     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7794 {
7795 	mdi_phys_path_t *pp;
7796 
7797 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7798 		(void) bus_config_one_phci_child(pp->phys_path);
7799 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7800 }
7801 
7802 /*
7803  * Dequeue elements from vhci async client config list and bus configure
7804  * their corresponding phci clients.
7805  */
7806 static void
7807 config_client_paths_thread(void *arg)
7808 {
7809 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7810 	mdi_async_client_config_t *acc;
7811 	clock_t quit_at_ticks;
7812 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7813 	callb_cpr_t cprinfo;
7814 
7815 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7816 	    "mdi_config_client_paths");
7817 
7818 	for (; ; ) {
7819 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7820 
7821 		mutex_enter(&vhc->vhc_lock);
7822 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7823 		    vhc->vhc_acc_list_head == NULL &&
7824 		    ddi_get_lbolt() < quit_at_ticks) {
7825 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7826 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7827 			    quit_at_ticks);
7828 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7829 		}
7830 
7831 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7832 		    vhc->vhc_acc_list_head == NULL)
7833 			goto out;
7834 
7835 		acc = vhc->vhc_acc_list_head;
7836 		vhc->vhc_acc_list_head = acc->acc_next;
7837 		if (vhc->vhc_acc_list_head == NULL)
7838 			vhc->vhc_acc_list_tail = NULL;
7839 		vhc->vhc_acc_count--;
7840 		mutex_exit(&vhc->vhc_lock);
7841 
7842 		config_client_paths_sync(vhc, acc->acc_ct_name,
7843 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7844 		    &acc->acc_token);
7845 
7846 		free_async_client_config(acc);
7847 	}
7848 
7849 out:
7850 	vhc->vhc_acc_thrcount--;
7851 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7852 	CALLB_CPR_EXIT(&cprinfo);
7853 }
7854 
7855 /*
7856  * Arrange for all the phci client paths (pp_head) for the specified client
7857  * to be bus configured asynchronously by a thread.
7858  */
7859 static void
7860 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7861     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7862 {
7863 	mdi_async_client_config_t *acc, *newacc;
7864 	int create_thread;
7865 
7866 	if (pp_head == NULL)
7867 		return;
7868 
7869 	if (mdi_mtc_off) {
7870 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7871 		free_phclient_path_list(pp_head);
7872 		return;
7873 	}
7874 
7875 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7876 	ASSERT(newacc);
7877 
7878 	mutex_enter(&vhc->vhc_lock);
7879 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7880 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7881 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7882 			free_async_client_config(newacc);
7883 			mutex_exit(&vhc->vhc_lock);
7884 			return;
7885 		}
7886 	}
7887 
7888 	if (vhc->vhc_acc_list_head == NULL)
7889 		vhc->vhc_acc_list_head = newacc;
7890 	else
7891 		vhc->vhc_acc_list_tail->acc_next = newacc;
7892 	vhc->vhc_acc_list_tail = newacc;
7893 	vhc->vhc_acc_count++;
7894 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7895 		cv_broadcast(&vhc->vhc_cv);
7896 		create_thread = 0;
7897 	} else {
7898 		vhc->vhc_acc_thrcount++;
7899 		create_thread = 1;
7900 	}
7901 	mutex_exit(&vhc->vhc_lock);
7902 
7903 	if (create_thread)
7904 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
7905 		    0, &p0, TS_RUN, minclsyspri);
7906 }
7907 
7908 /*
7909  * Return number of online paths for the specified client.
7910  */
7911 static int
7912 nonline_paths(mdi_vhcache_client_t *cct)
7913 {
7914 	mdi_vhcache_pathinfo_t *cpi;
7915 	int online_count = 0;
7916 
7917 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7918 		if (cpi->cpi_pip != NULL) {
7919 			MDI_PI_LOCK(cpi->cpi_pip);
7920 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
7921 				online_count++;
7922 			MDI_PI_UNLOCK(cpi->cpi_pip);
7923 		}
7924 	}
7925 
7926 	return (online_count);
7927 }
7928 
7929 /*
7930  * Bus configure all paths for the specified vhci client.
7931  * If at least one path for the client is already online, the remaining paths
7932  * will be configured asynchronously. Otherwise, it synchronously configures
7933  * the paths until at least one path is online and then rest of the paths
7934  * will be configured asynchronously.
7935  */
7936 static void
7937 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
7938 {
7939 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7940 	mdi_phys_path_t *pp_head, *pp;
7941 	mdi_vhcache_client_t *cct;
7942 	mdi_vhcache_lookup_token_t tok;
7943 
7944 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7945 
7946 	init_vhcache_lookup_token(&tok, NULL);
7947 
7948 	if (ct_name == NULL || ct_addr == NULL ||
7949 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
7950 	    == NULL ||
7951 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
7952 		rw_exit(&vhcache->vhcache_lock);
7953 		return;
7954 	}
7955 
7956 	/* if at least one path is online, configure the rest asynchronously */
7957 	if (nonline_paths(cct) > 0) {
7958 		rw_exit(&vhcache->vhcache_lock);
7959 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
7960 		return;
7961 	}
7962 
7963 	rw_exit(&vhcache->vhcache_lock);
7964 
7965 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
7966 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
7967 			rw_enter(&vhcache->vhcache_lock, RW_READER);
7968 
7969 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
7970 			    ct_addr, &tok)) == NULL) {
7971 				rw_exit(&vhcache->vhcache_lock);
7972 				goto out;
7973 			}
7974 
7975 			if (nonline_paths(cct) > 0 &&
7976 			    pp->phys_path_next != NULL) {
7977 				rw_exit(&vhcache->vhcache_lock);
7978 				config_client_paths_async(vhc, ct_name, ct_addr,
7979 				    pp->phys_path_next, &tok);
7980 				pp->phys_path_next = NULL;
7981 				goto out;
7982 			}
7983 
7984 			rw_exit(&vhcache->vhcache_lock);
7985 		}
7986 	}
7987 
7988 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
7989 out:
7990 	free_phclient_path_list(pp_head);
7991 }
7992 
7993 static void
7994 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
7995 {
7996 	mutex_enter(&vhc->vhc_lock);
7997 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
7998 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
7999 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8000 	mutex_exit(&vhc->vhc_lock);
8001 }
8002 
8003 static void
8004 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8005 {
8006 	mutex_enter(&vhc->vhc_lock);
8007 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8008 	cv_broadcast(&vhc->vhc_cv);
8009 	mutex_exit(&vhc->vhc_lock);
8010 }
8011 
8012 typedef struct mdi_phci_driver_info {
8013 	char	*phdriver_name;	/* name of the phci driver */
8014 
8015 	/* set to non zero if the phci driver supports root device */
8016 	int	phdriver_root_support;
8017 } mdi_phci_driver_info_t;
8018 
8019 /*
8020  * vhci class and root support capability of a phci driver can be
8021  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8022  * phci driver.conf file. The built-in tables below contain this information
8023  * for those phci drivers whose driver.conf files don't yet contain this info.
8024  *
8025  * All phci drivers expect iscsi have root device support.
8026  */
8027 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8028 	{ "fp", 1 },
8029 	{ "iscsi", 0 },
8030 	{ "ibsrp", 1 }
8031 	};
8032 
8033 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8034 
8035 static void *
8036 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8037 {
8038 	void *new_ptr;
8039 
8040 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8041 	if (old_ptr) {
8042 		bcopy(old_ptr, new_ptr, old_size);
8043 		kmem_free(old_ptr, old_size);
8044 	}
8045 	return (new_ptr);
8046 }
8047 
8048 static void
8049 add_to_phci_list(char ***driver_list, int **root_support_list,
8050     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8051 {
8052 	ASSERT(*cur_elements <= *max_elements);
8053 	if (*cur_elements == *max_elements) {
8054 		*max_elements += 10;
8055 		*driver_list = mdi_realloc(*driver_list,
8056 		    sizeof (char *) * (*cur_elements),
8057 		    sizeof (char *) * (*max_elements));
8058 		*root_support_list = mdi_realloc(*root_support_list,
8059 		    sizeof (int) * (*cur_elements),
8060 		    sizeof (int) * (*max_elements));
8061 	}
8062 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8063 	(*root_support_list)[*cur_elements] = root_support;
8064 	(*cur_elements)++;
8065 }
8066 
8067 static void
8068 get_phci_driver_list(char *vhci_class, char ***driver_list,
8069     int **root_support_list, int *cur_elements, int *max_elements)
8070 {
8071 	mdi_phci_driver_info_t	*st_driver_list, *p;
8072 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8073 	major_t		m;
8074 	struct devnames	*dnp;
8075 	ddi_prop_t	*propp;
8076 
8077 	*driver_list = NULL;
8078 	*root_support_list = NULL;
8079 	*cur_elements = 0;
8080 	*max_elements = 0;
8081 
8082 	/* add the phci drivers derived from the phci driver.conf files */
8083 	for (m = 0; m < devcnt; m++) {
8084 		dnp = &devnamesp[m];
8085 
8086 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8087 			LOCK_DEV_OPS(&dnp->dn_lock);
8088 			if (dnp->dn_global_prop_ptr != NULL &&
8089 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8090 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8091 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8092 			    strcmp(propp->prop_val, vhci_class) == 0) {
8093 
8094 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8095 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8096 				    &dnp->dn_global_prop_ptr->prop_list)
8097 				    == NULL) ? 1 : 0;
8098 
8099 				add_to_phci_list(driver_list, root_support_list,
8100 				    cur_elements, max_elements, dnp->dn_name,
8101 				    root_support);
8102 
8103 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8104 			} else
8105 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8106 		}
8107 	}
8108 
8109 	driver_conf_count = *cur_elements;
8110 
8111 	/* add the phci drivers specified in the built-in tables */
8112 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8113 		st_driver_list = scsi_phci_driver_list;
8114 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8115 		    sizeof (mdi_phci_driver_info_t);
8116 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8117 		st_driver_list = ib_phci_driver_list;
8118 		st_ndrivers = sizeof (ib_phci_driver_list) /
8119 		    sizeof (mdi_phci_driver_info_t);
8120 	} else {
8121 		st_driver_list = NULL;
8122 		st_ndrivers = 0;
8123 	}
8124 
8125 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8126 		/* add this phci driver if not already added before */
8127 		for (j = 0; j < driver_conf_count; j++) {
8128 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8129 				break;
8130 		}
8131 		if (j == driver_conf_count) {
8132 			add_to_phci_list(driver_list, root_support_list,
8133 			    cur_elements, max_elements, p->phdriver_name,
8134 			    p->phdriver_root_support);
8135 		}
8136 	}
8137 }
8138 
8139 /*
8140  * Attach the phci driver instances associated with the specified vhci class.
8141  * If root is mounted attach all phci driver instances.
8142  * If root is not mounted, attach the instances of only those phci
8143  * drivers that have the root support.
8144  */
8145 static void
8146 attach_phci_drivers(char *vhci_class)
8147 {
8148 	char	**driver_list, **p;
8149 	int	*root_support_list;
8150 	int	cur_elements, max_elements, i;
8151 	major_t	m;
8152 
8153 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8154 	    &cur_elements, &max_elements);
8155 
8156 	for (i = 0; i < cur_elements; i++) {
8157 		if (modrootloaded || root_support_list[i]) {
8158 			m = ddi_name_to_major(driver_list[i]);
8159 			if (m != (major_t)-1 && ddi_hold_installed_driver(m))
8160 				ddi_rele_driver(m);
8161 		}
8162 	}
8163 
8164 	if (driver_list) {
8165 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8166 			kmem_free(*p, strlen(*p) + 1);
8167 		kmem_free(driver_list, sizeof (char *) * max_elements);
8168 		kmem_free(root_support_list, sizeof (int) * max_elements);
8169 	}
8170 }
8171 
8172 /*
8173  * Build vhci cache:
8174  *
8175  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8176  * the phci driver instances. During this process the cache gets built.
8177  *
8178  * Cache is built fully if the root is mounted.
8179  * If the root is not mounted, phci drivers that do not have root support
8180  * are not attached. As a result the cache is built partially. The entries
8181  * in the cache reflect only those phci drivers that have root support.
8182  */
8183 static int
8184 build_vhci_cache(mdi_vhci_t *vh)
8185 {
8186 	mdi_vhci_config_t *vhc = vh->vh_config;
8187 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8188 
8189 	single_threaded_vhconfig_enter(vhc);
8190 
8191 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8192 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8193 		rw_exit(&vhcache->vhcache_lock);
8194 		single_threaded_vhconfig_exit(vhc);
8195 		return (0);
8196 	}
8197 	rw_exit(&vhcache->vhcache_lock);
8198 
8199 	attach_phci_drivers(vh->vh_class);
8200 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8201 	    BUS_CONFIG_ALL, (major_t)-1);
8202 
8203 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8204 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8205 	rw_exit(&vhcache->vhcache_lock);
8206 
8207 	single_threaded_vhconfig_exit(vhc);
8208 	vhcache_dirty(vhc);
8209 	return (1);
8210 }
8211 
8212 /*
8213  * Determine if discovery of paths is needed.
8214  */
8215 static int
8216 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8217 {
8218 	int rv = 1;
8219 
8220 	mutex_enter(&vhc->vhc_lock);
8221 	if (i_ddi_io_initialized() == 0) {
8222 		if (vhc->vhc_path_discovery_boot > 0) {
8223 			vhc->vhc_path_discovery_boot--;
8224 			goto out;
8225 		}
8226 	} else {
8227 		if (vhc->vhc_path_discovery_postboot > 0) {
8228 			vhc->vhc_path_discovery_postboot--;
8229 			goto out;
8230 		}
8231 	}
8232 
8233 	/*
8234 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8235 	 * This is to avoid a series of full path discoveries when opening
8236 	 * stale /dev/[r]dsk links.
8237 	 */
8238 	if (mdi_path_discovery_interval != -1 &&
8239 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8240 		goto out;
8241 
8242 	rv = 0;
8243 out:
8244 	mutex_exit(&vhc->vhc_lock);
8245 	return (rv);
8246 }
8247 
8248 /*
8249  * Discover all paths:
8250  *
8251  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8252  * driver instances. During this process all paths will be discovered.
8253  */
8254 static int
8255 vhcache_discover_paths(mdi_vhci_t *vh)
8256 {
8257 	mdi_vhci_config_t *vhc = vh->vh_config;
8258 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8259 	int rv = 0;
8260 
8261 	single_threaded_vhconfig_enter(vhc);
8262 
8263 	if (vhcache_do_discovery(vhc)) {
8264 		attach_phci_drivers(vh->vh_class);
8265 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8266 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8267 
8268 		mutex_enter(&vhc->vhc_lock);
8269 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8270 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8271 		mutex_exit(&vhc->vhc_lock);
8272 		rv = 1;
8273 	}
8274 
8275 	single_threaded_vhconfig_exit(vhc);
8276 	return (rv);
8277 }
8278 
8279 /*
8280  * Generic vhci bus config implementation:
8281  *
8282  * Parameters
8283  *	vdip	vhci dip
8284  *	flags	bus config flags
8285  *	op	bus config operation
8286  *	The remaining parameters are bus config operation specific
8287  *
8288  * for BUS_CONFIG_ONE
8289  *	arg	pointer to name@addr
8290  *	child	upon successful return from this function, *child will be
8291  *		set to the configured and held devinfo child node of vdip.
8292  *	ct_addr	pointer to client address (i.e. GUID)
8293  *
8294  * for BUS_CONFIG_DRIVER
8295  *	arg	major number of the driver
8296  *	child and ct_addr parameters are ignored
8297  *
8298  * for BUS_CONFIG_ALL
8299  *	arg, child, and ct_addr parameters are ignored
8300  *
8301  * Note that for the rest of the bus config operations, this function simply
8302  * calls the framework provided default bus config routine.
8303  */
8304 int
8305 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8306     void *arg, dev_info_t **child, char *ct_addr)
8307 {
8308 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8309 	mdi_vhci_config_t *vhc = vh->vh_config;
8310 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8311 	int rv = 0;
8312 	int params_valid = 0;
8313 	char *cp;
8314 
8315 	/*
8316 	 * While bus configuring phcis, the phci driver interactions with MDI
8317 	 * cause child nodes to be enumerated under the vhci node for which
8318 	 * they need to ndi_devi_enter the vhci node.
8319 	 *
8320 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8321 	 * for the bus config operations on phcis to finish while holding the
8322 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8323 	 * phcis and call the default framework provided bus config function
8324 	 * if we are called with ndi_devi_enter lock held.
8325 	 */
8326 	if (DEVI_BUSY_OWNED(vdip)) {
8327 		MDI_DEBUG(2, (CE_NOTE, vdip,
8328 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8329 		goto default_bus_config;
8330 	}
8331 
8332 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8333 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8334 		rw_exit(&vhcache->vhcache_lock);
8335 		rv = build_vhci_cache(vh);
8336 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8337 	}
8338 
8339 	switch (op) {
8340 	case BUS_CONFIG_ONE:
8341 		if (arg != NULL && ct_addr != NULL) {
8342 			/* extract node name */
8343 			cp = (char *)arg;
8344 			while (*cp != '\0' && *cp != '@')
8345 				cp++;
8346 			if (*cp == '@') {
8347 				params_valid = 1;
8348 				*cp = '\0';
8349 				config_client_paths(vhc, (char *)arg, ct_addr);
8350 				/* config_client_paths() releases cache_lock */
8351 				*cp = '@';
8352 				break;
8353 			}
8354 		}
8355 
8356 		rw_exit(&vhcache->vhcache_lock);
8357 		break;
8358 
8359 	case BUS_CONFIG_DRIVER:
8360 		rw_exit(&vhcache->vhcache_lock);
8361 		if (rv == 0)
8362 			st_bus_config_all_phcis(vhc, flags, op,
8363 			    (major_t)(uintptr_t)arg);
8364 		break;
8365 
8366 	case BUS_CONFIG_ALL:
8367 		rw_exit(&vhcache->vhcache_lock);
8368 		if (rv == 0)
8369 			st_bus_config_all_phcis(vhc, flags, op, -1);
8370 		break;
8371 
8372 	default:
8373 		rw_exit(&vhcache->vhcache_lock);
8374 		break;
8375 	}
8376 
8377 
8378 default_bus_config:
8379 	/*
8380 	 * All requested child nodes are enumerated under the vhci.
8381 	 * Now configure them.
8382 	 */
8383 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8384 	    NDI_SUCCESS) {
8385 		return (MDI_SUCCESS);
8386 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8387 		/* discover all paths and try configuring again */
8388 		if (vhcache_discover_paths(vh) &&
8389 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8390 		    NDI_SUCCESS)
8391 			return (MDI_SUCCESS);
8392 	}
8393 
8394 	return (MDI_FAILURE);
8395 }
8396 
8397 /*
8398  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8399  */
8400 static nvlist_t *
8401 read_on_disk_vhci_cache(char *vhci_class)
8402 {
8403 	nvlist_t *nvl;
8404 	int err;
8405 	char *filename;
8406 
8407 	filename = vhclass2vhcache_filename(vhci_class);
8408 
8409 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8410 		kmem_free(filename, strlen(filename) + 1);
8411 		return (nvl);
8412 	} else if (err == EIO)
8413 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8414 	else if (err == EINVAL)
8415 		cmn_err(CE_WARN,
8416 		    "%s: data file corrupted, will recreate\n", filename);
8417 
8418 	kmem_free(filename, strlen(filename) + 1);
8419 	return (NULL);
8420 }
8421 
8422 /*
8423  * Read on-disk vhci cache into nvlists for all vhci classes.
8424  * Called during booting by i_ddi_read_devices_files().
8425  */
8426 void
8427 mdi_read_devices_files(void)
8428 {
8429 	int i;
8430 
8431 	for (i = 0; i < N_VHCI_CLASSES; i++)
8432 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8433 }
8434 
8435 /*
8436  * Remove all stale entries from vhci cache.
8437  */
8438 static void
8439 clean_vhcache(mdi_vhci_config_t *vhc)
8440 {
8441 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8442 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8443 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8444 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8445 
8446 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8447 
8448 	cct_head = vhcache->vhcache_client_head;
8449 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8450 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8451 		cct_next = cct->cct_next;
8452 
8453 		cpi_head = cct->cct_cpi_head;
8454 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8455 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8456 			cpi_next = cpi->cpi_next;
8457 			if (cpi->cpi_pip != NULL) {
8458 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8459 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8460 			} else
8461 				free_vhcache_pathinfo(cpi);
8462 		}
8463 
8464 		if (cct->cct_cpi_head != NULL)
8465 			enqueue_vhcache_client(vhcache, cct);
8466 		else {
8467 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8468 			    (mod_hash_key_t)cct->cct_name_addr);
8469 			free_vhcache_client(cct);
8470 		}
8471 	}
8472 
8473 	cphci_head = vhcache->vhcache_phci_head;
8474 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8475 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8476 		cphci_next = cphci->cphci_next;
8477 		if (cphci->cphci_phci != NULL)
8478 			enqueue_vhcache_phci(vhcache, cphci);
8479 		else
8480 			free_vhcache_phci(cphci);
8481 	}
8482 
8483 	vhcache->vhcache_clean_time = lbolt64;
8484 	rw_exit(&vhcache->vhcache_lock);
8485 	vhcache_dirty(vhc);
8486 }
8487 
8488 /*
8489  * Remove all stale entries from vhci cache.
8490  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8491  */
8492 void
8493 mdi_clean_vhcache(void)
8494 {
8495 	mdi_vhci_t *vh;
8496 
8497 	mutex_enter(&mdi_mutex);
8498 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8499 		vh->vh_refcnt++;
8500 		mutex_exit(&mdi_mutex);
8501 		clean_vhcache(vh->vh_config);
8502 		mutex_enter(&mdi_mutex);
8503 		vh->vh_refcnt--;
8504 	}
8505 	mutex_exit(&mdi_mutex);
8506 }
8507 
8508 /*
8509  * mdi_vhci_walk_clients():
8510  *		Walker routine to traverse client dev_info nodes
8511  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8512  * below the client, including nexus devices, which we dont want.
8513  * So we just traverse the immediate siblings, starting from 1st client.
8514  */
8515 void
8516 mdi_vhci_walk_clients(dev_info_t *vdip,
8517     int (*f)(dev_info_t *, void *), void *arg)
8518 {
8519 	dev_info_t	*cdip;
8520 	mdi_client_t	*ct;
8521 
8522 	mutex_enter(&mdi_mutex);
8523 
8524 	cdip = ddi_get_child(vdip);
8525 
8526 	while (cdip) {
8527 		ct = i_devi_get_client(cdip);
8528 		MDI_CLIENT_LOCK(ct);
8529 
8530 		switch ((*f)(cdip, arg)) {
8531 		case DDI_WALK_CONTINUE:
8532 			cdip = ddi_get_next_sibling(cdip);
8533 			MDI_CLIENT_UNLOCK(ct);
8534 			break;
8535 
8536 		default:
8537 			MDI_CLIENT_UNLOCK(ct);
8538 			mutex_exit(&mdi_mutex);
8539 			return;
8540 		}
8541 	}
8542 
8543 	mutex_exit(&mdi_mutex);
8544 }
8545 
8546 /*
8547  * mdi_vhci_walk_phcis():
8548  *		Walker routine to traverse phci dev_info nodes
8549  */
8550 void
8551 mdi_vhci_walk_phcis(dev_info_t *vdip,
8552     int (*f)(dev_info_t *, void *), void *arg)
8553 {
8554 	mdi_vhci_t	*vh = NULL;
8555 	mdi_phci_t	*ph = NULL;
8556 
8557 	mutex_enter(&mdi_mutex);
8558 
8559 	vh = i_devi_get_vhci(vdip);
8560 	ph = vh->vh_phci_head;
8561 
8562 	while (ph) {
8563 		MDI_PHCI_LOCK(ph);
8564 
8565 		switch ((*f)(ph->ph_dip, arg)) {
8566 		case DDI_WALK_CONTINUE:
8567 			MDI_PHCI_UNLOCK(ph);
8568 			ph = ph->ph_next;
8569 			break;
8570 
8571 		default:
8572 			MDI_PHCI_UNLOCK(ph);
8573 			mutex_exit(&mdi_mutex);
8574 			return;
8575 		}
8576 	}
8577 
8578 	mutex_exit(&mdi_mutex);
8579 }
8580 
8581 
8582 /*
8583  * mdi_walk_vhcis():
8584  *		Walker routine to traverse vhci dev_info nodes
8585  */
8586 void
8587 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8588 {
8589 	mdi_vhci_t	*vh = NULL;
8590 
8591 	mutex_enter(&mdi_mutex);
8592 	/*
8593 	 * Scan for already registered vhci
8594 	 */
8595 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8596 		vh->vh_refcnt++;
8597 		mutex_exit(&mdi_mutex);
8598 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8599 			mutex_enter(&mdi_mutex);
8600 			vh->vh_refcnt--;
8601 			break;
8602 		} else {
8603 			mutex_enter(&mdi_mutex);
8604 			vh->vh_refcnt--;
8605 		}
8606 	}
8607 
8608 	mutex_exit(&mdi_mutex);
8609 }
8610 
8611 /*
8612  * i_mdi_log_sysevent():
8613  *		Logs events for pickup by syseventd
8614  */
8615 static void
8616 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8617 {
8618 	char		*path_name;
8619 	nvlist_t	*attr_list;
8620 
8621 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8622 	    KM_SLEEP) != DDI_SUCCESS) {
8623 		goto alloc_failed;
8624 	}
8625 
8626 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8627 	(void) ddi_pathname(dip, path_name);
8628 
8629 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8630 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8631 		goto error;
8632 	}
8633 
8634 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8635 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8636 		goto error;
8637 	}
8638 
8639 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8640 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8641 		goto error;
8642 	}
8643 
8644 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8645 	    path_name) != DDI_SUCCESS) {
8646 		goto error;
8647 	}
8648 
8649 	if (nvlist_add_string(attr_list, DDI_CLASS,
8650 	    ph_vh_class) != DDI_SUCCESS) {
8651 		goto error;
8652 	}
8653 
8654 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8655 	    attr_list, NULL, DDI_SLEEP);
8656 
8657 error:
8658 	kmem_free(path_name, MAXPATHLEN);
8659 	nvlist_free(attr_list);
8660 	return;
8661 
8662 alloc_failed:
8663 	MDI_DEBUG(1, (CE_WARN, dip,
8664 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8665 }
8666