xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision 4de2612967d06c4fdbf524a62556a1e8118a006f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
30  * detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 
71 #ifdef	DEBUG
72 #include <sys/debug.h>
73 int	mdi_debug = 1;
74 #define	MDI_DEBUG(level, stmnt) \
75 	    if (mdi_debug >= (level)) i_mdi_log stmnt
76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
77 #else	/* !DEBUG */
78 #define	MDI_DEBUG(level, stmnt)
79 #endif	/* DEBUG */
80 
81 extern pri_t	minclsyspri;
82 extern int	modrootloaded;
83 
84 /*
85  * Global mutex:
86  * Protects vHCI list and structure members, pHCI and Client lists.
87  */
88 kmutex_t	mdi_mutex;
89 
90 /*
91  * Registered vHCI class driver lists
92  */
93 int		mdi_vhci_count;
94 mdi_vhci_t	*mdi_vhci_head;
95 mdi_vhci_t	*mdi_vhci_tail;
96 
97 /*
98  * Client Hash Table size
99  */
100 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
101 
102 /*
103  * taskq interface definitions
104  */
105 #define	MDI_TASKQ_N_THREADS	8
106 #define	MDI_TASKQ_PRI		minclsyspri
107 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
108 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
109 
110 taskq_t				*mdi_taskq;
111 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
112 
113 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
114 
115 /*
116  * The data should be "quiet" for this interval (in seconds) before the
117  * vhci cached data is flushed to the disk.
118  */
119 static int mdi_vhcache_flush_delay = 10;
120 
121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
122 static int mdi_vhcache_flush_daemon_idle_time = 60;
123 
124 /*
125  * number of seconds the asynchronous configuration thread will sleep idle
126  * before exiting.
127  */
128 static int mdi_async_config_idle_time = 600;
129 
130 static int mdi_bus_config_cache_hash_size = 256;
131 
132 /* turns off multithreaded configuration for certain operations */
133 static int mdi_mtc_off = 0;
134 
135 /*
136  * MDI component property name/value string definitions
137  */
138 const char 		*mdi_component_prop = "mpxio-component";
139 const char		*mdi_component_prop_vhci = "vhci";
140 const char		*mdi_component_prop_phci = "phci";
141 const char		*mdi_component_prop_client = "client";
142 
143 /*
144  * MDI client global unique identifier property name
145  */
146 const char		*mdi_client_guid_prop = "client-guid";
147 
148 /*
149  * MDI client load balancing property name/value string definitions
150  */
151 const char		*mdi_load_balance = "load-balance";
152 const char		*mdi_load_balance_none = "none";
153 const char		*mdi_load_balance_rr = "round-robin";
154 const char		*mdi_load_balance_lba = "logical-block";
155 
156 /*
157  * Obsolete vHCI class definition; to be removed after Leadville update
158  */
159 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
160 
161 static char vhci_greeting[] =
162 	"\tThere already exists one vHCI driver for class %s\n"
163 	"\tOnly one vHCI driver for each class is allowed\n";
164 
165 /*
166  * Static function prototypes
167  */
168 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
169 static int		i_mdi_client_offline(dev_info_t *, uint_t);
170 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
171 static void		i_mdi_phci_post_detach(dev_info_t *,
172 			    ddi_detach_cmd_t, int);
173 static int		i_mdi_client_pre_detach(dev_info_t *,
174 			    ddi_detach_cmd_t);
175 static void		i_mdi_client_post_detach(dev_info_t *,
176 			    ddi_detach_cmd_t, int);
177 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
178 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
179 static int 		i_mdi_lba_lb(mdi_client_t *ct,
180 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
181 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
182 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
183 static void		i_mdi_pm_reset_client(mdi_client_t *);
184 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
185 static int		i_mdi_power_all_phci(mdi_client_t *);
186 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
187 
188 
189 /*
190  * Internal mdi_pathinfo node functions
191  */
192 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
193 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
194 
195 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
196 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
197 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
198 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
199 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
200 			    mdi_client_t *);
201 static void		i_mdi_phci_unlock(mdi_phci_t *);
202 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
203 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
204 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
205 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
206 			    mdi_client_t *);
207 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
208 static void		i_mdi_client_remove_path(mdi_client_t *,
209 			    mdi_pathinfo_t *);
210 
211 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
212 			    mdi_pathinfo_state_t, int);
213 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
214 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
215 			    char **, int);
216 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
217 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
218 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
219 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
220 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
221 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
222 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
223 static void		i_mdi_client_update_state(mdi_client_t *);
224 static int		i_mdi_client_compute_state(mdi_client_t *,
225 			    mdi_phci_t *);
226 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
227 static void		i_mdi_client_unlock(mdi_client_t *);
228 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
229 static mdi_client_t	*i_devi_get_client(dev_info_t *);
230 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int,
231 			int);
232 /*
233  * Failover related function prototypes
234  */
235 static int		i_mdi_failover(void *);
236 
237 /*
238  * misc internal functions
239  */
240 static int		i_mdi_get_hash_key(char *);
241 static int		i_map_nvlist_error_to_mdi(int);
242 static void		i_mdi_report_path_state(mdi_client_t *,
243 			    mdi_pathinfo_t *);
244 
245 static void		setup_vhci_cache(mdi_vhci_t *);
246 static int		destroy_vhci_cache(mdi_vhci_t *);
247 static void		setup_phci_driver_list(mdi_vhci_t *);
248 static void		free_phci_driver_list(mdi_vhci_config_t *);
249 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
250 static boolean_t	stop_vhcache_flush_thread(void *, int);
251 static void		free_string_array(char **, int);
252 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
253 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
254 static void		free_vhcache_client(mdi_vhcache_client_t *);
255 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
256 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
257 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
258 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
259 static void		vhcache_pi_add(mdi_vhci_config_t *,
260 			    struct mdi_pathinfo *);
261 static void		vhcache_pi_remove(mdi_vhci_config_t *,
262 			    struct mdi_pathinfo *);
263 static void		free_phclient_path_list(mdi_phys_path_t *);
264 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
265 static int		flush_vhcache(mdi_vhci_config_t *, int);
266 static void		vhcache_dirty(mdi_vhci_config_t *);
267 static void		free_async_client_config(mdi_async_client_config_t *);
268 static nvlist_t		*read_on_disk_vhci_cache(char *);
269 extern int		fread_nvlist(char *, nvlist_t **);
270 extern int		fwrite_nvlist(char *, nvlist_t *);
271 
272 /* called once when first vhci registers with mdi */
273 static void
274 i_mdi_init()
275 {
276 	static int initialized = 0;
277 
278 	if (initialized)
279 		return;
280 	initialized = 1;
281 
282 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
283 	/*
284 	 * Create our taskq resources
285 	 */
286 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
287 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
288 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
289 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
290 }
291 
292 /*
293  * mdi_get_component_type():
294  *		Return mpxio component type
295  * Return Values:
296  *		MDI_COMPONENT_NONE
297  *		MDI_COMPONENT_VHCI
298  *		MDI_COMPONENT_PHCI
299  *		MDI_COMPONENT_CLIENT
300  * XXX This doesn't work under multi-level MPxIO and should be
301  *	removed when clients migrate mdi_is_*() interfaces.
302  */
303 int
304 mdi_get_component_type(dev_info_t *dip)
305 {
306 	return (DEVI(dip)->devi_mdi_component);
307 }
308 
309 /*
310  * mdi_vhci_register():
311  *		Register a vHCI module with the mpxio framework
312  *		mdi_vhci_register() is called by vHCI drivers to register the
313  *		'class_driver' vHCI driver and its MDI entrypoints with the
314  *		mpxio framework.  The vHCI driver must call this interface as
315  *		part of its attach(9e) handler.
316  *		Competing threads may try to attach mdi_vhci_register() as
317  *		the vHCI drivers are loaded and attached as a result of pHCI
318  *		driver instance registration (mdi_phci_register()) with the
319  *		framework.
320  * Return Values:
321  *		MDI_SUCCESS
322  *		MDI_FAILURE
323  */
324 
325 /*ARGSUSED*/
326 int
327 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
328     int flags)
329 {
330 	mdi_vhci_t		*vh = NULL;
331 
332 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
333 
334 	i_mdi_init();
335 
336 	mutex_enter(&mdi_mutex);
337 	/*
338 	 * Scan for already registered vhci
339 	 */
340 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
341 		if (strcmp(vh->vh_class, class) == 0) {
342 			/*
343 			 * vHCI has already been created.  Check for valid
344 			 * vHCI ops registration.  We only support one vHCI
345 			 * module per class
346 			 */
347 			if (vh->vh_ops != NULL) {
348 				mutex_exit(&mdi_mutex);
349 				cmn_err(CE_NOTE, vhci_greeting, class);
350 				return (MDI_FAILURE);
351 			}
352 			break;
353 		}
354 	}
355 
356 	/*
357 	 * if not yet created, create the vHCI component
358 	 */
359 	if (vh == NULL) {
360 		struct client_hash	*hash = NULL;
361 		char			*load_balance;
362 
363 		/*
364 		 * Allocate and initialize the mdi extensions
365 		 */
366 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
367 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
368 		    KM_SLEEP);
369 		vh->vh_client_table = hash;
370 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
371 		(void) strcpy(vh->vh_class, class);
372 		vh->vh_lb = LOAD_BALANCE_RR;
373 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
374 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
375 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
376 				vh->vh_lb = LOAD_BALANCE_NONE;
377 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
378 				    == 0) {
379 				vh->vh_lb = LOAD_BALANCE_LBA;
380 			}
381 			ddi_prop_free(load_balance);
382 		}
383 
384 		/*
385 		 * Store the vHCI ops vectors
386 		 */
387 		vh->vh_dip = vdip;
388 		vh->vh_ops = vops;
389 
390 		setup_vhci_cache(vh);
391 
392 		if (mdi_vhci_head == NULL) {
393 			mdi_vhci_head = vh;
394 		}
395 		if (mdi_vhci_tail) {
396 			mdi_vhci_tail->vh_next = vh;
397 		}
398 		mdi_vhci_tail = vh;
399 		mdi_vhci_count++;
400 	}
401 
402 	/*
403 	 * Claim the devfs node as a vhci component
404 	 */
405 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
406 
407 	/*
408 	 * Initialize our back reference from dev_info node
409 	 */
410 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
411 	mutex_exit(&mdi_mutex);
412 	return (MDI_SUCCESS);
413 }
414 
415 /*
416  * mdi_vhci_unregister():
417  *		Unregister a vHCI module from mpxio framework
418  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
419  * 		of a vhci to unregister it from the framework.
420  * Return Values:
421  *		MDI_SUCCESS
422  *		MDI_FAILURE
423  */
424 
425 /*ARGSUSED*/
426 int
427 mdi_vhci_unregister(dev_info_t *vdip, int flags)
428 {
429 	mdi_vhci_t	*found, *vh, *prev = NULL;
430 
431 	/*
432 	 * Check for invalid VHCI
433 	 */
434 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
435 		return (MDI_FAILURE);
436 
437 	mutex_enter(&mdi_mutex);
438 
439 	/*
440 	 * Scan the list of registered vHCIs for a match
441 	 */
442 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
443 		if (found == vh)
444 			break;
445 		prev = found;
446 	}
447 
448 	if (found == NULL) {
449 		mutex_exit(&mdi_mutex);
450 		return (MDI_FAILURE);
451 	}
452 
453 	/*
454 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
455 	 * should have been unregistered, before a vHCI can be
456 	 * unregistered.
457 	 */
458 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
459 		mutex_exit(&mdi_mutex);
460 		return (MDI_FAILURE);
461 	}
462 
463 	/*
464 	 * Remove the vHCI from the global list
465 	 */
466 	if (vh == mdi_vhci_head) {
467 		mdi_vhci_head = vh->vh_next;
468 	} else {
469 		prev->vh_next = vh->vh_next;
470 	}
471 	if (vh == mdi_vhci_tail) {
472 		mdi_vhci_tail = prev;
473 	}
474 
475 	mdi_vhci_count--;
476 	mutex_exit(&mdi_mutex);
477 
478 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
479 		/* add vhci to the global list */
480 		mutex_enter(&mdi_mutex);
481 		if (mdi_vhci_head == NULL)
482 			mdi_vhci_head = vh;
483 		else
484 			mdi_vhci_tail->vh_next = vh;
485 		mdi_vhci_tail = vh;
486 		mdi_vhci_count++;
487 		mutex_exit(&mdi_mutex);
488 		return (MDI_FAILURE);
489 	}
490 
491 	vh->vh_ops = NULL;
492 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
493 	DEVI(vdip)->devi_mdi_xhci = NULL;
494 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
495 	kmem_free(vh->vh_client_table,
496 	    mdi_client_table_size * sizeof (struct client_hash));
497 	kmem_free(vh, sizeof (mdi_vhci_t));
498 	return (MDI_SUCCESS);
499 }
500 
501 /*
502  * i_mdi_vhci_class2vhci():
503  *		Look for a matching vHCI module given a vHCI class name
504  * Return Values:
505  *		Handle to a vHCI component
506  *		NULL
507  */
508 static mdi_vhci_t *
509 i_mdi_vhci_class2vhci(char *class)
510 {
511 	mdi_vhci_t	*vh = NULL;
512 
513 	ASSERT(!MUTEX_HELD(&mdi_mutex));
514 
515 	mutex_enter(&mdi_mutex);
516 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
517 		if (strcmp(vh->vh_class, class) == 0) {
518 			break;
519 		}
520 	}
521 	mutex_exit(&mdi_mutex);
522 	return (vh);
523 }
524 
525 /*
526  * i_devi_get_vhci():
527  *		Utility function to get the handle to a vHCI component
528  * Return Values:
529  *		Handle to a vHCI component
530  *		NULL
531  */
532 mdi_vhci_t *
533 i_devi_get_vhci(dev_info_t *vdip)
534 {
535 	mdi_vhci_t	*vh = NULL;
536 	if (MDI_VHCI(vdip)) {
537 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
538 	}
539 	return (vh);
540 }
541 
542 /*
543  * mdi_phci_register():
544  *		Register a pHCI module with mpxio framework
545  *		mdi_phci_register() is called by pHCI drivers to register with
546  *		the mpxio framework and a specific 'class_driver' vHCI.  The
547  *		pHCI driver must call this interface as part of its attach(9e)
548  *		handler.
549  * Return Values:
550  *		MDI_SUCCESS
551  *		MDI_FAILURE
552  */
553 
554 /*ARGSUSED*/
555 int
556 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
557 {
558 	mdi_phci_t		*ph;
559 	mdi_vhci_t		*vh;
560 	char			*data;
561 	char			*pathname;
562 
563 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
564 	(void) ddi_pathname(pdip, pathname);
565 
566 	/*
567 	 * Check for mpxio-disable property. Enable mpxio if the property is
568 	 * missing or not set to "yes".
569 	 * If the property is set to "yes" then emit a brief message.
570 	 */
571 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
572 	    &data) == DDI_SUCCESS)) {
573 		if (strcmp(data, "yes") == 0) {
574 			MDI_DEBUG(1, (CE_CONT, pdip,
575 			    "?%s (%s%d) multipath capabilities "
576 			    "disabled via %s.conf.\n", pathname,
577 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
578 			    ddi_driver_name(pdip)));
579 			ddi_prop_free(data);
580 			kmem_free(pathname, MAXPATHLEN);
581 			return (MDI_FAILURE);
582 		}
583 		ddi_prop_free(data);
584 	}
585 
586 	kmem_free(pathname, MAXPATHLEN);
587 
588 	/*
589 	 * Search for a matching vHCI
590 	 */
591 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
592 	if (vh == NULL) {
593 		return (MDI_FAILURE);
594 	}
595 
596 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
597 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
598 	ph->ph_dip = pdip;
599 	ph->ph_vhci = vh;
600 	ph->ph_next = NULL;
601 	ph->ph_unstable = 0;
602 	ph->ph_vprivate = 0;
603 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
604 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
605 
606 	MDI_PHCI_SET_POWER_UP(ph);
607 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
608 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
609 
610 	vhcache_phci_add(vh->vh_config, ph);
611 
612 	mutex_enter(&mdi_mutex);
613 	if (vh->vh_phci_head == NULL) {
614 		vh->vh_phci_head = ph;
615 	}
616 	if (vh->vh_phci_tail) {
617 		vh->vh_phci_tail->ph_next = ph;
618 	}
619 	vh->vh_phci_tail = ph;
620 	vh->vh_phci_count++;
621 	mutex_exit(&mdi_mutex);
622 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
623 	return (MDI_SUCCESS);
624 }
625 
626 /*
627  * mdi_phci_unregister():
628  *		Unregister a pHCI module from mpxio framework
629  *		mdi_phci_unregister() is called by the pHCI drivers from their
630  *		detach(9E) handler to unregister their instances from the
631  *		framework.
632  * Return Values:
633  *		MDI_SUCCESS
634  *		MDI_FAILURE
635  */
636 
637 /*ARGSUSED*/
638 int
639 mdi_phci_unregister(dev_info_t *pdip, int flags)
640 {
641 	mdi_vhci_t		*vh;
642 	mdi_phci_t		*ph;
643 	mdi_phci_t		*tmp;
644 	mdi_phci_t		*prev = NULL;
645 
646 	ph = i_devi_get_phci(pdip);
647 	if (ph == NULL) {
648 		MDI_DEBUG(1, (CE_WARN, pdip,
649 		    "!pHCI unregister: Not a valid pHCI"));
650 		return (MDI_FAILURE);
651 	}
652 
653 	vh = ph->ph_vhci;
654 	ASSERT(vh != NULL);
655 	if (vh == NULL) {
656 		MDI_DEBUG(1, (CE_WARN, pdip,
657 		    "!pHCI unregister: Not a valid vHCI"));
658 		return (MDI_FAILURE);
659 	}
660 
661 	mutex_enter(&mdi_mutex);
662 	tmp = vh->vh_phci_head;
663 	while (tmp) {
664 		if (tmp == ph) {
665 			break;
666 		}
667 		prev = tmp;
668 		tmp = tmp->ph_next;
669 	}
670 
671 	if (ph == vh->vh_phci_head) {
672 		vh->vh_phci_head = ph->ph_next;
673 	} else {
674 		prev->ph_next = ph->ph_next;
675 	}
676 
677 	if (ph == vh->vh_phci_tail) {
678 		vh->vh_phci_tail = prev;
679 	}
680 
681 	vh->vh_phci_count--;
682 
683 	mutex_exit(&mdi_mutex);
684 
685 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
686 	    ESC_DDI_INITIATOR_UNREGISTER);
687 	vhcache_phci_remove(vh->vh_config, ph);
688 	cv_destroy(&ph->ph_unstable_cv);
689 	cv_destroy(&ph->ph_powerchange_cv);
690 	mutex_destroy(&ph->ph_mutex);
691 	kmem_free(ph, sizeof (mdi_phci_t));
692 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
693 	DEVI(pdip)->devi_mdi_xhci = NULL;
694 	return (MDI_SUCCESS);
695 }
696 
697 /*
698  * i_devi_get_phci():
699  * 		Utility function to return the phci extensions.
700  */
701 static mdi_phci_t *
702 i_devi_get_phci(dev_info_t *pdip)
703 {
704 	mdi_phci_t	*ph = NULL;
705 	if (MDI_PHCI(pdip)) {
706 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
707 	}
708 	return (ph);
709 }
710 
711 /*
712  * mdi_phci_path2devinfo():
713  * 		Utility function to search for a valid phci device given
714  *		the devfs pathname.
715  */
716 
717 dev_info_t *
718 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
719 {
720 	char		*temp_pathname;
721 	mdi_vhci_t	*vh;
722 	mdi_phci_t	*ph;
723 	dev_info_t 	*pdip = NULL;
724 
725 	vh = i_devi_get_vhci(vdip);
726 	ASSERT(vh != NULL);
727 
728 	if (vh == NULL) {
729 		/*
730 		 * Invalid vHCI component, return failure
731 		 */
732 		return (NULL);
733 	}
734 
735 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
736 	mutex_enter(&mdi_mutex);
737 	ph = vh->vh_phci_head;
738 	while (ph != NULL) {
739 		pdip = ph->ph_dip;
740 		ASSERT(pdip != NULL);
741 		*temp_pathname = '\0';
742 		(void) ddi_pathname(pdip, temp_pathname);
743 		if (strcmp(temp_pathname, pathname) == 0) {
744 			break;
745 		}
746 		ph = ph->ph_next;
747 	}
748 	if (ph == NULL) {
749 		pdip = NULL;
750 	}
751 	mutex_exit(&mdi_mutex);
752 	kmem_free(temp_pathname, MAXPATHLEN);
753 	return (pdip);
754 }
755 
756 /*
757  * mdi_phci_get_path_count():
758  * 		get number of path information nodes associated with a given
759  *		pHCI device.
760  */
761 int
762 mdi_phci_get_path_count(dev_info_t *pdip)
763 {
764 	mdi_phci_t	*ph;
765 	int		count = 0;
766 
767 	ph = i_devi_get_phci(pdip);
768 	if (ph != NULL) {
769 		count = ph->ph_path_count;
770 	}
771 	return (count);
772 }
773 
774 /*
775  * i_mdi_phci_lock():
776  *		Lock a pHCI device
777  * Return Values:
778  *		None
779  * Note:
780  *		The default locking order is:
781  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
782  *		But there are number of situations where locks need to be
783  *		grabbed in reverse order.  This routine implements try and lock
784  *		mechanism depending on the requested parameter option.
785  */
786 static void
787 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
788 {
789 	if (pip) {
790 		/* Reverse locking is requested. */
791 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
792 			/*
793 			 * tryenter failed. Try to grab again
794 			 * after a small delay
795 			 */
796 			MDI_PI_HOLD(pip);
797 			MDI_PI_UNLOCK(pip);
798 			delay(1);
799 			MDI_PI_LOCK(pip);
800 			MDI_PI_RELE(pip);
801 		}
802 	} else {
803 		MDI_PHCI_LOCK(ph);
804 	}
805 }
806 
807 /*
808  * i_mdi_phci_get_client_lock():
809  *		Lock a pHCI device
810  * Return Values:
811  *		None
812  * Note:
813  *		The default locking order is:
814  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
815  *		But there are number of situations where locks need to be
816  *		grabbed in reverse order.  This routine implements try and lock
817  *		mechanism depending on the requested parameter option.
818  */
819 static void
820 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
821 {
822 	if (ct) {
823 		/* Reverse locking is requested. */
824 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
825 			/*
826 			 * tryenter failed. Try to grab again
827 			 * after a small delay
828 			 */
829 			MDI_CLIENT_UNLOCK(ct);
830 			delay(1);
831 			MDI_CLIENT_LOCK(ct);
832 		}
833 	} else {
834 		MDI_PHCI_LOCK(ph);
835 	}
836 }
837 
838 /*
839  * i_mdi_phci_unlock():
840  *		Unlock the pHCI component
841  */
842 static void
843 i_mdi_phci_unlock(mdi_phci_t *ph)
844 {
845 	MDI_PHCI_UNLOCK(ph);
846 }
847 
848 /*
849  * i_mdi_devinfo_create():
850  *		create client device's devinfo node
851  * Return Values:
852  *		dev_info
853  *		NULL
854  * Notes:
855  */
856 static dev_info_t *
857 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
858 	char **compatible, int ncompatible)
859 {
860 	dev_info_t *cdip = NULL;
861 
862 	ASSERT(MUTEX_HELD(&mdi_mutex));
863 
864 	/* Verify for duplicate entry */
865 	cdip = i_mdi_devinfo_find(vh, name, guid);
866 	ASSERT(cdip == NULL);
867 	if (cdip) {
868 		cmn_err(CE_WARN,
869 		    "i_mdi_devinfo_create: client dip %p already exists",
870 			(void *)cdip);
871 	}
872 
873 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
874 	if (cdip == NULL)
875 		goto fail;
876 
877 	/*
878 	 * Create component type and Global unique identifier
879 	 * properties
880 	 */
881 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
882 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
883 		goto fail;
884 	}
885 
886 	/* Decorate the node with compatible property */
887 	if (compatible &&
888 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
889 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
890 		goto fail;
891 	}
892 
893 	return (cdip);
894 
895 fail:
896 	if (cdip) {
897 		(void) ndi_prop_remove_all(cdip);
898 		(void) ndi_devi_free(cdip);
899 	}
900 	return (NULL);
901 }
902 
903 /*
904  * i_mdi_devinfo_find():
905  *		Find a matching devinfo node for given client node name
906  *		and its guid.
907  * Return Values:
908  *		Handle to a dev_info node or NULL
909  */
910 
911 static dev_info_t *
912 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
913 {
914 	char			*data;
915 	dev_info_t 		*cdip = NULL;
916 	dev_info_t 		*ndip = NULL;
917 	int			circular;
918 
919 	ndi_devi_enter(vh->vh_dip, &circular);
920 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
921 	while ((cdip = ndip) != NULL) {
922 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
923 
924 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
925 			continue;
926 		}
927 
928 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
929 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
930 		    &data) != DDI_PROP_SUCCESS) {
931 			continue;
932 		}
933 
934 		if (strcmp(data, guid) != 0) {
935 			ddi_prop_free(data);
936 			continue;
937 		}
938 		ddi_prop_free(data);
939 		break;
940 	}
941 	ndi_devi_exit(vh->vh_dip, circular);
942 	return (cdip);
943 }
944 
945 /*
946  * i_mdi_devinfo_remove():
947  *		Remove a client device node
948  */
949 static int
950 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
951 {
952 	int	rv = MDI_SUCCESS;
953 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
954 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
955 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
956 		if (rv != NDI_SUCCESS) {
957 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
958 			    " failed. cdip = %p\n", cdip));
959 		}
960 		/*
961 		 * Convert to MDI error code
962 		 */
963 		switch (rv) {
964 		case NDI_SUCCESS:
965 			rv = MDI_SUCCESS;
966 			break;
967 		case NDI_BUSY:
968 			rv = MDI_BUSY;
969 			break;
970 		default:
971 			rv = MDI_FAILURE;
972 			break;
973 		}
974 	}
975 	return (rv);
976 }
977 
978 /*
979  * i_devi_get_client()
980  *		Utility function to get mpxio component extensions
981  */
982 static mdi_client_t *
983 i_devi_get_client(dev_info_t *cdip)
984 {
985 	mdi_client_t	*ct = NULL;
986 	if (MDI_CLIENT(cdip)) {
987 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
988 	}
989 	return (ct);
990 }
991 
992 /*
993  * i_mdi_is_child_present():
994  *		Search for the presence of client device dev_info node
995  */
996 
997 static int
998 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
999 {
1000 	int		rv = MDI_FAILURE;
1001 	struct dev_info	*dip;
1002 	int		circular;
1003 
1004 	ndi_devi_enter(vdip, &circular);
1005 	dip = DEVI(vdip)->devi_child;
1006 	while (dip) {
1007 		if (dip == DEVI(cdip)) {
1008 			rv = MDI_SUCCESS;
1009 			break;
1010 		}
1011 		dip = dip->devi_sibling;
1012 	}
1013 	ndi_devi_exit(vdip, circular);
1014 	return (rv);
1015 }
1016 
1017 
1018 /*
1019  * i_mdi_client_lock():
1020  *		Grab client component lock
1021  * Return Values:
1022  *		None
1023  * Note:
1024  *		The default locking order is:
1025  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1026  *		But there are number of situations where locks need to be
1027  *		grabbed in reverse order.  This routine implements try and lock
1028  *		mechanism depending on the requested parameter option.
1029  */
1030 
1031 static void
1032 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1033 {
1034 	if (pip) {
1035 		/*
1036 		 * Reverse locking is requested.
1037 		 */
1038 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1039 			/*
1040 			 * tryenter failed. Try to grab again
1041 			 * after a small delay
1042 			 */
1043 			MDI_PI_HOLD(pip);
1044 			MDI_PI_UNLOCK(pip);
1045 			delay(1);
1046 			MDI_PI_LOCK(pip);
1047 			MDI_PI_RELE(pip);
1048 		}
1049 	} else {
1050 		MDI_CLIENT_LOCK(ct);
1051 	}
1052 }
1053 
1054 /*
1055  * i_mdi_client_unlock():
1056  *		Unlock a client component
1057  */
1058 
1059 static void
1060 i_mdi_client_unlock(mdi_client_t *ct)
1061 {
1062 	MDI_CLIENT_UNLOCK(ct);
1063 }
1064 
1065 /*
1066  * i_mdi_client_alloc():
1067  * 		Allocate and initialize a client structure.  Caller should
1068  *		hold the global mdi_mutex.
1069  * Return Values:
1070  *		Handle to a client component
1071  */
1072 /*ARGSUSED*/
1073 static mdi_client_t *
1074 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1075 {
1076 	mdi_client_t	*ct;
1077 
1078 	ASSERT(MUTEX_HELD(&mdi_mutex));
1079 
1080 	/*
1081 	 * Allocate and initialize a component structure.
1082 	 */
1083 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1084 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1085 	ct->ct_hnext = NULL;
1086 	ct->ct_hprev = NULL;
1087 	ct->ct_dip = NULL;
1088 	ct->ct_vhci = vh;
1089 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1090 	(void) strcpy(ct->ct_drvname, name);
1091 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1092 	(void) strcpy(ct->ct_guid, lguid);
1093 	ct->ct_cprivate = NULL;
1094 	ct->ct_vprivate = NULL;
1095 	ct->ct_flags = 0;
1096 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1097 	MDI_CLIENT_SET_OFFLINE(ct);
1098 	MDI_CLIENT_SET_DETACH(ct);
1099 	MDI_CLIENT_SET_POWER_UP(ct);
1100 	ct->ct_failover_flags = 0;
1101 	ct->ct_failover_status = 0;
1102 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1103 	ct->ct_unstable = 0;
1104 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1105 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1106 	ct->ct_lb = vh->vh_lb;
1107 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1108 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1109 	ct->ct_path_count = 0;
1110 	ct->ct_path_head = NULL;
1111 	ct->ct_path_tail = NULL;
1112 	ct->ct_path_last = NULL;
1113 
1114 	/*
1115 	 * Add this client component to our client hash queue
1116 	 */
1117 	i_mdi_client_enlist_table(vh, ct);
1118 	return (ct);
1119 }
1120 
1121 /*
1122  * i_mdi_client_enlist_table():
1123  *		Attach the client device to the client hash table. Caller
1124  *		should hold the mdi_mutex
1125  */
1126 
1127 static void
1128 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1129 {
1130 	int 			index;
1131 	struct client_hash	*head;
1132 
1133 	ASSERT(MUTEX_HELD(&mdi_mutex));
1134 	index = i_mdi_get_hash_key(ct->ct_guid);
1135 	head = &vh->vh_client_table[index];
1136 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1137 	head->ct_hash_head = ct;
1138 	head->ct_hash_count++;
1139 	vh->vh_client_count++;
1140 }
1141 
1142 /*
1143  * i_mdi_client_delist_table():
1144  *		Attach the client device to the client hash table.
1145  *		Caller should hold the mdi_mutex
1146  */
1147 
1148 static void
1149 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1150 {
1151 	int			index;
1152 	char			*guid;
1153 	struct client_hash 	*head;
1154 	mdi_client_t		*next;
1155 	mdi_client_t		*last;
1156 
1157 	ASSERT(MUTEX_HELD(&mdi_mutex));
1158 	guid = ct->ct_guid;
1159 	index = i_mdi_get_hash_key(guid);
1160 	head = &vh->vh_client_table[index];
1161 
1162 	last = NULL;
1163 	next = (mdi_client_t *)head->ct_hash_head;
1164 	while (next != NULL) {
1165 		if (next == ct) {
1166 			break;
1167 		}
1168 		last = next;
1169 		next = next->ct_hnext;
1170 	}
1171 
1172 	if (next) {
1173 		head->ct_hash_count--;
1174 		if (last == NULL) {
1175 			head->ct_hash_head = ct->ct_hnext;
1176 		} else {
1177 			last->ct_hnext = ct->ct_hnext;
1178 		}
1179 		ct->ct_hnext = NULL;
1180 		vh->vh_client_count--;
1181 	}
1182 }
1183 
1184 
1185 /*
1186  * i_mdi_client_free():
1187  *		Free a client component
1188  */
1189 static int
1190 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1191 {
1192 	int		rv = MDI_SUCCESS;
1193 	int		flags = ct->ct_flags;
1194 	dev_info_t	*cdip;
1195 	dev_info_t	*vdip;
1196 
1197 	ASSERT(MUTEX_HELD(&mdi_mutex));
1198 	vdip = vh->vh_dip;
1199 	cdip = ct->ct_dip;
1200 
1201 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1202 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1203 	DEVI(cdip)->devi_mdi_client = NULL;
1204 
1205 	/*
1206 	 * Clear out back ref. to dev_info_t node
1207 	 */
1208 	ct->ct_dip = NULL;
1209 
1210 	/*
1211 	 * Remove this client from our hash queue
1212 	 */
1213 	i_mdi_client_delist_table(vh, ct);
1214 
1215 	/*
1216 	 * Uninitialize and free the component
1217 	 */
1218 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1219 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1220 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1221 	cv_destroy(&ct->ct_failover_cv);
1222 	cv_destroy(&ct->ct_unstable_cv);
1223 	cv_destroy(&ct->ct_powerchange_cv);
1224 	mutex_destroy(&ct->ct_mutex);
1225 	kmem_free(ct, sizeof (*ct));
1226 
1227 	if (cdip != NULL) {
1228 		mutex_exit(&mdi_mutex);
1229 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1230 		mutex_enter(&mdi_mutex);
1231 	}
1232 	return (rv);
1233 }
1234 
1235 /*
1236  * i_mdi_client_find():
1237  * 		Find the client structure corresponding to a given guid
1238  *		Caller should hold the mdi_mutex
1239  */
1240 static mdi_client_t *
1241 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1242 {
1243 	int			index;
1244 	struct client_hash	*head;
1245 	mdi_client_t		*ct;
1246 
1247 	ASSERT(MUTEX_HELD(&mdi_mutex));
1248 	index = i_mdi_get_hash_key(guid);
1249 	head = &vh->vh_client_table[index];
1250 
1251 	ct = head->ct_hash_head;
1252 	while (ct != NULL) {
1253 		if (strcmp(ct->ct_guid, guid) == 0 &&
1254 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1255 			break;
1256 		}
1257 		ct = ct->ct_hnext;
1258 	}
1259 	return (ct);
1260 }
1261 
1262 
1263 
1264 /*
1265  * i_mdi_client_update_state():
1266  *		Compute and update client device state
1267  * Notes:
1268  *		A client device can be in any of three possible states:
1269  *
1270  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1271  *		one online/standby paths. Can tolerate failures.
1272  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1273  *		no alternate paths available as standby. A failure on the online
1274  *		would result in loss of access to device data.
1275  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1276  *		no paths available to access the device.
1277  */
1278 static void
1279 i_mdi_client_update_state(mdi_client_t *ct)
1280 {
1281 	int state;
1282 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1283 	state = i_mdi_client_compute_state(ct, NULL);
1284 	MDI_CLIENT_SET_STATE(ct, state);
1285 }
1286 
1287 /*
1288  * i_mdi_client_compute_state():
1289  *		Compute client device state
1290  *
1291  *		mdi_phci_t *	Pointer to pHCI structure which should
1292  *				while computing the new value.  Used by
1293  *				i_mdi_phci_offline() to find the new
1294  *				client state after DR of a pHCI.
1295  */
1296 static int
1297 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1298 {
1299 	int		state;
1300 	int		online_count = 0;
1301 	int		standby_count = 0;
1302 	mdi_pathinfo_t	*pip, *next;
1303 
1304 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1305 	pip = ct->ct_path_head;
1306 	while (pip != NULL) {
1307 		MDI_PI_LOCK(pip);
1308 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1309 		if (MDI_PI(pip)->pi_phci == ph) {
1310 			MDI_PI_UNLOCK(pip);
1311 			pip = next;
1312 			continue;
1313 		}
1314 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1315 				== MDI_PATHINFO_STATE_ONLINE)
1316 			online_count++;
1317 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1318 				== MDI_PATHINFO_STATE_STANDBY)
1319 			standby_count++;
1320 		MDI_PI_UNLOCK(pip);
1321 		pip = next;
1322 	}
1323 
1324 	if (online_count == 0) {
1325 		if (standby_count == 0) {
1326 			state = MDI_CLIENT_STATE_FAILED;
1327 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1328 			    " ct = %p\n", ct));
1329 		} else if (standby_count == 1) {
1330 			state = MDI_CLIENT_STATE_DEGRADED;
1331 		} else {
1332 			state = MDI_CLIENT_STATE_OPTIMAL;
1333 		}
1334 	} else if (online_count == 1) {
1335 		if (standby_count == 0) {
1336 			state = MDI_CLIENT_STATE_DEGRADED;
1337 		} else {
1338 			state = MDI_CLIENT_STATE_OPTIMAL;
1339 		}
1340 	} else {
1341 		state = MDI_CLIENT_STATE_OPTIMAL;
1342 	}
1343 	return (state);
1344 }
1345 
1346 /*
1347  * i_mdi_client2devinfo():
1348  *		Utility function
1349  */
1350 dev_info_t *
1351 i_mdi_client2devinfo(mdi_client_t *ct)
1352 {
1353 	return (ct->ct_dip);
1354 }
1355 
1356 /*
1357  * mdi_client_path2_devinfo():
1358  * 		Given the parent devinfo and child devfs pathname, search for
1359  *		a valid devfs node handle.
1360  */
1361 dev_info_t *
1362 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1363 {
1364 	dev_info_t 	*cdip = NULL;
1365 	dev_info_t 	*ndip = NULL;
1366 	char		*temp_pathname;
1367 	int		circular;
1368 
1369 	/*
1370 	 * Allocate temp buffer
1371 	 */
1372 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1373 
1374 	/*
1375 	 * Lock parent against changes
1376 	 */
1377 	ndi_devi_enter(vdip, &circular);
1378 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1379 	while ((cdip = ndip) != NULL) {
1380 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1381 
1382 		*temp_pathname = '\0';
1383 		(void) ddi_pathname(cdip, temp_pathname);
1384 		if (strcmp(temp_pathname, pathname) == 0) {
1385 			break;
1386 		}
1387 	}
1388 	/*
1389 	 * Release devinfo lock
1390 	 */
1391 	ndi_devi_exit(vdip, circular);
1392 
1393 	/*
1394 	 * Free the temp buffer
1395 	 */
1396 	kmem_free(temp_pathname, MAXPATHLEN);
1397 	return (cdip);
1398 }
1399 
1400 
1401 /*
1402  * mdi_client_get_path_count():
1403  * 		Utility function to get number of path information nodes
1404  *		associated with a given client device.
1405  */
1406 int
1407 mdi_client_get_path_count(dev_info_t *cdip)
1408 {
1409 	mdi_client_t	*ct;
1410 	int		count = 0;
1411 
1412 	ct = i_devi_get_client(cdip);
1413 	if (ct != NULL) {
1414 		count = ct->ct_path_count;
1415 	}
1416 	return (count);
1417 }
1418 
1419 
1420 /*
1421  * i_mdi_get_hash_key():
1422  * 		Create a hash using strings as keys
1423  *
1424  */
1425 static int
1426 i_mdi_get_hash_key(char *str)
1427 {
1428 	uint32_t	g, hash = 0;
1429 	char		*p;
1430 
1431 	for (p = str; *p != '\0'; p++) {
1432 		g = *p;
1433 		hash += g;
1434 	}
1435 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1436 }
1437 
1438 /*
1439  * mdi_get_lb_policy():
1440  * 		Get current load balancing policy for a given client device
1441  */
1442 client_lb_t
1443 mdi_get_lb_policy(dev_info_t *cdip)
1444 {
1445 	client_lb_t	lb = LOAD_BALANCE_NONE;
1446 	mdi_client_t	*ct;
1447 
1448 	ct = i_devi_get_client(cdip);
1449 	if (ct != NULL) {
1450 		lb = ct->ct_lb;
1451 	}
1452 	return (lb);
1453 }
1454 
1455 /*
1456  * mdi_set_lb_region_size():
1457  * 		Set current region size for the load-balance
1458  */
1459 int
1460 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1461 {
1462 	mdi_client_t	*ct;
1463 	int		rv = MDI_FAILURE;
1464 
1465 	ct = i_devi_get_client(cdip);
1466 	if (ct != NULL && ct->ct_lb_args != NULL) {
1467 		ct->ct_lb_args->region_size = region_size;
1468 		rv = MDI_SUCCESS;
1469 	}
1470 	return (rv);
1471 }
1472 
1473 /*
1474  * mdi_Set_lb_policy():
1475  * 		Set current load balancing policy for a given client device
1476  */
1477 int
1478 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1479 {
1480 	mdi_client_t	*ct;
1481 	int		rv = MDI_FAILURE;
1482 
1483 	ct = i_devi_get_client(cdip);
1484 	if (ct != NULL) {
1485 		ct->ct_lb = lb;
1486 		rv = MDI_SUCCESS;
1487 	}
1488 	return (rv);
1489 }
1490 
1491 /*
1492  * mdi_failover():
1493  *		failover function called by the vHCI drivers to initiate
1494  *		a failover operation.  This is typically due to non-availability
1495  *		of online paths to route I/O requests.  Failover can be
1496  *		triggered through user application also.
1497  *
1498  *		The vHCI driver calls mdi_failover() to initiate a failover
1499  *		operation. mdi_failover() calls back into the vHCI driver's
1500  *		vo_failover() entry point to perform the actual failover
1501  *		operation.  The reason for requiring the vHCI driver to
1502  *		initiate failover by calling mdi_failover(), instead of directly
1503  *		executing vo_failover() itself, is to ensure that the mdi
1504  *		framework can keep track of the client state properly.
1505  *		Additionally, mdi_failover() provides as a convenience the
1506  *		option of performing the failover operation synchronously or
1507  *		asynchronously
1508  *
1509  *		Upon successful completion of the failover operation, the
1510  *		paths that were previously ONLINE will be in the STANDBY state,
1511  *		and the newly activated paths will be in the ONLINE state.
1512  *
1513  *		The flags modifier determines whether the activation is done
1514  *		synchronously: MDI_FAILOVER_SYNC
1515  * Return Values:
1516  *		MDI_SUCCESS
1517  *		MDI_FAILURE
1518  *		MDI_BUSY
1519  */
1520 /*ARGSUSED*/
1521 int
1522 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1523 {
1524 	int			rv;
1525 	mdi_client_t		*ct;
1526 
1527 	ct = i_devi_get_client(cdip);
1528 	ASSERT(ct != NULL);
1529 	if (ct == NULL) {
1530 		/* cdip is not a valid client device. Nothing more to do. */
1531 		return (MDI_FAILURE);
1532 	}
1533 
1534 	MDI_CLIENT_LOCK(ct);
1535 
1536 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1537 		/* A path to the client is being freed */
1538 		MDI_CLIENT_UNLOCK(ct);
1539 		return (MDI_BUSY);
1540 	}
1541 
1542 
1543 	if (MDI_CLIENT_IS_FAILED(ct)) {
1544 		/*
1545 		 * Client is in failed state. Nothing more to do.
1546 		 */
1547 		MDI_CLIENT_UNLOCK(ct);
1548 		return (MDI_FAILURE);
1549 	}
1550 
1551 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1552 		/*
1553 		 * Failover is already in progress; return BUSY
1554 		 */
1555 		MDI_CLIENT_UNLOCK(ct);
1556 		return (MDI_BUSY);
1557 	}
1558 	/*
1559 	 * Make sure that mdi_pathinfo node state changes are processed.
1560 	 * We do not allow failovers to progress while client path state
1561 	 * changes are in progress
1562 	 */
1563 	if (ct->ct_unstable) {
1564 		if (flags == MDI_FAILOVER_ASYNC) {
1565 			MDI_CLIENT_UNLOCK(ct);
1566 			return (MDI_BUSY);
1567 		} else {
1568 			while (ct->ct_unstable)
1569 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1570 		}
1571 	}
1572 
1573 	/*
1574 	 * Client device is in stable state. Before proceeding, perform sanity
1575 	 * checks again.
1576 	 */
1577 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1578 	    (i_ddi_node_state(ct->ct_dip) < DS_READY)) {
1579 		/*
1580 		 * Client is in failed state. Nothing more to do.
1581 		 */
1582 		MDI_CLIENT_UNLOCK(ct);
1583 		return (MDI_FAILURE);
1584 	}
1585 
1586 	/*
1587 	 * Set the client state as failover in progress.
1588 	 */
1589 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1590 	ct->ct_failover_flags = flags;
1591 	MDI_CLIENT_UNLOCK(ct);
1592 
1593 	if (flags == MDI_FAILOVER_ASYNC) {
1594 		/*
1595 		 * Submit the initiate failover request via CPR safe
1596 		 * taskq threads.
1597 		 */
1598 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1599 		    ct, KM_SLEEP);
1600 		return (MDI_ACCEPT);
1601 	} else {
1602 		/*
1603 		 * Synchronous failover mode.  Typically invoked from the user
1604 		 * land.
1605 		 */
1606 		rv = i_mdi_failover(ct);
1607 	}
1608 	return (rv);
1609 }
1610 
1611 /*
1612  * i_mdi_failover():
1613  *		internal failover function. Invokes vHCI drivers failover
1614  *		callback function and process the failover status
1615  * Return Values:
1616  *		None
1617  *
1618  * Note: A client device in failover state can not be detached or freed.
1619  */
1620 static int
1621 i_mdi_failover(void *arg)
1622 {
1623 	int		rv = MDI_SUCCESS;
1624 	mdi_client_t	*ct = (mdi_client_t *)arg;
1625 	mdi_vhci_t	*vh = ct->ct_vhci;
1626 
1627 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1628 
1629 	if (vh->vh_ops->vo_failover != NULL) {
1630 		/*
1631 		 * Call vHCI drivers callback routine
1632 		 */
1633 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1634 		    ct->ct_failover_flags);
1635 	}
1636 
1637 	MDI_CLIENT_LOCK(ct);
1638 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1639 
1640 	/*
1641 	 * Save the failover return status
1642 	 */
1643 	ct->ct_failover_status = rv;
1644 
1645 	/*
1646 	 * As a result of failover, client status would have been changed.
1647 	 * Update the client state and wake up anyone waiting on this client
1648 	 * device.
1649 	 */
1650 	i_mdi_client_update_state(ct);
1651 
1652 	cv_broadcast(&ct->ct_failover_cv);
1653 	MDI_CLIENT_UNLOCK(ct);
1654 	return (rv);
1655 }
1656 
1657 /*
1658  * Load balancing is logical block.
1659  * IOs within the range described by region_size
1660  * would go on the same path. This would improve the
1661  * performance by cache-hit on some of the RAID devices.
1662  * Search only for online paths(At some point we
1663  * may want to balance across target ports).
1664  * If no paths are found then default to round-robin.
1665  */
1666 static int
1667 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1668 {
1669 	int		path_index = -1;
1670 	int		online_path_count = 0;
1671 	int		online_nonpref_path_count = 0;
1672 	int 		region_size = ct->ct_lb_args->region_size;
1673 	mdi_pathinfo_t	*pip;
1674 	mdi_pathinfo_t	*next;
1675 	int		preferred, path_cnt;
1676 
1677 	pip = ct->ct_path_head;
1678 	while (pip) {
1679 		MDI_PI_LOCK(pip);
1680 		if (MDI_PI(pip)->pi_state ==
1681 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1682 			online_path_count++;
1683 		} else if (MDI_PI(pip)->pi_state ==
1684 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1685 			online_nonpref_path_count++;
1686 		}
1687 		next = (mdi_pathinfo_t *)
1688 		    MDI_PI(pip)->pi_client_link;
1689 		MDI_PI_UNLOCK(pip);
1690 		pip = next;
1691 	}
1692 	/* if found any online/preferred then use this type */
1693 	if (online_path_count > 0) {
1694 		path_cnt = online_path_count;
1695 		preferred = 1;
1696 	} else if (online_nonpref_path_count > 0) {
1697 		path_cnt = online_nonpref_path_count;
1698 		preferred = 0;
1699 	} else {
1700 		path_cnt = 0;
1701 	}
1702 	if (path_cnt) {
1703 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1704 		pip = ct->ct_path_head;
1705 		while (pip && path_index != -1) {
1706 			MDI_PI_LOCK(pip);
1707 			if (path_index == 0 &&
1708 			    (MDI_PI(pip)->pi_state ==
1709 			    MDI_PATHINFO_STATE_ONLINE) &&
1710 				MDI_PI(pip)->pi_preferred == preferred) {
1711 				MDI_PI_HOLD(pip);
1712 				MDI_PI_UNLOCK(pip);
1713 				*ret_pip = pip;
1714 				return (MDI_SUCCESS);
1715 			}
1716 			path_index --;
1717 			next = (mdi_pathinfo_t *)
1718 			    MDI_PI(pip)->pi_client_link;
1719 			MDI_PI_UNLOCK(pip);
1720 			pip = next;
1721 		}
1722 		if (pip == NULL) {
1723 			MDI_DEBUG(4, (CE_NOTE, NULL,
1724 			    "!lba %p, no pip !!\n",
1725 				bp->b_blkno));
1726 		} else {
1727 			MDI_DEBUG(4, (CE_NOTE, NULL,
1728 			    "!lba %p, no pip for path_index, "
1729 			    "pip %p\n", pip));
1730 		}
1731 	}
1732 	return (MDI_FAILURE);
1733 }
1734 
1735 /*
1736  * mdi_select_path():
1737  *		select a path to access a client device.
1738  *
1739  *		mdi_select_path() function is called by the vHCI drivers to
1740  *		select a path to route the I/O request to.  The caller passes
1741  *		the block I/O data transfer structure ("buf") as one of the
1742  *		parameters.  The mpxio framework uses the buf structure
1743  *		contents to maintain per path statistics (total I/O size /
1744  *		count pending).  If more than one online paths are available to
1745  *		select, the framework automatically selects a suitable path
1746  *		for routing I/O request. If a failover operation is active for
1747  *		this client device the call shall be failed with MDI_BUSY error
1748  *		code.
1749  *
1750  *		By default this function returns a suitable path in online
1751  *		state based on the current load balancing policy.  Currently
1752  *		we support LOAD_BALANCE_NONE (Previously selected online path
1753  *		will continue to be used till the path is usable) and
1754  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1755  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1756  *		based on the logical block).  The load balancing
1757  *		through vHCI drivers configuration file (driver.conf).
1758  *
1759  *		vHCI drivers may override this default behavior by specifying
1760  *		appropriate flags.  If start_pip is specified (non NULL) is
1761  *		used as start point to walk and find the next appropriate path.
1762  *		The following values are currently defined:
1763  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1764  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1765  *
1766  *		The non-standard behavior is used by the scsi_vhci driver,
1767  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1768  *		attach of client devices (to avoid an unnecessary failover
1769  *		when the STANDBY path comes up first), during failover
1770  *		(to activate a STANDBY path as ONLINE).
1771  *
1772  *		The selected path in returned in a held state (ref_cnt).
1773  *		Caller should release the hold by calling mdi_rele_path().
1774  *
1775  * Return Values:
1776  *		MDI_SUCCESS	- Completed successfully
1777  *		MDI_BUSY 	- Client device is busy failing over
1778  *		MDI_NOPATH	- Client device is online, but no valid path are
1779  *				  available to access this client device
1780  *		MDI_FAILURE	- Invalid client device or state
1781  *		MDI_DEVI_ONLINING
1782  *				- Client device (struct dev_info state) is in
1783  *				  onlining state.
1784  */
1785 
1786 /*ARGSUSED*/
1787 int
1788 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1789     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1790 {
1791 	mdi_client_t	*ct;
1792 	mdi_pathinfo_t	*pip;
1793 	mdi_pathinfo_t	*next;
1794 	mdi_pathinfo_t	*head;
1795 	mdi_pathinfo_t	*start;
1796 	client_lb_t	lbp;	/* load balancing policy */
1797 	int		sb = 1;	/* standard behavior */
1798 	int		preferred = 1;	/* preferred path */
1799 	int		cond, cont = 1;
1800 	int		retry = 0;
1801 
1802 	if (flags != 0) {
1803 		/*
1804 		 * disable default behavior
1805 		 */
1806 		sb = 0;
1807 	}
1808 
1809 	*ret_pip = NULL;
1810 	ct = i_devi_get_client(cdip);
1811 	if (ct == NULL) {
1812 		/* mdi extensions are NULL, Nothing more to do */
1813 		return (MDI_FAILURE);
1814 	}
1815 
1816 	MDI_CLIENT_LOCK(ct);
1817 
1818 	if (sb) {
1819 		if (MDI_CLIENT_IS_FAILED(ct)) {
1820 			/*
1821 			 * Client is not ready to accept any I/O requests.
1822 			 * Fail this request.
1823 			 */
1824 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1825 			    "client state offline ct = %p\n", ct));
1826 			MDI_CLIENT_UNLOCK(ct);
1827 			return (MDI_FAILURE);
1828 		}
1829 
1830 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1831 			/*
1832 			 * Check for Failover is in progress. If so tell the
1833 			 * caller that this device is busy.
1834 			 */
1835 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1836 			    "client failover in progress ct = %p\n", ct));
1837 			MDI_CLIENT_UNLOCK(ct);
1838 			return (MDI_BUSY);
1839 		}
1840 
1841 		/*
1842 		 * Check to see whether the client device is attached.
1843 		 * If not so, let the vHCI driver manually select a path
1844 		 * (standby) and let the probe/attach process to continue.
1845 		 */
1846 		if ((MDI_CLIENT_IS_DETACHED(ct)) ||
1847 		    i_ddi_node_state(cdip) < DS_READY) {
1848 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1849 			MDI_CLIENT_UNLOCK(ct);
1850 			return (MDI_DEVI_ONLINING);
1851 		}
1852 	}
1853 
1854 	/*
1855 	 * Cache in the client list head.  If head of the list is NULL
1856 	 * return MDI_NOPATH
1857 	 */
1858 	head = ct->ct_path_head;
1859 	if (head == NULL) {
1860 		MDI_CLIENT_UNLOCK(ct);
1861 		return (MDI_NOPATH);
1862 	}
1863 
1864 	/*
1865 	 * for non default behavior, bypass current
1866 	 * load balancing policy and always use LOAD_BALANCE_RR
1867 	 * except that the start point will be adjusted based
1868 	 * on the provided start_pip
1869 	 */
1870 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1871 
1872 	switch (lbp) {
1873 	case LOAD_BALANCE_NONE:
1874 		/*
1875 		 * Load balancing is None  or Alternate path mode
1876 		 * Start looking for a online mdi_pathinfo node starting from
1877 		 * last known selected path
1878 		 */
1879 		preferred = 1;
1880 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1881 		if (pip == NULL) {
1882 			pip = head;
1883 		}
1884 		start = pip;
1885 		do {
1886 			MDI_PI_LOCK(pip);
1887 			/*
1888 			 * No need to explicitly check if the path is disabled.
1889 			 * Since we are checking for state == ONLINE and the
1890 			 * same veriable is used for DISABLE/ENABLE information.
1891 			 */
1892 			if (MDI_PI(pip)->pi_state  ==
1893 				MDI_PATHINFO_STATE_ONLINE &&
1894 				preferred == MDI_PI(pip)->pi_preferred) {
1895 				/*
1896 				 * Return the path in hold state. Caller should
1897 				 * release the lock by calling mdi_rele_path()
1898 				 */
1899 				MDI_PI_HOLD(pip);
1900 				MDI_PI_UNLOCK(pip);
1901 				ct->ct_path_last = pip;
1902 				*ret_pip = pip;
1903 				MDI_CLIENT_UNLOCK(ct);
1904 				return (MDI_SUCCESS);
1905 			}
1906 
1907 			/*
1908 			 * Path is busy.
1909 			 */
1910 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1911 			    MDI_PI_IS_TRANSIENT(pip))
1912 				retry = 1;
1913 			/*
1914 			 * Keep looking for a next available online path
1915 			 */
1916 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1917 			if (next == NULL) {
1918 				next = head;
1919 			}
1920 			MDI_PI_UNLOCK(pip);
1921 			pip = next;
1922 			if (start == pip && preferred) {
1923 				preferred = 0;
1924 			} else if (start == pip && !preferred) {
1925 				cont = 0;
1926 			}
1927 		} while (cont);
1928 		break;
1929 
1930 	case LOAD_BALANCE_LBA:
1931 		/*
1932 		 * Make sure we are looking
1933 		 * for an online path. Otherwise, if it is for a STANDBY
1934 		 * path request, it will go through and fetch an ONLINE
1935 		 * path which is not desirable.
1936 		 */
1937 		if ((ct->ct_lb_args != NULL) &&
1938 			    (ct->ct_lb_args->region_size) && bp &&
1939 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1940 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1941 				    == MDI_SUCCESS) {
1942 				MDI_CLIENT_UNLOCK(ct);
1943 				return (MDI_SUCCESS);
1944 			}
1945 		}
1946 		/*  FALLTHROUGH */
1947 	case LOAD_BALANCE_RR:
1948 		/*
1949 		 * Load balancing is Round Robin. Start looking for a online
1950 		 * mdi_pathinfo node starting from last known selected path
1951 		 * as the start point.  If override flags are specified,
1952 		 * process accordingly.
1953 		 * If the search is already in effect(start_pip not null),
1954 		 * then lets just use the same path preference to continue the
1955 		 * traversal.
1956 		 */
1957 
1958 		if (start_pip != NULL) {
1959 			preferred = MDI_PI(start_pip)->pi_preferred;
1960 		} else {
1961 			preferred = 1;
1962 		}
1963 
1964 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1965 		if (start == NULL) {
1966 			pip = head;
1967 		} else {
1968 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1969 			if (pip == NULL) {
1970 				if (!sb) {
1971 					if (preferred == 0) {
1972 						/*
1973 						 * Looks like we have completed
1974 						 * the traversal as preferred
1975 						 * value is 0. Time to bail out.
1976 						 */
1977 						*ret_pip = NULL;
1978 						MDI_CLIENT_UNLOCK(ct);
1979 						return (MDI_NOPATH);
1980 					} else {
1981 						/*
1982 						 * Looks like we reached the
1983 						 * end of the list. Lets enable
1984 						 * traversal of non preferred
1985 						 * paths.
1986 						 */
1987 						preferred = 0;
1988 					}
1989 				}
1990 				pip = head;
1991 			}
1992 		}
1993 		start = pip;
1994 		do {
1995 			MDI_PI_LOCK(pip);
1996 			if (sb) {
1997 				cond = ((MDI_PI(pip)->pi_state ==
1998 				    MDI_PATHINFO_STATE_ONLINE &&
1999 					MDI_PI(pip)->pi_preferred ==
2000 						preferred) ? 1 : 0);
2001 			} else {
2002 				if (flags == MDI_SELECT_ONLINE_PATH) {
2003 					cond = ((MDI_PI(pip)->pi_state ==
2004 					    MDI_PATHINFO_STATE_ONLINE &&
2005 						MDI_PI(pip)->pi_preferred ==
2006 						preferred) ? 1 : 0);
2007 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2008 					cond = ((MDI_PI(pip)->pi_state ==
2009 					    MDI_PATHINFO_STATE_STANDBY &&
2010 						MDI_PI(pip)->pi_preferred ==
2011 						preferred) ? 1 : 0);
2012 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2013 				    MDI_SELECT_STANDBY_PATH)) {
2014 					cond = (((MDI_PI(pip)->pi_state ==
2015 					    MDI_PATHINFO_STATE_ONLINE ||
2016 					    (MDI_PI(pip)->pi_state ==
2017 					    MDI_PATHINFO_STATE_STANDBY)) &&
2018 						MDI_PI(pip)->pi_preferred ==
2019 						preferred) ? 1 : 0);
2020 				} else {
2021 					cond = 0;
2022 				}
2023 			}
2024 			/*
2025 			 * No need to explicitly check if the path is disabled.
2026 			 * Since we are checking for state == ONLINE and the
2027 			 * same veriable is used for DISABLE/ENABLE information.
2028 			 */
2029 			if (cond) {
2030 				/*
2031 				 * Return the path in hold state. Caller should
2032 				 * release the lock by calling mdi_rele_path()
2033 				 */
2034 				MDI_PI_HOLD(pip);
2035 				MDI_PI_UNLOCK(pip);
2036 				if (sb)
2037 					ct->ct_path_last = pip;
2038 				*ret_pip = pip;
2039 				MDI_CLIENT_UNLOCK(ct);
2040 				return (MDI_SUCCESS);
2041 			}
2042 			/*
2043 			 * Path is busy.
2044 			 */
2045 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2046 			    MDI_PI_IS_TRANSIENT(pip))
2047 				retry = 1;
2048 
2049 			/*
2050 			 * Keep looking for a next available online path
2051 			 */
2052 do_again:
2053 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2054 			if (next == NULL) {
2055 				if (!sb) {
2056 					if (preferred == 1) {
2057 						/*
2058 						 * Looks like we reached the
2059 						 * end of the list. Lets enable
2060 						 * traversal of non preferred
2061 						 * paths.
2062 						 */
2063 						preferred = 0;
2064 						next = head;
2065 					} else {
2066 						/*
2067 						 * We have done both the passes
2068 						 * Preferred as well as for
2069 						 * Non-preferred. Bail out now.
2070 						 */
2071 						cont = 0;
2072 					}
2073 				} else {
2074 					/*
2075 					 * Standard behavior case.
2076 					 */
2077 					next = head;
2078 				}
2079 			}
2080 			MDI_PI_UNLOCK(pip);
2081 			if (cont == 0) {
2082 				break;
2083 			}
2084 			pip = next;
2085 
2086 			if (!sb) {
2087 				/*
2088 				 * We need to handle the selection of
2089 				 * non-preferred path in the following
2090 				 * case:
2091 				 *
2092 				 * +------+   +------+   +------+   +-----+
2093 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2094 				 * +------+   +------+   +------+   +-----+
2095 				 *
2096 				 * If we start the search with B, we need to
2097 				 * skip beyond B to pick C which is non -
2098 				 * preferred in the second pass. The following
2099 				 * test, if true, will allow us to skip over
2100 				 * the 'start'(B in the example) to select
2101 				 * other non preferred elements.
2102 				 */
2103 				if ((start_pip != NULL) && (start_pip == pip) &&
2104 				    (MDI_PI(start_pip)->pi_preferred
2105 				    != preferred)) {
2106 					/*
2107 					 * try again after going past the start
2108 					 * pip
2109 					 */
2110 					MDI_PI_LOCK(pip);
2111 					goto do_again;
2112 				}
2113 			} else {
2114 				/*
2115 				 * Standard behavior case
2116 				 */
2117 				if (start == pip && preferred) {
2118 					/* look for nonpreferred paths */
2119 					preferred = 0;
2120 				} else if (start == pip && !preferred) {
2121 					/*
2122 					 * Exit condition
2123 					 */
2124 					cont = 0;
2125 				}
2126 			}
2127 		} while (cont);
2128 		break;
2129 	}
2130 
2131 	MDI_CLIENT_UNLOCK(ct);
2132 	if (retry == 1) {
2133 		return (MDI_BUSY);
2134 	} else {
2135 		return (MDI_NOPATH);
2136 	}
2137 }
2138 
2139 /*
2140  * For a client, return the next available path to any phci
2141  *
2142  * Note:
2143  *		Caller should hold the branch's devinfo node to get a consistent
2144  *		snap shot of the mdi_pathinfo nodes.
2145  *
2146  *		Please note that even the list is stable the mdi_pathinfo
2147  *		node state and properties are volatile.  The caller should lock
2148  *		and unlock the nodes by calling mdi_pi_lock() and
2149  *		mdi_pi_unlock() functions to get a stable properties.
2150  *
2151  *		If there is a need to use the nodes beyond the hold of the
2152  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2153  *		need to be held against unexpected removal by calling
2154  *		mdi_hold_path() and should be released by calling
2155  *		mdi_rele_path() on completion.
2156  */
2157 mdi_pathinfo_t *
2158 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2159 {
2160 	mdi_client_t *ct;
2161 
2162 	if (!MDI_CLIENT(ct_dip))
2163 		return (NULL);
2164 
2165 	/*
2166 	 * Walk through client link
2167 	 */
2168 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2169 	ASSERT(ct != NULL);
2170 
2171 	if (pip == NULL)
2172 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2173 
2174 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2175 }
2176 
2177 /*
2178  * For a phci, return the next available path to any client
2179  * Note: ditto mdi_get_next_phci_path()
2180  */
2181 mdi_pathinfo_t *
2182 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2183 {
2184 	mdi_phci_t *ph;
2185 
2186 	if (!MDI_PHCI(ph_dip))
2187 		return (NULL);
2188 
2189 	/*
2190 	 * Walk through pHCI link
2191 	 */
2192 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2193 	ASSERT(ph != NULL);
2194 
2195 	if (pip == NULL)
2196 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2197 
2198 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2199 }
2200 
2201 /*
2202  * mdi_get_nextpath():
2203  *		mdi_pathinfo node walker function.  Get the next node from the
2204  *		client or pHCI device list.
2205  *
2206  * XXX This is wrapper function for compatibility purposes only.
2207  *
2208  *	It doesn't work under Multi-level MPxIO, where a dip
2209  *	is both client and phci (which link should next_path follow?).
2210  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2211  *	this interface should be removed.
2212  */
2213 void
2214 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2215     mdi_pathinfo_t **ret_pip)
2216 {
2217 	if (MDI_CLIENT(dip)) {
2218 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2219 	} else if (MDI_PHCI(dip)) {
2220 		*ret_pip = mdi_get_next_client_path(dip, pip);
2221 	} else {
2222 		*ret_pip = NULL;
2223 	}
2224 }
2225 
2226 /*
2227  * mdi_hold_path():
2228  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2229  * Return Values:
2230  *		None
2231  */
2232 void
2233 mdi_hold_path(mdi_pathinfo_t *pip)
2234 {
2235 	if (pip) {
2236 		MDI_PI_LOCK(pip);
2237 		MDI_PI_HOLD(pip);
2238 		MDI_PI_UNLOCK(pip);
2239 	}
2240 }
2241 
2242 
2243 /*
2244  * mdi_rele_path():
2245  *		Release the mdi_pathinfo node which was selected
2246  *		through mdi_select_path() mechanism or manually held by
2247  *		calling mdi_hold_path().
2248  * Return Values:
2249  *		None
2250  */
2251 void
2252 mdi_rele_path(mdi_pathinfo_t *pip)
2253 {
2254 	if (pip) {
2255 		MDI_PI_LOCK(pip);
2256 		MDI_PI_RELE(pip);
2257 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2258 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2259 		}
2260 		MDI_PI_UNLOCK(pip);
2261 	}
2262 }
2263 
2264 
2265 /*
2266  * mdi_pi_lock():
2267  * 		Lock the mdi_pathinfo node.
2268  * Note:
2269  *		The caller should release the lock by calling mdi_pi_unlock()
2270  */
2271 void
2272 mdi_pi_lock(mdi_pathinfo_t *pip)
2273 {
2274 	ASSERT(pip != NULL);
2275 	if (pip) {
2276 		MDI_PI_LOCK(pip);
2277 	}
2278 }
2279 
2280 
2281 /*
2282  * mdi_pi_unlock():
2283  * 		Unlock the mdi_pathinfo node.
2284  * Note:
2285  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2286  */
2287 void
2288 mdi_pi_unlock(mdi_pathinfo_t *pip)
2289 {
2290 	ASSERT(pip != NULL);
2291 	if (pip) {
2292 		MDI_PI_UNLOCK(pip);
2293 	}
2294 }
2295 
2296 /*
2297  * mdi_pi_find():
2298  *		Search the list of mdi_pathinfo nodes attached to the
2299  *		pHCI/Client device node whose path address matches "paddr".
2300  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2301  *		found.
2302  * Return Values:
2303  *		mdi_pathinfo node handle
2304  *		NULL
2305  * Notes:
2306  *		Caller need not hold any locks to call this function.
2307  */
2308 mdi_pathinfo_t *
2309 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2310 {
2311 	mdi_phci_t		*ph;
2312 	mdi_vhci_t		*vh;
2313 	mdi_client_t		*ct;
2314 	mdi_pathinfo_t		*pip = NULL;
2315 
2316 	if ((pdip == NULL) || (paddr == NULL)) {
2317 		return (NULL);
2318 	}
2319 	ph = i_devi_get_phci(pdip);
2320 	if (ph == NULL) {
2321 		/*
2322 		 * Invalid pHCI device, Nothing more to do.
2323 		 */
2324 		MDI_DEBUG(2, (CE_WARN, NULL,
2325 		    "!mdi_pi_find: invalid phci"));
2326 		return (NULL);
2327 	}
2328 
2329 	vh = ph->ph_vhci;
2330 	if (vh == NULL) {
2331 		/*
2332 		 * Invalid vHCI device, Nothing more to do.
2333 		 */
2334 		MDI_DEBUG(2, (CE_WARN, NULL,
2335 		    "!mdi_pi_find: invalid phci"));
2336 		return (NULL);
2337 	}
2338 
2339 	/*
2340 	 * Look for client device identified by caddr (guid)
2341 	 */
2342 	if (caddr == NULL) {
2343 		/*
2344 		 * Find a mdi_pathinfo node under pHCI list for a matching
2345 		 * unit address.
2346 		 */
2347 		mutex_enter(&ph->ph_mutex);
2348 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2349 
2350 		while (pip != NULL) {
2351 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2352 				break;
2353 			}
2354 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2355 		}
2356 		mutex_exit(&ph->ph_mutex);
2357 		return (pip);
2358 	}
2359 
2360 	/*
2361 	 * XXX - Is the rest of the code in this function really necessary?
2362 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2363 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2364 	 * whether the search is based on the pathinfo nodes attached to
2365 	 * the pHCI or the client node, the result will be the same.
2366 	 */
2367 
2368 	/*
2369 	 * Find the client device corresponding to 'caddr'
2370 	 */
2371 	mutex_enter(&mdi_mutex);
2372 
2373 	/*
2374 	 * XXX - Passing NULL to the following function works as long as the
2375 	 * the client addresses (caddr) are unique per vhci basis.
2376 	 */
2377 	ct = i_mdi_client_find(vh, NULL, caddr);
2378 	if (ct == NULL) {
2379 		/*
2380 		 * Client not found, Obviously mdi_pathinfo node has not been
2381 		 * created yet.
2382 		 */
2383 		mutex_exit(&mdi_mutex);
2384 		return (pip);
2385 	}
2386 
2387 	/*
2388 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2389 	 * pHCI and paddr
2390 	 */
2391 	MDI_CLIENT_LOCK(ct);
2392 
2393 	/*
2394 	 * Release the global mutex as it is no more needed. Note: We always
2395 	 * respect the locking order while acquiring.
2396 	 */
2397 	mutex_exit(&mdi_mutex);
2398 
2399 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2400 	while (pip != NULL) {
2401 		/*
2402 		 * Compare the unit address
2403 		 */
2404 		if ((MDI_PI(pip)->pi_phci == ph) &&
2405 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2406 			break;
2407 		}
2408 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2409 	}
2410 	MDI_CLIENT_UNLOCK(ct);
2411 	return (pip);
2412 }
2413 
2414 /*
2415  * mdi_pi_alloc():
2416  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2417  *		The mdi_pathinfo node returned by this function identifies a
2418  *		unique device path is capable of having properties attached
2419  *		and passed to mdi_pi_online() to fully attach and online the
2420  *		path and client device node.
2421  *		The mdi_pathinfo node returned by this function must be
2422  *		destroyed using mdi_pi_free() if the path is no longer
2423  *		operational or if the caller fails to attach a client device
2424  *		node when calling mdi_pi_online(). The framework will not free
2425  *		the resources allocated.
2426  *		This function can be called from both interrupt and kernel
2427  *		contexts.  DDI_NOSLEEP flag should be used while calling
2428  *		from interrupt contexts.
2429  * Return Values:
2430  *		MDI_SUCCESS
2431  *		MDI_FAILURE
2432  *		MDI_NOMEM
2433  */
2434 /*ARGSUSED*/
2435 int
2436 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2437     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2438 {
2439 	mdi_vhci_t	*vh;
2440 	mdi_phci_t	*ph;
2441 	mdi_client_t	*ct;
2442 	mdi_pathinfo_t	*pip = NULL;
2443 	dev_info_t	*cdip;
2444 	int		rv = MDI_NOMEM;
2445 	int		path_allocated = 0;
2446 
2447 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2448 	    ret_pip == NULL) {
2449 		/* Nothing more to do */
2450 		return (MDI_FAILURE);
2451 	}
2452 
2453 	*ret_pip = NULL;
2454 	ph = i_devi_get_phci(pdip);
2455 	ASSERT(ph != NULL);
2456 	if (ph == NULL) {
2457 		/* Invalid pHCI device, return failure */
2458 		MDI_DEBUG(1, (CE_WARN, NULL,
2459 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2460 		return (MDI_FAILURE);
2461 	}
2462 
2463 	MDI_PHCI_LOCK(ph);
2464 	vh = ph->ph_vhci;
2465 	if (vh == NULL) {
2466 		/* Invalid vHCI device, return failure */
2467 		MDI_DEBUG(1, (CE_WARN, NULL,
2468 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2469 		MDI_PHCI_UNLOCK(ph);
2470 		return (MDI_FAILURE);
2471 	}
2472 
2473 	if (MDI_PHCI_IS_READY(ph) == 0) {
2474 		/*
2475 		 * Do not allow new node creation when pHCI is in
2476 		 * offline/suspended states
2477 		 */
2478 		MDI_DEBUG(1, (CE_WARN, NULL,
2479 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2480 		MDI_PHCI_UNLOCK(ph);
2481 		return (MDI_BUSY);
2482 	}
2483 	MDI_PHCI_UNSTABLE(ph);
2484 	MDI_PHCI_UNLOCK(ph);
2485 
2486 	/* look for a matching client, create one if not found */
2487 	mutex_enter(&mdi_mutex);
2488 	ct = i_mdi_client_find(vh, cname, caddr);
2489 	if (ct == NULL) {
2490 		ct = i_mdi_client_alloc(vh, cname, caddr);
2491 		ASSERT(ct != NULL);
2492 	}
2493 
2494 	if (ct->ct_dip == NULL) {
2495 		/*
2496 		 * Allocate a devinfo node
2497 		 */
2498 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2499 		    compatible, ncompatible);
2500 		if (ct->ct_dip == NULL) {
2501 			(void) i_mdi_client_free(vh, ct);
2502 			goto fail;
2503 		}
2504 	}
2505 	cdip = ct->ct_dip;
2506 
2507 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2508 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2509 
2510 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2511 	while (pip != NULL) {
2512 		/*
2513 		 * Compare the unit address
2514 		 */
2515 		if ((MDI_PI(pip)->pi_phci == ph) &&
2516 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2517 			break;
2518 		}
2519 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2520 	}
2521 
2522 	if (pip == NULL) {
2523 		/*
2524 		 * This is a new path for this client device.  Allocate and
2525 		 * initialize a new pathinfo node
2526 		 */
2527 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2528 		ASSERT(pip != NULL);
2529 		path_allocated = 1;
2530 	}
2531 	rv = MDI_SUCCESS;
2532 
2533 fail:
2534 	/*
2535 	 * Release the global mutex.
2536 	 */
2537 	mutex_exit(&mdi_mutex);
2538 
2539 	/*
2540 	 * Mark the pHCI as stable
2541 	 */
2542 	MDI_PHCI_LOCK(ph);
2543 	MDI_PHCI_STABLE(ph);
2544 	MDI_PHCI_UNLOCK(ph);
2545 	*ret_pip = pip;
2546 
2547 	if (path_allocated)
2548 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2549 
2550 	return (rv);
2551 }
2552 
2553 /*ARGSUSED*/
2554 int
2555 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2556     int flags, mdi_pathinfo_t **ret_pip)
2557 {
2558 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2559 	    flags, ret_pip));
2560 }
2561 
2562 /*
2563  * i_mdi_pi_alloc():
2564  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2565  * Return Values:
2566  *		mdi_pathinfo
2567  */
2568 
2569 /*ARGSUSED*/
2570 static mdi_pathinfo_t *
2571 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2572 {
2573 	mdi_pathinfo_t	*pip;
2574 	int		ct_circular;
2575 	int		ph_circular;
2576 	int		se_flag;
2577 	int		kmem_flag;
2578 
2579 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2580 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2581 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2582 	    MDI_PATHINFO_STATE_TRANSIENT;
2583 
2584 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2585 		MDI_PI_SET_USER_DISABLE(pip);
2586 
2587 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2588 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2589 
2590 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2591 		MDI_PI_SET_DRV_DISABLE(pip);
2592 
2593 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2594 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2595 	MDI_PI(pip)->pi_client = ct;
2596 	MDI_PI(pip)->pi_phci = ph;
2597 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2598 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2599 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2600 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2601 	MDI_PI(pip)->pi_pprivate = NULL;
2602 	MDI_PI(pip)->pi_cprivate = NULL;
2603 	MDI_PI(pip)->pi_vprivate = NULL;
2604 	MDI_PI(pip)->pi_client_link = NULL;
2605 	MDI_PI(pip)->pi_phci_link = NULL;
2606 	MDI_PI(pip)->pi_ref_cnt = 0;
2607 	MDI_PI(pip)->pi_kstats = NULL;
2608 	MDI_PI(pip)->pi_preferred = 1;
2609 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2610 
2611 	/*
2612 	 * Lock both dev_info nodes against changes in parallel.
2613 	 */
2614 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2615 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2616 
2617 	i_mdi_phci_add_path(ph, pip);
2618 	i_mdi_client_add_path(ct, pip);
2619 
2620 	ndi_devi_exit(ph->ph_dip, ph_circular);
2621 	ndi_devi_exit(ct->ct_dip, ct_circular);
2622 
2623 	/* determine interrupt context */
2624 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2625 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2626 
2627 	i_ddi_di_cache_invalidate(kmem_flag);
2628 
2629 	return (pip);
2630 }
2631 
2632 /*
2633  * i_mdi_phci_add_path():
2634  * 		Add a mdi_pathinfo node to pHCI list.
2635  * Notes:
2636  *		Caller should per-pHCI mutex
2637  */
2638 
2639 static void
2640 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2641 {
2642 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2643 
2644 	if (ph->ph_path_head == NULL) {
2645 		ph->ph_path_head = pip;
2646 	} else {
2647 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2648 	}
2649 	ph->ph_path_tail = pip;
2650 	ph->ph_path_count++;
2651 }
2652 
2653 /*
2654  * i_mdi_client_add_path():
2655  *		Add mdi_pathinfo node to client list
2656  */
2657 
2658 static void
2659 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2660 {
2661 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2662 
2663 	if (ct->ct_path_head == NULL) {
2664 		ct->ct_path_head = pip;
2665 	} else {
2666 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2667 	}
2668 	ct->ct_path_tail = pip;
2669 	ct->ct_path_count++;
2670 }
2671 
2672 /*
2673  * mdi_pi_free():
2674  *		Free the mdi_pathinfo node and also client device node if this
2675  *		is the last path to the device
2676  * Return Values:
2677  *		MDI_SUCCESS
2678  *		MDI_FAILURE
2679  *		MDI_BUSY
2680  */
2681 
2682 /*ARGSUSED*/
2683 int
2684 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2685 {
2686 	int		rv = MDI_SUCCESS;
2687 	mdi_vhci_t	*vh;
2688 	mdi_phci_t	*ph;
2689 	mdi_client_t	*ct;
2690 	int		(*f)();
2691 	int		client_held = 0;
2692 
2693 	MDI_PI_LOCK(pip);
2694 	ph = MDI_PI(pip)->pi_phci;
2695 	ASSERT(ph != NULL);
2696 	if (ph == NULL) {
2697 		/*
2698 		 * Invalid pHCI device, return failure
2699 		 */
2700 		MDI_DEBUG(1, (CE_WARN, NULL,
2701 		    "!mdi_pi_free: invalid pHCI"));
2702 		MDI_PI_UNLOCK(pip);
2703 		return (MDI_FAILURE);
2704 	}
2705 
2706 	vh = ph->ph_vhci;
2707 	ASSERT(vh != NULL);
2708 	if (vh == NULL) {
2709 		/* Invalid pHCI device, return failure */
2710 		MDI_DEBUG(1, (CE_WARN, NULL,
2711 		    "!mdi_pi_free: invalid vHCI"));
2712 		MDI_PI_UNLOCK(pip);
2713 		return (MDI_FAILURE);
2714 	}
2715 
2716 	ct = MDI_PI(pip)->pi_client;
2717 	ASSERT(ct != NULL);
2718 	if (ct == NULL) {
2719 		/*
2720 		 * Invalid Client device, return failure
2721 		 */
2722 		MDI_DEBUG(1, (CE_WARN, NULL,
2723 		    "!mdi_pi_free: invalid client"));
2724 		MDI_PI_UNLOCK(pip);
2725 		return (MDI_FAILURE);
2726 	}
2727 
2728 	/*
2729 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2730 	 * if the node state is either offline or init and the reference count
2731 	 * is zero.
2732 	 */
2733 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2734 	    MDI_PI_IS_INITING(pip))) {
2735 		/*
2736 		 * Node is busy
2737 		 */
2738 		MDI_DEBUG(1, (CE_WARN, NULL,
2739 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2740 		MDI_PI_UNLOCK(pip);
2741 		return (MDI_BUSY);
2742 	}
2743 
2744 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2745 		/*
2746 		 * Give a chance for pending I/Os to complete.
2747 		 */
2748 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2749 		    "%d cmds still pending on path: %p\n",
2750 		    MDI_PI(pip)->pi_ref_cnt, pip));
2751 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2752 		    &MDI_PI(pip)->pi_mutex,
2753 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2754 			/*
2755 			 * The timeout time reached without ref_cnt being zero
2756 			 * being signaled.
2757 			 */
2758 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2759 			    "!mdi_pi_free: "
2760 			    "Timeout reached on path %p without the cond\n",
2761 			    pip));
2762 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2763 			    "!mdi_pi_free: "
2764 			    "%d cmds still pending on path: %p\n",
2765 			    MDI_PI(pip)->pi_ref_cnt, pip));
2766 			MDI_PI_UNLOCK(pip);
2767 			return (MDI_BUSY);
2768 		}
2769 	}
2770 	if (MDI_PI(pip)->pi_pm_held) {
2771 		client_held = 1;
2772 	}
2773 	MDI_PI_UNLOCK(pip);
2774 
2775 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2776 
2777 	MDI_CLIENT_LOCK(ct);
2778 
2779 	/* Prevent further failovers till mdi_mutex is held */
2780 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2781 
2782 	/*
2783 	 * Wait till failover is complete before removing this node.
2784 	 */
2785 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2786 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2787 
2788 	MDI_CLIENT_UNLOCK(ct);
2789 	mutex_enter(&mdi_mutex);
2790 	MDI_CLIENT_LOCK(ct);
2791 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2792 
2793 	if (!MDI_PI_IS_INITING(pip)) {
2794 		f = vh->vh_ops->vo_pi_uninit;
2795 		if (f != NULL) {
2796 			rv = (*f)(vh->vh_dip, pip, 0);
2797 		}
2798 	}
2799 	/*
2800 	 * If vo_pi_uninit() completed successfully.
2801 	 */
2802 	if (rv == MDI_SUCCESS) {
2803 		if (client_held) {
2804 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2805 			    "i_mdi_pm_rele_client\n"));
2806 			i_mdi_pm_rele_client(ct, 1);
2807 		}
2808 		i_mdi_pi_free(ph, pip, ct);
2809 		if (ct->ct_path_count == 0) {
2810 			/*
2811 			 * Client lost its last path.
2812 			 * Clean up the client device
2813 			 */
2814 			MDI_CLIENT_UNLOCK(ct);
2815 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2816 			mutex_exit(&mdi_mutex);
2817 			return (rv);
2818 		}
2819 	}
2820 	MDI_CLIENT_UNLOCK(ct);
2821 	mutex_exit(&mdi_mutex);
2822 
2823 	if (rv == MDI_FAILURE)
2824 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2825 
2826 	return (rv);
2827 }
2828 
2829 /*
2830  * i_mdi_pi_free():
2831  *		Free the mdi_pathinfo node
2832  */
2833 static void
2834 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2835 {
2836 	int	ct_circular;
2837 	int	ph_circular;
2838 	int	se_flag;
2839 	int	kmem_flag;
2840 
2841 	/*
2842 	 * remove any per-path kstats
2843 	 */
2844 	i_mdi_pi_kstat_destroy(pip);
2845 
2846 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2847 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2848 
2849 	i_mdi_client_remove_path(ct, pip);
2850 	i_mdi_phci_remove_path(ph, pip);
2851 
2852 	ndi_devi_exit(ph->ph_dip, ph_circular);
2853 	ndi_devi_exit(ct->ct_dip, ct_circular);
2854 
2855 	/* determine interrupt context */
2856 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2857 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2858 
2859 	i_ddi_di_cache_invalidate(kmem_flag);
2860 
2861 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2862 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2863 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2864 	if (MDI_PI(pip)->pi_addr) {
2865 		kmem_free(MDI_PI(pip)->pi_addr,
2866 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2867 		MDI_PI(pip)->pi_addr = NULL;
2868 	}
2869 
2870 	if (MDI_PI(pip)->pi_prop) {
2871 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2872 		MDI_PI(pip)->pi_prop = NULL;
2873 	}
2874 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2875 }
2876 
2877 
2878 /*
2879  * i_mdi_phci_remove_path():
2880  * 		Remove a mdi_pathinfo node from pHCI list.
2881  * Notes:
2882  *		Caller should hold per-pHCI mutex
2883  */
2884 
2885 static void
2886 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2887 {
2888 	mdi_pathinfo_t	*prev = NULL;
2889 	mdi_pathinfo_t	*path = NULL;
2890 
2891 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2892 
2893 	path = ph->ph_path_head;
2894 	while (path != NULL) {
2895 		if (path == pip) {
2896 			break;
2897 		}
2898 		prev = path;
2899 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2900 	}
2901 
2902 	if (path) {
2903 		ph->ph_path_count--;
2904 		if (prev) {
2905 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2906 		} else {
2907 			ph->ph_path_head =
2908 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2909 		}
2910 		if (ph->ph_path_tail == path) {
2911 			ph->ph_path_tail = prev;
2912 		}
2913 	}
2914 
2915 	/*
2916 	 * Clear the pHCI link
2917 	 */
2918 	MDI_PI(pip)->pi_phci_link = NULL;
2919 	MDI_PI(pip)->pi_phci = NULL;
2920 }
2921 
2922 /*
2923  * i_mdi_client_remove_path():
2924  * 		Remove a mdi_pathinfo node from client path list.
2925  */
2926 
2927 static void
2928 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2929 {
2930 	mdi_pathinfo_t	*prev = NULL;
2931 	mdi_pathinfo_t	*path;
2932 
2933 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2934 
2935 	path = ct->ct_path_head;
2936 	while (path != NULL) {
2937 		if (path == pip) {
2938 			break;
2939 		}
2940 		prev = path;
2941 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2942 	}
2943 
2944 	if (path) {
2945 		ct->ct_path_count--;
2946 		if (prev) {
2947 			MDI_PI(prev)->pi_client_link =
2948 			    MDI_PI(path)->pi_client_link;
2949 		} else {
2950 			ct->ct_path_head =
2951 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2952 		}
2953 		if (ct->ct_path_tail == path) {
2954 			ct->ct_path_tail = prev;
2955 		}
2956 		if (ct->ct_path_last == path) {
2957 			ct->ct_path_last = ct->ct_path_head;
2958 		}
2959 	}
2960 	MDI_PI(pip)->pi_client_link = NULL;
2961 	MDI_PI(pip)->pi_client = NULL;
2962 }
2963 
2964 /*
2965  * i_mdi_pi_state_change():
2966  *		online a mdi_pathinfo node
2967  *
2968  * Return Values:
2969  *		MDI_SUCCESS
2970  *		MDI_FAILURE
2971  */
2972 /*ARGSUSED*/
2973 static int
2974 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
2975 {
2976 	int		rv = MDI_SUCCESS;
2977 	mdi_vhci_t	*vh;
2978 	mdi_phci_t	*ph;
2979 	mdi_client_t	*ct;
2980 	int		(*f)();
2981 	dev_info_t	*cdip;
2982 
2983 	MDI_PI_LOCK(pip);
2984 
2985 	ph = MDI_PI(pip)->pi_phci;
2986 	ASSERT(ph);
2987 	if (ph == NULL) {
2988 		/*
2989 		 * Invalid pHCI device, fail the request
2990 		 */
2991 		MDI_PI_UNLOCK(pip);
2992 		MDI_DEBUG(1, (CE_WARN, NULL,
2993 		    "!mdi_pi_state_change: invalid phci"));
2994 		return (MDI_FAILURE);
2995 	}
2996 
2997 	vh = ph->ph_vhci;
2998 	ASSERT(vh);
2999 	if (vh == NULL) {
3000 		/*
3001 		 * Invalid vHCI device, fail the request
3002 		 */
3003 		MDI_PI_UNLOCK(pip);
3004 		MDI_DEBUG(1, (CE_WARN, NULL,
3005 		    "!mdi_pi_state_change: invalid vhci"));
3006 		return (MDI_FAILURE);
3007 	}
3008 
3009 	ct = MDI_PI(pip)->pi_client;
3010 	ASSERT(ct != NULL);
3011 	if (ct == NULL) {
3012 		/*
3013 		 * Invalid client device, fail the request
3014 		 */
3015 		MDI_PI_UNLOCK(pip);
3016 		MDI_DEBUG(1, (CE_WARN, NULL,
3017 		    "!mdi_pi_state_change: invalid client"));
3018 		return (MDI_FAILURE);
3019 	}
3020 
3021 	/*
3022 	 * If this path has not been initialized yet, Callback vHCI driver's
3023 	 * pathinfo node initialize entry point
3024 	 */
3025 
3026 	if (MDI_PI_IS_INITING(pip)) {
3027 		MDI_PI_UNLOCK(pip);
3028 		f = vh->vh_ops->vo_pi_init;
3029 		if (f != NULL) {
3030 			rv = (*f)(vh->vh_dip, pip, 0);
3031 			if (rv != MDI_SUCCESS) {
3032 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3033 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3034 				    vh, pip));
3035 				return (MDI_FAILURE);
3036 			}
3037 		}
3038 		MDI_PI_LOCK(pip);
3039 		MDI_PI_CLEAR_TRANSIENT(pip);
3040 	}
3041 
3042 	/*
3043 	 * Do not allow state transition when pHCI is in offline/suspended
3044 	 * states
3045 	 */
3046 	i_mdi_phci_lock(ph, pip);
3047 	if (MDI_PHCI_IS_READY(ph) == 0) {
3048 		MDI_DEBUG(1, (CE_WARN, NULL,
3049 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3050 		MDI_PI_UNLOCK(pip);
3051 		i_mdi_phci_unlock(ph);
3052 		return (MDI_BUSY);
3053 	}
3054 	MDI_PHCI_UNSTABLE(ph);
3055 	i_mdi_phci_unlock(ph);
3056 
3057 	/*
3058 	 * Check if mdi_pathinfo state is in transient state.
3059 	 * If yes, offlining is in progress and wait till transient state is
3060 	 * cleared.
3061 	 */
3062 	if (MDI_PI_IS_TRANSIENT(pip)) {
3063 		while (MDI_PI_IS_TRANSIENT(pip)) {
3064 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3065 			    &MDI_PI(pip)->pi_mutex);
3066 		}
3067 	}
3068 
3069 	/*
3070 	 * Grab the client lock in reverse order sequence and release the
3071 	 * mdi_pathinfo mutex.
3072 	 */
3073 	i_mdi_client_lock(ct, pip);
3074 	MDI_PI_UNLOCK(pip);
3075 
3076 	/*
3077 	 * Wait till failover state is cleared
3078 	 */
3079 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3080 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3081 
3082 	/*
3083 	 * Mark the mdi_pathinfo node state as transient
3084 	 */
3085 	MDI_PI_LOCK(pip);
3086 	switch (state) {
3087 	case MDI_PATHINFO_STATE_ONLINE:
3088 		MDI_PI_SET_ONLINING(pip);
3089 		break;
3090 
3091 	case MDI_PATHINFO_STATE_STANDBY:
3092 		MDI_PI_SET_STANDBYING(pip);
3093 		break;
3094 
3095 	case MDI_PATHINFO_STATE_FAULT:
3096 		/*
3097 		 * Mark the pathinfo state as FAULTED
3098 		 */
3099 		MDI_PI_SET_FAULTING(pip);
3100 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3101 		break;
3102 
3103 	case MDI_PATHINFO_STATE_OFFLINE:
3104 		/*
3105 		 * ndi_devi_offline() cannot hold pip or ct locks.
3106 		 */
3107 		MDI_PI_UNLOCK(pip);
3108 		/*
3109 		 * Do not offline if path will become last path and path
3110 		 * is busy for user initiated events.
3111 		 */
3112 		cdip = ct->ct_dip;
3113 		if ((flag & NDI_DEVI_REMOVE) &&
3114 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3115 			i_mdi_client_unlock(ct);
3116 			rv = ndi_devi_offline(cdip, 0);
3117 			if (rv != NDI_SUCCESS) {
3118 				/*
3119 				 * Convert to MDI error code
3120 				 */
3121 				switch (rv) {
3122 				case NDI_BUSY:
3123 					rv = MDI_BUSY;
3124 					break;
3125 				default:
3126 					rv = MDI_FAILURE;
3127 					break;
3128 				}
3129 				goto state_change_exit;
3130 			} else {
3131 				i_mdi_client_lock(ct, NULL);
3132 			}
3133 		}
3134 		/*
3135 		 * Mark the mdi_pathinfo node state as transient
3136 		 */
3137 		MDI_PI_LOCK(pip);
3138 		MDI_PI_SET_OFFLINING(pip);
3139 		break;
3140 	}
3141 	MDI_PI_UNLOCK(pip);
3142 	MDI_CLIENT_UNSTABLE(ct);
3143 	i_mdi_client_unlock(ct);
3144 
3145 	f = vh->vh_ops->vo_pi_state_change;
3146 	if (f != NULL) {
3147 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3148 		if (rv == MDI_NOT_SUPPORTED) {
3149 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3150 		}
3151 		if (rv != MDI_SUCCESS) {
3152 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3153 			    "!vo_pi_state_change: failed rv = %x", rv));
3154 		}
3155 	}
3156 	MDI_CLIENT_LOCK(ct);
3157 	MDI_PI_LOCK(pip);
3158 	if (MDI_PI_IS_TRANSIENT(pip)) {
3159 		if (rv == MDI_SUCCESS) {
3160 			MDI_PI_CLEAR_TRANSIENT(pip);
3161 		} else {
3162 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3163 		}
3164 	}
3165 
3166 	/*
3167 	 * Wake anyone waiting for this mdi_pathinfo node
3168 	 */
3169 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3170 	MDI_PI_UNLOCK(pip);
3171 
3172 	/*
3173 	 * Mark the client device as stable
3174 	 */
3175 	MDI_CLIENT_STABLE(ct);
3176 	if (rv == MDI_SUCCESS) {
3177 		if (ct->ct_unstable == 0) {
3178 			cdip = ct->ct_dip;
3179 
3180 			/*
3181 			 * Onlining the mdi_pathinfo node will impact the
3182 			 * client state Update the client and dev_info node
3183 			 * state accordingly
3184 			 */
3185 			rv = NDI_SUCCESS;
3186 			i_mdi_client_update_state(ct);
3187 			switch (MDI_CLIENT_STATE(ct)) {
3188 			case MDI_CLIENT_STATE_OPTIMAL:
3189 			case MDI_CLIENT_STATE_DEGRADED:
3190 				if (cdip &&
3191 				    (i_ddi_node_state(cdip) < DS_READY) &&
3192 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3193 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3194 
3195 					i_mdi_client_unlock(ct);
3196 					/*
3197 					 * Must do ndi_devi_online() through
3198 					 * hotplug thread for deferred
3199 					 * attach mechanism to work
3200 					 */
3201 					rv = ndi_devi_online(cdip, 0);
3202 					i_mdi_client_lock(ct, NULL);
3203 					if ((rv != NDI_SUCCESS) &&
3204 					    (MDI_CLIENT_STATE(ct) ==
3205 					    MDI_CLIENT_STATE_DEGRADED)) {
3206 						/*
3207 						 * ndi_devi_online failed.
3208 						 * Reset client flags to
3209 						 * offline.
3210 						 */
3211 						MDI_DEBUG(1, (CE_WARN, cdip,
3212 						    "!ndi_devi_online: failed "
3213 						    " Error: %x", rv));
3214 						MDI_CLIENT_SET_OFFLINE(ct);
3215 					}
3216 					if (rv != NDI_SUCCESS) {
3217 						/* Reset the path state */
3218 						MDI_PI_LOCK(pip);
3219 						MDI_PI(pip)->pi_state =
3220 						    MDI_PI_OLD_STATE(pip);
3221 						MDI_PI_UNLOCK(pip);
3222 					}
3223 				}
3224 				break;
3225 
3226 			case MDI_CLIENT_STATE_FAILED:
3227 				/*
3228 				 * This is the last path case for
3229 				 * non-user initiated events.
3230 				 */
3231 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3232 				    cdip && (i_ddi_node_state(cdip) >=
3233 				    DS_INITIALIZED)) {
3234 					i_mdi_client_unlock(ct);
3235 					rv = ndi_devi_offline(cdip, 0);
3236 					i_mdi_client_lock(ct, NULL);
3237 
3238 					if (rv != NDI_SUCCESS) {
3239 						/*
3240 						 * ndi_devi_offline failed.
3241 						 * Reset client flags to
3242 						 * online as the path could not
3243 						 * be offlined.
3244 						 */
3245 						MDI_DEBUG(1, (CE_WARN, cdip,
3246 						    "!ndi_devi_offline: failed "
3247 						    " Error: %x", rv));
3248 						MDI_CLIENT_SET_ONLINE(ct);
3249 					}
3250 				}
3251 				break;
3252 			}
3253 			/*
3254 			 * Convert to MDI error code
3255 			 */
3256 			switch (rv) {
3257 			case NDI_SUCCESS:
3258 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3259 				i_mdi_report_path_state(ct, pip);
3260 				rv = MDI_SUCCESS;
3261 				break;
3262 			case NDI_BUSY:
3263 				rv = MDI_BUSY;
3264 				break;
3265 			default:
3266 				rv = MDI_FAILURE;
3267 				break;
3268 			}
3269 		}
3270 	}
3271 	MDI_CLIENT_UNLOCK(ct);
3272 
3273 state_change_exit:
3274 	/*
3275 	 * Mark the pHCI as stable again.
3276 	 */
3277 	MDI_PHCI_LOCK(ph);
3278 	MDI_PHCI_STABLE(ph);
3279 	MDI_PHCI_UNLOCK(ph);
3280 	return (rv);
3281 }
3282 
3283 /*
3284  * mdi_pi_online():
3285  *		Place the path_info node in the online state.  The path is
3286  *		now available to be selected by mdi_select_path() for
3287  *		transporting I/O requests to client devices.
3288  * Return Values:
3289  *		MDI_SUCCESS
3290  *		MDI_FAILURE
3291  */
3292 int
3293 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3294 {
3295 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3296 	dev_info_t *cdip;
3297 	int		client_held = 0;
3298 	int rv;
3299 
3300 	ASSERT(ct != NULL);
3301 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3302 	if (rv != MDI_SUCCESS)
3303 		return (rv);
3304 
3305 	MDI_PI_LOCK(pip);
3306 	if (MDI_PI(pip)->pi_pm_held == 0) {
3307 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3308 		    "i_mdi_pm_hold_pip\n"));
3309 		i_mdi_pm_hold_pip(pip);
3310 		client_held = 1;
3311 	}
3312 	MDI_PI_UNLOCK(pip);
3313 
3314 	if (client_held) {
3315 		MDI_CLIENT_LOCK(ct);
3316 		if (ct->ct_power_cnt == 0) {
3317 			rv = i_mdi_power_all_phci(ct);
3318 		}
3319 
3320 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3321 		    "i_mdi_pm_hold_client\n"));
3322 		i_mdi_pm_hold_client(ct, 1);
3323 		MDI_CLIENT_UNLOCK(ct);
3324 	}
3325 
3326 	/*
3327 	 * Create the per-path (pathinfo) IO and error kstats which
3328 	 * are reported via iostat(1m).
3329 	 *
3330 	 * Defer creating the per-path kstats if device is not yet
3331 	 * attached;  the names of the kstats are constructed in part
3332 	 * using the devices instance number which is assigned during
3333 	 * process of attaching the client device.
3334 	 *
3335 	 * The framework post_attach handler, mdi_post_attach(), is
3336 	 * is responsible for initializing the client's pathinfo list
3337 	 * once successfully attached.
3338 	 */
3339 	cdip = ct->ct_dip;
3340 	ASSERT(cdip);
3341 	if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED))
3342 		return (rv);
3343 
3344 	MDI_CLIENT_LOCK(ct);
3345 	rv = i_mdi_pi_kstat_create(pip);
3346 	MDI_CLIENT_UNLOCK(ct);
3347 	return (rv);
3348 }
3349 
3350 /*
3351  * mdi_pi_standby():
3352  *		Place the mdi_pathinfo node in standby state
3353  *
3354  * Return Values:
3355  *		MDI_SUCCESS
3356  *		MDI_FAILURE
3357  */
3358 int
3359 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3360 {
3361 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3362 }
3363 
3364 /*
3365  * mdi_pi_fault():
3366  *		Place the mdi_pathinfo node in fault'ed state
3367  * Return Values:
3368  *		MDI_SUCCESS
3369  *		MDI_FAILURE
3370  */
3371 int
3372 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3373 {
3374 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3375 }
3376 
3377 /*
3378  * mdi_pi_offline():
3379  *		Offline a mdi_pathinfo node.
3380  * Return Values:
3381  *		MDI_SUCCESS
3382  *		MDI_FAILURE
3383  */
3384 int
3385 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3386 {
3387 	int	ret, client_held = 0;
3388 	mdi_client_t	*ct;
3389 
3390 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3391 
3392 	if (ret == MDI_SUCCESS) {
3393 		MDI_PI_LOCK(pip);
3394 		if (MDI_PI(pip)->pi_pm_held) {
3395 			client_held = 1;
3396 		}
3397 		MDI_PI_UNLOCK(pip);
3398 
3399 		if (client_held) {
3400 			ct = MDI_PI(pip)->pi_client;
3401 			MDI_CLIENT_LOCK(ct);
3402 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3403 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3404 			i_mdi_pm_rele_client(ct, 1);
3405 			MDI_CLIENT_UNLOCK(ct);
3406 		}
3407 	}
3408 
3409 	return (ret);
3410 }
3411 
3412 /*
3413  * i_mdi_pi_offline():
3414  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3415  */
3416 static int
3417 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3418 {
3419 	dev_info_t	*vdip = NULL;
3420 	mdi_vhci_t	*vh = NULL;
3421 	mdi_client_t	*ct = NULL;
3422 	int		(*f)();
3423 	int		rv;
3424 
3425 	MDI_PI_LOCK(pip);
3426 	ct = MDI_PI(pip)->pi_client;
3427 	ASSERT(ct != NULL);
3428 
3429 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3430 		/*
3431 		 * Give a chance for pending I/Os to complete.
3432 		 */
3433 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3434 		    "%d cmds still pending on path: %p\n",
3435 		    MDI_PI(pip)->pi_ref_cnt, pip));
3436 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3437 		    &MDI_PI(pip)->pi_mutex,
3438 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3439 			/*
3440 			 * The timeout time reached without ref_cnt being zero
3441 			 * being signaled.
3442 			 */
3443 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3444 			    "Timeout reached on path %p without the cond\n",
3445 			    pip));
3446 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3447 			    "%d cmds still pending on path: %p\n",
3448 			    MDI_PI(pip)->pi_ref_cnt, pip));
3449 		}
3450 	}
3451 	vh = ct->ct_vhci;
3452 	vdip = vh->vh_dip;
3453 
3454 	/*
3455 	 * Notify vHCI that has registered this event
3456 	 */
3457 	ASSERT(vh->vh_ops);
3458 	f = vh->vh_ops->vo_pi_state_change;
3459 
3460 	if (f != NULL) {
3461 		MDI_PI_UNLOCK(pip);
3462 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3463 		    flags)) != MDI_SUCCESS) {
3464 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3465 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3466 		}
3467 		MDI_PI_LOCK(pip);
3468 	}
3469 
3470 	/*
3471 	 * Set the mdi_pathinfo node state and clear the transient condition
3472 	 */
3473 	MDI_PI_SET_OFFLINE(pip);
3474 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3475 	MDI_PI_UNLOCK(pip);
3476 
3477 	MDI_CLIENT_LOCK(ct);
3478 	if (rv == MDI_SUCCESS) {
3479 		if (ct->ct_unstable == 0) {
3480 			dev_info_t	*cdip = ct->ct_dip;
3481 
3482 			/*
3483 			 * Onlining the mdi_pathinfo node will impact the
3484 			 * client state Update the client and dev_info node
3485 			 * state accordingly
3486 			 */
3487 			i_mdi_client_update_state(ct);
3488 			rv = NDI_SUCCESS;
3489 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3490 				if (cdip &&
3491 				    (i_ddi_node_state(cdip) >=
3492 				    DS_INITIALIZED)) {
3493 					MDI_CLIENT_UNLOCK(ct);
3494 					rv = ndi_devi_offline(cdip, 0);
3495 					MDI_CLIENT_LOCK(ct);
3496 					if (rv != NDI_SUCCESS) {
3497 						/*
3498 						 * ndi_devi_offline failed.
3499 						 * Reset client flags to
3500 						 * online.
3501 						 */
3502 						MDI_DEBUG(4, (CE_WARN, cdip,
3503 						    "!ndi_devi_offline: failed "
3504 						    " Error: %x", rv));
3505 						MDI_CLIENT_SET_ONLINE(ct);
3506 					}
3507 				}
3508 			}
3509 			/*
3510 			 * Convert to MDI error code
3511 			 */
3512 			switch (rv) {
3513 			case NDI_SUCCESS:
3514 				rv = MDI_SUCCESS;
3515 				break;
3516 			case NDI_BUSY:
3517 				rv = MDI_BUSY;
3518 				break;
3519 			default:
3520 				rv = MDI_FAILURE;
3521 				break;
3522 			}
3523 		}
3524 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3525 		i_mdi_report_path_state(ct, pip);
3526 	}
3527 
3528 	MDI_CLIENT_UNLOCK(ct);
3529 
3530 	/*
3531 	 * Change in the mdi_pathinfo node state will impact the client state
3532 	 */
3533 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3534 	    ct, pip));
3535 	return (rv);
3536 }
3537 
3538 
3539 /*
3540  * mdi_pi_get_addr():
3541  *		Get the unit address associated with a mdi_pathinfo node
3542  *
3543  * Return Values:
3544  *		char *
3545  */
3546 char *
3547 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3548 {
3549 	if (pip == NULL)
3550 		return (NULL);
3551 
3552 	return (MDI_PI(pip)->pi_addr);
3553 }
3554 
3555 /*
3556  * mdi_pi_get_client():
3557  *		Get the client devinfo associated with a mdi_pathinfo node
3558  *
3559  * Return Values:
3560  *		Handle to client device dev_info node
3561  */
3562 dev_info_t *
3563 mdi_pi_get_client(mdi_pathinfo_t *pip)
3564 {
3565 	dev_info_t	*dip = NULL;
3566 	if (pip) {
3567 		dip = MDI_PI(pip)->pi_client->ct_dip;
3568 	}
3569 	return (dip);
3570 }
3571 
3572 /*
3573  * mdi_pi_get_phci():
3574  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3575  * Return Values:
3576  *		Handle to dev_info node
3577  */
3578 dev_info_t *
3579 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3580 {
3581 	dev_info_t	*dip = NULL;
3582 	if (pip) {
3583 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3584 	}
3585 	return (dip);
3586 }
3587 
3588 /*
3589  * mdi_pi_get_client_private():
3590  *		Get the client private information associated with the
3591  *		mdi_pathinfo node
3592  */
3593 void *
3594 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3595 {
3596 	void *cprivate = NULL;
3597 	if (pip) {
3598 		cprivate = MDI_PI(pip)->pi_cprivate;
3599 	}
3600 	return (cprivate);
3601 }
3602 
3603 /*
3604  * mdi_pi_set_client_private():
3605  *		Set the client private information in the mdi_pathinfo node
3606  */
3607 void
3608 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3609 {
3610 	if (pip) {
3611 		MDI_PI(pip)->pi_cprivate = priv;
3612 	}
3613 }
3614 
3615 /*
3616  * mdi_pi_get_phci_private():
3617  *		Get the pHCI private information associated with the
3618  *		mdi_pathinfo node
3619  */
3620 caddr_t
3621 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3622 {
3623 	caddr_t	pprivate = NULL;
3624 	if (pip) {
3625 		pprivate = MDI_PI(pip)->pi_pprivate;
3626 	}
3627 	return (pprivate);
3628 }
3629 
3630 /*
3631  * mdi_pi_set_phci_private():
3632  *		Set the pHCI private information in the mdi_pathinfo node
3633  */
3634 void
3635 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3636 {
3637 	if (pip) {
3638 		MDI_PI(pip)->pi_pprivate = priv;
3639 	}
3640 }
3641 
3642 /*
3643  * mdi_pi_get_state():
3644  *		Get the mdi_pathinfo node state. Transient states are internal
3645  *		and not provided to the users
3646  */
3647 mdi_pathinfo_state_t
3648 mdi_pi_get_state(mdi_pathinfo_t *pip)
3649 {
3650 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3651 
3652 	if (pip) {
3653 		if (MDI_PI_IS_TRANSIENT(pip)) {
3654 			/*
3655 			 * mdi_pathinfo is in state transition.  Return the
3656 			 * last good state.
3657 			 */
3658 			state = MDI_PI_OLD_STATE(pip);
3659 		} else {
3660 			state = MDI_PI_STATE(pip);
3661 		}
3662 	}
3663 	return (state);
3664 }
3665 
3666 /*
3667  * Note that the following function needs to be the new interface for
3668  * mdi_pi_get_state when mpxio gets integrated to ON.
3669  */
3670 int
3671 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3672 		uint32_t *ext_state)
3673 {
3674 	*state = MDI_PATHINFO_STATE_INIT;
3675 
3676 	if (pip) {
3677 		if (MDI_PI_IS_TRANSIENT(pip)) {
3678 			/*
3679 			 * mdi_pathinfo is in state transition.  Return the
3680 			 * last good state.
3681 			 */
3682 			*state = MDI_PI_OLD_STATE(pip);
3683 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3684 		} else {
3685 			*state = MDI_PI_STATE(pip);
3686 			*ext_state = MDI_PI_EXT_STATE(pip);
3687 		}
3688 	}
3689 	return (MDI_SUCCESS);
3690 }
3691 
3692 /*
3693  * mdi_pi_get_preferred:
3694  *	Get the preferred path flag
3695  */
3696 int
3697 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3698 {
3699 	if (pip) {
3700 		return (MDI_PI(pip)->pi_preferred);
3701 	}
3702 	return (0);
3703 }
3704 
3705 /*
3706  * mdi_pi_set_preferred:
3707  *	Set the preferred path flag
3708  */
3709 void
3710 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3711 {
3712 	if (pip) {
3713 		MDI_PI(pip)->pi_preferred = preferred;
3714 	}
3715 }
3716 
3717 
3718 /*
3719  * mdi_pi_set_state():
3720  *		Set the mdi_pathinfo node state
3721  */
3722 void
3723 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3724 {
3725 	uint32_t	ext_state;
3726 
3727 	if (pip) {
3728 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3729 		MDI_PI(pip)->pi_state = state;
3730 		MDI_PI(pip)->pi_state |= ext_state;
3731 	}
3732 }
3733 
3734 /*
3735  * Property functions:
3736  */
3737 
3738 int
3739 i_map_nvlist_error_to_mdi(int val)
3740 {
3741 	int rv;
3742 
3743 	switch (val) {
3744 	case 0:
3745 		rv = DDI_PROP_SUCCESS;
3746 		break;
3747 	case EINVAL:
3748 	case ENOTSUP:
3749 		rv = DDI_PROP_INVAL_ARG;
3750 		break;
3751 	case ENOMEM:
3752 		rv = DDI_PROP_NO_MEMORY;
3753 		break;
3754 	default:
3755 		rv = DDI_PROP_NOT_FOUND;
3756 		break;
3757 	}
3758 	return (rv);
3759 }
3760 
3761 /*
3762  * mdi_pi_get_next_prop():
3763  * 		Property walk function.  The caller should hold mdi_pi_lock()
3764  *		and release by calling mdi_pi_unlock() at the end of walk to
3765  *		get a consistent value.
3766  */
3767 
3768 nvpair_t *
3769 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3770 {
3771 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3772 		return (NULL);
3773 	}
3774 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3775 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3776 }
3777 
3778 /*
3779  * mdi_prop_remove():
3780  * 		Remove the named property from the named list.
3781  */
3782 
3783 int
3784 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3785 {
3786 	if (pip == NULL) {
3787 		return (DDI_PROP_NOT_FOUND);
3788 	}
3789 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3790 	MDI_PI_LOCK(pip);
3791 	if (MDI_PI(pip)->pi_prop == NULL) {
3792 		MDI_PI_UNLOCK(pip);
3793 		return (DDI_PROP_NOT_FOUND);
3794 	}
3795 	if (name) {
3796 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3797 	} else {
3798 		char		nvp_name[MAXNAMELEN];
3799 		nvpair_t	*nvp;
3800 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3801 		while (nvp) {
3802 			nvpair_t	*next;
3803 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3804 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3805 			    nvpair_name(nvp));
3806 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3807 			    nvp_name);
3808 			nvp = next;
3809 		}
3810 	}
3811 	MDI_PI_UNLOCK(pip);
3812 	return (DDI_PROP_SUCCESS);
3813 }
3814 
3815 /*
3816  * mdi_prop_size():
3817  * 		Get buffer size needed to pack the property data.
3818  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3819  *		buffer size.
3820  */
3821 
3822 int
3823 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3824 {
3825 	int	rv;
3826 	size_t	bufsize;
3827 
3828 	*buflenp = 0;
3829 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3830 		return (DDI_PROP_NOT_FOUND);
3831 	}
3832 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3833 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3834 	    &bufsize, NV_ENCODE_NATIVE);
3835 	*buflenp = bufsize;
3836 	return (i_map_nvlist_error_to_mdi(rv));
3837 }
3838 
3839 /*
3840  * mdi_prop_pack():
3841  * 		pack the property list.  The caller should hold the
3842  *		mdi_pathinfo_t node to get a consistent data
3843  */
3844 
3845 int
3846 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3847 {
3848 	int	rv;
3849 	size_t	bufsize;
3850 
3851 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3852 		return (DDI_PROP_NOT_FOUND);
3853 	}
3854 
3855 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3856 
3857 	bufsize = buflen;
3858 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3859 	    NV_ENCODE_NATIVE, KM_SLEEP);
3860 
3861 	return (i_map_nvlist_error_to_mdi(rv));
3862 }
3863 
3864 /*
3865  * mdi_prop_update_byte():
3866  *		Create/Update a byte property
3867  */
3868 int
3869 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3870 {
3871 	int rv;
3872 
3873 	if (pip == NULL) {
3874 		return (DDI_PROP_INVAL_ARG);
3875 	}
3876 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3877 	MDI_PI_LOCK(pip);
3878 	if (MDI_PI(pip)->pi_prop == NULL) {
3879 		MDI_PI_UNLOCK(pip);
3880 		return (DDI_PROP_NOT_FOUND);
3881 	}
3882 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3883 	MDI_PI_UNLOCK(pip);
3884 	return (i_map_nvlist_error_to_mdi(rv));
3885 }
3886 
3887 /*
3888  * mdi_prop_update_byte_array():
3889  *		Create/Update a byte array property
3890  */
3891 int
3892 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3893     uint_t nelements)
3894 {
3895 	int rv;
3896 
3897 	if (pip == NULL) {
3898 		return (DDI_PROP_INVAL_ARG);
3899 	}
3900 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3901 	MDI_PI_LOCK(pip);
3902 	if (MDI_PI(pip)->pi_prop == NULL) {
3903 		MDI_PI_UNLOCK(pip);
3904 		return (DDI_PROP_NOT_FOUND);
3905 	}
3906 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3907 	MDI_PI_UNLOCK(pip);
3908 	return (i_map_nvlist_error_to_mdi(rv));
3909 }
3910 
3911 /*
3912  * mdi_prop_update_int():
3913  *		Create/Update a 32 bit integer property
3914  */
3915 int
3916 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3917 {
3918 	int rv;
3919 
3920 	if (pip == NULL) {
3921 		return (DDI_PROP_INVAL_ARG);
3922 	}
3923 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3924 	MDI_PI_LOCK(pip);
3925 	if (MDI_PI(pip)->pi_prop == NULL) {
3926 		MDI_PI_UNLOCK(pip);
3927 		return (DDI_PROP_NOT_FOUND);
3928 	}
3929 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3930 	MDI_PI_UNLOCK(pip);
3931 	return (i_map_nvlist_error_to_mdi(rv));
3932 }
3933 
3934 /*
3935  * mdi_prop_update_int64():
3936  *		Create/Update a 64 bit integer property
3937  */
3938 int
3939 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3940 {
3941 	int rv;
3942 
3943 	if (pip == NULL) {
3944 		return (DDI_PROP_INVAL_ARG);
3945 	}
3946 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3947 	MDI_PI_LOCK(pip);
3948 	if (MDI_PI(pip)->pi_prop == NULL) {
3949 		MDI_PI_UNLOCK(pip);
3950 		return (DDI_PROP_NOT_FOUND);
3951 	}
3952 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3953 	MDI_PI_UNLOCK(pip);
3954 	return (i_map_nvlist_error_to_mdi(rv));
3955 }
3956 
3957 /*
3958  * mdi_prop_update_int_array():
3959  *		Create/Update a int array property
3960  */
3961 int
3962 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
3963 	    uint_t nelements)
3964 {
3965 	int rv;
3966 
3967 	if (pip == NULL) {
3968 		return (DDI_PROP_INVAL_ARG);
3969 	}
3970 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3971 	MDI_PI_LOCK(pip);
3972 	if (MDI_PI(pip)->pi_prop == NULL) {
3973 		MDI_PI_UNLOCK(pip);
3974 		return (DDI_PROP_NOT_FOUND);
3975 	}
3976 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
3977 	    nelements);
3978 	MDI_PI_UNLOCK(pip);
3979 	return (i_map_nvlist_error_to_mdi(rv));
3980 }
3981 
3982 /*
3983  * mdi_prop_update_string():
3984  *		Create/Update a string property
3985  */
3986 int
3987 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
3988 {
3989 	int rv;
3990 
3991 	if (pip == NULL) {
3992 		return (DDI_PROP_INVAL_ARG);
3993 	}
3994 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3995 	MDI_PI_LOCK(pip);
3996 	if (MDI_PI(pip)->pi_prop == NULL) {
3997 		MDI_PI_UNLOCK(pip);
3998 		return (DDI_PROP_NOT_FOUND);
3999 	}
4000 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4001 	MDI_PI_UNLOCK(pip);
4002 	return (i_map_nvlist_error_to_mdi(rv));
4003 }
4004 
4005 /*
4006  * mdi_prop_update_string_array():
4007  *		Create/Update a string array property
4008  */
4009 int
4010 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4011     uint_t nelements)
4012 {
4013 	int rv;
4014 
4015 	if (pip == NULL) {
4016 		return (DDI_PROP_INVAL_ARG);
4017 	}
4018 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4019 	MDI_PI_LOCK(pip);
4020 	if (MDI_PI(pip)->pi_prop == NULL) {
4021 		MDI_PI_UNLOCK(pip);
4022 		return (DDI_PROP_NOT_FOUND);
4023 	}
4024 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4025 	    nelements);
4026 	MDI_PI_UNLOCK(pip);
4027 	return (i_map_nvlist_error_to_mdi(rv));
4028 }
4029 
4030 /*
4031  * mdi_prop_lookup_byte():
4032  * 		Look for byte property identified by name.  The data returned
4033  *		is the actual property and valid as long as mdi_pathinfo_t node
4034  *		is alive.
4035  */
4036 int
4037 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4038 {
4039 	int rv;
4040 
4041 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4042 		return (DDI_PROP_NOT_FOUND);
4043 	}
4044 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4045 	return (i_map_nvlist_error_to_mdi(rv));
4046 }
4047 
4048 
4049 /*
4050  * mdi_prop_lookup_byte_array():
4051  * 		Look for byte array property identified by name.  The data
4052  *		returned is the actual property and valid as long as
4053  *		mdi_pathinfo_t node is alive.
4054  */
4055 int
4056 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4057     uint_t *nelements)
4058 {
4059 	int rv;
4060 
4061 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4062 		return (DDI_PROP_NOT_FOUND);
4063 	}
4064 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4065 	    nelements);
4066 	return (i_map_nvlist_error_to_mdi(rv));
4067 }
4068 
4069 /*
4070  * mdi_prop_lookup_int():
4071  * 		Look for int property identified by name.  The data returned
4072  *		is the actual property and valid as long as mdi_pathinfo_t
4073  *		node is alive.
4074  */
4075 int
4076 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4077 {
4078 	int rv;
4079 
4080 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4081 		return (DDI_PROP_NOT_FOUND);
4082 	}
4083 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4084 	return (i_map_nvlist_error_to_mdi(rv));
4085 }
4086 
4087 /*
4088  * mdi_prop_lookup_int64():
4089  * 		Look for int64 property identified by name.  The data returned
4090  *		is the actual property and valid as long as mdi_pathinfo_t node
4091  *		is alive.
4092  */
4093 int
4094 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4095 {
4096 	int rv;
4097 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4098 		return (DDI_PROP_NOT_FOUND);
4099 	}
4100 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4101 	return (i_map_nvlist_error_to_mdi(rv));
4102 }
4103 
4104 /*
4105  * mdi_prop_lookup_int_array():
4106  * 		Look for int array property identified by name.  The data
4107  *		returned is the actual property and valid as long as
4108  *		mdi_pathinfo_t node is alive.
4109  */
4110 int
4111 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4112     uint_t *nelements)
4113 {
4114 	int rv;
4115 
4116 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4117 		return (DDI_PROP_NOT_FOUND);
4118 	}
4119 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4120 	    (int32_t **)data, nelements);
4121 	return (i_map_nvlist_error_to_mdi(rv));
4122 }
4123 
4124 /*
4125  * mdi_prop_lookup_string():
4126  * 		Look for string property identified by name.  The data
4127  *		returned is the actual property and valid as long as
4128  *		mdi_pathinfo_t node is alive.
4129  */
4130 int
4131 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4132 {
4133 	int rv;
4134 
4135 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4136 		return (DDI_PROP_NOT_FOUND);
4137 	}
4138 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4139 	return (i_map_nvlist_error_to_mdi(rv));
4140 }
4141 
4142 /*
4143  * mdi_prop_lookup_string_array():
4144  * 		Look for string array property identified by name.  The data
4145  *		returned is the actual property and valid as long as
4146  *		mdi_pathinfo_t node is alive.
4147  */
4148 
4149 int
4150 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4151     uint_t *nelements)
4152 {
4153 	int rv;
4154 
4155 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4156 		return (DDI_PROP_NOT_FOUND);
4157 	}
4158 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4159 	    nelements);
4160 	return (i_map_nvlist_error_to_mdi(rv));
4161 }
4162 
4163 /*
4164  * mdi_prop_free():
4165  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4166  *		functions return the pointer to actual property data and not a
4167  *		copy of it.  So the data returned is valid as long as
4168  *		mdi_pathinfo_t node is valid.
4169  */
4170 
4171 /*ARGSUSED*/
4172 int
4173 mdi_prop_free(void *data)
4174 {
4175 	return (DDI_PROP_SUCCESS);
4176 }
4177 
4178 /*ARGSUSED*/
4179 static void
4180 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4181 {
4182 	char		*phci_path, *ct_path;
4183 	char		*ct_status;
4184 	char		*status;
4185 	dev_info_t	*dip = ct->ct_dip;
4186 	char		lb_buf[64];
4187 
4188 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4189 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4190 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4191 		return;
4192 	}
4193 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4194 		ct_status = "optimal";
4195 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4196 		ct_status = "degraded";
4197 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4198 		ct_status = "failed";
4199 	} else {
4200 		ct_status = "unknown";
4201 	}
4202 
4203 	if (MDI_PI_IS_OFFLINE(pip)) {
4204 		status = "offline";
4205 	} else if (MDI_PI_IS_ONLINE(pip)) {
4206 		status = "online";
4207 	} else if (MDI_PI_IS_STANDBY(pip)) {
4208 		status = "standby";
4209 	} else if (MDI_PI_IS_FAULT(pip)) {
4210 		status = "faulted";
4211 	} else {
4212 		status = "unknown";
4213 	}
4214 
4215 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4216 		(void) snprintf(lb_buf, sizeof (lb_buf),
4217 		    "%s, region-size: %d", mdi_load_balance_lba,
4218 			ct->ct_lb_args->region_size);
4219 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4220 		(void) snprintf(lb_buf, sizeof (lb_buf),
4221 		    "%s", mdi_load_balance_none);
4222 	} else {
4223 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4224 		    mdi_load_balance_rr);
4225 	}
4226 
4227 	if (dip) {
4228 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4229 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4230 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4231 		    "path %s (%s%d) to target address: %s is %s"
4232 		    " Load balancing: %s\n",
4233 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4234 		    ddi_get_instance(dip), ct_status,
4235 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4236 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4237 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4238 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4239 		kmem_free(phci_path, MAXPATHLEN);
4240 		kmem_free(ct_path, MAXPATHLEN);
4241 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4242 	}
4243 }
4244 
4245 #ifdef	DEBUG
4246 /*
4247  * i_mdi_log():
4248  *		Utility function for error message management
4249  *
4250  */
4251 
4252 /*VARARGS3*/
4253 static void
4254 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4255 {
4256 	char		buf[MAXNAMELEN];
4257 	char		name[MAXNAMELEN];
4258 	va_list		ap;
4259 	int		log_only = 0;
4260 	int		boot_only = 0;
4261 	int		console_only = 0;
4262 
4263 	if (dip) {
4264 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4265 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4266 			    ddi_node_name(dip), ddi_get_instance(dip));
4267 		} else {
4268 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4269 			    ddi_node_name(dip), ddi_get_instance(dip));
4270 		}
4271 	} else {
4272 		name[0] = '\0';
4273 	}
4274 
4275 	va_start(ap, fmt);
4276 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4277 	va_end(ap);
4278 
4279 	switch (buf[0]) {
4280 	case '!':
4281 		log_only = 1;
4282 		break;
4283 	case '?':
4284 		boot_only = 1;
4285 		break;
4286 	case '^':
4287 		console_only = 1;
4288 		break;
4289 	}
4290 
4291 	switch (level) {
4292 	case CE_NOTE:
4293 		level = CE_CONT;
4294 		/* FALLTHROUGH */
4295 	case CE_CONT:
4296 	case CE_WARN:
4297 	case CE_PANIC:
4298 		if (boot_only) {
4299 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4300 		} else if (console_only) {
4301 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4302 		} else if (log_only) {
4303 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4304 		} else {
4305 			cmn_err(level, "%s\t%s", name, buf);
4306 		}
4307 		break;
4308 	default:
4309 		cmn_err(level, "%s\t%s", name, buf);
4310 		break;
4311 	}
4312 }
4313 #endif	/* DEBUG */
4314 
4315 void
4316 i_mdi_client_online(dev_info_t *ct_dip)
4317 {
4318 	mdi_client_t	*ct;
4319 
4320 	/*
4321 	 * Client online notification. Mark client state as online
4322 	 * restore our binding with dev_info node
4323 	 */
4324 	ct = i_devi_get_client(ct_dip);
4325 	ASSERT(ct != NULL);
4326 	MDI_CLIENT_LOCK(ct);
4327 	MDI_CLIENT_SET_ONLINE(ct);
4328 	/* catch for any memory leaks */
4329 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4330 	ct->ct_dip = ct_dip;
4331 
4332 	if (ct->ct_power_cnt == 0)
4333 		(void) i_mdi_power_all_phci(ct);
4334 
4335 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4336 	    "i_mdi_pm_hold_client\n"));
4337 	i_mdi_pm_hold_client(ct, 1);
4338 
4339 	MDI_CLIENT_UNLOCK(ct);
4340 }
4341 
4342 void
4343 i_mdi_phci_online(dev_info_t *ph_dip)
4344 {
4345 	mdi_phci_t	*ph;
4346 
4347 	/* pHCI online notification. Mark state accordingly */
4348 	ph = i_devi_get_phci(ph_dip);
4349 	ASSERT(ph != NULL);
4350 	MDI_PHCI_LOCK(ph);
4351 	MDI_PHCI_SET_ONLINE(ph);
4352 	MDI_PHCI_UNLOCK(ph);
4353 }
4354 
4355 /*
4356  * mdi_devi_online():
4357  * 		Online notification from NDI framework on pHCI/client
4358  *		device online.
4359  * Return Values:
4360  *		NDI_SUCCESS
4361  *		MDI_FAILURE
4362  */
4363 
4364 /*ARGSUSED*/
4365 int
4366 mdi_devi_online(dev_info_t *dip, uint_t flags)
4367 {
4368 	if (MDI_PHCI(dip)) {
4369 		i_mdi_phci_online(dip);
4370 	}
4371 
4372 	if (MDI_CLIENT(dip)) {
4373 		i_mdi_client_online(dip);
4374 	}
4375 	return (NDI_SUCCESS);
4376 }
4377 
4378 /*
4379  * mdi_devi_offline():
4380  * 		Offline notification from NDI framework on pHCI/Client device
4381  *		offline.
4382  *
4383  * Return Values:
4384  *		NDI_SUCCESS
4385  *		NDI_FAILURE
4386  */
4387 
4388 /*ARGSUSED*/
4389 int
4390 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4391 {
4392 	int		rv = NDI_SUCCESS;
4393 
4394 	if (MDI_CLIENT(dip)) {
4395 		rv = i_mdi_client_offline(dip, flags);
4396 		if (rv != NDI_SUCCESS)
4397 			return (rv);
4398 	}
4399 
4400 	if (MDI_PHCI(dip)) {
4401 		rv = i_mdi_phci_offline(dip, flags);
4402 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4403 			/* set client back online */
4404 			i_mdi_client_online(dip);
4405 		}
4406 	}
4407 
4408 	return (rv);
4409 }
4410 
4411 /*ARGSUSED*/
4412 static int
4413 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4414 {
4415 	int		rv = NDI_SUCCESS;
4416 	mdi_phci_t	*ph;
4417 	mdi_client_t	*ct;
4418 	mdi_pathinfo_t	*pip;
4419 	mdi_pathinfo_t	*next;
4420 	mdi_pathinfo_t	*failed_pip = NULL;
4421 	dev_info_t	*cdip;
4422 
4423 	/*
4424 	 * pHCI component offline notification
4425 	 * Make sure that this pHCI instance is free to be offlined.
4426 	 * If it is OK to proceed, Offline and remove all the child
4427 	 * mdi_pathinfo nodes.  This process automatically offlines
4428 	 * corresponding client devices, for which this pHCI provides
4429 	 * critical services.
4430 	 */
4431 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4432 	    dip));
4433 
4434 	ph = i_devi_get_phci(dip);
4435 	if (ph == NULL) {
4436 		return (rv);
4437 	}
4438 
4439 	MDI_PHCI_LOCK(ph);
4440 
4441 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4442 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4443 		MDI_PHCI_UNLOCK(ph);
4444 		return (NDI_SUCCESS);
4445 	}
4446 
4447 	/*
4448 	 * Check to see if the pHCI can be offlined
4449 	 */
4450 	if (ph->ph_unstable) {
4451 		MDI_DEBUG(1, (CE_WARN, dip,
4452 		    "!One or more target devices are in transient "
4453 		    "state. This device can not be removed at "
4454 		    "this moment. Please try again later."));
4455 		MDI_PHCI_UNLOCK(ph);
4456 		return (NDI_BUSY);
4457 	}
4458 
4459 	pip = ph->ph_path_head;
4460 	while (pip != NULL) {
4461 		MDI_PI_LOCK(pip);
4462 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4463 		/*
4464 		 * The mdi_pathinfo state is OK. Check the client state.
4465 		 * If failover in progress fail the pHCI from offlining
4466 		 */
4467 		ct = MDI_PI(pip)->pi_client;
4468 		i_mdi_client_lock(ct, pip);
4469 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4470 		    (ct->ct_unstable)) {
4471 			/*
4472 			 * Failover is in progress, Fail the DR
4473 			 */
4474 			MDI_DEBUG(1, (CE_WARN, dip,
4475 			    "!pHCI device (%s%d) is Busy. %s",
4476 			    ddi_driver_name(dip), ddi_get_instance(dip),
4477 			    "This device can not be removed at "
4478 			    "this moment. Please try again later."));
4479 			MDI_PI_UNLOCK(pip);
4480 			MDI_CLIENT_UNLOCK(ct);
4481 			MDI_PHCI_UNLOCK(ph);
4482 			return (NDI_BUSY);
4483 		}
4484 		MDI_PI_UNLOCK(pip);
4485 
4486 		/*
4487 		 * Check to see of we are removing the last path of this
4488 		 * client device...
4489 		 */
4490 		cdip = ct->ct_dip;
4491 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4492 		    (i_mdi_client_compute_state(ct, ph) ==
4493 		    MDI_CLIENT_STATE_FAILED)) {
4494 			i_mdi_client_unlock(ct);
4495 			MDI_PHCI_UNLOCK(ph);
4496 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4497 				/*
4498 				 * ndi_devi_offline() failed.
4499 				 * This pHCI provides the critical path
4500 				 * to one or more client devices.
4501 				 * Return busy.
4502 				 */
4503 				MDI_PHCI_LOCK(ph);
4504 				MDI_DEBUG(1, (CE_WARN, dip,
4505 				    "!pHCI device (%s%d) is Busy. %s",
4506 				    ddi_driver_name(dip), ddi_get_instance(dip),
4507 				    "This device can not be removed at "
4508 				    "this moment. Please try again later."));
4509 				failed_pip = pip;
4510 				break;
4511 			} else {
4512 				MDI_PHCI_LOCK(ph);
4513 				pip = next;
4514 			}
4515 		} else {
4516 			i_mdi_client_unlock(ct);
4517 			pip = next;
4518 		}
4519 	}
4520 
4521 	if (failed_pip) {
4522 		pip = ph->ph_path_head;
4523 		while (pip != failed_pip) {
4524 			MDI_PI_LOCK(pip);
4525 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4526 			ct = MDI_PI(pip)->pi_client;
4527 			i_mdi_client_lock(ct, pip);
4528 			cdip = ct->ct_dip;
4529 			switch (MDI_CLIENT_STATE(ct)) {
4530 			case MDI_CLIENT_STATE_OPTIMAL:
4531 			case MDI_CLIENT_STATE_DEGRADED:
4532 				if (cdip) {
4533 					MDI_PI_UNLOCK(pip);
4534 					i_mdi_client_unlock(ct);
4535 					MDI_PHCI_UNLOCK(ph);
4536 					(void) ndi_devi_online(cdip, 0);
4537 					MDI_PHCI_LOCK(ph);
4538 					pip = next;
4539 					continue;
4540 				}
4541 				break;
4542 
4543 			case MDI_CLIENT_STATE_FAILED:
4544 				if (cdip) {
4545 					MDI_PI_UNLOCK(pip);
4546 					i_mdi_client_unlock(ct);
4547 					MDI_PHCI_UNLOCK(ph);
4548 					(void) ndi_devi_offline(cdip, 0);
4549 					MDI_PHCI_LOCK(ph);
4550 					pip = next;
4551 					continue;
4552 				}
4553 				break;
4554 			}
4555 			MDI_PI_UNLOCK(pip);
4556 			i_mdi_client_unlock(ct);
4557 			pip = next;
4558 		}
4559 		MDI_PHCI_UNLOCK(ph);
4560 		return (NDI_BUSY);
4561 	}
4562 
4563 	/*
4564 	 * Mark the pHCI as offline
4565 	 */
4566 	MDI_PHCI_SET_OFFLINE(ph);
4567 
4568 	/*
4569 	 * Mark the child mdi_pathinfo nodes as transient
4570 	 */
4571 	pip = ph->ph_path_head;
4572 	while (pip != NULL) {
4573 		MDI_PI_LOCK(pip);
4574 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4575 		MDI_PI_SET_OFFLINING(pip);
4576 		MDI_PI_UNLOCK(pip);
4577 		pip = next;
4578 	}
4579 	MDI_PHCI_UNLOCK(ph);
4580 	/*
4581 	 * Give a chance for any pending commands to execute
4582 	 */
4583 	delay(1);
4584 	MDI_PHCI_LOCK(ph);
4585 	pip = ph->ph_path_head;
4586 	while (pip != NULL) {
4587 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4588 		(void) i_mdi_pi_offline(pip, flags);
4589 		MDI_PI_LOCK(pip);
4590 		ct = MDI_PI(pip)->pi_client;
4591 		if (!MDI_PI_IS_OFFLINE(pip)) {
4592 			MDI_DEBUG(1, (CE_WARN, dip,
4593 			    "!pHCI device (%s%d) is Busy. %s",
4594 			    ddi_driver_name(dip), ddi_get_instance(dip),
4595 			    "This device can not be removed at "
4596 			    "this moment. Please try again later."));
4597 			MDI_PI_UNLOCK(pip);
4598 			MDI_PHCI_SET_ONLINE(ph);
4599 			MDI_PHCI_UNLOCK(ph);
4600 			return (NDI_BUSY);
4601 		}
4602 		MDI_PI_UNLOCK(pip);
4603 		pip = next;
4604 	}
4605 	MDI_PHCI_UNLOCK(ph);
4606 
4607 	return (rv);
4608 }
4609 
4610 /*ARGSUSED*/
4611 static int
4612 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4613 {
4614 	int		rv = NDI_SUCCESS;
4615 	mdi_client_t	*ct;
4616 
4617 	/*
4618 	 * Client component to go offline.  Make sure that we are
4619 	 * not in failing over state and update client state
4620 	 * accordingly
4621 	 */
4622 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4623 	    dip));
4624 	ct = i_devi_get_client(dip);
4625 	if (ct != NULL) {
4626 		MDI_CLIENT_LOCK(ct);
4627 		if (ct->ct_unstable) {
4628 			/*
4629 			 * One or more paths are in transient state,
4630 			 * Dont allow offline of a client device
4631 			 */
4632 			MDI_DEBUG(1, (CE_WARN, dip,
4633 			    "!One or more paths to this device is "
4634 			    "in transient state. This device can not "
4635 			    "be removed at this moment. "
4636 			    "Please try again later."));
4637 			MDI_CLIENT_UNLOCK(ct);
4638 			return (NDI_BUSY);
4639 		}
4640 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4641 			/*
4642 			 * Failover is in progress, Dont allow DR of
4643 			 * a client device
4644 			 */
4645 			MDI_DEBUG(1, (CE_WARN, dip,
4646 			    "!Client device (%s%d) is Busy. %s",
4647 			    ddi_driver_name(dip), ddi_get_instance(dip),
4648 			    "This device can not be removed at "
4649 			    "this moment. Please try again later."));
4650 			MDI_CLIENT_UNLOCK(ct);
4651 			return (NDI_BUSY);
4652 		}
4653 		MDI_CLIENT_SET_OFFLINE(ct);
4654 
4655 		/*
4656 		 * Unbind our relationship with the dev_info node
4657 		 */
4658 		if (flags & NDI_DEVI_REMOVE) {
4659 			ct->ct_dip = NULL;
4660 		}
4661 		MDI_CLIENT_UNLOCK(ct);
4662 	}
4663 	return (rv);
4664 }
4665 
4666 /*
4667  * mdi_pre_attach():
4668  *		Pre attach() notification handler
4669  */
4670 
4671 /*ARGSUSED*/
4672 int
4673 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4674 {
4675 	/* don't support old DDI_PM_RESUME */
4676 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4677 	    (cmd == DDI_PM_RESUME))
4678 		return (DDI_FAILURE);
4679 
4680 	return (DDI_SUCCESS);
4681 }
4682 
4683 /*
4684  * mdi_post_attach():
4685  *		Post attach() notification handler
4686  */
4687 
4688 /*ARGSUSED*/
4689 void
4690 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4691 {
4692 	mdi_phci_t	*ph;
4693 	mdi_client_t	*ct;
4694 	mdi_pathinfo_t	*pip;
4695 
4696 	if (MDI_PHCI(dip)) {
4697 		ph = i_devi_get_phci(dip);
4698 		ASSERT(ph != NULL);
4699 
4700 		MDI_PHCI_LOCK(ph);
4701 		switch (cmd) {
4702 		case DDI_ATTACH:
4703 			MDI_DEBUG(2, (CE_NOTE, dip,
4704 			    "!pHCI post_attach: called %p\n", ph));
4705 			if (error == DDI_SUCCESS) {
4706 				MDI_PHCI_SET_ATTACH(ph);
4707 			} else {
4708 				MDI_DEBUG(1, (CE_NOTE, dip,
4709 				    "!pHCI post_attach: failed error=%d\n",
4710 				    error));
4711 				MDI_PHCI_SET_DETACH(ph);
4712 			}
4713 			break;
4714 
4715 		case DDI_RESUME:
4716 			MDI_DEBUG(2, (CE_NOTE, dip,
4717 			    "!pHCI post_resume: called %p\n", ph));
4718 			if (error == DDI_SUCCESS) {
4719 				MDI_PHCI_SET_RESUME(ph);
4720 			} else {
4721 				MDI_DEBUG(1, (CE_NOTE, dip,
4722 				    "!pHCI post_resume: failed error=%d\n",
4723 				    error));
4724 				MDI_PHCI_SET_SUSPEND(ph);
4725 			}
4726 			break;
4727 		}
4728 		MDI_PHCI_UNLOCK(ph);
4729 	}
4730 
4731 	if (MDI_CLIENT(dip)) {
4732 		ct = i_devi_get_client(dip);
4733 		ASSERT(ct != NULL);
4734 
4735 		MDI_CLIENT_LOCK(ct);
4736 		switch (cmd) {
4737 		case DDI_ATTACH:
4738 			MDI_DEBUG(2, (CE_NOTE, dip,
4739 			    "!Client post_attach: called %p\n", ct));
4740 			if (error != DDI_SUCCESS) {
4741 				MDI_DEBUG(1, (CE_NOTE, dip,
4742 				    "!Client post_attach: failed error=%d\n",
4743 				    error));
4744 				MDI_CLIENT_SET_DETACH(ct);
4745 				MDI_DEBUG(4, (CE_WARN, dip,
4746 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4747 				i_mdi_pm_reset_client(ct);
4748 				break;
4749 			}
4750 
4751 			/*
4752 			 * Client device has successfully attached.
4753 			 * Create kstats for any pathinfo structures
4754 			 * initially associated with this client.
4755 			 */
4756 			for (pip = ct->ct_path_head; pip != NULL;
4757 			    pip = (mdi_pathinfo_t *)
4758 			    MDI_PI(pip)->pi_client_link) {
4759 				(void) i_mdi_pi_kstat_create(pip);
4760 				i_mdi_report_path_state(ct, pip);
4761 			}
4762 			MDI_CLIENT_SET_ATTACH(ct);
4763 			break;
4764 
4765 		case DDI_RESUME:
4766 			MDI_DEBUG(2, (CE_NOTE, dip,
4767 			    "!Client post_attach: called %p\n", ct));
4768 			if (error == DDI_SUCCESS) {
4769 				MDI_CLIENT_SET_RESUME(ct);
4770 			} else {
4771 				MDI_DEBUG(1, (CE_NOTE, dip,
4772 				    "!Client post_resume: failed error=%d\n",
4773 				    error));
4774 				MDI_CLIENT_SET_SUSPEND(ct);
4775 			}
4776 			break;
4777 		}
4778 		MDI_CLIENT_UNLOCK(ct);
4779 	}
4780 }
4781 
4782 /*
4783  * mdi_pre_detach():
4784  *		Pre detach notification handler
4785  */
4786 
4787 /*ARGSUSED*/
4788 int
4789 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4790 {
4791 	int rv = DDI_SUCCESS;
4792 
4793 	if (MDI_CLIENT(dip)) {
4794 		(void) i_mdi_client_pre_detach(dip, cmd);
4795 	}
4796 
4797 	if (MDI_PHCI(dip)) {
4798 		rv = i_mdi_phci_pre_detach(dip, cmd);
4799 	}
4800 
4801 	return (rv);
4802 }
4803 
4804 /*ARGSUSED*/
4805 static int
4806 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4807 {
4808 	int		rv = DDI_SUCCESS;
4809 	mdi_phci_t	*ph;
4810 	mdi_client_t	*ct;
4811 	mdi_pathinfo_t	*pip;
4812 	mdi_pathinfo_t	*failed_pip = NULL;
4813 	mdi_pathinfo_t	*next;
4814 
4815 	ph = i_devi_get_phci(dip);
4816 	if (ph == NULL) {
4817 		return (rv);
4818 	}
4819 
4820 	MDI_PHCI_LOCK(ph);
4821 	switch (cmd) {
4822 	case DDI_DETACH:
4823 		MDI_DEBUG(2, (CE_NOTE, dip,
4824 		    "!pHCI pre_detach: called %p\n", ph));
4825 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4826 			/*
4827 			 * mdi_pathinfo nodes are still attached to
4828 			 * this pHCI. Fail the detach for this pHCI.
4829 			 */
4830 			MDI_DEBUG(2, (CE_WARN, dip,
4831 			    "!pHCI pre_detach: "
4832 			    "mdi_pathinfo nodes are still attached "
4833 			    "%p\n", ph));
4834 			rv = DDI_FAILURE;
4835 			break;
4836 		}
4837 		MDI_PHCI_SET_DETACH(ph);
4838 		break;
4839 
4840 	case DDI_SUSPEND:
4841 		/*
4842 		 * pHCI is getting suspended.  Since mpxio client
4843 		 * devices may not be suspended at this point, to avoid
4844 		 * a potential stack overflow, it is important to suspend
4845 		 * client devices before pHCI can be suspended.
4846 		 */
4847 
4848 		MDI_DEBUG(2, (CE_NOTE, dip,
4849 		    "!pHCI pre_suspend: called %p\n", ph));
4850 		/*
4851 		 * Suspend all the client devices accessible through this pHCI
4852 		 */
4853 		pip = ph->ph_path_head;
4854 		while (pip != NULL && rv == DDI_SUCCESS) {
4855 			dev_info_t *cdip;
4856 			MDI_PI_LOCK(pip);
4857 			next =
4858 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4859 			ct = MDI_PI(pip)->pi_client;
4860 			i_mdi_client_lock(ct, pip);
4861 			cdip = ct->ct_dip;
4862 			MDI_PI_UNLOCK(pip);
4863 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4864 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4865 				i_mdi_client_unlock(ct);
4866 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4867 				    DDI_SUCCESS) {
4868 					/*
4869 					 * Suspend of one of the client
4870 					 * device has failed.
4871 					 */
4872 					MDI_DEBUG(1, (CE_WARN, dip,
4873 					    "!Suspend of device (%s%d) failed.",
4874 					    ddi_driver_name(cdip),
4875 					    ddi_get_instance(cdip)));
4876 					failed_pip = pip;
4877 					break;
4878 				}
4879 			} else {
4880 				i_mdi_client_unlock(ct);
4881 			}
4882 			pip = next;
4883 		}
4884 
4885 		if (rv == DDI_SUCCESS) {
4886 			/*
4887 			 * Suspend of client devices is complete. Proceed
4888 			 * with pHCI suspend.
4889 			 */
4890 			MDI_PHCI_SET_SUSPEND(ph);
4891 		} else {
4892 			/*
4893 			 * Revert back all the suspended client device states
4894 			 * to converse.
4895 			 */
4896 			pip = ph->ph_path_head;
4897 			while (pip != failed_pip) {
4898 				dev_info_t *cdip;
4899 				MDI_PI_LOCK(pip);
4900 				next =
4901 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4902 				ct = MDI_PI(pip)->pi_client;
4903 				i_mdi_client_lock(ct, pip);
4904 				cdip = ct->ct_dip;
4905 				MDI_PI_UNLOCK(pip);
4906 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4907 					i_mdi_client_unlock(ct);
4908 					(void) devi_attach(cdip, DDI_RESUME);
4909 				} else {
4910 					i_mdi_client_unlock(ct);
4911 				}
4912 				pip = next;
4913 			}
4914 		}
4915 		break;
4916 
4917 	default:
4918 		rv = DDI_FAILURE;
4919 		break;
4920 	}
4921 	MDI_PHCI_UNLOCK(ph);
4922 	return (rv);
4923 }
4924 
4925 /*ARGSUSED*/
4926 static int
4927 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4928 {
4929 	int		rv = DDI_SUCCESS;
4930 	mdi_client_t	*ct;
4931 
4932 	ct = i_devi_get_client(dip);
4933 	if (ct == NULL) {
4934 		return (rv);
4935 	}
4936 
4937 	MDI_CLIENT_LOCK(ct);
4938 	switch (cmd) {
4939 	case DDI_DETACH:
4940 		MDI_DEBUG(2, (CE_NOTE, dip,
4941 		    "!Client pre_detach: called %p\n", ct));
4942 		MDI_CLIENT_SET_DETACH(ct);
4943 		break;
4944 
4945 	case DDI_SUSPEND:
4946 		MDI_DEBUG(2, (CE_NOTE, dip,
4947 		    "!Client pre_suspend: called %p\n", ct));
4948 		MDI_CLIENT_SET_SUSPEND(ct);
4949 		break;
4950 
4951 	default:
4952 		rv = DDI_FAILURE;
4953 		break;
4954 	}
4955 	MDI_CLIENT_UNLOCK(ct);
4956 	return (rv);
4957 }
4958 
4959 /*
4960  * mdi_post_detach():
4961  *		Post detach notification handler
4962  */
4963 
4964 /*ARGSUSED*/
4965 void
4966 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4967 {
4968 	/*
4969 	 * Detach/Suspend of mpxio component failed. Update our state
4970 	 * too
4971 	 */
4972 	if (MDI_PHCI(dip))
4973 		i_mdi_phci_post_detach(dip, cmd, error);
4974 
4975 	if (MDI_CLIENT(dip))
4976 		i_mdi_client_post_detach(dip, cmd, error);
4977 }
4978 
4979 /*ARGSUSED*/
4980 static void
4981 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4982 {
4983 	mdi_phci_t	*ph;
4984 
4985 	/*
4986 	 * Detach/Suspend of phci component failed. Update our state
4987 	 * too
4988 	 */
4989 	ph = i_devi_get_phci(dip);
4990 	if (ph == NULL) {
4991 		return;
4992 	}
4993 
4994 	MDI_PHCI_LOCK(ph);
4995 	/*
4996 	 * Detach of pHCI failed. Restore back converse
4997 	 * state
4998 	 */
4999 	switch (cmd) {
5000 	case DDI_DETACH:
5001 		MDI_DEBUG(2, (CE_NOTE, dip,
5002 		    "!pHCI post_detach: called %p\n", ph));
5003 		if (error != DDI_SUCCESS)
5004 			MDI_PHCI_SET_ATTACH(ph);
5005 		break;
5006 
5007 	case DDI_SUSPEND:
5008 		MDI_DEBUG(2, (CE_NOTE, dip,
5009 		    "!pHCI post_suspend: called %p\n", ph));
5010 		if (error != DDI_SUCCESS)
5011 			MDI_PHCI_SET_RESUME(ph);
5012 		break;
5013 	}
5014 	MDI_PHCI_UNLOCK(ph);
5015 }
5016 
5017 /*ARGSUSED*/
5018 static void
5019 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5020 {
5021 	mdi_client_t	*ct;
5022 
5023 	ct = i_devi_get_client(dip);
5024 	if (ct == NULL) {
5025 		return;
5026 	}
5027 	MDI_CLIENT_LOCK(ct);
5028 	/*
5029 	 * Detach of Client failed. Restore back converse
5030 	 * state
5031 	 */
5032 	switch (cmd) {
5033 	case DDI_DETACH:
5034 		MDI_DEBUG(2, (CE_NOTE, dip,
5035 		    "!Client post_detach: called %p\n", ct));
5036 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5037 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5038 			    "i_mdi_pm_rele_client\n"));
5039 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5040 		} else {
5041 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5042 			    "i_mdi_pm_reset_client\n"));
5043 			i_mdi_pm_reset_client(ct);
5044 		}
5045 		if (error != DDI_SUCCESS)
5046 			MDI_CLIENT_SET_ATTACH(ct);
5047 		break;
5048 
5049 	case DDI_SUSPEND:
5050 		MDI_DEBUG(2, (CE_NOTE, dip,
5051 		    "!Client post_suspend: called %p\n", ct));
5052 		if (error != DDI_SUCCESS)
5053 			MDI_CLIENT_SET_RESUME(ct);
5054 		break;
5055 	}
5056 	MDI_CLIENT_UNLOCK(ct);
5057 }
5058 
5059 /*
5060  * create and install per-path (client - pHCI) statistics
5061  * I/O stats supported: nread, nwritten, reads, and writes
5062  * Error stats - hard errors, soft errors, & transport errors
5063  */
5064 static int
5065 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5066 {
5067 
5068 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5069 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5070 	char ksname[KSTAT_STRLEN];
5071 	mdi_pathinfo_t *cpip;
5072 	const char *err_postfix = ",err";
5073 	kstat_t	*kiosp, *kerrsp;
5074 	struct pi_errs	*nsp;
5075 	struct mdi_pi_kstats *mdi_statp;
5076 
5077 	ASSERT(client != NULL && ppath != NULL);
5078 
5079 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5080 
5081 	if (MDI_PI(pip)->pi_kstats != NULL)
5082 		return (MDI_SUCCESS);
5083 
5084 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5085 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5086 		if (cpip == pip)
5087 			continue;
5088 		/*
5089 		 * We have found a different path with same parent
5090 		 * kstats for a given client-pHCI are common
5091 		 */
5092 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5093 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5094 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5095 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5096 			return (MDI_SUCCESS);
5097 		}
5098 	}
5099 
5100 	/*
5101 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5102 	 * clamp length of name against max length of error kstat name
5103 	 */
5104 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5105 	    ddi_driver_name(client), ddi_get_instance(client),
5106 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5107 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5108 		return (MDI_FAILURE);
5109 	}
5110 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5111 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5112 		return (MDI_FAILURE);
5113 	}
5114 
5115 	(void) strcat(ksname, err_postfix);
5116 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5117 	    KSTAT_TYPE_NAMED,
5118 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5119 
5120 	if (kerrsp == NULL) {
5121 		kstat_delete(kiosp);
5122 		return (MDI_FAILURE);
5123 	}
5124 
5125 	nsp = (struct pi_errs *)kerrsp->ks_data;
5126 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5127 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5128 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5129 	    KSTAT_DATA_UINT32);
5130 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5131 	    KSTAT_DATA_UINT32);
5132 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5133 	    KSTAT_DATA_UINT32);
5134 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5135 	    KSTAT_DATA_UINT32);
5136 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5137 	    KSTAT_DATA_UINT32);
5138 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5139 	    KSTAT_DATA_UINT32);
5140 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5141 	    KSTAT_DATA_UINT32);
5142 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5143 
5144 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5145 	mdi_statp->pi_kstat_ref = 1;
5146 	mdi_statp->pi_kstat_iostats = kiosp;
5147 	mdi_statp->pi_kstat_errstats = kerrsp;
5148 	kstat_install(kiosp);
5149 	kstat_install(kerrsp);
5150 	MDI_PI(pip)->pi_kstats = mdi_statp;
5151 	return (MDI_SUCCESS);
5152 }
5153 
5154 /*
5155  * destroy per-path properties
5156  */
5157 static void
5158 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5159 {
5160 
5161 	struct mdi_pi_kstats *mdi_statp;
5162 
5163 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5164 		return;
5165 
5166 	MDI_PI(pip)->pi_kstats = NULL;
5167 
5168 	/*
5169 	 * the kstat may be shared between multiple pathinfo nodes
5170 	 * decrement this pathinfo's usage, removing the kstats
5171 	 * themselves when the last pathinfo reference is removed.
5172 	 */
5173 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5174 	if (--mdi_statp->pi_kstat_ref != 0)
5175 		return;
5176 
5177 	kstat_delete(mdi_statp->pi_kstat_iostats);
5178 	kstat_delete(mdi_statp->pi_kstat_errstats);
5179 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5180 }
5181 
5182 /*
5183  * update I/O paths KSTATS
5184  */
5185 void
5186 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5187 {
5188 	kstat_t *iostatp;
5189 	size_t xfer_cnt;
5190 
5191 	ASSERT(pip != NULL);
5192 
5193 	/*
5194 	 * I/O can be driven across a path prior to having path
5195 	 * statistics available, i.e. probe(9e).
5196 	 */
5197 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5198 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5199 		xfer_cnt = bp->b_bcount - bp->b_resid;
5200 		if (bp->b_flags & B_READ) {
5201 			KSTAT_IO_PTR(iostatp)->reads++;
5202 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5203 		} else {
5204 			KSTAT_IO_PTR(iostatp)->writes++;
5205 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5206 		}
5207 	}
5208 }
5209 
5210 /*
5211  * disable the path to a particular pHCI (pHCI specified in the phci_path
5212  * argument) for a particular client (specified in the client_path argument).
5213  * Disabling a path means that MPxIO will not select the disabled path for
5214  * routing any new I/O requests.
5215  */
5216 int
5217 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5218 {
5219 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5220 }
5221 
5222 /*
5223  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5224  * argument) for a particular client (specified in the client_path argument).
5225  * Enabling a path means that MPxIO may select the enabled path for routing
5226  * future I/O requests, subject to other path state constraints.
5227  */
5228 
5229 int
5230 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5231 {
5232 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5233 }
5234 
5235 
5236 /*
5237  * Common routine for doing enable/disable.
5238  */
5239 int
5240 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5241 {
5242 
5243 	mdi_phci_t	*ph;
5244 	mdi_vhci_t	*vh = NULL;
5245 	mdi_client_t	*ct;
5246 	mdi_pathinfo_t	*next, *pip;
5247 	int		found_it;
5248 	int		(*f)() = NULL;
5249 	int		rv;
5250 	int		sync_flag = 0;
5251 
5252 	ph = i_devi_get_phci(pdip);
5253 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5254 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5255 	if (ph == NULL) {
5256 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5257 			" failed. ph = NULL operation = %d\n", op));
5258 		return (MDI_FAILURE);
5259 	}
5260 
5261 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5262 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5263 			" Invalid operation = %d\n", op));
5264 		return (MDI_FAILURE);
5265 	}
5266 
5267 	sync_flag = (flags << 8) & 0xf00;
5268 
5269 	vh = ph->ph_vhci;
5270 	f = vh->vh_ops->vo_pi_state_change;
5271 
5272 	if (cdip == NULL) {
5273 		/*
5274 		 * Need to mark the Phci as enabled/disabled.
5275 		 */
5276 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5277 		"Operation %d for the phci\n", op));
5278 		MDI_PHCI_LOCK(ph);
5279 		switch (flags) {
5280 			case USER_DISABLE:
5281 				if (op == MDI_DISABLE_OP)
5282 					MDI_PHCI_SET_USER_DISABLE(ph);
5283 				else
5284 					MDI_PHCI_SET_USER_ENABLE(ph);
5285 				break;
5286 			case DRIVER_DISABLE:
5287 				if (op == MDI_DISABLE_OP)
5288 					MDI_PHCI_SET_DRV_DISABLE(ph);
5289 				else
5290 					MDI_PHCI_SET_DRV_ENABLE(ph);
5291 				break;
5292 			case DRIVER_DISABLE_TRANSIENT:
5293 				if (op == MDI_DISABLE_OP)
5294 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5295 				else
5296 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5297 				break;
5298 			default:
5299 				MDI_PHCI_UNLOCK(ph);
5300 				MDI_DEBUG(1, (CE_NOTE, NULL,
5301 				"!i_mdi_pi_enable_disable:"
5302 				" Invalid flag argument= %d\n", flags));
5303 		}
5304 
5305 		/*
5306 		 * Phci has been disabled. Now try to enable/disable
5307 		 * path info's to each client.
5308 		 */
5309 		pip = ph->ph_path_head;
5310 		while (pip != NULL) {
5311 			/*
5312 			 * Do a callback into the mdi consumer to let it
5313 			 * know that path is about to be enabled/disabled.
5314 			 */
5315 			if (f != NULL) {
5316 				rv = (*f)(vh->vh_dip, pip, 0,
5317 					MDI_PI_EXT_STATE(pip),
5318 					MDI_EXT_STATE_CHANGE | sync_flag |
5319 					op | MDI_BEFORE_STATE_CHANGE);
5320 				if (rv != MDI_SUCCESS) {
5321 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5322 				"!vo_pi_state_change: failed rv = %x", rv));
5323 				}
5324 			}
5325 
5326 			MDI_PI_LOCK(pip);
5327 			next =
5328 				(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5329 			switch (flags) {
5330 			case USER_DISABLE:
5331 				if (op == MDI_DISABLE_OP)
5332 					MDI_PI_SET_USER_DISABLE(pip);
5333 				else
5334 					MDI_PI_SET_USER_ENABLE(pip);
5335 				break;
5336 			case DRIVER_DISABLE:
5337 				if (op == MDI_DISABLE_OP)
5338 					MDI_PI_SET_DRV_DISABLE(pip);
5339 				else
5340 					MDI_PI_SET_DRV_ENABLE(pip);
5341 				break;
5342 			case DRIVER_DISABLE_TRANSIENT:
5343 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5344 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5345 				else
5346 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5347 				break;
5348 			}
5349 			MDI_PI_UNLOCK(pip);
5350 			/*
5351 			 * Do a callback into the mdi consumer to let it
5352 			 * know that path is now enabled/disabled.
5353 			 */
5354 			if (f != NULL) {
5355 				rv = (*f)(vh->vh_dip, pip, 0,
5356 					MDI_PI_EXT_STATE(pip),
5357 					MDI_EXT_STATE_CHANGE | sync_flag |
5358 					op | MDI_AFTER_STATE_CHANGE);
5359 				if (rv != MDI_SUCCESS) {
5360 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5361 				"!vo_pi_state_change: failed rv = %x", rv));
5362 				}
5363 			}
5364 			pip = next;
5365 		}
5366 		MDI_PHCI_UNLOCK(ph);
5367 	} else {
5368 
5369 		/*
5370 		 * Disable a specific client.
5371 		 */
5372 		ct = i_devi_get_client(cdip);
5373 		if (ct == NULL) {
5374 			MDI_DEBUG(1, (CE_NOTE, NULL,
5375 			"!i_mdi_pi_enable_disable:"
5376 			" failed. ct = NULL operation = %d\n", op));
5377 			return (MDI_FAILURE);
5378 		}
5379 
5380 		MDI_CLIENT_LOCK(ct);
5381 		pip = ct->ct_path_head;
5382 		found_it = 0;
5383 		while (pip != NULL) {
5384 			MDI_PI_LOCK(pip);
5385 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5386 			if (MDI_PI(pip)->pi_phci == ph) {
5387 				MDI_PI_UNLOCK(pip);
5388 				found_it = 1;
5389 				break;
5390 			}
5391 			MDI_PI_UNLOCK(pip);
5392 			pip = next;
5393 		}
5394 
5395 		MDI_CLIENT_UNLOCK(ct);
5396 		if (found_it == 0) {
5397 			MDI_DEBUG(1, (CE_NOTE, NULL,
5398 			"!i_mdi_pi_enable_disable:"
5399 			" failed. Could not find corresponding pip\n"));
5400 			return (MDI_FAILURE);
5401 		}
5402 		/*
5403 		 * Do a callback into the mdi consumer to let it
5404 		 * know that path is about to get enabled/disabled.
5405 		 */
5406 		if (f != NULL) {
5407 			rv = (*f)(vh->vh_dip, pip, 0,
5408 				MDI_PI_EXT_STATE(pip),
5409 				MDI_EXT_STATE_CHANGE | sync_flag |
5410 				op | MDI_BEFORE_STATE_CHANGE);
5411 			if (rv != MDI_SUCCESS) {
5412 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5413 				"!vo_pi_state_change: failed rv = %x", rv));
5414 			}
5415 		}
5416 		MDI_PI_LOCK(pip);
5417 		switch (flags) {
5418 			case USER_DISABLE:
5419 				if (op == MDI_DISABLE_OP)
5420 					MDI_PI_SET_USER_DISABLE(pip);
5421 				else
5422 					MDI_PI_SET_USER_ENABLE(pip);
5423 				break;
5424 			case DRIVER_DISABLE:
5425 				if (op == MDI_DISABLE_OP)
5426 					MDI_PI_SET_DRV_DISABLE(pip);
5427 				else
5428 					MDI_PI_SET_DRV_ENABLE(pip);
5429 				break;
5430 			case DRIVER_DISABLE_TRANSIENT:
5431 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5432 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5433 				else
5434 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5435 				break;
5436 		}
5437 		MDI_PI_UNLOCK(pip);
5438 		/*
5439 		 * Do a callback into the mdi consumer to let it
5440 		 * know that path is now enabled/disabled.
5441 		 */
5442 		if (f != NULL) {
5443 			rv = (*f)(vh->vh_dip, pip, 0,
5444 				MDI_PI_EXT_STATE(pip),
5445 				MDI_EXT_STATE_CHANGE | sync_flag |
5446 				op | MDI_AFTER_STATE_CHANGE);
5447 			if (rv != MDI_SUCCESS) {
5448 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5449 				"!vo_pi_state_change: failed rv = %x", rv));
5450 			}
5451 		}
5452 	}
5453 
5454 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5455 		" Returning success pdip = %p cdip = %p\n", op, pdip, cdip));
5456 	return (MDI_SUCCESS);
5457 }
5458 
5459 /*ARGSUSED3*/
5460 int
5461 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5462     int flags, clock_t timeout)
5463 {
5464 	mdi_pathinfo_t *pip;
5465 	dev_info_t *dip;
5466 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5467 	char *paddr;
5468 
5469 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5470 
5471 	if (!MDI_PHCI(pdip))
5472 		return (MDI_FAILURE);
5473 
5474 	paddr = strchr(devnm, '@');
5475 	if (paddr == NULL)
5476 		return (MDI_FAILURE);
5477 
5478 	paddr++;	/* skip '@' */
5479 	pip = mdi_pi_find(pdip, NULL, paddr);
5480 	while (pip == NULL && timeout > 0) {
5481 		if (interval > timeout)
5482 			interval = timeout;
5483 		if (flags & NDI_DEVI_DEBUG) {
5484 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5485 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5486 			    paddr, interval, timeout);
5487 		}
5488 		delay(interval);
5489 		timeout -= interval;
5490 		interval += interval;
5491 		pip = mdi_pi_find(pdip, NULL, paddr);
5492 	}
5493 
5494 	if (pip == NULL)
5495 		return (MDI_FAILURE);
5496 	dip = mdi_pi_get_client(pip);
5497 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5498 		return (MDI_FAILURE);
5499 	*cdipp = dip;
5500 
5501 	/* TODO: holding should happen inside search functions */
5502 	ndi_hold_devi(dip);
5503 	return (MDI_SUCCESS);
5504 }
5505 
5506 /*
5507  * Ensure phci powered up
5508  */
5509 static void
5510 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5511 {
5512 	dev_info_t	*ph_dip;
5513 
5514 	ASSERT(pip != NULL);
5515 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5516 
5517 	if (MDI_PI(pip)->pi_pm_held) {
5518 		return;
5519 	}
5520 
5521 	ph_dip = mdi_pi_get_phci(pip);
5522 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5523 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5524 	if (ph_dip == NULL) {
5525 		return;
5526 	}
5527 
5528 	MDI_PI_UNLOCK(pip);
5529 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5530 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5531 	pm_hold_power(ph_dip);
5532 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5533 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5534 	MDI_PI_LOCK(pip);
5535 
5536 	MDI_PI(pip)->pi_pm_held = 1;
5537 }
5538 
5539 /*
5540  * Allow phci powered down
5541  */
5542 static void
5543 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5544 {
5545 	dev_info_t	*ph_dip = NULL;
5546 
5547 	ASSERT(pip != NULL);
5548 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5549 
5550 	if (MDI_PI(pip)->pi_pm_held == 0) {
5551 		return;
5552 	}
5553 
5554 	ph_dip = mdi_pi_get_phci(pip);
5555 	ASSERT(ph_dip != NULL);
5556 
5557 	MDI_PI_UNLOCK(pip);
5558 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5559 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5560 
5561 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5562 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5563 	pm_rele_power(ph_dip);
5564 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5565 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5566 
5567 	MDI_PI_LOCK(pip);
5568 	MDI_PI(pip)->pi_pm_held = 0;
5569 }
5570 
5571 static void
5572 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5573 {
5574 	ASSERT(ct);
5575 
5576 	ct->ct_power_cnt += incr;
5577 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5578 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5579 	ASSERT(ct->ct_power_cnt >= 0);
5580 }
5581 
5582 static void
5583 i_mdi_rele_all_phci(mdi_client_t *ct)
5584 {
5585 	mdi_pathinfo_t  *pip;
5586 
5587 	ASSERT(mutex_owned(&ct->ct_mutex));
5588 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5589 	while (pip != NULL) {
5590 		mdi_hold_path(pip);
5591 		MDI_PI_LOCK(pip);
5592 		i_mdi_pm_rele_pip(pip);
5593 		MDI_PI_UNLOCK(pip);
5594 		mdi_rele_path(pip);
5595 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5596 	}
5597 }
5598 
5599 static void
5600 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5601 {
5602 	ASSERT(ct);
5603 
5604 	if (i_ddi_node_state(ct->ct_dip) >= DS_READY) {
5605 		ct->ct_power_cnt -= decr;
5606 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5607 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5608 	}
5609 
5610 	ASSERT(ct->ct_power_cnt >= 0);
5611 	if (ct->ct_power_cnt == 0) {
5612 		i_mdi_rele_all_phci(ct);
5613 		return;
5614 	}
5615 }
5616 
5617 static void
5618 i_mdi_pm_reset_client(mdi_client_t *ct)
5619 {
5620 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5621 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5622 	ct->ct_power_cnt = 0;
5623 	i_mdi_rele_all_phci(ct);
5624 	ct->ct_powercnt_reset = 1;
5625 	ct->ct_powercnt_held = 0;
5626 }
5627 
5628 static void
5629 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5630 {
5631 	mdi_pathinfo_t  *pip;
5632 	ASSERT(mutex_owned(&ct->ct_mutex));
5633 
5634 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5635 	while (pip != NULL) {
5636 		mdi_hold_path(pip);
5637 		MDI_PI_LOCK(pip);
5638 		i_mdi_pm_hold_pip(pip);
5639 		MDI_PI_UNLOCK(pip);
5640 		mdi_rele_path(pip);
5641 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5642 	}
5643 }
5644 
5645 static int
5646 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5647 {
5648 	int		ret;
5649 	dev_info_t	*ph_dip;
5650 
5651 	MDI_PI_LOCK(pip);
5652 	i_mdi_pm_hold_pip(pip);
5653 
5654 	ph_dip = mdi_pi_get_phci(pip);
5655 	MDI_PI_UNLOCK(pip);
5656 
5657 	/* bring all components of phci to full power */
5658 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5659 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5660 	    ddi_get_instance(ph_dip)));
5661 
5662 	ret = pm_powerup(ph_dip);
5663 
5664 	if (ret == DDI_FAILURE) {
5665 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5666 		    "pm_powerup FAILED for %s%d\n",
5667 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5668 
5669 		MDI_PI_LOCK(pip);
5670 		i_mdi_pm_rele_pip(pip);
5671 		MDI_PI_UNLOCK(pip);
5672 		return (MDI_FAILURE);
5673 	}
5674 
5675 	return (MDI_SUCCESS);
5676 }
5677 
5678 static int
5679 i_mdi_power_all_phci(mdi_client_t *ct)
5680 {
5681 	mdi_pathinfo_t  *pip;
5682 	int		succeeded = 0;
5683 
5684 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5685 	while (pip != NULL) {
5686 		mdi_hold_path(pip);
5687 		MDI_CLIENT_UNLOCK(ct);
5688 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5689 			succeeded = 1;
5690 
5691 		ASSERT(ct == MDI_PI(pip)->pi_client);
5692 		MDI_CLIENT_LOCK(ct);
5693 		mdi_rele_path(pip);
5694 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5695 	}
5696 
5697 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5698 }
5699 
5700 /*
5701  * mdi_bus_power():
5702  *		1. Place the phci(s) into powered up state so that
5703  *		   client can do power management
5704  *		2. Ensure phci powered up as client power managing
5705  * Return Values:
5706  *		MDI_SUCCESS
5707  *		MDI_FAILURE
5708  */
5709 int
5710 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5711     void *arg, void *result)
5712 {
5713 	int			ret = MDI_SUCCESS;
5714 	pm_bp_child_pwrchg_t	*bpc;
5715 	mdi_client_t		*ct;
5716 	dev_info_t		*cdip;
5717 	pm_bp_has_changed_t	*bphc;
5718 
5719 	/*
5720 	 * BUS_POWER_NOINVOL not supported
5721 	 */
5722 	if (op == BUS_POWER_NOINVOL)
5723 		return (MDI_FAILURE);
5724 
5725 	/*
5726 	 * ignore other OPs.
5727 	 * return quickly to save cou cycles on the ct processing
5728 	 */
5729 	switch (op) {
5730 	case BUS_POWER_PRE_NOTIFICATION:
5731 	case BUS_POWER_POST_NOTIFICATION:
5732 		bpc = (pm_bp_child_pwrchg_t *)arg;
5733 		cdip = bpc->bpc_dip;
5734 		break;
5735 	case BUS_POWER_HAS_CHANGED:
5736 		bphc = (pm_bp_has_changed_t *)arg;
5737 		cdip = bphc->bphc_dip;
5738 		break;
5739 	default:
5740 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5741 	}
5742 
5743 	ASSERT(MDI_CLIENT(cdip));
5744 
5745 	ct = i_devi_get_client(cdip);
5746 	if (ct == NULL)
5747 		return (MDI_FAILURE);
5748 
5749 	/*
5750 	 * wait till the mdi_pathinfo node state change are processed
5751 	 */
5752 	MDI_CLIENT_LOCK(ct);
5753 	switch (op) {
5754 	case BUS_POWER_PRE_NOTIFICATION:
5755 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5756 		    "BUS_POWER_PRE_NOTIFICATION:"
5757 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5758 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5759 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5760 
5761 		/* serialize power level change per client */
5762 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5763 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5764 
5765 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5766 
5767 		if (ct->ct_power_cnt == 0) {
5768 			ret = i_mdi_power_all_phci(ct);
5769 		}
5770 
5771 		/*
5772 		 * if new_level > 0:
5773 		 *	- hold phci(s)
5774 		 *	- power up phci(s) if not already
5775 		 * ignore power down
5776 		 */
5777 		if (bpc->bpc_nlevel > 0) {
5778 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5779 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5780 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5781 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5782 			}
5783 		}
5784 		break;
5785 	case BUS_POWER_POST_NOTIFICATION:
5786 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5787 		    "BUS_POWER_POST_NOTIFICATION:"
5788 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5789 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5790 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5791 		    *(int *)result));
5792 
5793 		if (*(int *)result == DDI_SUCCESS) {
5794 			if (bpc->bpc_nlevel > 0) {
5795 				MDI_CLIENT_SET_POWER_UP(ct);
5796 			} else {
5797 				MDI_CLIENT_SET_POWER_DOWN(ct);
5798 			}
5799 		}
5800 
5801 		/* release the hold we did in pre-notification */
5802 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5803 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5804 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5805 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5806 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5807 		}
5808 
5809 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5810 			/* another thread might started attaching */
5811 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5812 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5813 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5814 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5815 			/* detaching has been taken care in pm_post_unconfig */
5816 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5817 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5818 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5819 				i_mdi_pm_reset_client(ct);
5820 			}
5821 		}
5822 
5823 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5824 		cv_broadcast(&ct->ct_powerchange_cv);
5825 
5826 		break;
5827 
5828 	/* need to do more */
5829 	case BUS_POWER_HAS_CHANGED:
5830 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5831 		    "BUS_POWER_HAS_CHANGED:"
5832 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5833 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5834 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5835 
5836 		if (bphc->bphc_nlevel > 0 &&
5837 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5838 			if (ct->ct_power_cnt == 0) {
5839 				ret = i_mdi_power_all_phci(ct);
5840 			}
5841 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5842 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5843 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5844 		}
5845 
5846 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5847 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5848 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5849 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5850 		}
5851 		break;
5852 	}
5853 
5854 	MDI_CLIENT_UNLOCK(ct);
5855 	return (ret);
5856 }
5857 
5858 static int
5859 i_mdi_pm_pre_config_one(dev_info_t *child)
5860 {
5861 	int		ret = MDI_SUCCESS;
5862 	mdi_client_t	*ct;
5863 
5864 	ct = i_devi_get_client(child);
5865 	if (ct == NULL)
5866 		return (MDI_FAILURE);
5867 
5868 	MDI_CLIENT_LOCK(ct);
5869 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5870 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5871 
5872 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5873 		MDI_CLIENT_UNLOCK(ct);
5874 		MDI_DEBUG(4, (CE_NOTE, child,
5875 		    "i_mdi_pm_pre_config_one already configured\n"));
5876 		return (MDI_SUCCESS);
5877 	}
5878 
5879 	if (ct->ct_powercnt_held) {
5880 		MDI_CLIENT_UNLOCK(ct);
5881 		MDI_DEBUG(4, (CE_NOTE, child,
5882 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5883 		return (MDI_SUCCESS);
5884 	}
5885 
5886 	if (ct->ct_power_cnt == 0) {
5887 		ret = i_mdi_power_all_phci(ct);
5888 	}
5889 	MDI_DEBUG(4, (CE_NOTE, child,
5890 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5891 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5892 	ct->ct_powercnt_held = 1;
5893 	ct->ct_powercnt_reset = 0;
5894 	MDI_CLIENT_UNLOCK(ct);
5895 	return (ret);
5896 }
5897 
5898 static int
5899 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5900 {
5901 	int			ret = MDI_SUCCESS;
5902 	dev_info_t		*cdip;
5903 	int			circ;
5904 
5905 	ASSERT(MDI_VHCI(parent));
5906 
5907 	/* ndi_devi_config_one */
5908 	if (child) {
5909 		return (i_mdi_pm_pre_config_one(child));
5910 	}
5911 
5912 	/* devi_config_common */
5913 	ndi_devi_enter(parent, &circ);
5914 	cdip = ddi_get_child(parent);
5915 	while (cdip) {
5916 		dev_info_t *next = ddi_get_next_sibling(cdip);
5917 
5918 		ret = i_mdi_pm_pre_config_one(cdip);
5919 		if (ret != MDI_SUCCESS)
5920 			break;
5921 		cdip = next;
5922 	}
5923 	ndi_devi_exit(parent, circ);
5924 	return (ret);
5925 }
5926 
5927 static int
5928 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5929 {
5930 	int		ret = MDI_SUCCESS;
5931 	mdi_client_t	*ct;
5932 
5933 	ct = i_devi_get_client(child);
5934 	if (ct == NULL)
5935 		return (MDI_FAILURE);
5936 
5937 	MDI_CLIENT_LOCK(ct);
5938 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5939 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5940 
5941 	if (i_ddi_node_state(ct->ct_dip) < DS_READY) {
5942 		MDI_DEBUG(4, (CE_NOTE, child,
5943 		    "i_mdi_pm_pre_unconfig node detached already\n"));
5944 		MDI_CLIENT_UNLOCK(ct);
5945 		return (MDI_SUCCESS);
5946 	}
5947 
5948 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
5949 	    (flags & NDI_AUTODETACH)) {
5950 		MDI_DEBUG(4, (CE_NOTE, child,
5951 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
5952 		MDI_CLIENT_UNLOCK(ct);
5953 		return (MDI_FAILURE);
5954 	}
5955 
5956 	if (ct->ct_powercnt_held) {
5957 		MDI_DEBUG(4, (CE_NOTE, child,
5958 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
5959 		MDI_CLIENT_UNLOCK(ct);
5960 		*held = 1;
5961 		return (MDI_SUCCESS);
5962 	}
5963 
5964 	if (ct->ct_power_cnt == 0) {
5965 		ret = i_mdi_power_all_phci(ct);
5966 	}
5967 	MDI_DEBUG(4, (CE_NOTE, child,
5968 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
5969 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5970 	ct->ct_powercnt_held = 1;
5971 	ct->ct_powercnt_reset = 0;
5972 	MDI_CLIENT_UNLOCK(ct);
5973 	if (ret == MDI_SUCCESS)
5974 		*held = 1;
5975 	return (ret);
5976 }
5977 
5978 static int
5979 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
5980     int flags)
5981 {
5982 	int			ret = MDI_SUCCESS;
5983 	dev_info_t		*cdip;
5984 	int			circ;
5985 
5986 	ASSERT(MDI_VHCI(parent));
5987 	*held = 0;
5988 
5989 	/* ndi_devi_unconfig_one */
5990 	if (child) {
5991 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
5992 	}
5993 
5994 	/* devi_unconfig_common */
5995 	ndi_devi_enter(parent, &circ);
5996 	cdip = ddi_get_child(parent);
5997 	while (cdip) {
5998 		dev_info_t *next = ddi_get_next_sibling(cdip);
5999 
6000 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6001 		cdip = next;
6002 	}
6003 	ndi_devi_exit(parent, circ);
6004 
6005 	if (*held)
6006 		ret = MDI_SUCCESS;
6007 
6008 	return (ret);
6009 }
6010 
6011 static void
6012 i_mdi_pm_post_config_one(dev_info_t *child)
6013 {
6014 	mdi_client_t	*ct;
6015 
6016 	ct = i_devi_get_client(child);
6017 	if (ct == NULL)
6018 		return;
6019 
6020 	MDI_CLIENT_LOCK(ct);
6021 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6022 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6023 
6024 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) {
6025 		MDI_DEBUG(4, (CE_NOTE, child,
6026 		    "i_mdi_pm_post_config_one NOT held\n"));
6027 		MDI_CLIENT_UNLOCK(ct);
6028 		return;
6029 	}
6030 
6031 	/* client has not been updated */
6032 	if (MDI_CLIENT_IS_FAILED(ct)) {
6033 		MDI_DEBUG(4, (CE_NOTE, child,
6034 		    "i_mdi_pm_post_config_one NOT configured\n"));
6035 		MDI_CLIENT_UNLOCK(ct);
6036 		return;
6037 	}
6038 
6039 	/* another thread might have powered it down or detached it */
6040 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6041 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6042 	    (i_ddi_node_state(ct->ct_dip) < DS_READY &&
6043 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6044 		MDI_DEBUG(4, (CE_NOTE, child,
6045 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6046 		i_mdi_pm_reset_client(ct);
6047 	} else {
6048 		mdi_pathinfo_t	*pip, *next;
6049 		int	valid_path_count = 0;
6050 
6051 		MDI_DEBUG(4, (CE_NOTE, child,
6052 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6053 		pip = ct->ct_path_head;
6054 		while (pip != NULL) {
6055 			MDI_PI_LOCK(pip);
6056 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6057 			if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6058 				== MDI_PATHINFO_STATE_ONLINE ||
6059 			    (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6060 				== MDI_PATHINFO_STATE_STANDBY)
6061 				valid_path_count ++;
6062 			MDI_PI_UNLOCK(pip);
6063 			pip = next;
6064 		}
6065 		i_mdi_pm_rele_client(ct, valid_path_count);
6066 	}
6067 	ct->ct_powercnt_held = 0;
6068 	MDI_CLIENT_UNLOCK(ct);
6069 }
6070 
6071 static void
6072 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6073 {
6074 	int		circ;
6075 	dev_info_t	*cdip;
6076 	ASSERT(MDI_VHCI(parent));
6077 
6078 	/* ndi_devi_config_one */
6079 	if (child) {
6080 		i_mdi_pm_post_config_one(child);
6081 		return;
6082 	}
6083 
6084 	/* devi_config_common */
6085 	ndi_devi_enter(parent, &circ);
6086 	cdip = ddi_get_child(parent);
6087 	while (cdip) {
6088 		dev_info_t *next = ddi_get_next_sibling(cdip);
6089 
6090 		i_mdi_pm_post_config_one(cdip);
6091 		cdip = next;
6092 	}
6093 	ndi_devi_exit(parent, circ);
6094 }
6095 
6096 static void
6097 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6098 {
6099 	mdi_client_t	*ct;
6100 
6101 	ct = i_devi_get_client(child);
6102 	if (ct == NULL)
6103 		return;
6104 
6105 	MDI_CLIENT_LOCK(ct);
6106 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6107 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6108 
6109 	if (!ct->ct_powercnt_held) {
6110 		MDI_DEBUG(4, (CE_NOTE, child,
6111 		    "i_mdi_pm_post_unconfig NOT held\n"));
6112 		MDI_CLIENT_UNLOCK(ct);
6113 		return;
6114 	}
6115 
6116 	/* failure detaching or another thread just attached it */
6117 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6118 	    i_ddi_node_state(ct->ct_dip) == DS_READY) ||
6119 	    (i_ddi_node_state(ct->ct_dip) != DS_READY &&
6120 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6121 		MDI_DEBUG(4, (CE_NOTE, child,
6122 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6123 		i_mdi_pm_reset_client(ct);
6124 	}
6125 
6126 	MDI_DEBUG(4, (CE_NOTE, child,
6127 	    "i_mdi_pm_post_unconfig not changed\n"));
6128 	MDI_CLIENT_UNLOCK(ct);
6129 }
6130 
6131 static void
6132 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6133 {
6134 	int			circ;
6135 	dev_info_t		*cdip;
6136 
6137 	ASSERT(MDI_VHCI(parent));
6138 
6139 	if (!held) {
6140 		MDI_DEBUG(4, (CE_NOTE, parent,
6141 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6142 		return;
6143 	}
6144 
6145 	if (child) {
6146 		i_mdi_pm_post_unconfig_one(child);
6147 		return;
6148 	}
6149 
6150 	ndi_devi_enter(parent, &circ);
6151 	cdip = ddi_get_child(parent);
6152 	while (cdip) {
6153 		dev_info_t *next = ddi_get_next_sibling(cdip);
6154 
6155 		i_mdi_pm_post_unconfig_one(cdip);
6156 		cdip = next;
6157 	}
6158 	ndi_devi_exit(parent, circ);
6159 }
6160 
6161 int
6162 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6163 {
6164 	int			circ, ret = MDI_SUCCESS;
6165 	dev_info_t		*client_dip = NULL;
6166 	mdi_client_t		*ct;
6167 
6168 	/*
6169 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6170 	 * Power up pHCI for the named client device.
6171 	 * Note: Before the client is enumerated under vhci by phci,
6172 	 * client_dip can be NULL. Then proceed to power up all the
6173 	 * pHCIs.
6174 	 */
6175 	if (devnm != NULL) {
6176 		ndi_devi_enter(vdip, &circ);
6177 		client_dip = ndi_devi_findchild(vdip, devnm);
6178 		ndi_devi_exit(vdip, circ);
6179 	}
6180 
6181 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6182 
6183 	switch (op) {
6184 	case MDI_PM_PRE_CONFIG:
6185 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6186 
6187 		break;
6188 	case MDI_PM_PRE_UNCONFIG:
6189 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6190 		    flags);
6191 
6192 		break;
6193 	case MDI_PM_POST_CONFIG:
6194 		i_mdi_pm_post_config(vdip, client_dip);
6195 
6196 		break;
6197 	case MDI_PM_POST_UNCONFIG:
6198 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6199 
6200 		break;
6201 	case MDI_PM_HOLD_POWER:
6202 	case MDI_PM_RELE_POWER:
6203 		ASSERT(args);
6204 
6205 		client_dip = (dev_info_t *)args;
6206 		ASSERT(MDI_CLIENT(client_dip));
6207 
6208 		ct = i_devi_get_client(client_dip);
6209 		MDI_CLIENT_LOCK(ct);
6210 
6211 		if (op == MDI_PM_HOLD_POWER) {
6212 			if (ct->ct_power_cnt == 0) {
6213 				(void) i_mdi_power_all_phci(ct);
6214 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6215 				    "mdi_power i_mdi_pm_hold_client\n"));
6216 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6217 			}
6218 		} else {
6219 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6220 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6221 				    "mdi_power i_mdi_pm_rele_client\n"));
6222 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6223 			} else {
6224 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6225 				    "mdi_power i_mdi_pm_reset_client\n"));
6226 				i_mdi_pm_reset_client(ct);
6227 			}
6228 		}
6229 
6230 		MDI_CLIENT_UNLOCK(ct);
6231 		break;
6232 	default:
6233 		break;
6234 	}
6235 
6236 	return (ret);
6237 }
6238 
6239 int
6240 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6241 {
6242 	mdi_vhci_t *vhci;
6243 
6244 	if (!MDI_VHCI(dip))
6245 		return (MDI_FAILURE);
6246 
6247 	if (mdi_class) {
6248 		vhci = DEVI(dip)->devi_mdi_xhci;
6249 		ASSERT(vhci);
6250 		*mdi_class = vhci->vh_class;
6251 	}
6252 
6253 	return (MDI_SUCCESS);
6254 }
6255 
6256 int
6257 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6258 {
6259 	mdi_phci_t *phci;
6260 
6261 	if (!MDI_PHCI(dip))
6262 		return (MDI_FAILURE);
6263 
6264 	if (mdi_class) {
6265 		phci = DEVI(dip)->devi_mdi_xhci;
6266 		ASSERT(phci);
6267 		*mdi_class = phci->ph_vhci->vh_class;
6268 	}
6269 
6270 	return (MDI_SUCCESS);
6271 }
6272 
6273 int
6274 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6275 {
6276 	mdi_client_t *client;
6277 
6278 	if (!MDI_CLIENT(dip))
6279 		return (MDI_FAILURE);
6280 
6281 	if (mdi_class) {
6282 		client = DEVI(dip)->devi_mdi_client;
6283 		ASSERT(client);
6284 		*mdi_class = client->ct_vhci->vh_class;
6285 	}
6286 
6287 	return (MDI_SUCCESS);
6288 }
6289 
6290 void *
6291 mdi_client_get_vhci_private(dev_info_t *dip)
6292 {
6293 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6294 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6295 		mdi_client_t	*ct;
6296 		ct = i_devi_get_client(dip);
6297 		return (ct->ct_vprivate);
6298 	}
6299 	return (NULL);
6300 }
6301 
6302 void
6303 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6304 {
6305 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6306 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6307 		mdi_client_t	*ct;
6308 		ct = i_devi_get_client(dip);
6309 		ct->ct_vprivate = data;
6310 	}
6311 }
6312 /*
6313  * mdi_pi_get_vhci_private():
6314  *		Get the vhci private information associated with the
6315  *		mdi_pathinfo node
6316  */
6317 void *
6318 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6319 {
6320 	caddr_t	vprivate = NULL;
6321 	if (pip) {
6322 		vprivate = MDI_PI(pip)->pi_vprivate;
6323 	}
6324 	return (vprivate);
6325 }
6326 
6327 /*
6328  * mdi_pi_set_vhci_private():
6329  *		Set the vhci private information in the mdi_pathinfo node
6330  */
6331 void
6332 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6333 {
6334 	if (pip) {
6335 		MDI_PI(pip)->pi_vprivate = priv;
6336 	}
6337 }
6338 
6339 /*
6340  * mdi_phci_get_vhci_private():
6341  *		Get the vhci private information associated with the
6342  *		mdi_phci node
6343  */
6344 void *
6345 mdi_phci_get_vhci_private(dev_info_t *dip)
6346 {
6347 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6348 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6349 		mdi_phci_t	*ph;
6350 		ph = i_devi_get_phci(dip);
6351 		return (ph->ph_vprivate);
6352 	}
6353 	return (NULL);
6354 }
6355 
6356 /*
6357  * mdi_phci_set_vhci_private():
6358  *		Set the vhci private information in the mdi_phci node
6359  */
6360 void
6361 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6362 {
6363 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6364 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6365 		mdi_phci_t	*ph;
6366 		ph = i_devi_get_phci(dip);
6367 		ph->ph_vprivate = priv;
6368 	}
6369 }
6370 
6371 /*
6372  * List of vhci class names:
6373  * A vhci class name must be in this list only if the corresponding vhci
6374  * driver intends to use the mdi provided bus config implementation
6375  * (i.e., mdi_vhci_bus_config()).
6376  */
6377 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6378 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6379 
6380 /*
6381  * Built-in list of phci drivers for every vhci class.
6382  * All phci drivers expect iscsi have root device support.
6383  */
6384 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
6385 	{ "fp", 1 },
6386 	{ "iscsi", 0 },
6387 	{ "ibsrp", 1 }
6388 	};
6389 
6390 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
6391 
6392 /*
6393  * During boot time, the on-disk vhci cache for every vhci class is read
6394  * in the form of an nvlist and stored here.
6395  */
6396 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6397 
6398 /* nvpair names in vhci cache nvlist */
6399 #define	MDI_VHCI_CACHE_VERSION	1
6400 #define	MDI_NVPNAME_VERSION	"version"
6401 #define	MDI_NVPNAME_PHCIS	"phcis"
6402 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6403 
6404 typedef enum {
6405 	VHCACHE_NOT_REBUILT,
6406 	VHCACHE_PARTIALLY_BUILT,
6407 	VHCACHE_FULLY_BUILT
6408 } vhcache_build_status_t;
6409 
6410 /*
6411  * Given vhci class name, return its on-disk vhci cache filename.
6412  * Memory for the returned filename which includes the full path is allocated
6413  * by this function.
6414  */
6415 static char *
6416 vhclass2vhcache_filename(char *vhclass)
6417 {
6418 	char *filename;
6419 	int len;
6420 	static char *fmt = "/etc/devices/mdi_%s_cache";
6421 
6422 	/*
6423 	 * fmt contains the on-disk vhci cache file name format;
6424 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6425 	 */
6426 
6427 	/* the -1 below is to account for "%s" in the format string */
6428 	len = strlen(fmt) + strlen(vhclass) - 1;
6429 	filename = kmem_alloc(len, KM_SLEEP);
6430 	(void) snprintf(filename, len, fmt, vhclass);
6431 	ASSERT(len == (strlen(filename) + 1));
6432 	return (filename);
6433 }
6434 
6435 /*
6436  * initialize the vhci cache related data structures and read the on-disk
6437  * vhci cached data into memory.
6438  */
6439 static void
6440 setup_vhci_cache(mdi_vhci_t *vh)
6441 {
6442 	mdi_vhci_config_t *vhc;
6443 	mdi_vhci_cache_t *vhcache;
6444 	int i;
6445 	nvlist_t *nvl = NULL;
6446 
6447 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6448 	vh->vh_config = vhc;
6449 	vhcache = &vhc->vhc_vhcache;
6450 
6451 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6452 
6453 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6454 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6455 
6456 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6457 
6458 	/*
6459 	 * Create string hash; same as mod_hash_create_strhash() except that
6460 	 * we use NULL key destructor.
6461 	 */
6462 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6463 	    mdi_bus_config_cache_hash_size,
6464 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6465 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6466 
6467 	setup_phci_driver_list(vh);
6468 
6469 	/*
6470 	 * The on-disk vhci cache is read during booting prior to the
6471 	 * lights-out period by mdi_read_devices_files().
6472 	 */
6473 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6474 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6475 			nvl = vhcache_nvl[i];
6476 			vhcache_nvl[i] = NULL;
6477 			break;
6478 		}
6479 	}
6480 
6481 	/*
6482 	 * this is to cover the case of some one manually causing unloading
6483 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6484 	 */
6485 	if (nvl == NULL && modrootloaded)
6486 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6487 
6488 	if (nvl != NULL) {
6489 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6490 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6491 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6492 		else  {
6493 			cmn_err(CE_WARN,
6494 			    "%s: data file corrupted, will recreate\n",
6495 			    vhc->vhc_vhcache_filename);
6496 		}
6497 		rw_exit(&vhcache->vhcache_lock);
6498 		nvlist_free(nvl);
6499 	}
6500 
6501 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6502 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6503 }
6504 
6505 /*
6506  * free all vhci cache related resources
6507  */
6508 static int
6509 destroy_vhci_cache(mdi_vhci_t *vh)
6510 {
6511 	mdi_vhci_config_t *vhc = vh->vh_config;
6512 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6513 	mdi_vhcache_phci_t *cphci, *cphci_next;
6514 	mdi_vhcache_client_t *cct, *cct_next;
6515 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6516 
6517 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6518 		return (MDI_FAILURE);
6519 
6520 	kmem_free(vhc->vhc_vhcache_filename,
6521 	    strlen(vhc->vhc_vhcache_filename) + 1);
6522 
6523 	if (vhc->vhc_phci_driver_list)
6524 		free_phci_driver_list(vhc);
6525 
6526 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6527 
6528 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6529 	    cphci = cphci_next) {
6530 		cphci_next = cphci->cphci_next;
6531 		free_vhcache_phci(cphci);
6532 	}
6533 
6534 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6535 		cct_next = cct->cct_next;
6536 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6537 			cpi_next = cpi->cpi_next;
6538 			free_vhcache_pathinfo(cpi);
6539 		}
6540 		free_vhcache_client(cct);
6541 	}
6542 
6543 	rw_destroy(&vhcache->vhcache_lock);
6544 
6545 	mutex_destroy(&vhc->vhc_lock);
6546 	cv_destroy(&vhc->vhc_cv);
6547 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6548 	return (MDI_SUCCESS);
6549 }
6550 
6551 /*
6552  * Setup the list of phci drivers associated with the specified vhci class.
6553  * MDI uses this information to rebuild bus config cache if in case the
6554  * cache is not available or corrupted.
6555  */
6556 static void
6557 setup_phci_driver_list(mdi_vhci_t *vh)
6558 {
6559 	mdi_vhci_config_t *vhc = vh->vh_config;
6560 	mdi_phci_driver_info_t *driver_list;
6561 	char **driver_list1;
6562 	uint_t ndrivers, ndrivers1;
6563 	int i, j;
6564 
6565 	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6566 		driver_list = scsi_phci_driver_list;
6567 		ndrivers = sizeof (scsi_phci_driver_list) /
6568 		    sizeof (mdi_phci_driver_info_t);
6569 	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
6570 		driver_list = ib_phci_driver_list;
6571 		ndrivers = sizeof (ib_phci_driver_list) /
6572 		    sizeof (mdi_phci_driver_info_t);
6573 	} else {
6574 		driver_list = NULL;
6575 		ndrivers = 0;
6576 	}
6577 
6578 	/*
6579 	 * The driver.conf file of a vhci driver can specify additional
6580 	 * phci drivers using a project private "phci-drivers" property.
6581 	 */
6582 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
6583 	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
6584 	    &ndrivers1) != DDI_PROP_SUCCESS)
6585 		ndrivers1 = 0;
6586 
6587 	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
6588 	if (vhc->vhc_nphci_drivers == 0)
6589 		return;
6590 
6591 	vhc->vhc_phci_driver_list = kmem_alloc(
6592 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
6593 
6594 	for (i = 0; i < ndrivers; i++) {
6595 		vhc->vhc_phci_driver_list[i].phdriver_name =
6596 		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
6597 		vhc->vhc_phci_driver_list[i].phdriver_root_support =
6598 		    driver_list[i].phdriver_root_support;
6599 	}
6600 
6601 	for (j = 0; j < ndrivers1; j++, i++) {
6602 		vhc->vhc_phci_driver_list[i].phdriver_name =
6603 		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
6604 		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
6605 	}
6606 
6607 	if (ndrivers1)
6608 		ddi_prop_free(driver_list1);
6609 }
6610 
6611 /*
6612  * Free the memory allocated for the phci driver list
6613  */
6614 static void
6615 free_phci_driver_list(mdi_vhci_config_t *vhc)
6616 {
6617 	int i;
6618 
6619 	if (vhc->vhc_phci_driver_list == NULL)
6620 		return;
6621 
6622 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
6623 		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
6624 		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
6625 	}
6626 
6627 	kmem_free(vhc->vhc_phci_driver_list,
6628 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
6629 }
6630 
6631 /*
6632  * Stop all vhci cache related async threads and free their resources.
6633  */
6634 static int
6635 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6636 {
6637 	mdi_async_client_config_t *acc, *acc_next;
6638 
6639 	mutex_enter(&vhc->vhc_lock);
6640 	vhc->vhc_flags |= MDI_VHC_EXIT;
6641 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6642 	cv_broadcast(&vhc->vhc_cv);
6643 
6644 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6645 	    (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) ||
6646 	    vhc->vhc_acc_thrcount != 0) {
6647 		mutex_exit(&vhc->vhc_lock);
6648 		delay(1);
6649 		mutex_enter(&vhc->vhc_lock);
6650 	}
6651 
6652 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6653 
6654 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6655 		acc_next = acc->acc_next;
6656 		free_async_client_config(acc);
6657 	}
6658 	vhc->vhc_acc_list_head = NULL;
6659 	vhc->vhc_acc_list_tail = NULL;
6660 	vhc->vhc_acc_count = 0;
6661 
6662 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6663 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6664 		mutex_exit(&vhc->vhc_lock);
6665 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6666 			vhcache_dirty(vhc);
6667 			return (MDI_FAILURE);
6668 		}
6669 	} else
6670 		mutex_exit(&vhc->vhc_lock);
6671 
6672 	if (callb_delete(vhc->vhc_cbid) != 0)
6673 		return (MDI_FAILURE);
6674 
6675 	return (MDI_SUCCESS);
6676 }
6677 
6678 /*
6679  * Stop vhci cache flush thread
6680  */
6681 /* ARGSUSED */
6682 static boolean_t
6683 stop_vhcache_flush_thread(void *arg, int code)
6684 {
6685 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6686 
6687 	mutex_enter(&vhc->vhc_lock);
6688 	vhc->vhc_flags |= MDI_VHC_EXIT;
6689 	cv_broadcast(&vhc->vhc_cv);
6690 
6691 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6692 		mutex_exit(&vhc->vhc_lock);
6693 		delay(1);
6694 		mutex_enter(&vhc->vhc_lock);
6695 	}
6696 
6697 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6698 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6699 		mutex_exit(&vhc->vhc_lock);
6700 		(void) flush_vhcache(vhc, 1);
6701 	} else
6702 		mutex_exit(&vhc->vhc_lock);
6703 
6704 	return (B_TRUE);
6705 }
6706 
6707 /*
6708  * Enqueue the vhcache phci (cphci) at the tail of the list
6709  */
6710 static void
6711 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6712 {
6713 	cphci->cphci_next = NULL;
6714 	if (vhcache->vhcache_phci_head == NULL)
6715 		vhcache->vhcache_phci_head = cphci;
6716 	else
6717 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6718 	vhcache->vhcache_phci_tail = cphci;
6719 }
6720 
6721 /*
6722  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6723  */
6724 static void
6725 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6726     mdi_vhcache_pathinfo_t *cpi)
6727 {
6728 	cpi->cpi_next = NULL;
6729 	if (cct->cct_cpi_head == NULL)
6730 		cct->cct_cpi_head = cpi;
6731 	else
6732 		cct->cct_cpi_tail->cpi_next = cpi;
6733 	cct->cct_cpi_tail = cpi;
6734 }
6735 
6736 /*
6737  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6738  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6739  * flag set come at the beginning of the list. All cpis which have this
6740  * flag set come at the end of the list.
6741  */
6742 static void
6743 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6744     mdi_vhcache_pathinfo_t *newcpi)
6745 {
6746 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6747 
6748 	if (cct->cct_cpi_head == NULL ||
6749 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6750 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6751 	else {
6752 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6753 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6754 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6755 			;
6756 
6757 		if (prev_cpi == NULL)
6758 			cct->cct_cpi_head = newcpi;
6759 		else
6760 			prev_cpi->cpi_next = newcpi;
6761 
6762 		newcpi->cpi_next = cpi;
6763 
6764 		if (cpi == NULL)
6765 			cct->cct_cpi_tail = newcpi;
6766 	}
6767 }
6768 
6769 /*
6770  * Enqueue the vhcache client (cct) at the tail of the list
6771  */
6772 static void
6773 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6774     mdi_vhcache_client_t *cct)
6775 {
6776 	cct->cct_next = NULL;
6777 	if (vhcache->vhcache_client_head == NULL)
6778 		vhcache->vhcache_client_head = cct;
6779 	else
6780 		vhcache->vhcache_client_tail->cct_next = cct;
6781 	vhcache->vhcache_client_tail = cct;
6782 }
6783 
6784 static void
6785 free_string_array(char **str, int nelem)
6786 {
6787 	int i;
6788 
6789 	if (str) {
6790 		for (i = 0; i < nelem; i++) {
6791 			if (str[i])
6792 				kmem_free(str[i], strlen(str[i]) + 1);
6793 		}
6794 		kmem_free(str, sizeof (char *) * nelem);
6795 	}
6796 }
6797 
6798 static void
6799 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6800 {
6801 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6802 	kmem_free(cphci, sizeof (*cphci));
6803 }
6804 
6805 static void
6806 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6807 {
6808 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6809 	kmem_free(cpi, sizeof (*cpi));
6810 }
6811 
6812 static void
6813 free_vhcache_client(mdi_vhcache_client_t *cct)
6814 {
6815 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6816 	kmem_free(cct, sizeof (*cct));
6817 }
6818 
6819 static char *
6820 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6821 {
6822 	char *name_addr;
6823 	int len;
6824 
6825 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6826 	name_addr = kmem_alloc(len, KM_SLEEP);
6827 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6828 
6829 	if (ret_len)
6830 		*ret_len = len;
6831 	return (name_addr);
6832 }
6833 
6834 /*
6835  * Copy the contents of paddrnvl to vhci cache.
6836  * paddrnvl nvlist contains path information for a vhci client.
6837  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6838  */
6839 static void
6840 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6841     mdi_vhcache_client_t *cct)
6842 {
6843 	nvpair_t *nvp = NULL;
6844 	mdi_vhcache_pathinfo_t *cpi;
6845 	uint_t nelem;
6846 	uint32_t *val;
6847 
6848 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6849 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6850 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6851 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6852 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6853 		ASSERT(nelem == 2);
6854 		cpi->cpi_cphci = cphci_list[val[0]];
6855 		cpi->cpi_flags = val[1];
6856 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6857 	}
6858 }
6859 
6860 /*
6861  * Copy the contents of caddrmapnvl to vhci cache.
6862  * caddrmapnvl nvlist contains vhci client address to phci client address
6863  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6864  * this nvlist.
6865  */
6866 static void
6867 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6868     mdi_vhcache_phci_t *cphci_list[])
6869 {
6870 	nvpair_t *nvp = NULL;
6871 	nvlist_t *paddrnvl;
6872 	mdi_vhcache_client_t *cct;
6873 
6874 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6875 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6876 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6877 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6878 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6879 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6880 		/* the client must contain at least one path */
6881 		ASSERT(cct->cct_cpi_head != NULL);
6882 
6883 		enqueue_vhcache_client(vhcache, cct);
6884 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6885 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6886 	}
6887 }
6888 
6889 /*
6890  * Copy the contents of the main nvlist to vhci cache.
6891  *
6892  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6893  * The nvlist contains the mappings between the vhci client addresses and
6894  * their corresponding phci client addresses.
6895  *
6896  * The structure of the nvlist is as follows:
6897  *
6898  * Main nvlist:
6899  *	NAME		TYPE		DATA
6900  *	version		int32		version number
6901  *	phcis		string array	array of phci paths
6902  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6903  *
6904  * structure of c2paddrs_nvl:
6905  *	NAME		TYPE		DATA
6906  *	caddr1		nvlist_t	paddrs_nvl1
6907  *	caddr2		nvlist_t	paddrs_nvl2
6908  *	...
6909  * where caddr1, caddr2, ... are vhci client name and addresses in the
6910  * form of "<clientname>@<clientaddress>".
6911  * (for example: "ssd@2000002037cd9f72");
6912  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6913  *
6914  * structure of paddrs_nvl:
6915  *	NAME		TYPE		DATA
6916  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6917  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6918  *	...
6919  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6920  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6921  * phci-ids are integers that identify PHCIs to which the
6922  * the bus specific address belongs to. These integers are used as an index
6923  * into to the phcis string array in the main nvlist to get the PHCI path.
6924  */
6925 static int
6926 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6927 {
6928 	char **phcis, **phci_namep;
6929 	uint_t nphcis;
6930 	mdi_vhcache_phci_t *cphci, **cphci_list;
6931 	nvlist_t *caddrmapnvl;
6932 	int32_t ver;
6933 	int i;
6934 	size_t cphci_list_size;
6935 
6936 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
6937 
6938 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
6939 	    ver != MDI_VHCI_CACHE_VERSION)
6940 		return (MDI_FAILURE);
6941 
6942 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
6943 	    &nphcis) != 0)
6944 		return (MDI_SUCCESS);
6945 
6946 	ASSERT(nphcis > 0);
6947 
6948 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
6949 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
6950 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
6951 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
6952 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
6953 		enqueue_vhcache_phci(vhcache, cphci);
6954 		cphci_list[i] = cphci;
6955 	}
6956 
6957 	ASSERT(vhcache->vhcache_phci_head != NULL);
6958 
6959 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
6960 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
6961 
6962 	kmem_free(cphci_list, cphci_list_size);
6963 	return (MDI_SUCCESS);
6964 }
6965 
6966 /*
6967  * Build paddrnvl for the specified client using the information in the
6968  * vhci cache and add it to the caddrmapnnvl.
6969  * Returns 0 on success, errno on failure.
6970  */
6971 static int
6972 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
6973     nvlist_t *caddrmapnvl)
6974 {
6975 	mdi_vhcache_pathinfo_t *cpi;
6976 	nvlist_t *nvl;
6977 	int err;
6978 	uint32_t val[2];
6979 
6980 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
6981 
6982 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
6983 		return (err);
6984 
6985 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
6986 		val[0] = cpi->cpi_cphci->cphci_id;
6987 		val[1] = cpi->cpi_flags;
6988 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
6989 		    != 0)
6990 			goto out;
6991 	}
6992 
6993 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
6994 out:
6995 	nvlist_free(nvl);
6996 	return (err);
6997 }
6998 
6999 /*
7000  * Build caddrmapnvl using the information in the vhci cache
7001  * and add it to the mainnvl.
7002  * Returns 0 on success, errno on failure.
7003  */
7004 static int
7005 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7006 {
7007 	mdi_vhcache_client_t *cct;
7008 	nvlist_t *nvl;
7009 	int err;
7010 
7011 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7012 
7013 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7014 		return (err);
7015 
7016 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7017 	    cct = cct->cct_next) {
7018 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7019 			goto out;
7020 	}
7021 
7022 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7023 out:
7024 	nvlist_free(nvl);
7025 	return (err);
7026 }
7027 
7028 /*
7029  * Build nvlist using the information in the vhci cache.
7030  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7031  * Returns nvl on success, NULL on failure.
7032  */
7033 static nvlist_t *
7034 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7035 {
7036 	mdi_vhcache_phci_t *cphci;
7037 	uint_t phci_count;
7038 	char **phcis;
7039 	nvlist_t *nvl;
7040 	int err, i;
7041 
7042 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7043 		nvl = NULL;
7044 		goto out;
7045 	}
7046 
7047 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7048 	    MDI_VHCI_CACHE_VERSION)) != 0)
7049 		goto out;
7050 
7051 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7052 	if (vhcache->vhcache_phci_head == NULL) {
7053 		rw_exit(&vhcache->vhcache_lock);
7054 		return (nvl);
7055 	}
7056 
7057 	phci_count = 0;
7058 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7059 	    cphci = cphci->cphci_next)
7060 		cphci->cphci_id = phci_count++;
7061 
7062 	/* build phci pathname list */
7063 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7064 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7065 	    cphci = cphci->cphci_next, i++)
7066 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7067 
7068 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7069 	    phci_count);
7070 	free_string_array(phcis, phci_count);
7071 
7072 	if (err == 0 &&
7073 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7074 		rw_exit(&vhcache->vhcache_lock);
7075 		return (nvl);
7076 	}
7077 
7078 	rw_exit(&vhcache->vhcache_lock);
7079 out:
7080 	if (nvl)
7081 		nvlist_free(nvl);
7082 	return (NULL);
7083 }
7084 
7085 /*
7086  * Lookup vhcache phci structure for the specified phci path.
7087  */
7088 static mdi_vhcache_phci_t *
7089 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7090 {
7091 	mdi_vhcache_phci_t *cphci;
7092 
7093 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7094 
7095 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7096 	    cphci = cphci->cphci_next) {
7097 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7098 			return (cphci);
7099 	}
7100 
7101 	return (NULL);
7102 }
7103 
7104 /*
7105  * Lookup vhcache phci structure for the specified phci.
7106  */
7107 static mdi_vhcache_phci_t *
7108 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7109 {
7110 	mdi_vhcache_phci_t *cphci;
7111 
7112 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7113 
7114 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7115 	    cphci = cphci->cphci_next) {
7116 		if (cphci->cphci_phci == ph)
7117 			return (cphci);
7118 	}
7119 
7120 	return (NULL);
7121 }
7122 
7123 /*
7124  * Add the specified phci to the vhci cache if not already present.
7125  */
7126 static void
7127 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7128 {
7129 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7130 	mdi_vhcache_phci_t *cphci;
7131 	char *pathname;
7132 	int cache_updated;
7133 
7134 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7135 
7136 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7137 	(void) ddi_pathname(ph->ph_dip, pathname);
7138 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7139 	    != NULL) {
7140 		cphci->cphci_phci = ph;
7141 		cache_updated = 0;
7142 	} else {
7143 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7144 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7145 		cphci->cphci_phci = ph;
7146 		enqueue_vhcache_phci(vhcache, cphci);
7147 		cache_updated = 1;
7148 	}
7149 	rw_exit(&vhcache->vhcache_lock);
7150 
7151 	kmem_free(pathname, MAXPATHLEN);
7152 	if (cache_updated)
7153 		vhcache_dirty(vhc);
7154 }
7155 
7156 /*
7157  * Remove the reference to the specified phci from the vhci cache.
7158  */
7159 static void
7160 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7161 {
7162 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7163 	mdi_vhcache_phci_t *cphci;
7164 
7165 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7166 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7167 		/* do not remove the actual mdi_vhcache_phci structure */
7168 		cphci->cphci_phci = NULL;
7169 	}
7170 	rw_exit(&vhcache->vhcache_lock);
7171 }
7172 
7173 static void
7174 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7175     mdi_vhcache_lookup_token_t *src)
7176 {
7177 	if (src == NULL) {
7178 		dst->lt_cct = NULL;
7179 		dst->lt_cct_lookup_time = 0;
7180 	} else {
7181 		dst->lt_cct = src->lt_cct;
7182 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7183 	}
7184 }
7185 
7186 /*
7187  * Look up vhcache client for the specified client.
7188  */
7189 static mdi_vhcache_client_t *
7190 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7191     mdi_vhcache_lookup_token_t *token)
7192 {
7193 	mod_hash_val_t hv;
7194 	char *name_addr;
7195 	int len;
7196 
7197 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7198 
7199 	/*
7200 	 * If no vhcache clean occurred since the last lookup, we can
7201 	 * simply return the cct from the last lookup operation.
7202 	 * It works because ccts are never freed except during the vhcache
7203 	 * cleanup operation.
7204 	 */
7205 	if (token != NULL &&
7206 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7207 		return (token->lt_cct);
7208 
7209 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7210 	if (mod_hash_find(vhcache->vhcache_client_hash,
7211 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7212 		if (token) {
7213 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7214 			token->lt_cct_lookup_time = lbolt64;
7215 		}
7216 	} else {
7217 		if (token) {
7218 			token->lt_cct = NULL;
7219 			token->lt_cct_lookup_time = 0;
7220 		}
7221 		hv = NULL;
7222 	}
7223 	kmem_free(name_addr, len);
7224 	return ((mdi_vhcache_client_t *)hv);
7225 }
7226 
7227 /*
7228  * Add the specified path to the vhci cache if not already present.
7229  * Also add the vhcache client for the client corresponding to this path
7230  * if it doesn't already exist.
7231  */
7232 static void
7233 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7234 {
7235 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7236 	mdi_vhcache_client_t *cct;
7237 	mdi_vhcache_pathinfo_t *cpi;
7238 	mdi_phci_t *ph = pip->pi_phci;
7239 	mdi_client_t *ct = pip->pi_client;
7240 	int cache_updated = 0;
7241 
7242 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7243 
7244 	/* if vhcache client for this pip doesn't already exist, add it */
7245 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7246 	    NULL)) == NULL) {
7247 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7248 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7249 		    ct->ct_guid, NULL);
7250 		enqueue_vhcache_client(vhcache, cct);
7251 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7252 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7253 		cache_updated = 1;
7254 	}
7255 
7256 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7257 		if (cpi->cpi_cphci->cphci_phci == ph &&
7258 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7259 			cpi->cpi_pip = pip;
7260 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7261 				cpi->cpi_flags &=
7262 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7263 				sort_vhcache_paths(cct);
7264 				cache_updated = 1;
7265 			}
7266 			break;
7267 		}
7268 	}
7269 
7270 	if (cpi == NULL) {
7271 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7272 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7273 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7274 		ASSERT(cpi->cpi_cphci != NULL);
7275 		cpi->cpi_pip = pip;
7276 		enqueue_vhcache_pathinfo(cct, cpi);
7277 		cache_updated = 1;
7278 	}
7279 
7280 	rw_exit(&vhcache->vhcache_lock);
7281 
7282 	if (cache_updated)
7283 		vhcache_dirty(vhc);
7284 }
7285 
7286 /*
7287  * Remove the reference to the specified path from the vhci cache.
7288  */
7289 static void
7290 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7291 {
7292 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7293 	mdi_client_t *ct = pip->pi_client;
7294 	mdi_vhcache_client_t *cct;
7295 	mdi_vhcache_pathinfo_t *cpi;
7296 
7297 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7298 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7299 	    NULL)) != NULL) {
7300 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7301 		    cpi = cpi->cpi_next) {
7302 			if (cpi->cpi_pip == pip) {
7303 				cpi->cpi_pip = NULL;
7304 				break;
7305 			}
7306 		}
7307 	}
7308 	rw_exit(&vhcache->vhcache_lock);
7309 }
7310 
7311 /*
7312  * Flush the vhci cache to disk.
7313  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7314  */
7315 static int
7316 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7317 {
7318 	nvlist_t *nvl;
7319 	int err;
7320 	int rv;
7321 
7322 	/*
7323 	 * It is possible that the system may shutdown before
7324 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7325 	 * flushing the cache in this case do not check for
7326 	 * i_ddi_io_initialized when force flag is set.
7327 	 */
7328 	if (force_flag == 0 && !i_ddi_io_initialized())
7329 		return (MDI_FAILURE);
7330 
7331 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7332 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7333 		nvlist_free(nvl);
7334 	} else
7335 		err = EFAULT;
7336 
7337 	rv = MDI_SUCCESS;
7338 	mutex_enter(&vhc->vhc_lock);
7339 	if (err != 0) {
7340 		if (err == EROFS) {
7341 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7342 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7343 			    MDI_VHC_VHCACHE_DIRTY);
7344 		} else {
7345 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7346 				cmn_err(CE_CONT, "%s: update failed\n",
7347 				    vhc->vhc_vhcache_filename);
7348 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7349 			}
7350 			rv = MDI_FAILURE;
7351 		}
7352 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7353 		cmn_err(CE_CONT,
7354 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7355 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7356 	}
7357 	mutex_exit(&vhc->vhc_lock);
7358 
7359 	return (rv);
7360 }
7361 
7362 /*
7363  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7364  * Exits itself if left idle for the idle timeout period.
7365  */
7366 static void
7367 vhcache_flush_thread(void *arg)
7368 {
7369 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7370 	clock_t idle_time, quit_at_ticks;
7371 	callb_cpr_t cprinfo;
7372 
7373 	/* number of seconds to sleep idle before exiting */
7374 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7375 
7376 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7377 	    "mdi_vhcache_flush");
7378 	mutex_enter(&vhc->vhc_lock);
7379 	for (; ; ) {
7380 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7381 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7382 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7383 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7384 				(void) cv_timedwait(&vhc->vhc_cv,
7385 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7386 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7387 			} else {
7388 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7389 				mutex_exit(&vhc->vhc_lock);
7390 
7391 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7392 					vhcache_dirty(vhc);
7393 
7394 				mutex_enter(&vhc->vhc_lock);
7395 			}
7396 		}
7397 
7398 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7399 
7400 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7401 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7402 		    ddi_get_lbolt() < quit_at_ticks) {
7403 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7404 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7405 			    quit_at_ticks);
7406 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7407 		}
7408 
7409 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7410 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7411 			goto out;
7412 	}
7413 
7414 out:
7415 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7416 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7417 	CALLB_CPR_EXIT(&cprinfo);
7418 }
7419 
7420 /*
7421  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7422  */
7423 static void
7424 vhcache_dirty(mdi_vhci_config_t *vhc)
7425 {
7426 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7427 	int create_thread;
7428 
7429 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7430 	/* do not flush cache until the cache is fully built */
7431 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7432 		rw_exit(&vhcache->vhcache_lock);
7433 		return;
7434 	}
7435 	rw_exit(&vhcache->vhcache_lock);
7436 
7437 	mutex_enter(&vhc->vhc_lock);
7438 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7439 		mutex_exit(&vhc->vhc_lock);
7440 		return;
7441 	}
7442 
7443 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7444 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7445 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7446 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7447 		cv_broadcast(&vhc->vhc_cv);
7448 		create_thread = 0;
7449 	} else {
7450 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7451 		create_thread = 1;
7452 	}
7453 	mutex_exit(&vhc->vhc_lock);
7454 
7455 	if (create_thread)
7456 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7457 		    0, &p0, TS_RUN, minclsyspri);
7458 }
7459 
7460 /*
7461  * phci bus config structure - one for for each phci bus config operation that
7462  * we initiate on behalf of a vhci.
7463  */
7464 typedef struct mdi_phci_bus_config_s {
7465 	char *phbc_phci_path;
7466 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7467 	struct mdi_phci_bus_config_s *phbc_next;
7468 } mdi_phci_bus_config_t;
7469 
7470 /* vhci bus config structure - one for each vhci bus config operation */
7471 typedef struct mdi_vhci_bus_config_s {
7472 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7473 	major_t vhbc_op_major;		/* bus config op major */
7474 	uint_t vhbc_op_flags;		/* bus config op flags */
7475 	kmutex_t vhbc_lock;
7476 	kcondvar_t vhbc_cv;
7477 	int vhbc_thr_count;
7478 } mdi_vhci_bus_config_t;
7479 
7480 /*
7481  * bus config the specified phci
7482  */
7483 static void
7484 bus_config_phci(void *arg)
7485 {
7486 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7487 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7488 	dev_info_t *ph_dip;
7489 
7490 	/*
7491 	 * first configure all path components upto phci and then configure
7492 	 * the phci children.
7493 	 */
7494 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7495 	    != NULL) {
7496 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7497 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7498 			(void) ndi_devi_config_driver(ph_dip,
7499 			    vhbc->vhbc_op_flags,
7500 			    vhbc->vhbc_op_major);
7501 		} else
7502 			(void) ndi_devi_config(ph_dip,
7503 			    vhbc->vhbc_op_flags);
7504 
7505 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7506 		ndi_rele_devi(ph_dip);
7507 	}
7508 
7509 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7510 	kmem_free(phbc, sizeof (*phbc));
7511 
7512 	mutex_enter(&vhbc->vhbc_lock);
7513 	vhbc->vhbc_thr_count--;
7514 	if (vhbc->vhbc_thr_count == 0)
7515 		cv_broadcast(&vhbc->vhbc_cv);
7516 	mutex_exit(&vhbc->vhbc_lock);
7517 }
7518 
7519 /*
7520  * Bus config all phcis associated with the vhci in parallel.
7521  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7522  */
7523 static void
7524 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7525     ddi_bus_config_op_t op, major_t maj)
7526 {
7527 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7528 	mdi_vhci_bus_config_t *vhbc;
7529 	mdi_vhcache_phci_t *cphci;
7530 
7531 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7532 	if (vhcache->vhcache_phci_head == NULL) {
7533 		rw_exit(&vhcache->vhcache_lock);
7534 		return;
7535 	}
7536 
7537 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7538 
7539 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7540 	    cphci = cphci->cphci_next) {
7541 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7542 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7543 		    KM_SLEEP);
7544 		phbc->phbc_vhbusconfig = vhbc;
7545 		phbc->phbc_next = phbc_head;
7546 		phbc_head = phbc;
7547 		vhbc->vhbc_thr_count++;
7548 	}
7549 	rw_exit(&vhcache->vhcache_lock);
7550 
7551 	vhbc->vhbc_op = op;
7552 	vhbc->vhbc_op_major = maj;
7553 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7554 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7555 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7556 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7557 
7558 	/* now create threads to initiate bus config on all phcis in parallel */
7559 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7560 		phbc_next = phbc->phbc_next;
7561 		if (mdi_mtc_off)
7562 			bus_config_phci((void *)phbc);
7563 		else
7564 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7565 			    0, &p0, TS_RUN, minclsyspri);
7566 	}
7567 
7568 	mutex_enter(&vhbc->vhbc_lock);
7569 	/* wait until all threads exit */
7570 	while (vhbc->vhbc_thr_count > 0)
7571 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7572 	mutex_exit(&vhbc->vhbc_lock);
7573 
7574 	mutex_destroy(&vhbc->vhbc_lock);
7575 	cv_destroy(&vhbc->vhbc_cv);
7576 	kmem_free(vhbc, sizeof (*vhbc));
7577 }
7578 
7579 /*
7580  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7581  * The path includes the child component in addition to the phci path.
7582  */
7583 static int
7584 bus_config_one_phci_child(char *path)
7585 {
7586 	dev_info_t *ph_dip, *child;
7587 	char *devnm;
7588 	int rv = MDI_FAILURE;
7589 
7590 	/* extract the child component of the phci */
7591 	devnm = strrchr(path, '/');
7592 	*devnm++ = '\0';
7593 
7594 	/*
7595 	 * first configure all path components upto phci and then
7596 	 * configure the phci child.
7597 	 */
7598 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7599 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7600 		    NDI_SUCCESS) {
7601 			/*
7602 			 * release the hold that ndi_devi_config_one() placed
7603 			 */
7604 			ndi_rele_devi(child);
7605 			rv = MDI_SUCCESS;
7606 		}
7607 
7608 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7609 		ndi_rele_devi(ph_dip);
7610 	}
7611 
7612 	devnm--;
7613 	*devnm = '/';
7614 	return (rv);
7615 }
7616 
7617 /*
7618  * Build a list of phci client paths for the specified vhci client.
7619  * The list includes only those phci client paths which aren't configured yet.
7620  */
7621 static mdi_phys_path_t *
7622 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7623 {
7624 	mdi_vhcache_pathinfo_t *cpi;
7625 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7626 	int config_path, len;
7627 
7628 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7629 		/*
7630 		 * include only those paths that aren't configured.
7631 		 */
7632 		config_path = 0;
7633 		if (cpi->cpi_pip == NULL)
7634 			config_path = 1;
7635 		else {
7636 			MDI_PI_LOCK(cpi->cpi_pip);
7637 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7638 				config_path = 1;
7639 			MDI_PI_UNLOCK(cpi->cpi_pip);
7640 		}
7641 
7642 		if (config_path) {
7643 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7644 			len = strlen(cpi->cpi_cphci->cphci_path) +
7645 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7646 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7647 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7648 			    cpi->cpi_cphci->cphci_path, ct_name,
7649 			    cpi->cpi_addr);
7650 			pp->phys_path_next = NULL;
7651 
7652 			if (pp_head == NULL)
7653 				pp_head = pp;
7654 			else
7655 				pp_tail->phys_path_next = pp;
7656 			pp_tail = pp;
7657 		}
7658 	}
7659 
7660 	return (pp_head);
7661 }
7662 
7663 /*
7664  * Free the memory allocated for phci client path list.
7665  */
7666 static void
7667 free_phclient_path_list(mdi_phys_path_t *pp_head)
7668 {
7669 	mdi_phys_path_t *pp, *pp_next;
7670 
7671 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7672 		pp_next = pp->phys_path_next;
7673 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7674 		kmem_free(pp, sizeof (*pp));
7675 	}
7676 }
7677 
7678 /*
7679  * Allocated async client structure and initialize with the specified values.
7680  */
7681 static mdi_async_client_config_t *
7682 alloc_async_client_config(char *ct_name, char *ct_addr,
7683     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7684 {
7685 	mdi_async_client_config_t *acc;
7686 
7687 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7688 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7689 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7690 	acc->acc_phclient_path_list_head = pp_head;
7691 	init_vhcache_lookup_token(&acc->acc_token, tok);
7692 	acc->acc_next = NULL;
7693 	return (acc);
7694 }
7695 
7696 /*
7697  * Free the memory allocated for the async client structure and their members.
7698  */
7699 static void
7700 free_async_client_config(mdi_async_client_config_t *acc)
7701 {
7702 	if (acc->acc_phclient_path_list_head)
7703 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7704 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7705 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7706 	kmem_free(acc, sizeof (*acc));
7707 }
7708 
7709 /*
7710  * Sort vhcache pathinfos (cpis) of the specified client.
7711  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7712  * flag set come at the beginning of the list. All cpis which have this
7713  * flag set come at the end of the list.
7714  */
7715 static void
7716 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7717 {
7718 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7719 
7720 	cpi_head = cct->cct_cpi_head;
7721 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7722 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7723 		cpi_next = cpi->cpi_next;
7724 		enqueue_vhcache_pathinfo(cct, cpi);
7725 	}
7726 }
7727 
7728 /*
7729  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7730  * every vhcache pathinfo of the specified client. If not adjust the flag
7731  * setting appropriately.
7732  *
7733  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7734  * on-disk vhci cache. So every time this flag is updated the cache must be
7735  * flushed.
7736  */
7737 static void
7738 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7739     mdi_vhcache_lookup_token_t *tok)
7740 {
7741 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7742 	mdi_vhcache_client_t *cct;
7743 	mdi_vhcache_pathinfo_t *cpi;
7744 
7745 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7746 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7747 	    == NULL) {
7748 		rw_exit(&vhcache->vhcache_lock);
7749 		return;
7750 	}
7751 
7752 	/*
7753 	 * to avoid unnecessary on-disk cache updates, first check if an
7754 	 * update is really needed. If no update is needed simply return.
7755 	 */
7756 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7757 		if ((cpi->cpi_pip != NULL &&
7758 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7759 		    (cpi->cpi_pip == NULL &&
7760 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7761 			break;
7762 		}
7763 	}
7764 	if (cpi == NULL) {
7765 		rw_exit(&vhcache->vhcache_lock);
7766 		return;
7767 	}
7768 
7769 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7770 		rw_exit(&vhcache->vhcache_lock);
7771 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7772 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7773 		    tok)) == NULL) {
7774 			rw_exit(&vhcache->vhcache_lock);
7775 			return;
7776 		}
7777 	}
7778 
7779 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7780 		if (cpi->cpi_pip != NULL)
7781 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7782 		else
7783 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7784 	}
7785 	sort_vhcache_paths(cct);
7786 
7787 	rw_exit(&vhcache->vhcache_lock);
7788 	vhcache_dirty(vhc);
7789 }
7790 
7791 /*
7792  * Configure all specified paths of the client.
7793  */
7794 static void
7795 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7796     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7797 {
7798 	mdi_phys_path_t *pp;
7799 
7800 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7801 		(void) bus_config_one_phci_child(pp->phys_path);
7802 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7803 }
7804 
7805 /*
7806  * Dequeue elements from vhci async client config list and bus configure
7807  * their corresponding phci clients.
7808  */
7809 static void
7810 config_client_paths_thread(void *arg)
7811 {
7812 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7813 	mdi_async_client_config_t *acc;
7814 	clock_t quit_at_ticks;
7815 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7816 	callb_cpr_t cprinfo;
7817 
7818 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7819 	    "mdi_config_client_paths");
7820 
7821 	for (; ; ) {
7822 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7823 
7824 		mutex_enter(&vhc->vhc_lock);
7825 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7826 		    vhc->vhc_acc_list_head == NULL &&
7827 		    ddi_get_lbolt() < quit_at_ticks) {
7828 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7829 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7830 			    quit_at_ticks);
7831 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7832 		}
7833 
7834 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7835 		    vhc->vhc_acc_list_head == NULL)
7836 			goto out;
7837 
7838 		acc = vhc->vhc_acc_list_head;
7839 		vhc->vhc_acc_list_head = acc->acc_next;
7840 		if (vhc->vhc_acc_list_head == NULL)
7841 			vhc->vhc_acc_list_tail = NULL;
7842 		vhc->vhc_acc_count--;
7843 		mutex_exit(&vhc->vhc_lock);
7844 
7845 		config_client_paths_sync(vhc, acc->acc_ct_name,
7846 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7847 		    &acc->acc_token);
7848 
7849 		free_async_client_config(acc);
7850 	}
7851 
7852 out:
7853 	vhc->vhc_acc_thrcount--;
7854 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7855 	CALLB_CPR_EXIT(&cprinfo);
7856 }
7857 
7858 /*
7859  * Arrange for all the phci client paths (pp_head) for the specified client
7860  * to be bus configured asynchronously by a thread.
7861  */
7862 static void
7863 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7864     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7865 {
7866 	mdi_async_client_config_t *acc, *newacc;
7867 	int create_thread;
7868 
7869 	if (pp_head == NULL)
7870 		return;
7871 
7872 	if (mdi_mtc_off) {
7873 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7874 		free_phclient_path_list(pp_head);
7875 		return;
7876 	}
7877 
7878 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7879 	ASSERT(newacc);
7880 
7881 	mutex_enter(&vhc->vhc_lock);
7882 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7883 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7884 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7885 			free_async_client_config(newacc);
7886 			mutex_exit(&vhc->vhc_lock);
7887 			return;
7888 		}
7889 	}
7890 
7891 	if (vhc->vhc_acc_list_head == NULL)
7892 		vhc->vhc_acc_list_head = newacc;
7893 	else
7894 		vhc->vhc_acc_list_tail->acc_next = newacc;
7895 	vhc->vhc_acc_list_tail = newacc;
7896 	vhc->vhc_acc_count++;
7897 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7898 		cv_broadcast(&vhc->vhc_cv);
7899 		create_thread = 0;
7900 	} else {
7901 		vhc->vhc_acc_thrcount++;
7902 		create_thread = 1;
7903 	}
7904 	mutex_exit(&vhc->vhc_lock);
7905 
7906 	if (create_thread)
7907 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
7908 		    0, &p0, TS_RUN, minclsyspri);
7909 }
7910 
7911 /*
7912  * Return number of online paths for the specified client.
7913  */
7914 static int
7915 nonline_paths(mdi_vhcache_client_t *cct)
7916 {
7917 	mdi_vhcache_pathinfo_t *cpi;
7918 	int online_count = 0;
7919 
7920 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7921 		if (cpi->cpi_pip != NULL) {
7922 			MDI_PI_LOCK(cpi->cpi_pip);
7923 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
7924 				online_count++;
7925 			MDI_PI_UNLOCK(cpi->cpi_pip);
7926 		}
7927 	}
7928 
7929 	return (online_count);
7930 }
7931 
7932 /*
7933  * Bus configure all paths for the specified vhci client.
7934  * If at least one path for the client is already online, the remaining paths
7935  * will be configured asynchronously. Otherwise, it synchronously configures
7936  * the paths until at least one path is online and then rest of the paths
7937  * will be configured asynchronously.
7938  */
7939 static void
7940 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
7941 {
7942 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7943 	mdi_phys_path_t *pp_head, *pp;
7944 	mdi_vhcache_client_t *cct;
7945 	mdi_vhcache_lookup_token_t tok;
7946 
7947 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7948 
7949 	init_vhcache_lookup_token(&tok, NULL);
7950 
7951 	if (ct_name == NULL || ct_addr == NULL ||
7952 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
7953 	    == NULL ||
7954 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
7955 		rw_exit(&vhcache->vhcache_lock);
7956 		return;
7957 	}
7958 
7959 	/* if at least one path is online, configure the rest asynchronously */
7960 	if (nonline_paths(cct) > 0) {
7961 		rw_exit(&vhcache->vhcache_lock);
7962 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
7963 		return;
7964 	}
7965 
7966 	rw_exit(&vhcache->vhcache_lock);
7967 
7968 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
7969 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
7970 			rw_enter(&vhcache->vhcache_lock, RW_READER);
7971 
7972 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
7973 			    ct_addr, &tok)) == NULL) {
7974 				rw_exit(&vhcache->vhcache_lock);
7975 				goto out;
7976 			}
7977 
7978 			if (nonline_paths(cct) > 0 &&
7979 			    pp->phys_path_next != NULL) {
7980 				rw_exit(&vhcache->vhcache_lock);
7981 				config_client_paths_async(vhc, ct_name, ct_addr,
7982 				    pp->phys_path_next, &tok);
7983 				pp->phys_path_next = NULL;
7984 				goto out;
7985 			}
7986 
7987 			rw_exit(&vhcache->vhcache_lock);
7988 		}
7989 	}
7990 
7991 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
7992 out:
7993 	free_phclient_path_list(pp_head);
7994 }
7995 
7996 static void
7997 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
7998 {
7999 	mutex_enter(&vhc->vhc_lock);
8000 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8001 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8002 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8003 	mutex_exit(&vhc->vhc_lock);
8004 }
8005 
8006 static void
8007 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8008 {
8009 	mutex_enter(&vhc->vhc_lock);
8010 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8011 	cv_broadcast(&vhc->vhc_cv);
8012 	mutex_exit(&vhc->vhc_lock);
8013 }
8014 
8015 /*
8016  * Attach the phci driver instances associated with the vhci:
8017  * If root is mounted attach all phci driver instances.
8018  * If root is not mounted, attach the instances of only those phci
8019  * drivers that have the root support.
8020  */
8021 static void
8022 attach_phci_drivers(mdi_vhci_config_t *vhc, int root_mounted)
8023 {
8024 	int  i;
8025 	major_t m;
8026 
8027 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
8028 		if (root_mounted == 0 &&
8029 		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
8030 			continue;
8031 
8032 		m = ddi_name_to_major(
8033 		    vhc->vhc_phci_driver_list[i].phdriver_name);
8034 		if (m != (major_t)-1) {
8035 			if (ddi_hold_installed_driver(m) != NULL)
8036 				ddi_rele_driver(m);
8037 		}
8038 	}
8039 }
8040 
8041 /*
8042  * Build vhci cache:
8043  *
8044  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8045  * the phci driver instances. During this process the cache gets built.
8046  *
8047  * Cache is built fully if the root is mounted (i.e., root_mounted is nonzero).
8048  *
8049  * If the root is not mounted, phci drivers that do not have root support
8050  * are not attached. As a result the cache is built partially. The entries
8051  * in the cache reflect only those phci drivers that have root support.
8052  */
8053 static vhcache_build_status_t
8054 build_vhci_cache(mdi_vhci_config_t *vhc, int root_mounted)
8055 {
8056 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8057 
8058 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8059 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8060 		rw_exit(&vhcache->vhcache_lock);
8061 		return (VHCACHE_NOT_REBUILT);
8062 	}
8063 	rw_exit(&vhcache->vhcache_lock);
8064 
8065 	attach_phci_drivers(vhc, root_mounted);
8066 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8067 	    BUS_CONFIG_ALL, (major_t)-1);
8068 
8069 	if (root_mounted) {
8070 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8071 		vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8072 		rw_exit(&vhcache->vhcache_lock);
8073 		vhcache_dirty(vhc);
8074 		return (VHCACHE_FULLY_BUILT);
8075 	} else
8076 		return (VHCACHE_PARTIALLY_BUILT);
8077 }
8078 
8079 /*
8080  * Wait until the root is mounted and then build the vhci cache.
8081  */
8082 static void
8083 build_vhci_cache_thread(void *arg)
8084 {
8085 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8086 
8087 	mutex_enter(&vhc->vhc_lock);
8088 	while (!modrootloaded && !(vhc->vhc_flags & MDI_VHC_EXIT)) {
8089 		(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8090 		    ddi_get_lbolt() + 10 * TICKS_PER_SECOND);
8091 	}
8092 	if (vhc->vhc_flags & MDI_VHC_EXIT)
8093 		goto out;
8094 
8095 	mutex_exit(&vhc->vhc_lock);
8096 
8097 	/*
8098 	 * Now that the root is mounted. So build_vhci_cache() will build
8099 	 * the full cache.
8100 	 */
8101 	(void) build_vhci_cache(vhc, 1);
8102 
8103 	mutex_enter(&vhc->vhc_lock);
8104 out:
8105 	vhc->vhc_flags &= ~MDI_VHC_BUILD_VHCI_CACHE_THREAD;
8106 	mutex_exit(&vhc->vhc_lock);
8107 }
8108 
8109 /*
8110  * Build vhci cache - a wrapper for build_vhci_cache().
8111  *
8112  * In a normal case on-disk vhci cache is read and setup during booting.
8113  * But if the on-disk vhci cache is not there or deleted or corrupted then
8114  * this function sets up the vhci cache.
8115  *
8116  * The cache is built fully if the root is mounted.
8117  *
8118  * If the root is not mounted, initially the cache is built reflecting only
8119  * those driver entries that have the root support. A separate thread is
8120  * created to handle the creation of full cache. This thread will wait
8121  * until the root is mounted and then rebuilds the cache.
8122  */
8123 static int
8124 e_build_vhci_cache(mdi_vhci_config_t *vhc)
8125 {
8126 	vhcache_build_status_t rv;
8127 
8128 	single_threaded_vhconfig_enter(vhc);
8129 
8130 	mutex_enter(&vhc->vhc_lock);
8131 	if (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) {
8132 		if (modrootloaded) {
8133 			cv_broadcast(&vhc->vhc_cv);
8134 			/* wait until build vhci cache thread exits */
8135 			while (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD)
8136 				cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8137 			rv = VHCACHE_FULLY_BUILT;
8138 		} else {
8139 			/*
8140 			 * The presense of MDI_VHC_BUILD_VHCI_CACHE_THREAD
8141 			 * flag indicates that the cache has already been
8142 			 * partially built.
8143 			 */
8144 			rv = VHCACHE_PARTIALLY_BUILT;
8145 		}
8146 
8147 		mutex_exit(&vhc->vhc_lock);
8148 		single_threaded_vhconfig_exit(vhc);
8149 		return (rv);
8150 	}
8151 	mutex_exit(&vhc->vhc_lock);
8152 
8153 	rv = build_vhci_cache(vhc, modrootloaded);
8154 
8155 	if (rv == VHCACHE_PARTIALLY_BUILT) {
8156 		/*
8157 		 * create a thread; this thread will wait until the root is
8158 		 * mounted and then fully rebuilds the cache.
8159 		 */
8160 		mutex_enter(&vhc->vhc_lock);
8161 		vhc->vhc_flags |= MDI_VHC_BUILD_VHCI_CACHE_THREAD;
8162 		mutex_exit(&vhc->vhc_lock);
8163 		(void) thread_create(NULL, 0, build_vhci_cache_thread,
8164 		    vhc, 0, &p0, TS_RUN, minclsyspri);
8165 	}
8166 
8167 	single_threaded_vhconfig_exit(vhc);
8168 	return (rv);
8169 }
8170 
8171 /*
8172  * Generic vhci bus config implementation:
8173  *
8174  * Parameters
8175  *	vdip	vhci dip
8176  *	flags	bus config flags
8177  *	op	bus config operation
8178  *	The remaining parameters are bus config operation specific
8179  *
8180  * for BUS_CONFIG_ONE
8181  *	arg	pointer to name@addr
8182  *	child	upon successful return from this function, *child will be
8183  *		set to the configured and held devinfo child node of vdip.
8184  *	ct_addr	pointer to client address (i.e. GUID)
8185  *
8186  * for BUS_CONFIG_DRIVER
8187  *	arg	major number of the driver
8188  *	child and ct_addr parameters are ignored
8189  *
8190  * for BUS_CONFIG_ALL
8191  *	arg, child, and ct_addr parameters are ignored
8192  *
8193  * Note that for the rest of the bus config operations, this function simply
8194  * calls the framework provided default bus config routine.
8195  */
8196 int
8197 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8198     void *arg, dev_info_t **child, char *ct_addr)
8199 {
8200 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8201 	mdi_vhci_config_t *vhc = vh->vh_config;
8202 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8203 	vhcache_build_status_t rv = VHCACHE_NOT_REBUILT;
8204 	char *cp;
8205 
8206 	/*
8207 	 * While bus configuring phcis, the phci driver interactions with MDI
8208 	 * cause child nodes to be enumerated under the vhci node for which
8209 	 * they need to ndi_devi_enter the vhci node.
8210 	 *
8211 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8212 	 * for the bus config operations on phcis to finish while holding the
8213 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8214 	 * phcis and call the default framework provided bus config function
8215 	 * if we are called with ndi_devi_enter lock held.
8216 	 */
8217 	if (DEVI_BUSY_OWNED(vdip)) {
8218 		MDI_DEBUG(2, (CE_NOTE, vdip,
8219 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8220 		goto default_bus_config;
8221 	}
8222 
8223 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8224 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8225 		rw_exit(&vhcache->vhcache_lock);
8226 		rv = e_build_vhci_cache(vhc);
8227 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8228 	}
8229 
8230 	switch (op) {
8231 	case BUS_CONFIG_ONE:
8232 		/* extract node name */
8233 		cp = (char *)arg;
8234 		while (*cp != '\0' && *cp != '@')
8235 			cp++;
8236 		if (*cp == '@') {
8237 			*cp = '\0';
8238 			config_client_paths(vhc, (char *)arg, ct_addr);
8239 			/* config_client_paths() releases the cache_lock */
8240 			*cp = '@';
8241 		} else
8242 			rw_exit(&vhcache->vhcache_lock);
8243 		break;
8244 
8245 	case BUS_CONFIG_DRIVER:
8246 		rw_exit(&vhcache->vhcache_lock);
8247 		if (rv == VHCACHE_NOT_REBUILT)
8248 			bus_config_all_phcis(vhcache, flags, op,
8249 			    (major_t)(uintptr_t)arg);
8250 		break;
8251 
8252 	case BUS_CONFIG_ALL:
8253 		rw_exit(&vhcache->vhcache_lock);
8254 		if (rv == VHCACHE_NOT_REBUILT)
8255 			bus_config_all_phcis(vhcache, flags, op, -1);
8256 		break;
8257 
8258 	default:
8259 		rw_exit(&vhcache->vhcache_lock);
8260 		break;
8261 	}
8262 
8263 
8264 default_bus_config:
8265 	/*
8266 	 * All requested child nodes are enumerated under the vhci.
8267 	 * Now configure them.
8268 	 */
8269 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8270 	    NDI_SUCCESS) {
8271 		return (MDI_SUCCESS);
8272 	}
8273 
8274 	return (MDI_FAILURE);
8275 }
8276 
8277 /*
8278  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8279  */
8280 static nvlist_t *
8281 read_on_disk_vhci_cache(char *vhci_class)
8282 {
8283 	nvlist_t *nvl;
8284 	int err;
8285 	char *filename;
8286 
8287 	filename = vhclass2vhcache_filename(vhci_class);
8288 
8289 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8290 		kmem_free(filename, strlen(filename) + 1);
8291 		return (nvl);
8292 	} else if (err == EIO)
8293 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8294 	else if (err == EINVAL)
8295 		cmn_err(CE_WARN,
8296 		    "%s: data file corrupted, will recreate\n", filename);
8297 
8298 	kmem_free(filename, strlen(filename) + 1);
8299 	return (NULL);
8300 }
8301 
8302 /*
8303  * Read on-disk vhci cache into nvlists for all vhci classes.
8304  * Called during booting by i_ddi_read_devices_files().
8305  */
8306 void
8307 mdi_read_devices_files(void)
8308 {
8309 	int i;
8310 
8311 	for (i = 0; i < N_VHCI_CLASSES; i++)
8312 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8313 }
8314 
8315 /*
8316  * Remove all stale entries from vhci cache.
8317  */
8318 static void
8319 clean_vhcache(mdi_vhci_config_t *vhc)
8320 {
8321 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8322 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8323 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8324 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8325 
8326 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8327 
8328 	cct_head = vhcache->vhcache_client_head;
8329 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8330 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8331 		cct_next = cct->cct_next;
8332 
8333 		cpi_head = cct->cct_cpi_head;
8334 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8335 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8336 			cpi_next = cpi->cpi_next;
8337 			if (cpi->cpi_pip != NULL) {
8338 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8339 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8340 			} else
8341 				free_vhcache_pathinfo(cpi);
8342 		}
8343 
8344 		if (cct->cct_cpi_head != NULL)
8345 			enqueue_vhcache_client(vhcache, cct);
8346 		else {
8347 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8348 			    (mod_hash_key_t)cct->cct_name_addr);
8349 			free_vhcache_client(cct);
8350 		}
8351 	}
8352 
8353 	cphci_head = vhcache->vhcache_phci_head;
8354 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8355 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8356 		cphci_next = cphci->cphci_next;
8357 		if (cphci->cphci_phci != NULL)
8358 			enqueue_vhcache_phci(vhcache, cphci);
8359 		else
8360 			free_vhcache_phci(cphci);
8361 	}
8362 
8363 	vhcache->vhcache_clean_time = lbolt64;
8364 	rw_exit(&vhcache->vhcache_lock);
8365 	vhcache_dirty(vhc);
8366 }
8367 
8368 /*
8369  * Remove all stale entries from vhci cache.
8370  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8371  */
8372 void
8373 mdi_clean_vhcache(void)
8374 {
8375 	mdi_vhci_t *vh;
8376 
8377 	mutex_enter(&mdi_mutex);
8378 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8379 		vh->vh_refcnt++;
8380 		mutex_exit(&mdi_mutex);
8381 		clean_vhcache(vh->vh_config);
8382 		mutex_enter(&mdi_mutex);
8383 		vh->vh_refcnt--;
8384 	}
8385 	mutex_exit(&mdi_mutex);
8386 }
8387 
8388 /*
8389  * mdi_vhci_walk_clients():
8390  *		Walker routine to traverse client dev_info nodes
8391  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8392  * below the client, including nexus devices, which we dont want.
8393  * So we just traverse the immediate siblings, starting from 1st client.
8394  */
8395 void
8396 mdi_vhci_walk_clients(dev_info_t *vdip,
8397     int (*f)(dev_info_t *, void *), void *arg)
8398 {
8399 	dev_info_t	*cdip;
8400 	mdi_client_t	*ct;
8401 
8402 	mutex_enter(&mdi_mutex);
8403 
8404 	cdip = ddi_get_child(vdip);
8405 
8406 	while (cdip) {
8407 		ct = i_devi_get_client(cdip);
8408 		MDI_CLIENT_LOCK(ct);
8409 
8410 		switch ((*f)(cdip, arg)) {
8411 		case DDI_WALK_CONTINUE:
8412 			cdip = ddi_get_next_sibling(cdip);
8413 			MDI_CLIENT_UNLOCK(ct);
8414 			break;
8415 
8416 		default:
8417 			MDI_CLIENT_UNLOCK(ct);
8418 			mutex_exit(&mdi_mutex);
8419 			return;
8420 		}
8421 	}
8422 
8423 	mutex_exit(&mdi_mutex);
8424 }
8425 
8426 /*
8427  * mdi_vhci_walk_phcis():
8428  *		Walker routine to traverse phci dev_info nodes
8429  */
8430 void
8431 mdi_vhci_walk_phcis(dev_info_t *vdip,
8432     int (*f)(dev_info_t *, void *), void *arg)
8433 {
8434 	mdi_vhci_t	*vh = NULL;
8435 	mdi_phci_t	*ph = NULL;
8436 
8437 	mutex_enter(&mdi_mutex);
8438 
8439 	vh = i_devi_get_vhci(vdip);
8440 	ph = vh->vh_phci_head;
8441 
8442 	while (ph) {
8443 		MDI_PHCI_LOCK(ph);
8444 
8445 		switch ((*f)(ph->ph_dip, arg)) {
8446 		case DDI_WALK_CONTINUE:
8447 			MDI_PHCI_UNLOCK(ph);
8448 			ph = ph->ph_next;
8449 			break;
8450 
8451 		default:
8452 			MDI_PHCI_UNLOCK(ph);
8453 			mutex_exit(&mdi_mutex);
8454 			return;
8455 		}
8456 	}
8457 
8458 	mutex_exit(&mdi_mutex);
8459 }
8460 
8461 
8462 /*
8463  * mdi_walk_vhcis():
8464  *		Walker routine to traverse vhci dev_info nodes
8465  */
8466 void
8467 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8468 {
8469 	mdi_vhci_t	*vh = NULL;
8470 
8471 	mutex_enter(&mdi_mutex);
8472 	/*
8473 	 * Scan for already registered vhci
8474 	 */
8475 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8476 		vh->vh_refcnt++;
8477 		mutex_exit(&mdi_mutex);
8478 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8479 			mutex_enter(&mdi_mutex);
8480 			vh->vh_refcnt--;
8481 			break;
8482 		} else {
8483 			mutex_enter(&mdi_mutex);
8484 			vh->vh_refcnt--;
8485 		}
8486 	}
8487 
8488 	mutex_exit(&mdi_mutex);
8489 }
8490 
8491 /*
8492  * i_mdi_log_sysevent():
8493  *		Logs events for pickup by syseventd
8494  */
8495 static void
8496 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8497 {
8498 	char		*path_name;
8499 	nvlist_t	*attr_list;
8500 
8501 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8502 	    KM_SLEEP) != DDI_SUCCESS) {
8503 		goto alloc_failed;
8504 	}
8505 
8506 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8507 	(void) ddi_pathname(dip, path_name);
8508 
8509 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8510 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8511 		goto error;
8512 	}
8513 
8514 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8515 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8516 		goto error;
8517 	}
8518 
8519 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8520 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8521 		goto error;
8522 	}
8523 
8524 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8525 	    path_name) != DDI_SUCCESS) {
8526 		goto error;
8527 	}
8528 
8529 	if (nvlist_add_string(attr_list, DDI_CLASS,
8530 	    ph_vh_class) != DDI_SUCCESS) {
8531 		goto error;
8532 	}
8533 
8534 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8535 	    attr_list, NULL, DDI_SLEEP);
8536 
8537 error:
8538 	kmem_free(path_name, MAXPATHLEN);
8539 	nvlist_free(attr_list);
8540 	return;
8541 
8542 alloc_failed:
8543 	MDI_DEBUG(1, (CE_WARN, dip,
8544 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8545 }
8546