xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision 81f63062a60a29358c252e0d10807f8a8547fbb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
29  * detailed discussion of the overall mpxio architecture.
30  *
31  * Default locking order:
32  *
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
36  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
38  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
39  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
40  */
41 
42 #include <sys/note.h>
43 #include <sys/types.h>
44 #include <sys/varargs.h>
45 #include <sys/param.h>
46 #include <sys/errno.h>
47 #include <sys/uio.h>
48 #include <sys/buf.h>
49 #include <sys/modctl.h>
50 #include <sys/open.h>
51 #include <sys/kmem.h>
52 #include <sys/poll.h>
53 #include <sys/conf.h>
54 #include <sys/bootconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/stat.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddipropdefs.h>
60 #include <sys/sunndi.h>
61 #include <sys/ndi_impldefs.h>
62 #include <sys/promif.h>
63 #include <sys/sunmdi.h>
64 #include <sys/mdi_impldefs.h>
65 #include <sys/taskq.h>
66 #include <sys/epm.h>
67 #include <sys/sunpm.h>
68 #include <sys/modhash.h>
69 #include <sys/disp.h>
70 #include <sys/autoconf.h>
71 #include <sys/sysmacros.h>
72 
73 #ifdef	DEBUG
74 #include <sys/debug.h>
75 int	mdi_debug = 1;
76 int	mdi_debug_logonly = 0;
77 #define	MDI_DEBUG(level, stmnt) \
78 	    if (mdi_debug >= (level)) i_mdi_log stmnt
79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
80 #else	/* !DEBUG */
81 #define	MDI_DEBUG(level, stmnt)
82 #endif	/* DEBUG */
83 
84 extern pri_t	minclsyspri;
85 extern int	modrootloaded;
86 
87 /*
88  * Global mutex:
89  * Protects vHCI list and structure members.
90  */
91 kmutex_t	mdi_mutex;
92 
93 /*
94  * Registered vHCI class driver lists
95  */
96 int		mdi_vhci_count;
97 mdi_vhci_t	*mdi_vhci_head;
98 mdi_vhci_t	*mdi_vhci_tail;
99 
100 /*
101  * Client Hash Table size
102  */
103 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
104 
105 /*
106  * taskq interface definitions
107  */
108 #define	MDI_TASKQ_N_THREADS	8
109 #define	MDI_TASKQ_PRI		minclsyspri
110 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
111 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
112 
113 taskq_t				*mdi_taskq;
114 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
115 
116 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
117 
118 /*
119  * The data should be "quiet" for this interval (in seconds) before the
120  * vhci cached data is flushed to the disk.
121  */
122 static int mdi_vhcache_flush_delay = 10;
123 
124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
125 static int mdi_vhcache_flush_daemon_idle_time = 60;
126 
127 /*
128  * MDI falls back to discovery of all paths when a bus_config_one fails.
129  * The following parameters can be used to tune this operation.
130  *
131  * mdi_path_discovery_boot
132  *	Number of times path discovery will be attempted during early boot.
133  *	Probably there is no reason to ever set this value to greater than one.
134  *
135  * mdi_path_discovery_postboot
136  *	Number of times path discovery will be attempted after early boot.
137  *	Set it to a minimum of two to allow for discovery of iscsi paths which
138  *	may happen very late during booting.
139  *
140  * mdi_path_discovery_interval
141  *	Minimum number of seconds MDI will wait between successive discovery
142  *	of all paths. Set it to -1 to disable discovery of all paths.
143  */
144 static int mdi_path_discovery_boot = 1;
145 static int mdi_path_discovery_postboot = 2;
146 static int mdi_path_discovery_interval = 10;
147 
148 /*
149  * number of seconds the asynchronous configuration thread will sleep idle
150  * before exiting.
151  */
152 static int mdi_async_config_idle_time = 600;
153 
154 static int mdi_bus_config_cache_hash_size = 256;
155 
156 /* turns off multithreaded configuration for certain operations */
157 static int mdi_mtc_off = 0;
158 
159 /*
160  * MDI component property name/value string definitions
161  */
162 const char 		*mdi_component_prop = "mpxio-component";
163 const char		*mdi_component_prop_vhci = "vhci";
164 const char		*mdi_component_prop_phci = "phci";
165 const char		*mdi_component_prop_client = "client";
166 
167 /*
168  * MDI client global unique identifier property name
169  */
170 const char		*mdi_client_guid_prop = "client-guid";
171 
172 /*
173  * MDI client load balancing property name/value string definitions
174  */
175 const char		*mdi_load_balance = "load-balance";
176 const char		*mdi_load_balance_none = "none";
177 const char		*mdi_load_balance_rr = "round-robin";
178 const char		*mdi_load_balance_lba = "logical-block";
179 
180 /*
181  * Obsolete vHCI class definition; to be removed after Leadville update
182  */
183 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
184 
185 static char vhci_greeting[] =
186 	"\tThere already exists one vHCI driver for class %s\n"
187 	"\tOnly one vHCI driver for each class is allowed\n";
188 
189 /*
190  * Static function prototypes
191  */
192 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
193 static int		i_mdi_client_offline(dev_info_t *, uint_t);
194 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
195 static void		i_mdi_phci_post_detach(dev_info_t *,
196 			    ddi_detach_cmd_t, int);
197 static int		i_mdi_client_pre_detach(dev_info_t *,
198 			    ddi_detach_cmd_t);
199 static void		i_mdi_client_post_detach(dev_info_t *,
200 			    ddi_detach_cmd_t, int);
201 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
202 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
203 static int 		i_mdi_lba_lb(mdi_client_t *ct,
204 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
205 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
206 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
207 static void		i_mdi_pm_reset_client(mdi_client_t *);
208 static int		i_mdi_power_all_phci(mdi_client_t *);
209 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
210 
211 
212 /*
213  * Internal mdi_pathinfo node functions
214  */
215 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
216 
217 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
218 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
219 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
220 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
221 static void		i_mdi_phci_unlock(mdi_phci_t *);
222 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
223 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
224 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
225 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
226 			    mdi_client_t *);
227 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
228 static void		i_mdi_client_remove_path(mdi_client_t *,
229 			    mdi_pathinfo_t *);
230 
231 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
232 			    mdi_pathinfo_state_t, int);
233 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
234 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
235 			    char **, int);
236 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
237 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
238 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
239 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
240 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
241 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
242 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
243 static void		i_mdi_client_update_state(mdi_client_t *);
244 static int		i_mdi_client_compute_state(mdi_client_t *,
245 			    mdi_phci_t *);
246 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
247 static void		i_mdi_client_unlock(mdi_client_t *);
248 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
249 static mdi_client_t	*i_devi_get_client(dev_info_t *);
250 /*
251  * NOTE: this will be removed once the NWS files are changed to use the new
252  * mdi_{enable,disable}_path interfaces
253  */
254 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
255 				int, int);
256 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
257 				mdi_vhci_t *vh, int flags, int op);
258 /*
259  * Failover related function prototypes
260  */
261 static int		i_mdi_failover(void *);
262 
263 /*
264  * misc internal functions
265  */
266 static int		i_mdi_get_hash_key(char *);
267 static int		i_map_nvlist_error_to_mdi(int);
268 static void		i_mdi_report_path_state(mdi_client_t *,
269 			    mdi_pathinfo_t *);
270 
271 static void		setup_vhci_cache(mdi_vhci_t *);
272 static int		destroy_vhci_cache(mdi_vhci_t *);
273 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
274 static boolean_t	stop_vhcache_flush_thread(void *, int);
275 static void		free_string_array(char **, int);
276 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
277 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
278 static void		free_vhcache_client(mdi_vhcache_client_t *);
279 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
280 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
281 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
282 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
283 static void		vhcache_pi_add(mdi_vhci_config_t *,
284 			    struct mdi_pathinfo *);
285 static void		vhcache_pi_remove(mdi_vhci_config_t *,
286 			    struct mdi_pathinfo *);
287 static void		free_phclient_path_list(mdi_phys_path_t *);
288 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
289 static int		flush_vhcache(mdi_vhci_config_t *, int);
290 static void		vhcache_dirty(mdi_vhci_config_t *);
291 static void		free_async_client_config(mdi_async_client_config_t *);
292 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
293 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
294 static nvlist_t		*read_on_disk_vhci_cache(char *);
295 extern int		fread_nvlist(char *, nvlist_t **);
296 extern int		fwrite_nvlist(char *, nvlist_t *);
297 
298 /* called once when first vhci registers with mdi */
299 static void
300 i_mdi_init()
301 {
302 	static int initialized = 0;
303 
304 	if (initialized)
305 		return;
306 	initialized = 1;
307 
308 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
309 	/*
310 	 * Create our taskq resources
311 	 */
312 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
313 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
314 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
315 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
316 }
317 
318 /*
319  * mdi_get_component_type():
320  *		Return mpxio component type
321  * Return Values:
322  *		MDI_COMPONENT_NONE
323  *		MDI_COMPONENT_VHCI
324  *		MDI_COMPONENT_PHCI
325  *		MDI_COMPONENT_CLIENT
326  * XXX This doesn't work under multi-level MPxIO and should be
327  *	removed when clients migrate mdi_component_is_*() interfaces.
328  */
329 int
330 mdi_get_component_type(dev_info_t *dip)
331 {
332 	return (DEVI(dip)->devi_mdi_component);
333 }
334 
335 /*
336  * mdi_vhci_register():
337  *		Register a vHCI module with the mpxio framework
338  *		mdi_vhci_register() is called by vHCI drivers to register the
339  *		'class_driver' vHCI driver and its MDI entrypoints with the
340  *		mpxio framework.  The vHCI driver must call this interface as
341  *		part of its attach(9e) handler.
342  *		Competing threads may try to attach mdi_vhci_register() as
343  *		the vHCI drivers are loaded and attached as a result of pHCI
344  *		driver instance registration (mdi_phci_register()) with the
345  *		framework.
346  * Return Values:
347  *		MDI_SUCCESS
348  *		MDI_FAILURE
349  */
350 /*ARGSUSED*/
351 int
352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
353     int flags)
354 {
355 	mdi_vhci_t		*vh = NULL;
356 
357 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
358 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
359 
360 	i_mdi_init();
361 
362 	mutex_enter(&mdi_mutex);
363 	/*
364 	 * Scan for already registered vhci
365 	 */
366 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
367 		if (strcmp(vh->vh_class, class) == 0) {
368 			/*
369 			 * vHCI has already been created.  Check for valid
370 			 * vHCI ops registration.  We only support one vHCI
371 			 * module per class
372 			 */
373 			if (vh->vh_ops != NULL) {
374 				mutex_exit(&mdi_mutex);
375 				cmn_err(CE_NOTE, vhci_greeting, class);
376 				return (MDI_FAILURE);
377 			}
378 			break;
379 		}
380 	}
381 
382 	/*
383 	 * if not yet created, create the vHCI component
384 	 */
385 	if (vh == NULL) {
386 		struct client_hash	*hash = NULL;
387 		char			*load_balance;
388 
389 		/*
390 		 * Allocate and initialize the mdi extensions
391 		 */
392 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
393 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
394 		    KM_SLEEP);
395 		vh->vh_client_table = hash;
396 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
397 		(void) strcpy(vh->vh_class, class);
398 		vh->vh_lb = LOAD_BALANCE_RR;
399 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
400 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
401 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
402 				vh->vh_lb = LOAD_BALANCE_NONE;
403 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
404 				    == 0) {
405 				vh->vh_lb = LOAD_BALANCE_LBA;
406 			}
407 			ddi_prop_free(load_balance);
408 		}
409 
410 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
411 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
412 
413 		/*
414 		 * Store the vHCI ops vectors
415 		 */
416 		vh->vh_dip = vdip;
417 		vh->vh_ops = vops;
418 
419 		setup_vhci_cache(vh);
420 
421 		if (mdi_vhci_head == NULL) {
422 			mdi_vhci_head = vh;
423 		}
424 		if (mdi_vhci_tail) {
425 			mdi_vhci_tail->vh_next = vh;
426 		}
427 		mdi_vhci_tail = vh;
428 		mdi_vhci_count++;
429 	}
430 
431 	/*
432 	 * Claim the devfs node as a vhci component
433 	 */
434 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
435 
436 	/*
437 	 * Initialize our back reference from dev_info node
438 	 */
439 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
440 	mutex_exit(&mdi_mutex);
441 	return (MDI_SUCCESS);
442 }
443 
444 /*
445  * mdi_vhci_unregister():
446  *		Unregister a vHCI module from mpxio framework
447  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
448  * 		of a vhci to unregister it from the framework.
449  * Return Values:
450  *		MDI_SUCCESS
451  *		MDI_FAILURE
452  */
453 /*ARGSUSED*/
454 int
455 mdi_vhci_unregister(dev_info_t *vdip, int flags)
456 {
457 	mdi_vhci_t	*found, *vh, *prev = NULL;
458 
459 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
460 
461 	/*
462 	 * Check for invalid VHCI
463 	 */
464 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
465 		return (MDI_FAILURE);
466 
467 	/*
468 	 * Scan the list of registered vHCIs for a match
469 	 */
470 	mutex_enter(&mdi_mutex);
471 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
472 		if (found == vh)
473 			break;
474 		prev = found;
475 	}
476 
477 	if (found == NULL) {
478 		mutex_exit(&mdi_mutex);
479 		return (MDI_FAILURE);
480 	}
481 
482 	/*
483 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
484 	 * should have been unregistered, before a vHCI can be
485 	 * unregistered.
486 	 */
487 	MDI_VHCI_PHCI_LOCK(vh);
488 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
489 		MDI_VHCI_PHCI_UNLOCK(vh);
490 		mutex_exit(&mdi_mutex);
491 		return (MDI_FAILURE);
492 	}
493 	MDI_VHCI_PHCI_UNLOCK(vh);
494 
495 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
496 		mutex_exit(&mdi_mutex);
497 		return (MDI_FAILURE);
498 	}
499 
500 	/*
501 	 * Remove the vHCI from the global list
502 	 */
503 	if (vh == mdi_vhci_head) {
504 		mdi_vhci_head = vh->vh_next;
505 	} else {
506 		prev->vh_next = vh->vh_next;
507 	}
508 	if (vh == mdi_vhci_tail) {
509 		mdi_vhci_tail = prev;
510 	}
511 	mdi_vhci_count--;
512 	mutex_exit(&mdi_mutex);
513 
514 	vh->vh_ops = NULL;
515 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
516 	DEVI(vdip)->devi_mdi_xhci = NULL;
517 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
518 	kmem_free(vh->vh_client_table,
519 	    mdi_client_table_size * sizeof (struct client_hash));
520 	mutex_destroy(&vh->vh_phci_mutex);
521 	mutex_destroy(&vh->vh_client_mutex);
522 
523 	kmem_free(vh, sizeof (mdi_vhci_t));
524 	return (MDI_SUCCESS);
525 }
526 
527 /*
528  * i_mdi_vhci_class2vhci():
529  *		Look for a matching vHCI module given a vHCI class name
530  * Return Values:
531  *		Handle to a vHCI component
532  *		NULL
533  */
534 static mdi_vhci_t *
535 i_mdi_vhci_class2vhci(char *class)
536 {
537 	mdi_vhci_t	*vh = NULL;
538 
539 	ASSERT(!MUTEX_HELD(&mdi_mutex));
540 
541 	mutex_enter(&mdi_mutex);
542 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
543 		if (strcmp(vh->vh_class, class) == 0) {
544 			break;
545 		}
546 	}
547 	mutex_exit(&mdi_mutex);
548 	return (vh);
549 }
550 
551 /*
552  * i_devi_get_vhci():
553  *		Utility function to get the handle to a vHCI component
554  * Return Values:
555  *		Handle to a vHCI component
556  *		NULL
557  */
558 mdi_vhci_t *
559 i_devi_get_vhci(dev_info_t *vdip)
560 {
561 	mdi_vhci_t	*vh = NULL;
562 	if (MDI_VHCI(vdip)) {
563 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
564 	}
565 	return (vh);
566 }
567 
568 /*
569  * mdi_phci_register():
570  *		Register a pHCI module with mpxio framework
571  *		mdi_phci_register() is called by pHCI drivers to register with
572  *		the mpxio framework and a specific 'class_driver' vHCI.  The
573  *		pHCI driver must call this interface as part of its attach(9e)
574  *		handler.
575  * Return Values:
576  *		MDI_SUCCESS
577  *		MDI_FAILURE
578  */
579 /*ARGSUSED*/
580 int
581 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
582 {
583 	mdi_phci_t		*ph;
584 	mdi_vhci_t		*vh;
585 	char			*data;
586 	char			*pathname;
587 
588 	/*
589 	 * Some subsystems, like fcp, perform pHCI registration from a
590 	 * different thread than the one doing the pHCI attach(9E) - the
591 	 * driver attach code is waiting for this other thread to complete.
592 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
593 	 * (indicating that some thread has done an ndi_devi_enter of parent)
594 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
595 	 */
596 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
597 
598 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
599 	(void) ddi_pathname(pdip, pathname);
600 
601 	/*
602 	 * Check for mpxio-disable property. Enable mpxio if the property is
603 	 * missing or not set to "yes".
604 	 * If the property is set to "yes" then emit a brief message.
605 	 */
606 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
607 	    &data) == DDI_SUCCESS)) {
608 		if (strcmp(data, "yes") == 0) {
609 			MDI_DEBUG(1, (CE_CONT, pdip,
610 			    "?%s (%s%d) multipath capabilities "
611 			    "disabled via %s.conf.\n", pathname,
612 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
613 			    ddi_driver_name(pdip)));
614 			ddi_prop_free(data);
615 			kmem_free(pathname, MAXPATHLEN);
616 			return (MDI_FAILURE);
617 		}
618 		ddi_prop_free(data);
619 	}
620 
621 	kmem_free(pathname, MAXPATHLEN);
622 
623 	/*
624 	 * Search for a matching vHCI
625 	 */
626 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
627 	if (vh == NULL) {
628 		return (MDI_FAILURE);
629 	}
630 
631 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
632 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
633 	ph->ph_dip = pdip;
634 	ph->ph_vhci = vh;
635 	ph->ph_next = NULL;
636 	ph->ph_unstable = 0;
637 	ph->ph_vprivate = 0;
638 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
639 
640 	MDI_PHCI_LOCK(ph);
641 	MDI_PHCI_SET_POWER_UP(ph);
642 	MDI_PHCI_UNLOCK(ph);
643 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
644 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
645 
646 	vhcache_phci_add(vh->vh_config, ph);
647 
648 	MDI_VHCI_PHCI_LOCK(vh);
649 	if (vh->vh_phci_head == NULL) {
650 		vh->vh_phci_head = ph;
651 	}
652 	if (vh->vh_phci_tail) {
653 		vh->vh_phci_tail->ph_next = ph;
654 	}
655 	vh->vh_phci_tail = ph;
656 	vh->vh_phci_count++;
657 	MDI_VHCI_PHCI_UNLOCK(vh);
658 
659 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
660 	return (MDI_SUCCESS);
661 }
662 
663 /*
664  * mdi_phci_unregister():
665  *		Unregister a pHCI module from mpxio framework
666  *		mdi_phci_unregister() is called by the pHCI drivers from their
667  *		detach(9E) handler to unregister their instances from the
668  *		framework.
669  * Return Values:
670  *		MDI_SUCCESS
671  *		MDI_FAILURE
672  */
673 /*ARGSUSED*/
674 int
675 mdi_phci_unregister(dev_info_t *pdip, int flags)
676 {
677 	mdi_vhci_t		*vh;
678 	mdi_phci_t		*ph;
679 	mdi_phci_t		*tmp;
680 	mdi_phci_t		*prev = NULL;
681 
682 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
683 
684 	ph = i_devi_get_phci(pdip);
685 	if (ph == NULL) {
686 		MDI_DEBUG(1, (CE_WARN, pdip,
687 		    "!pHCI unregister: Not a valid pHCI"));
688 		return (MDI_FAILURE);
689 	}
690 
691 	vh = ph->ph_vhci;
692 	ASSERT(vh != NULL);
693 	if (vh == NULL) {
694 		MDI_DEBUG(1, (CE_WARN, pdip,
695 		    "!pHCI unregister: Not a valid vHCI"));
696 		return (MDI_FAILURE);
697 	}
698 
699 	MDI_VHCI_PHCI_LOCK(vh);
700 	tmp = vh->vh_phci_head;
701 	while (tmp) {
702 		if (tmp == ph) {
703 			break;
704 		}
705 		prev = tmp;
706 		tmp = tmp->ph_next;
707 	}
708 
709 	if (ph == vh->vh_phci_head) {
710 		vh->vh_phci_head = ph->ph_next;
711 	} else {
712 		prev->ph_next = ph->ph_next;
713 	}
714 
715 	if (ph == vh->vh_phci_tail) {
716 		vh->vh_phci_tail = prev;
717 	}
718 
719 	vh->vh_phci_count--;
720 	MDI_VHCI_PHCI_UNLOCK(vh);
721 
722 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
723 	    ESC_DDI_INITIATOR_UNREGISTER);
724 	vhcache_phci_remove(vh->vh_config, ph);
725 	cv_destroy(&ph->ph_unstable_cv);
726 	mutex_destroy(&ph->ph_mutex);
727 	kmem_free(ph, sizeof (mdi_phci_t));
728 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
729 	DEVI(pdip)->devi_mdi_xhci = NULL;
730 	return (MDI_SUCCESS);
731 }
732 
733 /*
734  * i_devi_get_phci():
735  * 		Utility function to return the phci extensions.
736  */
737 static mdi_phci_t *
738 i_devi_get_phci(dev_info_t *pdip)
739 {
740 	mdi_phci_t	*ph = NULL;
741 	if (MDI_PHCI(pdip)) {
742 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
743 	}
744 	return (ph);
745 }
746 
747 /*
748  * Single thread mdi entry into devinfo node for modifying its children.
749  * If necessary we perform an ndi_devi_enter of the vHCI before doing
750  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
751  * for the vHCI and one for the pHCI.
752  */
753 void
754 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
755 {
756 	dev_info_t	*vdip;
757 	int		vcircular, pcircular;
758 
759 	/* Verify calling context */
760 	ASSERT(MDI_PHCI(phci_dip));
761 	vdip = mdi_devi_get_vdip(phci_dip);
762 	ASSERT(vdip);			/* A pHCI always has a vHCI */
763 
764 	/*
765 	 * If pHCI is detaching then the framework has already entered the
766 	 * vHCI on a threads that went down the code path leading to
767 	 * detach_node().  This framework enter of the vHCI during pHCI
768 	 * detach is done to avoid deadlock with vHCI power management
769 	 * operations which enter the vHCI and the enter down the path
770 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
771 	 * enter of the vHCI on frameworks vHCI enter that has already
772 	 * occurred - this is OK because we know that the framework thread
773 	 * doing detach is waiting for our completion.
774 	 *
775 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
776 	 * race with detach - but we can't do that because the framework has
777 	 * already entered the parent, so we have some complexity instead.
778 	 */
779 	for (;;) {
780 		if (ndi_devi_tryenter(vdip, &vcircular)) {
781 			ASSERT(vcircular != -1);
782 			if (DEVI_IS_DETACHING(phci_dip)) {
783 				ndi_devi_exit(vdip, vcircular);
784 				vcircular = -1;
785 			}
786 			break;
787 		} else if (DEVI_IS_DETACHING(phci_dip)) {
788 			vcircular = -1;
789 			break;
790 		} else {
791 			delay(1);
792 		}
793 	}
794 
795 	ndi_devi_enter(phci_dip, &pcircular);
796 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
797 }
798 
799 /*
800  * Release mdi_devi_enter or successful mdi_devi_tryenter.
801  */
802 void
803 mdi_devi_exit(dev_info_t *phci_dip, int circular)
804 {
805 	dev_info_t	*vdip;
806 	int		vcircular, pcircular;
807 
808 	/* Verify calling context */
809 	ASSERT(MDI_PHCI(phci_dip));
810 	vdip = mdi_devi_get_vdip(phci_dip);
811 	ASSERT(vdip);			/* A pHCI always has a vHCI */
812 
813 	/* extract two circular recursion values from single int */
814 	pcircular = (short)(circular & 0xFFFF);
815 	vcircular = (short)((circular >> 16) & 0xFFFF);
816 
817 	ndi_devi_exit(phci_dip, pcircular);
818 	if (vcircular != -1)
819 		ndi_devi_exit(vdip, vcircular);
820 }
821 
822 /*
823  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
824  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
825  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
826  * with vHCI power management code during path online/offline.  Each
827  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
828  * occur within the scope of an active mdi_devi_enter that establishes the
829  * circular value.
830  */
831 void
832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
833 {
834 	int		pcircular;
835 
836 	/* Verify calling context */
837 	ASSERT(MDI_PHCI(phci_dip));
838 
839 	pcircular = (short)(circular & 0xFFFF);
840 	ndi_devi_exit(phci_dip, pcircular);
841 }
842 
843 void
844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
845 {
846 	int		pcircular;
847 
848 	/* Verify calling context */
849 	ASSERT(MDI_PHCI(phci_dip));
850 
851 	ndi_devi_enter(phci_dip, &pcircular);
852 
853 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
854 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
855 }
856 
857 /*
858  * mdi_devi_get_vdip():
859  *		given a pHCI dip return vHCI dip
860  */
861 dev_info_t *
862 mdi_devi_get_vdip(dev_info_t *pdip)
863 {
864 	mdi_phci_t	*ph;
865 
866 	ph = i_devi_get_phci(pdip);
867 	if (ph && ph->ph_vhci)
868 		return (ph->ph_vhci->vh_dip);
869 	return (NULL);
870 }
871 
872 /*
873  * mdi_devi_pdip_entered():
874  *		Return 1 if we are vHCI and have done an ndi_devi_enter
875  *		of a pHCI
876  */
877 int
878 mdi_devi_pdip_entered(dev_info_t *vdip)
879 {
880 	mdi_vhci_t	*vh;
881 	mdi_phci_t	*ph;
882 
883 	vh = i_devi_get_vhci(vdip);
884 	if (vh == NULL)
885 		return (0);
886 
887 	MDI_VHCI_PHCI_LOCK(vh);
888 	ph = vh->vh_phci_head;
889 	while (ph) {
890 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
891 			MDI_VHCI_PHCI_UNLOCK(vh);
892 			return (1);
893 		}
894 		ph = ph->ph_next;
895 	}
896 	MDI_VHCI_PHCI_UNLOCK(vh);
897 	return (0);
898 }
899 
900 /*
901  * mdi_phci_path2devinfo():
902  * 		Utility function to search for a valid phci device given
903  *		the devfs pathname.
904  */
905 dev_info_t *
906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
907 {
908 	char		*temp_pathname;
909 	mdi_vhci_t	*vh;
910 	mdi_phci_t	*ph;
911 	dev_info_t 	*pdip = NULL;
912 
913 	vh = i_devi_get_vhci(vdip);
914 	ASSERT(vh != NULL);
915 
916 	if (vh == NULL) {
917 		/*
918 		 * Invalid vHCI component, return failure
919 		 */
920 		return (NULL);
921 	}
922 
923 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
924 	MDI_VHCI_PHCI_LOCK(vh);
925 	ph = vh->vh_phci_head;
926 	while (ph != NULL) {
927 		pdip = ph->ph_dip;
928 		ASSERT(pdip != NULL);
929 		*temp_pathname = '\0';
930 		(void) ddi_pathname(pdip, temp_pathname);
931 		if (strcmp(temp_pathname, pathname) == 0) {
932 			break;
933 		}
934 		ph = ph->ph_next;
935 	}
936 	if (ph == NULL) {
937 		pdip = NULL;
938 	}
939 	MDI_VHCI_PHCI_UNLOCK(vh);
940 	kmem_free(temp_pathname, MAXPATHLEN);
941 	return (pdip);
942 }
943 
944 /*
945  * mdi_phci_get_path_count():
946  * 		get number of path information nodes associated with a given
947  *		pHCI device.
948  */
949 int
950 mdi_phci_get_path_count(dev_info_t *pdip)
951 {
952 	mdi_phci_t	*ph;
953 	int		count = 0;
954 
955 	ph = i_devi_get_phci(pdip);
956 	if (ph != NULL) {
957 		count = ph->ph_path_count;
958 	}
959 	return (count);
960 }
961 
962 /*
963  * i_mdi_phci_lock():
964  *		Lock a pHCI device
965  * Return Values:
966  *		None
967  * Note:
968  *		The default locking order is:
969  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
970  *		But there are number of situations where locks need to be
971  *		grabbed in reverse order.  This routine implements try and lock
972  *		mechanism depending on the requested parameter option.
973  */
974 static void
975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
976 {
977 	if (pip) {
978 		/* Reverse locking is requested. */
979 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
980 			/*
981 			 * tryenter failed. Try to grab again
982 			 * after a small delay
983 			 */
984 			MDI_PI_HOLD(pip);
985 			MDI_PI_UNLOCK(pip);
986 			delay(1);
987 			MDI_PI_LOCK(pip);
988 			MDI_PI_RELE(pip);
989 		}
990 	} else {
991 		MDI_PHCI_LOCK(ph);
992 	}
993 }
994 
995 /*
996  * i_mdi_phci_unlock():
997  *		Unlock the pHCI component
998  */
999 static void
1000 i_mdi_phci_unlock(mdi_phci_t *ph)
1001 {
1002 	MDI_PHCI_UNLOCK(ph);
1003 }
1004 
1005 /*
1006  * i_mdi_devinfo_create():
1007  *		create client device's devinfo node
1008  * Return Values:
1009  *		dev_info
1010  *		NULL
1011  * Notes:
1012  */
1013 static dev_info_t *
1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1015 	char **compatible, int ncompatible)
1016 {
1017 	dev_info_t *cdip = NULL;
1018 
1019 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1020 
1021 	/* Verify for duplicate entry */
1022 	cdip = i_mdi_devinfo_find(vh, name, guid);
1023 	ASSERT(cdip == NULL);
1024 	if (cdip) {
1025 		cmn_err(CE_WARN,
1026 		    "i_mdi_devinfo_create: client dip %p already exists",
1027 			(void *)cdip);
1028 	}
1029 
1030 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1031 	if (cdip == NULL)
1032 		goto fail;
1033 
1034 	/*
1035 	 * Create component type and Global unique identifier
1036 	 * properties
1037 	 */
1038 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1039 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1040 		goto fail;
1041 	}
1042 
1043 	/* Decorate the node with compatible property */
1044 	if (compatible &&
1045 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1046 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1047 		goto fail;
1048 	}
1049 
1050 	return (cdip);
1051 
1052 fail:
1053 	if (cdip) {
1054 		(void) ndi_prop_remove_all(cdip);
1055 		(void) ndi_devi_free(cdip);
1056 	}
1057 	return (NULL);
1058 }
1059 
1060 /*
1061  * i_mdi_devinfo_find():
1062  *		Find a matching devinfo node for given client node name
1063  *		and its guid.
1064  * Return Values:
1065  *		Handle to a dev_info node or NULL
1066  */
1067 static dev_info_t *
1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1069 {
1070 	char			*data;
1071 	dev_info_t 		*cdip = NULL;
1072 	dev_info_t 		*ndip = NULL;
1073 	int			circular;
1074 
1075 	ndi_devi_enter(vh->vh_dip, &circular);
1076 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1077 	while ((cdip = ndip) != NULL) {
1078 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1079 
1080 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1081 			continue;
1082 		}
1083 
1084 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1085 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1086 		    &data) != DDI_PROP_SUCCESS) {
1087 			continue;
1088 		}
1089 
1090 		if (strcmp(data, guid) != 0) {
1091 			ddi_prop_free(data);
1092 			continue;
1093 		}
1094 		ddi_prop_free(data);
1095 		break;
1096 	}
1097 	ndi_devi_exit(vh->vh_dip, circular);
1098 	return (cdip);
1099 }
1100 
1101 /*
1102  * i_mdi_devinfo_remove():
1103  *		Remove a client device node
1104  */
1105 static int
1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1107 {
1108 	int	rv = MDI_SUCCESS;
1109 
1110 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1111 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1112 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
1113 		if (rv != NDI_SUCCESS) {
1114 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
1115 			    " failed. cdip = %p\n", (void *)cdip));
1116 		}
1117 		/*
1118 		 * Convert to MDI error code
1119 		 */
1120 		switch (rv) {
1121 		case NDI_SUCCESS:
1122 			rv = MDI_SUCCESS;
1123 			break;
1124 		case NDI_BUSY:
1125 			rv = MDI_BUSY;
1126 			break;
1127 		default:
1128 			rv = MDI_FAILURE;
1129 			break;
1130 		}
1131 	}
1132 	return (rv);
1133 }
1134 
1135 /*
1136  * i_devi_get_client()
1137  *		Utility function to get mpxio component extensions
1138  */
1139 static mdi_client_t *
1140 i_devi_get_client(dev_info_t *cdip)
1141 {
1142 	mdi_client_t	*ct = NULL;
1143 
1144 	if (MDI_CLIENT(cdip)) {
1145 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1146 	}
1147 	return (ct);
1148 }
1149 
1150 /*
1151  * i_mdi_is_child_present():
1152  *		Search for the presence of client device dev_info node
1153  */
1154 static int
1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1156 {
1157 	int		rv = MDI_FAILURE;
1158 	struct dev_info	*dip;
1159 	int		circular;
1160 
1161 	ndi_devi_enter(vdip, &circular);
1162 	dip = DEVI(vdip)->devi_child;
1163 	while (dip) {
1164 		if (dip == DEVI(cdip)) {
1165 			rv = MDI_SUCCESS;
1166 			break;
1167 		}
1168 		dip = dip->devi_sibling;
1169 	}
1170 	ndi_devi_exit(vdip, circular);
1171 	return (rv);
1172 }
1173 
1174 
1175 /*
1176  * i_mdi_client_lock():
1177  *		Grab client component lock
1178  * Return Values:
1179  *		None
1180  * Note:
1181  *		The default locking order is:
1182  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1183  *		But there are number of situations where locks need to be
1184  *		grabbed in reverse order.  This routine implements try and lock
1185  *		mechanism depending on the requested parameter option.
1186  */
1187 static void
1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1189 {
1190 	if (pip) {
1191 		/*
1192 		 * Reverse locking is requested.
1193 		 */
1194 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1195 			/*
1196 			 * tryenter failed. Try to grab again
1197 			 * after a small delay
1198 			 */
1199 			MDI_PI_HOLD(pip);
1200 			MDI_PI_UNLOCK(pip);
1201 			delay(1);
1202 			MDI_PI_LOCK(pip);
1203 			MDI_PI_RELE(pip);
1204 		}
1205 	} else {
1206 		MDI_CLIENT_LOCK(ct);
1207 	}
1208 }
1209 
1210 /*
1211  * i_mdi_client_unlock():
1212  *		Unlock a client component
1213  */
1214 static void
1215 i_mdi_client_unlock(mdi_client_t *ct)
1216 {
1217 	MDI_CLIENT_UNLOCK(ct);
1218 }
1219 
1220 /*
1221  * i_mdi_client_alloc():
1222  * 		Allocate and initialize a client structure.  Caller should
1223  *		hold the vhci client lock.
1224  * Return Values:
1225  *		Handle to a client component
1226  */
1227 /*ARGSUSED*/
1228 static mdi_client_t *
1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1230 {
1231 	mdi_client_t	*ct;
1232 
1233 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1234 
1235 	/*
1236 	 * Allocate and initialize a component structure.
1237 	 */
1238 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1239 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1240 	ct->ct_hnext = NULL;
1241 	ct->ct_hprev = NULL;
1242 	ct->ct_dip = NULL;
1243 	ct->ct_vhci = vh;
1244 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1245 	(void) strcpy(ct->ct_drvname, name);
1246 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1247 	(void) strcpy(ct->ct_guid, lguid);
1248 	ct->ct_cprivate = NULL;
1249 	ct->ct_vprivate = NULL;
1250 	ct->ct_flags = 0;
1251 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1252 	MDI_CLIENT_LOCK(ct);
1253 	MDI_CLIENT_SET_OFFLINE(ct);
1254 	MDI_CLIENT_SET_DETACH(ct);
1255 	MDI_CLIENT_SET_POWER_UP(ct);
1256 	MDI_CLIENT_UNLOCK(ct);
1257 	ct->ct_failover_flags = 0;
1258 	ct->ct_failover_status = 0;
1259 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1260 	ct->ct_unstable = 0;
1261 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1262 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1263 	ct->ct_lb = vh->vh_lb;
1264 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1265 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1266 	ct->ct_path_count = 0;
1267 	ct->ct_path_head = NULL;
1268 	ct->ct_path_tail = NULL;
1269 	ct->ct_path_last = NULL;
1270 
1271 	/*
1272 	 * Add this client component to our client hash queue
1273 	 */
1274 	i_mdi_client_enlist_table(vh, ct);
1275 	return (ct);
1276 }
1277 
1278 /*
1279  * i_mdi_client_enlist_table():
1280  *		Attach the client device to the client hash table. Caller
1281  *		should hold the vhci client lock.
1282  */
1283 static void
1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1285 {
1286 	int 			index;
1287 	struct client_hash	*head;
1288 
1289 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1290 
1291 	index = i_mdi_get_hash_key(ct->ct_guid);
1292 	head = &vh->vh_client_table[index];
1293 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1294 	head->ct_hash_head = ct;
1295 	head->ct_hash_count++;
1296 	vh->vh_client_count++;
1297 }
1298 
1299 /*
1300  * i_mdi_client_delist_table():
1301  *		Attach the client device to the client hash table.
1302  *		Caller should hold the vhci client lock.
1303  */
1304 static void
1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1306 {
1307 	int			index;
1308 	char			*guid;
1309 	struct client_hash 	*head;
1310 	mdi_client_t		*next;
1311 	mdi_client_t		*last;
1312 
1313 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1314 
1315 	guid = ct->ct_guid;
1316 	index = i_mdi_get_hash_key(guid);
1317 	head = &vh->vh_client_table[index];
1318 
1319 	last = NULL;
1320 	next = (mdi_client_t *)head->ct_hash_head;
1321 	while (next != NULL) {
1322 		if (next == ct) {
1323 			break;
1324 		}
1325 		last = next;
1326 		next = next->ct_hnext;
1327 	}
1328 
1329 	if (next) {
1330 		head->ct_hash_count--;
1331 		if (last == NULL) {
1332 			head->ct_hash_head = ct->ct_hnext;
1333 		} else {
1334 			last->ct_hnext = ct->ct_hnext;
1335 		}
1336 		ct->ct_hnext = NULL;
1337 		vh->vh_client_count--;
1338 	}
1339 }
1340 
1341 
1342 /*
1343  * i_mdi_client_free():
1344  *		Free a client component
1345  */
1346 static int
1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1348 {
1349 	int		rv = MDI_SUCCESS;
1350 	int		flags = ct->ct_flags;
1351 	dev_info_t	*cdip;
1352 	dev_info_t	*vdip;
1353 
1354 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1355 
1356 	vdip = vh->vh_dip;
1357 	cdip = ct->ct_dip;
1358 
1359 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1360 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1361 	DEVI(cdip)->devi_mdi_client = NULL;
1362 
1363 	/*
1364 	 * Clear out back ref. to dev_info_t node
1365 	 */
1366 	ct->ct_dip = NULL;
1367 
1368 	/*
1369 	 * Remove this client from our hash queue
1370 	 */
1371 	i_mdi_client_delist_table(vh, ct);
1372 
1373 	/*
1374 	 * Uninitialize and free the component
1375 	 */
1376 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1377 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1378 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1379 	cv_destroy(&ct->ct_failover_cv);
1380 	cv_destroy(&ct->ct_unstable_cv);
1381 	cv_destroy(&ct->ct_powerchange_cv);
1382 	mutex_destroy(&ct->ct_mutex);
1383 	kmem_free(ct, sizeof (*ct));
1384 
1385 	if (cdip != NULL) {
1386 		MDI_VHCI_CLIENT_UNLOCK(vh);
1387 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1388 		MDI_VHCI_CLIENT_LOCK(vh);
1389 	}
1390 	return (rv);
1391 }
1392 
1393 /*
1394  * i_mdi_client_find():
1395  * 		Find the client structure corresponding to a given guid
1396  *		Caller should hold the vhci client lock.
1397  */
1398 static mdi_client_t *
1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1400 {
1401 	int			index;
1402 	struct client_hash	*head;
1403 	mdi_client_t		*ct;
1404 
1405 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1406 
1407 	index = i_mdi_get_hash_key(guid);
1408 	head = &vh->vh_client_table[index];
1409 
1410 	ct = head->ct_hash_head;
1411 	while (ct != NULL) {
1412 		if (strcmp(ct->ct_guid, guid) == 0 &&
1413 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1414 			break;
1415 		}
1416 		ct = ct->ct_hnext;
1417 	}
1418 	return (ct);
1419 }
1420 
1421 /*
1422  * i_mdi_client_update_state():
1423  *		Compute and update client device state
1424  * Notes:
1425  *		A client device can be in any of three possible states:
1426  *
1427  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1428  *		one online/standby paths. Can tolerate failures.
1429  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1430  *		no alternate paths available as standby. A failure on the online
1431  *		would result in loss of access to device data.
1432  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1433  *		no paths available to access the device.
1434  */
1435 static void
1436 i_mdi_client_update_state(mdi_client_t *ct)
1437 {
1438 	int state;
1439 
1440 	ASSERT(MDI_CLIENT_LOCKED(ct));
1441 	state = i_mdi_client_compute_state(ct, NULL);
1442 	MDI_CLIENT_SET_STATE(ct, state);
1443 }
1444 
1445 /*
1446  * i_mdi_client_compute_state():
1447  *		Compute client device state
1448  *
1449  *		mdi_phci_t *	Pointer to pHCI structure which should
1450  *				while computing the new value.  Used by
1451  *				i_mdi_phci_offline() to find the new
1452  *				client state after DR of a pHCI.
1453  */
1454 static int
1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1456 {
1457 	int		state;
1458 	int		online_count = 0;
1459 	int		standby_count = 0;
1460 	mdi_pathinfo_t	*pip, *next;
1461 
1462 	ASSERT(MDI_CLIENT_LOCKED(ct));
1463 	pip = ct->ct_path_head;
1464 	while (pip != NULL) {
1465 		MDI_PI_LOCK(pip);
1466 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1467 		if (MDI_PI(pip)->pi_phci == ph) {
1468 			MDI_PI_UNLOCK(pip);
1469 			pip = next;
1470 			continue;
1471 		}
1472 
1473 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1474 				== MDI_PATHINFO_STATE_ONLINE)
1475 			online_count++;
1476 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1477 				== MDI_PATHINFO_STATE_STANDBY)
1478 			standby_count++;
1479 		MDI_PI_UNLOCK(pip);
1480 		pip = next;
1481 	}
1482 
1483 	if (online_count == 0) {
1484 		if (standby_count == 0) {
1485 			state = MDI_CLIENT_STATE_FAILED;
1486 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1487 			    " ct = %p\n", (void *)ct));
1488 		} else if (standby_count == 1) {
1489 			state = MDI_CLIENT_STATE_DEGRADED;
1490 		} else {
1491 			state = MDI_CLIENT_STATE_OPTIMAL;
1492 		}
1493 	} else if (online_count == 1) {
1494 		if (standby_count == 0) {
1495 			state = MDI_CLIENT_STATE_DEGRADED;
1496 		} else {
1497 			state = MDI_CLIENT_STATE_OPTIMAL;
1498 		}
1499 	} else {
1500 		state = MDI_CLIENT_STATE_OPTIMAL;
1501 	}
1502 	return (state);
1503 }
1504 
1505 /*
1506  * i_mdi_client2devinfo():
1507  *		Utility function
1508  */
1509 dev_info_t *
1510 i_mdi_client2devinfo(mdi_client_t *ct)
1511 {
1512 	return (ct->ct_dip);
1513 }
1514 
1515 /*
1516  * mdi_client_path2_devinfo():
1517  * 		Given the parent devinfo and child devfs pathname, search for
1518  *		a valid devfs node handle.
1519  */
1520 dev_info_t *
1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1522 {
1523 	dev_info_t 	*cdip = NULL;
1524 	dev_info_t 	*ndip = NULL;
1525 	char		*temp_pathname;
1526 	int		circular;
1527 
1528 	/*
1529 	 * Allocate temp buffer
1530 	 */
1531 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1532 
1533 	/*
1534 	 * Lock parent against changes
1535 	 */
1536 	ndi_devi_enter(vdip, &circular);
1537 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1538 	while ((cdip = ndip) != NULL) {
1539 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1540 
1541 		*temp_pathname = '\0';
1542 		(void) ddi_pathname(cdip, temp_pathname);
1543 		if (strcmp(temp_pathname, pathname) == 0) {
1544 			break;
1545 		}
1546 	}
1547 	/*
1548 	 * Release devinfo lock
1549 	 */
1550 	ndi_devi_exit(vdip, circular);
1551 
1552 	/*
1553 	 * Free the temp buffer
1554 	 */
1555 	kmem_free(temp_pathname, MAXPATHLEN);
1556 	return (cdip);
1557 }
1558 
1559 /*
1560  * mdi_client_get_path_count():
1561  * 		Utility function to get number of path information nodes
1562  *		associated with a given client device.
1563  */
1564 int
1565 mdi_client_get_path_count(dev_info_t *cdip)
1566 {
1567 	mdi_client_t	*ct;
1568 	int		count = 0;
1569 
1570 	ct = i_devi_get_client(cdip);
1571 	if (ct != NULL) {
1572 		count = ct->ct_path_count;
1573 	}
1574 	return (count);
1575 }
1576 
1577 
1578 /*
1579  * i_mdi_get_hash_key():
1580  * 		Create a hash using strings as keys
1581  *
1582  */
1583 static int
1584 i_mdi_get_hash_key(char *str)
1585 {
1586 	uint32_t	g, hash = 0;
1587 	char		*p;
1588 
1589 	for (p = str; *p != '\0'; p++) {
1590 		g = *p;
1591 		hash += g;
1592 	}
1593 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1594 }
1595 
1596 /*
1597  * mdi_get_lb_policy():
1598  * 		Get current load balancing policy for a given client device
1599  */
1600 client_lb_t
1601 mdi_get_lb_policy(dev_info_t *cdip)
1602 {
1603 	client_lb_t	lb = LOAD_BALANCE_NONE;
1604 	mdi_client_t	*ct;
1605 
1606 	ct = i_devi_get_client(cdip);
1607 	if (ct != NULL) {
1608 		lb = ct->ct_lb;
1609 	}
1610 	return (lb);
1611 }
1612 
1613 /*
1614  * mdi_set_lb_region_size():
1615  * 		Set current region size for the load-balance
1616  */
1617 int
1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1619 {
1620 	mdi_client_t	*ct;
1621 	int		rv = MDI_FAILURE;
1622 
1623 	ct = i_devi_get_client(cdip);
1624 	if (ct != NULL && ct->ct_lb_args != NULL) {
1625 		ct->ct_lb_args->region_size = region_size;
1626 		rv = MDI_SUCCESS;
1627 	}
1628 	return (rv);
1629 }
1630 
1631 /*
1632  * mdi_Set_lb_policy():
1633  * 		Set current load balancing policy for a given client device
1634  */
1635 int
1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1637 {
1638 	mdi_client_t	*ct;
1639 	int		rv = MDI_FAILURE;
1640 
1641 	ct = i_devi_get_client(cdip);
1642 	if (ct != NULL) {
1643 		ct->ct_lb = lb;
1644 		rv = MDI_SUCCESS;
1645 	}
1646 	return (rv);
1647 }
1648 
1649 /*
1650  * mdi_failover():
1651  *		failover function called by the vHCI drivers to initiate
1652  *		a failover operation.  This is typically due to non-availability
1653  *		of online paths to route I/O requests.  Failover can be
1654  *		triggered through user application also.
1655  *
1656  *		The vHCI driver calls mdi_failover() to initiate a failover
1657  *		operation. mdi_failover() calls back into the vHCI driver's
1658  *		vo_failover() entry point to perform the actual failover
1659  *		operation.  The reason for requiring the vHCI driver to
1660  *		initiate failover by calling mdi_failover(), instead of directly
1661  *		executing vo_failover() itself, is to ensure that the mdi
1662  *		framework can keep track of the client state properly.
1663  *		Additionally, mdi_failover() provides as a convenience the
1664  *		option of performing the failover operation synchronously or
1665  *		asynchronously
1666  *
1667  *		Upon successful completion of the failover operation, the
1668  *		paths that were previously ONLINE will be in the STANDBY state,
1669  *		and the newly activated paths will be in the ONLINE state.
1670  *
1671  *		The flags modifier determines whether the activation is done
1672  *		synchronously: MDI_FAILOVER_SYNC
1673  * Return Values:
1674  *		MDI_SUCCESS
1675  *		MDI_FAILURE
1676  *		MDI_BUSY
1677  */
1678 /*ARGSUSED*/
1679 int
1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1681 {
1682 	int			rv;
1683 	mdi_client_t		*ct;
1684 
1685 	ct = i_devi_get_client(cdip);
1686 	ASSERT(ct != NULL);
1687 	if (ct == NULL) {
1688 		/* cdip is not a valid client device. Nothing more to do. */
1689 		return (MDI_FAILURE);
1690 	}
1691 
1692 	MDI_CLIENT_LOCK(ct);
1693 
1694 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1695 		/* A path to the client is being freed */
1696 		MDI_CLIENT_UNLOCK(ct);
1697 		return (MDI_BUSY);
1698 	}
1699 
1700 
1701 	if (MDI_CLIENT_IS_FAILED(ct)) {
1702 		/*
1703 		 * Client is in failed state. Nothing more to do.
1704 		 */
1705 		MDI_CLIENT_UNLOCK(ct);
1706 		return (MDI_FAILURE);
1707 	}
1708 
1709 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1710 		/*
1711 		 * Failover is already in progress; return BUSY
1712 		 */
1713 		MDI_CLIENT_UNLOCK(ct);
1714 		return (MDI_BUSY);
1715 	}
1716 	/*
1717 	 * Make sure that mdi_pathinfo node state changes are processed.
1718 	 * We do not allow failovers to progress while client path state
1719 	 * changes are in progress
1720 	 */
1721 	if (ct->ct_unstable) {
1722 		if (flags == MDI_FAILOVER_ASYNC) {
1723 			MDI_CLIENT_UNLOCK(ct);
1724 			return (MDI_BUSY);
1725 		} else {
1726 			while (ct->ct_unstable)
1727 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1728 		}
1729 	}
1730 
1731 	/*
1732 	 * Client device is in stable state. Before proceeding, perform sanity
1733 	 * checks again.
1734 	 */
1735 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1736 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1737 		/*
1738 		 * Client is in failed state. Nothing more to do.
1739 		 */
1740 		MDI_CLIENT_UNLOCK(ct);
1741 		return (MDI_FAILURE);
1742 	}
1743 
1744 	/*
1745 	 * Set the client state as failover in progress.
1746 	 */
1747 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1748 	ct->ct_failover_flags = flags;
1749 	MDI_CLIENT_UNLOCK(ct);
1750 
1751 	if (flags == MDI_FAILOVER_ASYNC) {
1752 		/*
1753 		 * Submit the initiate failover request via CPR safe
1754 		 * taskq threads.
1755 		 */
1756 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1757 		    ct, KM_SLEEP);
1758 		return (MDI_ACCEPT);
1759 	} else {
1760 		/*
1761 		 * Synchronous failover mode.  Typically invoked from the user
1762 		 * land.
1763 		 */
1764 		rv = i_mdi_failover(ct);
1765 	}
1766 	return (rv);
1767 }
1768 
1769 /*
1770  * i_mdi_failover():
1771  *		internal failover function. Invokes vHCI drivers failover
1772  *		callback function and process the failover status
1773  * Return Values:
1774  *		None
1775  *
1776  * Note: A client device in failover state can not be detached or freed.
1777  */
1778 static int
1779 i_mdi_failover(void *arg)
1780 {
1781 	int		rv = MDI_SUCCESS;
1782 	mdi_client_t	*ct = (mdi_client_t *)arg;
1783 	mdi_vhci_t	*vh = ct->ct_vhci;
1784 
1785 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1786 
1787 	if (vh->vh_ops->vo_failover != NULL) {
1788 		/*
1789 		 * Call vHCI drivers callback routine
1790 		 */
1791 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1792 		    ct->ct_failover_flags);
1793 	}
1794 
1795 	MDI_CLIENT_LOCK(ct);
1796 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1797 
1798 	/*
1799 	 * Save the failover return status
1800 	 */
1801 	ct->ct_failover_status = rv;
1802 
1803 	/*
1804 	 * As a result of failover, client status would have been changed.
1805 	 * Update the client state and wake up anyone waiting on this client
1806 	 * device.
1807 	 */
1808 	i_mdi_client_update_state(ct);
1809 
1810 	cv_broadcast(&ct->ct_failover_cv);
1811 	MDI_CLIENT_UNLOCK(ct);
1812 	return (rv);
1813 }
1814 
1815 /*
1816  * Load balancing is logical block.
1817  * IOs within the range described by region_size
1818  * would go on the same path. This would improve the
1819  * performance by cache-hit on some of the RAID devices.
1820  * Search only for online paths(At some point we
1821  * may want to balance across target ports).
1822  * If no paths are found then default to round-robin.
1823  */
1824 static int
1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1826 {
1827 	int		path_index = -1;
1828 	int		online_path_count = 0;
1829 	int		online_nonpref_path_count = 0;
1830 	int 		region_size = ct->ct_lb_args->region_size;
1831 	mdi_pathinfo_t	*pip;
1832 	mdi_pathinfo_t	*next;
1833 	int		preferred, path_cnt;
1834 
1835 	pip = ct->ct_path_head;
1836 	while (pip) {
1837 		MDI_PI_LOCK(pip);
1838 		if (MDI_PI(pip)->pi_state ==
1839 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1840 			online_path_count++;
1841 		} else if (MDI_PI(pip)->pi_state ==
1842 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1843 			online_nonpref_path_count++;
1844 		}
1845 		next = (mdi_pathinfo_t *)
1846 		    MDI_PI(pip)->pi_client_link;
1847 		MDI_PI_UNLOCK(pip);
1848 		pip = next;
1849 	}
1850 	/* if found any online/preferred then use this type */
1851 	if (online_path_count > 0) {
1852 		path_cnt = online_path_count;
1853 		preferred = 1;
1854 	} else if (online_nonpref_path_count > 0) {
1855 		path_cnt = online_nonpref_path_count;
1856 		preferred = 0;
1857 	} else {
1858 		path_cnt = 0;
1859 	}
1860 	if (path_cnt) {
1861 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1862 		pip = ct->ct_path_head;
1863 		while (pip && path_index != -1) {
1864 			MDI_PI_LOCK(pip);
1865 			if (path_index == 0 &&
1866 			    (MDI_PI(pip)->pi_state ==
1867 			    MDI_PATHINFO_STATE_ONLINE) &&
1868 				MDI_PI(pip)->pi_preferred == preferred) {
1869 				MDI_PI_HOLD(pip);
1870 				MDI_PI_UNLOCK(pip);
1871 				*ret_pip = pip;
1872 				return (MDI_SUCCESS);
1873 			}
1874 			path_index --;
1875 			next = (mdi_pathinfo_t *)
1876 			    MDI_PI(pip)->pi_client_link;
1877 			MDI_PI_UNLOCK(pip);
1878 			pip = next;
1879 		}
1880 		if (pip == NULL) {
1881 			MDI_DEBUG(4, (CE_NOTE, NULL,
1882 			    "!lba %llx, no pip !!\n",
1883 				bp->b_lblkno));
1884 		} else {
1885 			MDI_DEBUG(4, (CE_NOTE, NULL,
1886 			    "!lba %llx, no pip for path_index, "
1887 			    "pip %p\n", bp->b_lblkno, (void *)pip));
1888 		}
1889 	}
1890 	return (MDI_FAILURE);
1891 }
1892 
1893 /*
1894  * mdi_select_path():
1895  *		select a path to access a client device.
1896  *
1897  *		mdi_select_path() function is called by the vHCI drivers to
1898  *		select a path to route the I/O request to.  The caller passes
1899  *		the block I/O data transfer structure ("buf") as one of the
1900  *		parameters.  The mpxio framework uses the buf structure
1901  *		contents to maintain per path statistics (total I/O size /
1902  *		count pending).  If more than one online paths are available to
1903  *		select, the framework automatically selects a suitable path
1904  *		for routing I/O request. If a failover operation is active for
1905  *		this client device the call shall be failed with MDI_BUSY error
1906  *		code.
1907  *
1908  *		By default this function returns a suitable path in online
1909  *		state based on the current load balancing policy.  Currently
1910  *		we support LOAD_BALANCE_NONE (Previously selected online path
1911  *		will continue to be used till the path is usable) and
1912  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1913  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1914  *		based on the logical block).  The load balancing
1915  *		through vHCI drivers configuration file (driver.conf).
1916  *
1917  *		vHCI drivers may override this default behavior by specifying
1918  *		appropriate flags.  If start_pip is specified (non NULL) is
1919  *		used as start point to walk and find the next appropriate path.
1920  *		The following values are currently defined:
1921  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1922  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1923  *
1924  *		The non-standard behavior is used by the scsi_vhci driver,
1925  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1926  *		attach of client devices (to avoid an unnecessary failover
1927  *		when the STANDBY path comes up first), during failover
1928  *		(to activate a STANDBY path as ONLINE).
1929  *
1930  *		The selected path is returned in a a mdi_hold_path() state
1931  *		(pi_ref_cnt). Caller should release the hold by calling
1932  *		mdi_rele_path().
1933  *
1934  * Return Values:
1935  *		MDI_SUCCESS	- Completed successfully
1936  *		MDI_BUSY 	- Client device is busy failing over
1937  *		MDI_NOPATH	- Client device is online, but no valid path are
1938  *				  available to access this client device
1939  *		MDI_FAILURE	- Invalid client device or state
1940  *		MDI_DEVI_ONLINING
1941  *				- Client device (struct dev_info state) is in
1942  *				  onlining state.
1943  */
1944 
1945 /*ARGSUSED*/
1946 int
1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1948     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1949 {
1950 	mdi_client_t	*ct;
1951 	mdi_pathinfo_t	*pip;
1952 	mdi_pathinfo_t	*next;
1953 	mdi_pathinfo_t	*head;
1954 	mdi_pathinfo_t	*start;
1955 	client_lb_t	lbp;	/* load balancing policy */
1956 	int		sb = 1;	/* standard behavior */
1957 	int		preferred = 1;	/* preferred path */
1958 	int		cond, cont = 1;
1959 	int		retry = 0;
1960 
1961 	if (flags != 0) {
1962 		/*
1963 		 * disable default behavior
1964 		 */
1965 		sb = 0;
1966 	}
1967 
1968 	*ret_pip = NULL;
1969 	ct = i_devi_get_client(cdip);
1970 	if (ct == NULL) {
1971 		/* mdi extensions are NULL, Nothing more to do */
1972 		return (MDI_FAILURE);
1973 	}
1974 
1975 	MDI_CLIENT_LOCK(ct);
1976 
1977 	if (sb) {
1978 		if (MDI_CLIENT_IS_FAILED(ct)) {
1979 			/*
1980 			 * Client is not ready to accept any I/O requests.
1981 			 * Fail this request.
1982 			 */
1983 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1984 			    "client state offline ct = %p\n", (void *)ct));
1985 			MDI_CLIENT_UNLOCK(ct);
1986 			return (MDI_FAILURE);
1987 		}
1988 
1989 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1990 			/*
1991 			 * Check for Failover is in progress. If so tell the
1992 			 * caller that this device is busy.
1993 			 */
1994 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1995 			    "client failover in progress ct = %p\n",
1996 			    (void *)ct));
1997 			MDI_CLIENT_UNLOCK(ct);
1998 			return (MDI_BUSY);
1999 		}
2000 
2001 		/*
2002 		 * Check to see whether the client device is attached.
2003 		 * If not so, let the vHCI driver manually select a path
2004 		 * (standby) and let the probe/attach process to continue.
2005 		 */
2006 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2007 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining "
2008 			    "ct = %p\n", (void *)ct));
2009 			MDI_CLIENT_UNLOCK(ct);
2010 			return (MDI_DEVI_ONLINING);
2011 		}
2012 	}
2013 
2014 	/*
2015 	 * Cache in the client list head.  If head of the list is NULL
2016 	 * return MDI_NOPATH
2017 	 */
2018 	head = ct->ct_path_head;
2019 	if (head == NULL) {
2020 		MDI_CLIENT_UNLOCK(ct);
2021 		return (MDI_NOPATH);
2022 	}
2023 
2024 	/*
2025 	 * for non default behavior, bypass current
2026 	 * load balancing policy and always use LOAD_BALANCE_RR
2027 	 * except that the start point will be adjusted based
2028 	 * on the provided start_pip
2029 	 */
2030 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2031 
2032 	switch (lbp) {
2033 	case LOAD_BALANCE_NONE:
2034 		/*
2035 		 * Load balancing is None  or Alternate path mode
2036 		 * Start looking for a online mdi_pathinfo node starting from
2037 		 * last known selected path
2038 		 */
2039 		preferred = 1;
2040 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2041 		if (pip == NULL) {
2042 			pip = head;
2043 		}
2044 		start = pip;
2045 		do {
2046 			MDI_PI_LOCK(pip);
2047 			/*
2048 			 * No need to explicitly check if the path is disabled.
2049 			 * Since we are checking for state == ONLINE and the
2050 			 * same veriable is used for DISABLE/ENABLE information.
2051 			 */
2052 			if ((MDI_PI(pip)->pi_state  ==
2053 				MDI_PATHINFO_STATE_ONLINE) &&
2054 				preferred == MDI_PI(pip)->pi_preferred) {
2055 				/*
2056 				 * Return the path in hold state. Caller should
2057 				 * release the lock by calling mdi_rele_path()
2058 				 */
2059 				MDI_PI_HOLD(pip);
2060 				MDI_PI_UNLOCK(pip);
2061 				ct->ct_path_last = pip;
2062 				*ret_pip = pip;
2063 				MDI_CLIENT_UNLOCK(ct);
2064 				return (MDI_SUCCESS);
2065 			}
2066 
2067 			/*
2068 			 * Path is busy.
2069 			 */
2070 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2071 			    MDI_PI_IS_TRANSIENT(pip))
2072 				retry = 1;
2073 			/*
2074 			 * Keep looking for a next available online path
2075 			 */
2076 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2077 			if (next == NULL) {
2078 				next = head;
2079 			}
2080 			MDI_PI_UNLOCK(pip);
2081 			pip = next;
2082 			if (start == pip && preferred) {
2083 				preferred = 0;
2084 			} else if (start == pip && !preferred) {
2085 				cont = 0;
2086 			}
2087 		} while (cont);
2088 		break;
2089 
2090 	case LOAD_BALANCE_LBA:
2091 		/*
2092 		 * Make sure we are looking
2093 		 * for an online path. Otherwise, if it is for a STANDBY
2094 		 * path request, it will go through and fetch an ONLINE
2095 		 * path which is not desirable.
2096 		 */
2097 		if ((ct->ct_lb_args != NULL) &&
2098 			    (ct->ct_lb_args->region_size) && bp &&
2099 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2100 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2101 				    == MDI_SUCCESS) {
2102 				MDI_CLIENT_UNLOCK(ct);
2103 				return (MDI_SUCCESS);
2104 			}
2105 		}
2106 		/*  FALLTHROUGH */
2107 	case LOAD_BALANCE_RR:
2108 		/*
2109 		 * Load balancing is Round Robin. Start looking for a online
2110 		 * mdi_pathinfo node starting from last known selected path
2111 		 * as the start point.  If override flags are specified,
2112 		 * process accordingly.
2113 		 * If the search is already in effect(start_pip not null),
2114 		 * then lets just use the same path preference to continue the
2115 		 * traversal.
2116 		 */
2117 
2118 		if (start_pip != NULL) {
2119 			preferred = MDI_PI(start_pip)->pi_preferred;
2120 		} else {
2121 			preferred = 1;
2122 		}
2123 
2124 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2125 		if (start == NULL) {
2126 			pip = head;
2127 		} else {
2128 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2129 			if (pip == NULL) {
2130 				if (!sb) {
2131 					if (preferred == 0) {
2132 						/*
2133 						 * Looks like we have completed
2134 						 * the traversal as preferred
2135 						 * value is 0. Time to bail out.
2136 						 */
2137 						*ret_pip = NULL;
2138 						MDI_CLIENT_UNLOCK(ct);
2139 						return (MDI_NOPATH);
2140 					} else {
2141 						/*
2142 						 * Looks like we reached the
2143 						 * end of the list. Lets enable
2144 						 * traversal of non preferred
2145 						 * paths.
2146 						 */
2147 						preferred = 0;
2148 					}
2149 				}
2150 				pip = head;
2151 			}
2152 		}
2153 		start = pip;
2154 		do {
2155 			MDI_PI_LOCK(pip);
2156 			if (sb) {
2157 				cond = ((MDI_PI(pip)->pi_state ==
2158 				    MDI_PATHINFO_STATE_ONLINE &&
2159 					MDI_PI(pip)->pi_preferred ==
2160 						preferred) ? 1 : 0);
2161 			} else {
2162 				if (flags == MDI_SELECT_ONLINE_PATH) {
2163 					cond = ((MDI_PI(pip)->pi_state ==
2164 					    MDI_PATHINFO_STATE_ONLINE &&
2165 						MDI_PI(pip)->pi_preferred ==
2166 						preferred) ? 1 : 0);
2167 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2168 					cond = ((MDI_PI(pip)->pi_state ==
2169 					    MDI_PATHINFO_STATE_STANDBY &&
2170 						MDI_PI(pip)->pi_preferred ==
2171 						preferred) ? 1 : 0);
2172 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2173 				    MDI_SELECT_STANDBY_PATH)) {
2174 					cond = (((MDI_PI(pip)->pi_state ==
2175 					    MDI_PATHINFO_STATE_ONLINE ||
2176 					    (MDI_PI(pip)->pi_state ==
2177 					    MDI_PATHINFO_STATE_STANDBY)) &&
2178 						MDI_PI(pip)->pi_preferred ==
2179 						preferred) ? 1 : 0);
2180 				} else if (flags ==
2181 					(MDI_SELECT_STANDBY_PATH |
2182 					MDI_SELECT_ONLINE_PATH |
2183 					MDI_SELECT_USER_DISABLE_PATH)) {
2184 					cond = (((MDI_PI(pip)->pi_state ==
2185 					    MDI_PATHINFO_STATE_ONLINE ||
2186 					    (MDI_PI(pip)->pi_state ==
2187 					    MDI_PATHINFO_STATE_STANDBY) ||
2188 						(MDI_PI(pip)->pi_state ==
2189 					    (MDI_PATHINFO_STATE_ONLINE|
2190 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2191 						(MDI_PI(pip)->pi_state ==
2192 					    (MDI_PATHINFO_STATE_STANDBY |
2193 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2194 						MDI_PI(pip)->pi_preferred ==
2195 						preferred) ? 1 : 0);
2196 				} else {
2197 					cond = 0;
2198 				}
2199 			}
2200 			/*
2201 			 * No need to explicitly check if the path is disabled.
2202 			 * Since we are checking for state == ONLINE and the
2203 			 * same veriable is used for DISABLE/ENABLE information.
2204 			 */
2205 			if (cond) {
2206 				/*
2207 				 * Return the path in hold state. Caller should
2208 				 * release the lock by calling mdi_rele_path()
2209 				 */
2210 				MDI_PI_HOLD(pip);
2211 				MDI_PI_UNLOCK(pip);
2212 				if (sb)
2213 					ct->ct_path_last = pip;
2214 				*ret_pip = pip;
2215 				MDI_CLIENT_UNLOCK(ct);
2216 				return (MDI_SUCCESS);
2217 			}
2218 			/*
2219 			 * Path is busy.
2220 			 */
2221 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2222 			    MDI_PI_IS_TRANSIENT(pip))
2223 				retry = 1;
2224 
2225 			/*
2226 			 * Keep looking for a next available online path
2227 			 */
2228 do_again:
2229 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2230 			if (next == NULL) {
2231 				if (!sb) {
2232 					if (preferred == 1) {
2233 						/*
2234 						 * Looks like we reached the
2235 						 * end of the list. Lets enable
2236 						 * traversal of non preferred
2237 						 * paths.
2238 						 */
2239 						preferred = 0;
2240 						next = head;
2241 					} else {
2242 						/*
2243 						 * We have done both the passes
2244 						 * Preferred as well as for
2245 						 * Non-preferred. Bail out now.
2246 						 */
2247 						cont = 0;
2248 					}
2249 				} else {
2250 					/*
2251 					 * Standard behavior case.
2252 					 */
2253 					next = head;
2254 				}
2255 			}
2256 			MDI_PI_UNLOCK(pip);
2257 			if (cont == 0) {
2258 				break;
2259 			}
2260 			pip = next;
2261 
2262 			if (!sb) {
2263 				/*
2264 				 * We need to handle the selection of
2265 				 * non-preferred path in the following
2266 				 * case:
2267 				 *
2268 				 * +------+   +------+   +------+   +-----+
2269 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2270 				 * +------+   +------+   +------+   +-----+
2271 				 *
2272 				 * If we start the search with B, we need to
2273 				 * skip beyond B to pick C which is non -
2274 				 * preferred in the second pass. The following
2275 				 * test, if true, will allow us to skip over
2276 				 * the 'start'(B in the example) to select
2277 				 * other non preferred elements.
2278 				 */
2279 				if ((start_pip != NULL) && (start_pip == pip) &&
2280 				    (MDI_PI(start_pip)->pi_preferred
2281 				    != preferred)) {
2282 					/*
2283 					 * try again after going past the start
2284 					 * pip
2285 					 */
2286 					MDI_PI_LOCK(pip);
2287 					goto do_again;
2288 				}
2289 			} else {
2290 				/*
2291 				 * Standard behavior case
2292 				 */
2293 				if (start == pip && preferred) {
2294 					/* look for nonpreferred paths */
2295 					preferred = 0;
2296 				} else if (start == pip && !preferred) {
2297 					/*
2298 					 * Exit condition
2299 					 */
2300 					cont = 0;
2301 				}
2302 			}
2303 		} while (cont);
2304 		break;
2305 	}
2306 
2307 	MDI_CLIENT_UNLOCK(ct);
2308 	if (retry == 1) {
2309 		return (MDI_BUSY);
2310 	} else {
2311 		return (MDI_NOPATH);
2312 	}
2313 }
2314 
2315 /*
2316  * For a client, return the next available path to any phci
2317  *
2318  * Note:
2319  *		Caller should hold the branch's devinfo node to get a consistent
2320  *		snap shot of the mdi_pathinfo nodes.
2321  *
2322  *		Please note that even the list is stable the mdi_pathinfo
2323  *		node state and properties are volatile.  The caller should lock
2324  *		and unlock the nodes by calling mdi_pi_lock() and
2325  *		mdi_pi_unlock() functions to get a stable properties.
2326  *
2327  *		If there is a need to use the nodes beyond the hold of the
2328  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2329  *		need to be held against unexpected removal by calling
2330  *		mdi_hold_path() and should be released by calling
2331  *		mdi_rele_path() on completion.
2332  */
2333 mdi_pathinfo_t *
2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2335 {
2336 	mdi_client_t *ct;
2337 
2338 	if (!MDI_CLIENT(ct_dip))
2339 		return (NULL);
2340 
2341 	/*
2342 	 * Walk through client link
2343 	 */
2344 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2345 	ASSERT(ct != NULL);
2346 
2347 	if (pip == NULL)
2348 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2349 
2350 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2351 }
2352 
2353 /*
2354  * For a phci, return the next available path to any client
2355  * Note: ditto mdi_get_next_phci_path()
2356  */
2357 mdi_pathinfo_t *
2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2359 {
2360 	mdi_phci_t *ph;
2361 
2362 	if (!MDI_PHCI(ph_dip))
2363 		return (NULL);
2364 
2365 	/*
2366 	 * Walk through pHCI link
2367 	 */
2368 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2369 	ASSERT(ph != NULL);
2370 
2371 	if (pip == NULL)
2372 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2373 
2374 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2375 }
2376 
2377 /*
2378  * mdi_hold_path():
2379  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2380  * Return Values:
2381  *		None
2382  */
2383 void
2384 mdi_hold_path(mdi_pathinfo_t *pip)
2385 {
2386 	if (pip) {
2387 		MDI_PI_LOCK(pip);
2388 		MDI_PI_HOLD(pip);
2389 		MDI_PI_UNLOCK(pip);
2390 	}
2391 }
2392 
2393 
2394 /*
2395  * mdi_rele_path():
2396  *		Release the mdi_pathinfo node which was selected
2397  *		through mdi_select_path() mechanism or manually held by
2398  *		calling mdi_hold_path().
2399  * Return Values:
2400  *		None
2401  */
2402 void
2403 mdi_rele_path(mdi_pathinfo_t *pip)
2404 {
2405 	if (pip) {
2406 		MDI_PI_LOCK(pip);
2407 		MDI_PI_RELE(pip);
2408 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2409 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2410 		}
2411 		MDI_PI_UNLOCK(pip);
2412 	}
2413 }
2414 
2415 /*
2416  * mdi_pi_lock():
2417  * 		Lock the mdi_pathinfo node.
2418  * Note:
2419  *		The caller should release the lock by calling mdi_pi_unlock()
2420  */
2421 void
2422 mdi_pi_lock(mdi_pathinfo_t *pip)
2423 {
2424 	ASSERT(pip != NULL);
2425 	if (pip) {
2426 		MDI_PI_LOCK(pip);
2427 	}
2428 }
2429 
2430 
2431 /*
2432  * mdi_pi_unlock():
2433  * 		Unlock the mdi_pathinfo node.
2434  * Note:
2435  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2436  */
2437 void
2438 mdi_pi_unlock(mdi_pathinfo_t *pip)
2439 {
2440 	ASSERT(pip != NULL);
2441 	if (pip) {
2442 		MDI_PI_UNLOCK(pip);
2443 	}
2444 }
2445 
2446 /*
2447  * mdi_pi_find():
2448  *		Search the list of mdi_pathinfo nodes attached to the
2449  *		pHCI/Client device node whose path address matches "paddr".
2450  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2451  *		found.
2452  * Return Values:
2453  *		mdi_pathinfo node handle
2454  *		NULL
2455  * Notes:
2456  *		Caller need not hold any locks to call this function.
2457  */
2458 mdi_pathinfo_t *
2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2460 {
2461 	mdi_phci_t		*ph;
2462 	mdi_vhci_t		*vh;
2463 	mdi_client_t		*ct;
2464 	mdi_pathinfo_t		*pip = NULL;
2465 
2466 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s",
2467 	    caddr ? caddr : "NULL", paddr ? paddr : "NULL"));
2468 	if ((pdip == NULL) || (paddr == NULL)) {
2469 		return (NULL);
2470 	}
2471 	ph = i_devi_get_phci(pdip);
2472 	if (ph == NULL) {
2473 		/*
2474 		 * Invalid pHCI device, Nothing more to do.
2475 		 */
2476 		MDI_DEBUG(2, (CE_WARN, pdip,
2477 		    "!mdi_pi_find: invalid phci"));
2478 		return (NULL);
2479 	}
2480 
2481 	vh = ph->ph_vhci;
2482 	if (vh == NULL) {
2483 		/*
2484 		 * Invalid vHCI device, Nothing more to do.
2485 		 */
2486 		MDI_DEBUG(2, (CE_WARN, pdip,
2487 		    "!mdi_pi_find: invalid vhci"));
2488 		return (NULL);
2489 	}
2490 
2491 	/*
2492 	 * Look for pathinfo node identified by paddr.
2493 	 */
2494 	if (caddr == NULL) {
2495 		/*
2496 		 * Find a mdi_pathinfo node under pHCI list for a matching
2497 		 * unit address.
2498 		 */
2499 		MDI_PHCI_LOCK(ph);
2500 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2501 			MDI_DEBUG(2, (CE_WARN, pdip,
2502 			    "!mdi_pi_find: offline phci %p", (void *)ph));
2503 			MDI_PHCI_UNLOCK(ph);
2504 			return (NULL);
2505 		}
2506 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2507 
2508 		while (pip != NULL) {
2509 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2510 				break;
2511 			}
2512 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2513 		}
2514 		MDI_PHCI_UNLOCK(ph);
2515 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p",
2516 		    (void *)pip));
2517 		return (pip);
2518 	}
2519 
2520 	/*
2521 	 * XXX - Is the rest of the code in this function really necessary?
2522 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2523 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2524 	 * whether the search is based on the pathinfo nodes attached to
2525 	 * the pHCI or the client node, the result will be the same.
2526 	 */
2527 
2528 	/*
2529 	 * Find the client device corresponding to 'caddr'
2530 	 */
2531 	MDI_VHCI_CLIENT_LOCK(vh);
2532 
2533 	/*
2534 	 * XXX - Passing NULL to the following function works as long as the
2535 	 * the client addresses (caddr) are unique per vhci basis.
2536 	 */
2537 	ct = i_mdi_client_find(vh, NULL, caddr);
2538 	if (ct == NULL) {
2539 		/*
2540 		 * Client not found, Obviously mdi_pathinfo node has not been
2541 		 * created yet.
2542 		 */
2543 		MDI_VHCI_CLIENT_UNLOCK(vh);
2544 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not "
2545 		    "found for caddr %s", caddr ? caddr : "NULL"));
2546 		return (NULL);
2547 	}
2548 
2549 	/*
2550 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2551 	 * pHCI and paddr
2552 	 */
2553 	MDI_CLIENT_LOCK(ct);
2554 
2555 	/*
2556 	 * Release the global mutex as it is no more needed. Note: We always
2557 	 * respect the locking order while acquiring.
2558 	 */
2559 	MDI_VHCI_CLIENT_UNLOCK(vh);
2560 
2561 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2562 	while (pip != NULL) {
2563 		/*
2564 		 * Compare the unit address
2565 		 */
2566 		if ((MDI_PI(pip)->pi_phci == ph) &&
2567 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2568 			break;
2569 		}
2570 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2571 	}
2572 	MDI_CLIENT_UNLOCK(ct);
2573 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip));
2574 	return (pip);
2575 }
2576 
2577 /*
2578  * mdi_pi_alloc():
2579  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2580  *		The mdi_pathinfo node returned by this function identifies a
2581  *		unique device path is capable of having properties attached
2582  *		and passed to mdi_pi_online() to fully attach and online the
2583  *		path and client device node.
2584  *		The mdi_pathinfo node returned by this function must be
2585  *		destroyed using mdi_pi_free() if the path is no longer
2586  *		operational or if the caller fails to attach a client device
2587  *		node when calling mdi_pi_online(). The framework will not free
2588  *		the resources allocated.
2589  *		This function can be called from both interrupt and kernel
2590  *		contexts.  DDI_NOSLEEP flag should be used while calling
2591  *		from interrupt contexts.
2592  * Return Values:
2593  *		MDI_SUCCESS
2594  *		MDI_FAILURE
2595  *		MDI_NOMEM
2596  */
2597 /*ARGSUSED*/
2598 int
2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2600     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2601 {
2602 	mdi_vhci_t	*vh;
2603 	mdi_phci_t	*ph;
2604 	mdi_client_t	*ct;
2605 	mdi_pathinfo_t	*pip = NULL;
2606 	dev_info_t	*cdip;
2607 	int		rv = MDI_NOMEM;
2608 	int		path_allocated = 0;
2609 
2610 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s",
2611 	    cname ? cname : "NULL", caddr ? caddr : "NULL",
2612 	    paddr ? paddr : "NULL"));
2613 
2614 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2615 	    ret_pip == NULL) {
2616 		/* Nothing more to do */
2617 		return (MDI_FAILURE);
2618 	}
2619 
2620 	*ret_pip = NULL;
2621 
2622 	/* No allocations on detaching pHCI */
2623 	if (DEVI_IS_DETACHING(pdip)) {
2624 		/* Invalid pHCI device, return failure */
2625 		MDI_DEBUG(1, (CE_WARN, pdip,
2626 		    "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip));
2627 		return (MDI_FAILURE);
2628 	}
2629 
2630 	ph = i_devi_get_phci(pdip);
2631 	ASSERT(ph != NULL);
2632 	if (ph == NULL) {
2633 		/* Invalid pHCI device, return failure */
2634 		MDI_DEBUG(1, (CE_WARN, pdip,
2635 		    "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip));
2636 		return (MDI_FAILURE);
2637 	}
2638 
2639 	MDI_PHCI_LOCK(ph);
2640 	vh = ph->ph_vhci;
2641 	if (vh == NULL) {
2642 		/* Invalid vHCI device, return failure */
2643 		MDI_DEBUG(1, (CE_WARN, pdip,
2644 		    "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip));
2645 		MDI_PHCI_UNLOCK(ph);
2646 		return (MDI_FAILURE);
2647 	}
2648 
2649 	if (MDI_PHCI_IS_READY(ph) == 0) {
2650 		/*
2651 		 * Do not allow new node creation when pHCI is in
2652 		 * offline/suspended states
2653 		 */
2654 		MDI_DEBUG(1, (CE_WARN, pdip,
2655 		    "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph));
2656 		MDI_PHCI_UNLOCK(ph);
2657 		return (MDI_BUSY);
2658 	}
2659 	MDI_PHCI_UNSTABLE(ph);
2660 	MDI_PHCI_UNLOCK(ph);
2661 
2662 	/* look for a matching client, create one if not found */
2663 	MDI_VHCI_CLIENT_LOCK(vh);
2664 	ct = i_mdi_client_find(vh, cname, caddr);
2665 	if (ct == NULL) {
2666 		ct = i_mdi_client_alloc(vh, cname, caddr);
2667 		ASSERT(ct != NULL);
2668 	}
2669 
2670 	if (ct->ct_dip == NULL) {
2671 		/*
2672 		 * Allocate a devinfo node
2673 		 */
2674 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2675 		    compatible, ncompatible);
2676 		if (ct->ct_dip == NULL) {
2677 			(void) i_mdi_client_free(vh, ct);
2678 			goto fail;
2679 		}
2680 	}
2681 	cdip = ct->ct_dip;
2682 
2683 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2684 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2685 
2686 	MDI_CLIENT_LOCK(ct);
2687 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2688 	while (pip != NULL) {
2689 		/*
2690 		 * Compare the unit address
2691 		 */
2692 		if ((MDI_PI(pip)->pi_phci == ph) &&
2693 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2694 			break;
2695 		}
2696 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2697 	}
2698 	MDI_CLIENT_UNLOCK(ct);
2699 
2700 	if (pip == NULL) {
2701 		/*
2702 		 * This is a new path for this client device.  Allocate and
2703 		 * initialize a new pathinfo node
2704 		 */
2705 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2706 		ASSERT(pip != NULL);
2707 		path_allocated = 1;
2708 	}
2709 	rv = MDI_SUCCESS;
2710 
2711 fail:
2712 	/*
2713 	 * Release the global mutex.
2714 	 */
2715 	MDI_VHCI_CLIENT_UNLOCK(vh);
2716 
2717 	/*
2718 	 * Mark the pHCI as stable
2719 	 */
2720 	MDI_PHCI_LOCK(ph);
2721 	MDI_PHCI_STABLE(ph);
2722 	MDI_PHCI_UNLOCK(ph);
2723 	*ret_pip = pip;
2724 
2725 	MDI_DEBUG(2, (CE_NOTE, pdip,
2726 	    "!mdi_pi_alloc_compatible: alloc %p", (void *)pip));
2727 
2728 	if (path_allocated)
2729 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2730 
2731 	return (rv);
2732 }
2733 
2734 /*ARGSUSED*/
2735 int
2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2737     int flags, mdi_pathinfo_t **ret_pip)
2738 {
2739 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2740 	    flags, ret_pip));
2741 }
2742 
2743 /*
2744  * i_mdi_pi_alloc():
2745  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2746  * Return Values:
2747  *		mdi_pathinfo
2748  */
2749 /*ARGSUSED*/
2750 static mdi_pathinfo_t *
2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2752 {
2753 	mdi_pathinfo_t	*pip;
2754 	int		ct_circular;
2755 	int		ph_circular;
2756 	int		se_flag;
2757 	int		kmem_flag;
2758 
2759 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2760 
2761 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2762 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2763 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2764 	    MDI_PATHINFO_STATE_TRANSIENT;
2765 
2766 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2767 		MDI_PI_SET_USER_DISABLE(pip);
2768 
2769 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2770 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2771 
2772 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2773 		MDI_PI_SET_DRV_DISABLE(pip);
2774 
2775 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2776 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2777 	MDI_PI(pip)->pi_client = ct;
2778 	MDI_PI(pip)->pi_phci = ph;
2779 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2780 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2781 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2782 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2783 	MDI_PI(pip)->pi_pprivate = NULL;
2784 	MDI_PI(pip)->pi_cprivate = NULL;
2785 	MDI_PI(pip)->pi_vprivate = NULL;
2786 	MDI_PI(pip)->pi_client_link = NULL;
2787 	MDI_PI(pip)->pi_phci_link = NULL;
2788 	MDI_PI(pip)->pi_ref_cnt = 0;
2789 	MDI_PI(pip)->pi_kstats = NULL;
2790 	MDI_PI(pip)->pi_preferred = 1;
2791 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2792 
2793 	/*
2794 	 * Lock both dev_info nodes against changes in parallel.
2795 	 *
2796 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
2797 	 * This atypical operation is done to synchronize pathinfo nodes
2798 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
2799 	 * the pathinfo nodes are children of the Client.
2800 	 */
2801 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2802 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2803 
2804 	i_mdi_phci_add_path(ph, pip);
2805 	i_mdi_client_add_path(ct, pip);
2806 
2807 	ndi_devi_exit(ph->ph_dip, ph_circular);
2808 	ndi_devi_exit(ct->ct_dip, ct_circular);
2809 
2810 	/* determine interrupt context */
2811 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2812 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2813 
2814 	i_ddi_di_cache_invalidate(kmem_flag);
2815 
2816 	return (pip);
2817 }
2818 
2819 /*
2820  * i_mdi_phci_add_path():
2821  * 		Add a mdi_pathinfo node to pHCI list.
2822  * Notes:
2823  *		Caller should per-pHCI mutex
2824  */
2825 static void
2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2827 {
2828 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2829 
2830 	MDI_PHCI_LOCK(ph);
2831 	if (ph->ph_path_head == NULL) {
2832 		ph->ph_path_head = pip;
2833 	} else {
2834 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2835 	}
2836 	ph->ph_path_tail = pip;
2837 	ph->ph_path_count++;
2838 	MDI_PHCI_UNLOCK(ph);
2839 }
2840 
2841 /*
2842  * i_mdi_client_add_path():
2843  *		Add mdi_pathinfo node to client list
2844  */
2845 static void
2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2847 {
2848 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2849 
2850 	MDI_CLIENT_LOCK(ct);
2851 	if (ct->ct_path_head == NULL) {
2852 		ct->ct_path_head = pip;
2853 	} else {
2854 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2855 	}
2856 	ct->ct_path_tail = pip;
2857 	ct->ct_path_count++;
2858 	MDI_CLIENT_UNLOCK(ct);
2859 }
2860 
2861 /*
2862  * mdi_pi_free():
2863  *		Free the mdi_pathinfo node and also client device node if this
2864  *		is the last path to the device
2865  * Return Values:
2866  *		MDI_SUCCESS
2867  *		MDI_FAILURE
2868  *		MDI_BUSY
2869  */
2870 /*ARGSUSED*/
2871 int
2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2873 {
2874 	int		rv = MDI_FAILURE;
2875 	mdi_vhci_t	*vh;
2876 	mdi_phci_t	*ph;
2877 	mdi_client_t	*ct;
2878 	int		(*f)();
2879 	int		client_held = 0;
2880 
2881 	MDI_PI_LOCK(pip);
2882 	ph = MDI_PI(pip)->pi_phci;
2883 	ASSERT(ph != NULL);
2884 	if (ph == NULL) {
2885 		/*
2886 		 * Invalid pHCI device, return failure
2887 		 */
2888 		MDI_DEBUG(1, (CE_WARN, NULL,
2889 		    "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip));
2890 		MDI_PI_UNLOCK(pip);
2891 		return (MDI_FAILURE);
2892 	}
2893 
2894 	vh = ph->ph_vhci;
2895 	ASSERT(vh != NULL);
2896 	if (vh == NULL) {
2897 		/* Invalid pHCI device, return failure */
2898 		MDI_DEBUG(1, (CE_WARN, NULL,
2899 		    "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip));
2900 		MDI_PI_UNLOCK(pip);
2901 		return (MDI_FAILURE);
2902 	}
2903 
2904 	ct = MDI_PI(pip)->pi_client;
2905 	ASSERT(ct != NULL);
2906 	if (ct == NULL) {
2907 		/*
2908 		 * Invalid Client device, return failure
2909 		 */
2910 		MDI_DEBUG(1, (CE_WARN, NULL,
2911 		    "!mdi_pi_free: invalid client pip=%p", (void *)pip));
2912 		MDI_PI_UNLOCK(pip);
2913 		return (MDI_FAILURE);
2914 	}
2915 
2916 	/*
2917 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2918 	 * if the node state is either offline or init and the reference count
2919 	 * is zero.
2920 	 */
2921 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2922 	    MDI_PI_IS_INITING(pip))) {
2923 		/*
2924 		 * Node is busy
2925 		 */
2926 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
2927 		    "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip));
2928 		MDI_PI_UNLOCK(pip);
2929 		return (MDI_BUSY);
2930 	}
2931 
2932 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2933 		/*
2934 		 * Give a chance for pending I/Os to complete.
2935 		 */
2936 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: "
2937 		    "%d cmds still pending on path: %p\n",
2938 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
2939 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2940 		    &MDI_PI(pip)->pi_mutex,
2941 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2942 			/*
2943 			 * The timeout time reached without ref_cnt being zero
2944 			 * being signaled.
2945 			 */
2946 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
2947 			    "!mdi_pi_free: "
2948 			    "Timeout reached on path %p without the cond\n",
2949 			    (void *)pip));
2950 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
2951 			    "!mdi_pi_free: "
2952 			    "%d cmds still pending on path: %p\n",
2953 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
2954 			MDI_PI_UNLOCK(pip);
2955 			return (MDI_BUSY);
2956 		}
2957 	}
2958 	if (MDI_PI(pip)->pi_pm_held) {
2959 		client_held = 1;
2960 	}
2961 	MDI_PI_UNLOCK(pip);
2962 
2963 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2964 
2965 	MDI_CLIENT_LOCK(ct);
2966 
2967 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
2968 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2969 
2970 	/*
2971 	 * Wait till failover is complete before removing this node.
2972 	 */
2973 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2974 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2975 
2976 	MDI_CLIENT_UNLOCK(ct);
2977 	MDI_VHCI_CLIENT_LOCK(vh);
2978 	MDI_CLIENT_LOCK(ct);
2979 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2980 
2981 	if (!MDI_PI_IS_INITING(pip)) {
2982 		f = vh->vh_ops->vo_pi_uninit;
2983 		if (f != NULL) {
2984 			rv = (*f)(vh->vh_dip, pip, 0);
2985 		}
2986 	}
2987 	/*
2988 	 * If vo_pi_uninit() completed successfully.
2989 	 */
2990 	if (rv == MDI_SUCCESS) {
2991 		if (client_held) {
2992 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2993 			    "i_mdi_pm_rele_client\n"));
2994 			i_mdi_pm_rele_client(ct, 1);
2995 		}
2996 		i_mdi_pi_free(ph, pip, ct);
2997 		if (ct->ct_path_count == 0) {
2998 			/*
2999 			 * Client lost its last path.
3000 			 * Clean up the client device
3001 			 */
3002 			MDI_CLIENT_UNLOCK(ct);
3003 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3004 			MDI_VHCI_CLIENT_UNLOCK(vh);
3005 			return (rv);
3006 		}
3007 	}
3008 	MDI_CLIENT_UNLOCK(ct);
3009 	MDI_VHCI_CLIENT_UNLOCK(vh);
3010 
3011 	if (rv == MDI_FAILURE)
3012 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3013 
3014 	return (rv);
3015 }
3016 
3017 /*
3018  * i_mdi_pi_free():
3019  *		Free the mdi_pathinfo node
3020  */
3021 static void
3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3023 {
3024 	int	ct_circular;
3025 	int	ph_circular;
3026 	int	se_flag;
3027 	int	kmem_flag;
3028 
3029 	ASSERT(MDI_CLIENT_LOCKED(ct));
3030 
3031 	/*
3032 	 * remove any per-path kstats
3033 	 */
3034 	i_mdi_pi_kstat_destroy(pip);
3035 
3036 	/* See comments in i_mdi_pi_alloc() */
3037 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3038 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3039 
3040 	i_mdi_client_remove_path(ct, pip);
3041 	i_mdi_phci_remove_path(ph, pip);
3042 
3043 	ndi_devi_exit(ph->ph_dip, ph_circular);
3044 	ndi_devi_exit(ct->ct_dip, ct_circular);
3045 
3046 	/* determine interrupt context */
3047 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3048 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3049 
3050 	i_ddi_di_cache_invalidate(kmem_flag);
3051 
3052 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3053 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3054 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3055 	if (MDI_PI(pip)->pi_addr) {
3056 		kmem_free(MDI_PI(pip)->pi_addr,
3057 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3058 		MDI_PI(pip)->pi_addr = NULL;
3059 	}
3060 
3061 	if (MDI_PI(pip)->pi_prop) {
3062 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3063 		MDI_PI(pip)->pi_prop = NULL;
3064 	}
3065 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3066 }
3067 
3068 
3069 /*
3070  * i_mdi_phci_remove_path():
3071  * 		Remove a mdi_pathinfo node from pHCI list.
3072  * Notes:
3073  *		Caller should hold per-pHCI mutex
3074  */
3075 static void
3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3077 {
3078 	mdi_pathinfo_t	*prev = NULL;
3079 	mdi_pathinfo_t	*path = NULL;
3080 
3081 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3082 
3083 	MDI_PHCI_LOCK(ph);
3084 	path = ph->ph_path_head;
3085 	while (path != NULL) {
3086 		if (path == pip) {
3087 			break;
3088 		}
3089 		prev = path;
3090 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3091 	}
3092 
3093 	if (path) {
3094 		ph->ph_path_count--;
3095 		if (prev) {
3096 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3097 		} else {
3098 			ph->ph_path_head =
3099 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3100 		}
3101 		if (ph->ph_path_tail == path) {
3102 			ph->ph_path_tail = prev;
3103 		}
3104 	}
3105 
3106 	/*
3107 	 * Clear the pHCI link
3108 	 */
3109 	MDI_PI(pip)->pi_phci_link = NULL;
3110 	MDI_PI(pip)->pi_phci = NULL;
3111 	MDI_PHCI_UNLOCK(ph);
3112 }
3113 
3114 /*
3115  * i_mdi_client_remove_path():
3116  * 		Remove a mdi_pathinfo node from client path list.
3117  */
3118 static void
3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3120 {
3121 	mdi_pathinfo_t	*prev = NULL;
3122 	mdi_pathinfo_t	*path;
3123 
3124 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3125 
3126 	ASSERT(MDI_CLIENT_LOCKED(ct));
3127 	path = ct->ct_path_head;
3128 	while (path != NULL) {
3129 		if (path == pip) {
3130 			break;
3131 		}
3132 		prev = path;
3133 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3134 	}
3135 
3136 	if (path) {
3137 		ct->ct_path_count--;
3138 		if (prev) {
3139 			MDI_PI(prev)->pi_client_link =
3140 			    MDI_PI(path)->pi_client_link;
3141 		} else {
3142 			ct->ct_path_head =
3143 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3144 		}
3145 		if (ct->ct_path_tail == path) {
3146 			ct->ct_path_tail = prev;
3147 		}
3148 		if (ct->ct_path_last == path) {
3149 			ct->ct_path_last = ct->ct_path_head;
3150 		}
3151 	}
3152 	MDI_PI(pip)->pi_client_link = NULL;
3153 	MDI_PI(pip)->pi_client = NULL;
3154 }
3155 
3156 /*
3157  * i_mdi_pi_state_change():
3158  *		online a mdi_pathinfo node
3159  *
3160  * Return Values:
3161  *		MDI_SUCCESS
3162  *		MDI_FAILURE
3163  */
3164 /*ARGSUSED*/
3165 static int
3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3167 {
3168 	int		rv = MDI_SUCCESS;
3169 	mdi_vhci_t	*vh;
3170 	mdi_phci_t	*ph;
3171 	mdi_client_t	*ct;
3172 	int		(*f)();
3173 	dev_info_t	*cdip;
3174 
3175 	MDI_PI_LOCK(pip);
3176 
3177 	ph = MDI_PI(pip)->pi_phci;
3178 	ASSERT(ph);
3179 	if (ph == NULL) {
3180 		/*
3181 		 * Invalid pHCI device, fail the request
3182 		 */
3183 		MDI_PI_UNLOCK(pip);
3184 		MDI_DEBUG(1, (CE_WARN, NULL,
3185 		    "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip));
3186 		return (MDI_FAILURE);
3187 	}
3188 
3189 	vh = ph->ph_vhci;
3190 	ASSERT(vh);
3191 	if (vh == NULL) {
3192 		/*
3193 		 * Invalid vHCI device, fail the request
3194 		 */
3195 		MDI_PI_UNLOCK(pip);
3196 		MDI_DEBUG(1, (CE_WARN, NULL,
3197 		    "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip));
3198 		return (MDI_FAILURE);
3199 	}
3200 
3201 	ct = MDI_PI(pip)->pi_client;
3202 	ASSERT(ct != NULL);
3203 	if (ct == NULL) {
3204 		/*
3205 		 * Invalid client device, fail the request
3206 		 */
3207 		MDI_PI_UNLOCK(pip);
3208 		MDI_DEBUG(1, (CE_WARN, NULL,
3209 		    "!mdi_pi_state_change: invalid client pip=%p",
3210 		    (void *)pip));
3211 		return (MDI_FAILURE);
3212 	}
3213 
3214 	/*
3215 	 * If this path has not been initialized yet, Callback vHCI driver's
3216 	 * pathinfo node initialize entry point
3217 	 */
3218 
3219 	if (MDI_PI_IS_INITING(pip)) {
3220 		MDI_PI_UNLOCK(pip);
3221 		f = vh->vh_ops->vo_pi_init;
3222 		if (f != NULL) {
3223 			rv = (*f)(vh->vh_dip, pip, 0);
3224 			if (rv != MDI_SUCCESS) {
3225 				MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3226 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3227 				    (void *)vh, (void *)pip));
3228 				return (MDI_FAILURE);
3229 			}
3230 		}
3231 		MDI_PI_LOCK(pip);
3232 		MDI_PI_CLEAR_TRANSIENT(pip);
3233 	}
3234 
3235 	/*
3236 	 * Do not allow state transition when pHCI is in offline/suspended
3237 	 * states
3238 	 */
3239 	i_mdi_phci_lock(ph, pip);
3240 	if (MDI_PHCI_IS_READY(ph) == 0) {
3241 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3242 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p",
3243 		    (void *)ph));
3244 		MDI_PI_UNLOCK(pip);
3245 		i_mdi_phci_unlock(ph);
3246 		return (MDI_BUSY);
3247 	}
3248 	MDI_PHCI_UNSTABLE(ph);
3249 	i_mdi_phci_unlock(ph);
3250 
3251 	/*
3252 	 * Check if mdi_pathinfo state is in transient state.
3253 	 * If yes, offlining is in progress and wait till transient state is
3254 	 * cleared.
3255 	 */
3256 	if (MDI_PI_IS_TRANSIENT(pip)) {
3257 		while (MDI_PI_IS_TRANSIENT(pip)) {
3258 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3259 			    &MDI_PI(pip)->pi_mutex);
3260 		}
3261 	}
3262 
3263 	/*
3264 	 * Grab the client lock in reverse order sequence and release the
3265 	 * mdi_pathinfo mutex.
3266 	 */
3267 	i_mdi_client_lock(ct, pip);
3268 	MDI_PI_UNLOCK(pip);
3269 
3270 	/*
3271 	 * Wait till failover state is cleared
3272 	 */
3273 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3274 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3275 
3276 	/*
3277 	 * Mark the mdi_pathinfo node state as transient
3278 	 */
3279 	MDI_PI_LOCK(pip);
3280 	switch (state) {
3281 	case MDI_PATHINFO_STATE_ONLINE:
3282 		MDI_PI_SET_ONLINING(pip);
3283 		break;
3284 
3285 	case MDI_PATHINFO_STATE_STANDBY:
3286 		MDI_PI_SET_STANDBYING(pip);
3287 		break;
3288 
3289 	case MDI_PATHINFO_STATE_FAULT:
3290 		/*
3291 		 * Mark the pathinfo state as FAULTED
3292 		 */
3293 		MDI_PI_SET_FAULTING(pip);
3294 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3295 		break;
3296 
3297 	case MDI_PATHINFO_STATE_OFFLINE:
3298 		/*
3299 		 * ndi_devi_offline() cannot hold pip or ct locks.
3300 		 */
3301 		MDI_PI_UNLOCK(pip);
3302 		/*
3303 		 * Don't offline the client dev_info node unless we have
3304 		 * no available paths left at all.
3305 		 */
3306 		cdip = ct->ct_dip;
3307 		if ((flag & NDI_DEVI_REMOVE) &&
3308 		    (ct->ct_path_count == 1)) {
3309 			i_mdi_client_unlock(ct);
3310 			rv = ndi_devi_offline(cdip, 0);
3311 			if (rv != NDI_SUCCESS) {
3312 				/*
3313 				 * Convert to MDI error code
3314 				 */
3315 				switch (rv) {
3316 				case NDI_BUSY:
3317 					rv = MDI_BUSY;
3318 					break;
3319 				default:
3320 					rv = MDI_FAILURE;
3321 					break;
3322 				}
3323 				goto state_change_exit;
3324 			} else {
3325 				i_mdi_client_lock(ct, NULL);
3326 			}
3327 		}
3328 		/*
3329 		 * Mark the mdi_pathinfo node state as transient
3330 		 */
3331 		MDI_PI_LOCK(pip);
3332 		MDI_PI_SET_OFFLINING(pip);
3333 		break;
3334 	}
3335 	MDI_PI_UNLOCK(pip);
3336 	MDI_CLIENT_UNSTABLE(ct);
3337 	i_mdi_client_unlock(ct);
3338 
3339 	f = vh->vh_ops->vo_pi_state_change;
3340 	if (f != NULL)
3341 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3342 
3343 	MDI_CLIENT_LOCK(ct);
3344 	MDI_PI_LOCK(pip);
3345 	if (rv == MDI_NOT_SUPPORTED) {
3346 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3347 	}
3348 	if (rv != MDI_SUCCESS) {
3349 		MDI_DEBUG(2, (CE_WARN, ct->ct_dip,
3350 		    "!vo_pi_state_change: failed rv = %x", rv));
3351 	}
3352 	if (MDI_PI_IS_TRANSIENT(pip)) {
3353 		if (rv == MDI_SUCCESS) {
3354 			MDI_PI_CLEAR_TRANSIENT(pip);
3355 		} else {
3356 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3357 		}
3358 	}
3359 
3360 	/*
3361 	 * Wake anyone waiting for this mdi_pathinfo node
3362 	 */
3363 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3364 	MDI_PI_UNLOCK(pip);
3365 
3366 	/*
3367 	 * Mark the client device as stable
3368 	 */
3369 	MDI_CLIENT_STABLE(ct);
3370 	if (rv == MDI_SUCCESS) {
3371 		if (ct->ct_unstable == 0) {
3372 			cdip = ct->ct_dip;
3373 
3374 			/*
3375 			 * Onlining the mdi_pathinfo node will impact the
3376 			 * client state Update the client and dev_info node
3377 			 * state accordingly
3378 			 */
3379 			rv = NDI_SUCCESS;
3380 			i_mdi_client_update_state(ct);
3381 			switch (MDI_CLIENT_STATE(ct)) {
3382 			case MDI_CLIENT_STATE_OPTIMAL:
3383 			case MDI_CLIENT_STATE_DEGRADED:
3384 				if (cdip && !i_ddi_devi_attached(cdip) &&
3385 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3386 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3387 
3388 					/*
3389 					 * Must do ndi_devi_online() through
3390 					 * hotplug thread for deferred
3391 					 * attach mechanism to work
3392 					 */
3393 					MDI_CLIENT_UNLOCK(ct);
3394 					rv = ndi_devi_online(cdip, 0);
3395 					MDI_CLIENT_LOCK(ct);
3396 					if ((rv != NDI_SUCCESS) &&
3397 					    (MDI_CLIENT_STATE(ct) ==
3398 					    MDI_CLIENT_STATE_DEGRADED)) {
3399 						/*
3400 						 * ndi_devi_online failed.
3401 						 * Reset client flags to
3402 						 * offline.
3403 						 */
3404 						MDI_DEBUG(1, (CE_WARN, cdip,
3405 						    "!ndi_devi_online: failed "
3406 						    " Error: %x", rv));
3407 						MDI_CLIENT_SET_OFFLINE(ct);
3408 					}
3409 					if (rv != NDI_SUCCESS) {
3410 						/* Reset the path state */
3411 						MDI_PI_LOCK(pip);
3412 						MDI_PI(pip)->pi_state =
3413 						    MDI_PI_OLD_STATE(pip);
3414 						MDI_PI_UNLOCK(pip);
3415 					}
3416 				}
3417 				break;
3418 
3419 			case MDI_CLIENT_STATE_FAILED:
3420 				/*
3421 				 * This is the last path case for
3422 				 * non-user initiated events.
3423 				 */
3424 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3425 				    cdip && (i_ddi_node_state(cdip) >=
3426 				    DS_INITIALIZED)) {
3427 					MDI_CLIENT_UNLOCK(ct);
3428 					rv = ndi_devi_offline(cdip, 0);
3429 					MDI_CLIENT_LOCK(ct);
3430 
3431 					if (rv != NDI_SUCCESS) {
3432 						/*
3433 						 * ndi_devi_offline failed.
3434 						 * Reset client flags to
3435 						 * online as the path could not
3436 						 * be offlined.
3437 						 */
3438 						MDI_DEBUG(1, (CE_WARN, cdip,
3439 						    "!ndi_devi_offline: failed "
3440 						    " Error: %x", rv));
3441 						MDI_CLIENT_SET_ONLINE(ct);
3442 					}
3443 				}
3444 				break;
3445 			}
3446 			/*
3447 			 * Convert to MDI error code
3448 			 */
3449 			switch (rv) {
3450 			case NDI_SUCCESS:
3451 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3452 				i_mdi_report_path_state(ct, pip);
3453 				rv = MDI_SUCCESS;
3454 				break;
3455 			case NDI_BUSY:
3456 				rv = MDI_BUSY;
3457 				break;
3458 			default:
3459 				rv = MDI_FAILURE;
3460 				break;
3461 			}
3462 		}
3463 	}
3464 	MDI_CLIENT_UNLOCK(ct);
3465 
3466 state_change_exit:
3467 	/*
3468 	 * Mark the pHCI as stable again.
3469 	 */
3470 	MDI_PHCI_LOCK(ph);
3471 	MDI_PHCI_STABLE(ph);
3472 	MDI_PHCI_UNLOCK(ph);
3473 	return (rv);
3474 }
3475 
3476 /*
3477  * mdi_pi_online():
3478  *		Place the path_info node in the online state.  The path is
3479  *		now available to be selected by mdi_select_path() for
3480  *		transporting I/O requests to client devices.
3481  * Return Values:
3482  *		MDI_SUCCESS
3483  *		MDI_FAILURE
3484  */
3485 int
3486 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3487 {
3488 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3489 	int		client_held = 0;
3490 	int		rv;
3491 
3492 	ASSERT(ct != NULL);
3493 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3494 	if (rv != MDI_SUCCESS)
3495 		return (rv);
3496 
3497 	MDI_PI_LOCK(pip);
3498 	if (MDI_PI(pip)->pi_pm_held == 0) {
3499 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3500 		    "i_mdi_pm_hold_pip %p\n", (void *)pip));
3501 		i_mdi_pm_hold_pip(pip);
3502 		client_held = 1;
3503 	}
3504 	MDI_PI_UNLOCK(pip);
3505 
3506 	if (client_held) {
3507 		MDI_CLIENT_LOCK(ct);
3508 		if (ct->ct_power_cnt == 0) {
3509 			rv = i_mdi_power_all_phci(ct);
3510 		}
3511 
3512 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3513 		    "i_mdi_pm_hold_client %p\n", (void *)ct));
3514 		i_mdi_pm_hold_client(ct, 1);
3515 		MDI_CLIENT_UNLOCK(ct);
3516 	}
3517 
3518 	return (rv);
3519 }
3520 
3521 /*
3522  * mdi_pi_standby():
3523  *		Place the mdi_pathinfo node in standby state
3524  *
3525  * Return Values:
3526  *		MDI_SUCCESS
3527  *		MDI_FAILURE
3528  */
3529 int
3530 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3531 {
3532 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3533 }
3534 
3535 /*
3536  * mdi_pi_fault():
3537  *		Place the mdi_pathinfo node in fault'ed state
3538  * Return Values:
3539  *		MDI_SUCCESS
3540  *		MDI_FAILURE
3541  */
3542 int
3543 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3544 {
3545 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3546 }
3547 
3548 /*
3549  * mdi_pi_offline():
3550  *		Offline a mdi_pathinfo node.
3551  * Return Values:
3552  *		MDI_SUCCESS
3553  *		MDI_FAILURE
3554  */
3555 int
3556 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3557 {
3558 	int	ret, client_held = 0;
3559 	mdi_client_t	*ct;
3560 
3561 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3562 
3563 	if (ret == MDI_SUCCESS) {
3564 		MDI_PI_LOCK(pip);
3565 		if (MDI_PI(pip)->pi_pm_held) {
3566 			client_held = 1;
3567 		}
3568 		MDI_PI_UNLOCK(pip);
3569 
3570 		if (client_held) {
3571 			ct = MDI_PI(pip)->pi_client;
3572 			MDI_CLIENT_LOCK(ct);
3573 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3574 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3575 			i_mdi_pm_rele_client(ct, 1);
3576 			MDI_CLIENT_UNLOCK(ct);
3577 		}
3578 	}
3579 
3580 	return (ret);
3581 }
3582 
3583 /*
3584  * i_mdi_pi_offline():
3585  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3586  */
3587 static int
3588 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3589 {
3590 	dev_info_t	*vdip = NULL;
3591 	mdi_vhci_t	*vh = NULL;
3592 	mdi_client_t	*ct = NULL;
3593 	int		(*f)();
3594 	int		rv;
3595 
3596 	MDI_PI_LOCK(pip);
3597 	ct = MDI_PI(pip)->pi_client;
3598 	ASSERT(ct != NULL);
3599 
3600 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3601 		/*
3602 		 * Give a chance for pending I/Os to complete.
3603 		 */
3604 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3605 		    "%d cmds still pending on path: %p\n",
3606 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3607 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3608 		    &MDI_PI(pip)->pi_mutex,
3609 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3610 			/*
3611 			 * The timeout time reached without ref_cnt being zero
3612 			 * being signaled.
3613 			 */
3614 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3615 			    "Timeout reached on path %p without the cond\n",
3616 			    (void *)pip));
3617 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3618 			    "%d cmds still pending on path: %p\n",
3619 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3620 		}
3621 	}
3622 	vh = ct->ct_vhci;
3623 	vdip = vh->vh_dip;
3624 
3625 	/*
3626 	 * Notify vHCI that has registered this event
3627 	 */
3628 	ASSERT(vh->vh_ops);
3629 	f = vh->vh_ops->vo_pi_state_change;
3630 
3631 	if (f != NULL) {
3632 		MDI_PI_UNLOCK(pip);
3633 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3634 		    flags)) != MDI_SUCCESS) {
3635 			MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3636 			    "!vo_path_offline failed "
3637 			    "vdip %p, pip %p", (void *)vdip, (void *)pip));
3638 		}
3639 		MDI_PI_LOCK(pip);
3640 	}
3641 
3642 	/*
3643 	 * Set the mdi_pathinfo node state and clear the transient condition
3644 	 */
3645 	MDI_PI_SET_OFFLINE(pip);
3646 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3647 	MDI_PI_UNLOCK(pip);
3648 
3649 	MDI_CLIENT_LOCK(ct);
3650 	if (rv == MDI_SUCCESS) {
3651 		if (ct->ct_unstable == 0) {
3652 			dev_info_t	*cdip = ct->ct_dip;
3653 
3654 			/*
3655 			 * Onlining the mdi_pathinfo node will impact the
3656 			 * client state Update the client and dev_info node
3657 			 * state accordingly
3658 			 */
3659 			i_mdi_client_update_state(ct);
3660 			rv = NDI_SUCCESS;
3661 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3662 				if (cdip &&
3663 				    (i_ddi_node_state(cdip) >=
3664 				    DS_INITIALIZED)) {
3665 					MDI_CLIENT_UNLOCK(ct);
3666 					rv = ndi_devi_offline(cdip, 0);
3667 					MDI_CLIENT_LOCK(ct);
3668 					if (rv != NDI_SUCCESS) {
3669 						/*
3670 						 * ndi_devi_offline failed.
3671 						 * Reset client flags to
3672 						 * online.
3673 						 */
3674 						MDI_DEBUG(4, (CE_WARN, cdip,
3675 						    "!ndi_devi_offline: failed "
3676 						    " Error: %x", rv));
3677 						MDI_CLIENT_SET_ONLINE(ct);
3678 					}
3679 				}
3680 			}
3681 			/*
3682 			 * Convert to MDI error code
3683 			 */
3684 			switch (rv) {
3685 			case NDI_SUCCESS:
3686 				rv = MDI_SUCCESS;
3687 				break;
3688 			case NDI_BUSY:
3689 				rv = MDI_BUSY;
3690 				break;
3691 			default:
3692 				rv = MDI_FAILURE;
3693 				break;
3694 			}
3695 		}
3696 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3697 		i_mdi_report_path_state(ct, pip);
3698 	}
3699 
3700 	MDI_CLIENT_UNLOCK(ct);
3701 
3702 	/*
3703 	 * Change in the mdi_pathinfo node state will impact the client state
3704 	 */
3705 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3706 	    (void *)ct, (void *)pip));
3707 	return (rv);
3708 }
3709 
3710 
3711 /*
3712  * mdi_pi_get_addr():
3713  *		Get the unit address associated with a mdi_pathinfo node
3714  *
3715  * Return Values:
3716  *		char *
3717  */
3718 char *
3719 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3720 {
3721 	if (pip == NULL)
3722 		return (NULL);
3723 
3724 	return (MDI_PI(pip)->pi_addr);
3725 }
3726 
3727 /*
3728  * mdi_pi_get_client():
3729  *		Get the client devinfo associated with a mdi_pathinfo node
3730  *
3731  * Return Values:
3732  *		Handle to client device dev_info node
3733  */
3734 dev_info_t *
3735 mdi_pi_get_client(mdi_pathinfo_t *pip)
3736 {
3737 	dev_info_t	*dip = NULL;
3738 	if (pip) {
3739 		dip = MDI_PI(pip)->pi_client->ct_dip;
3740 	}
3741 	return (dip);
3742 }
3743 
3744 /*
3745  * mdi_pi_get_phci():
3746  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3747  * Return Values:
3748  *		Handle to dev_info node
3749  */
3750 dev_info_t *
3751 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3752 {
3753 	dev_info_t	*dip = NULL;
3754 	if (pip) {
3755 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3756 	}
3757 	return (dip);
3758 }
3759 
3760 /*
3761  * mdi_pi_get_client_private():
3762  *		Get the client private information associated with the
3763  *		mdi_pathinfo node
3764  */
3765 void *
3766 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3767 {
3768 	void *cprivate = NULL;
3769 	if (pip) {
3770 		cprivate = MDI_PI(pip)->pi_cprivate;
3771 	}
3772 	return (cprivate);
3773 }
3774 
3775 /*
3776  * mdi_pi_set_client_private():
3777  *		Set the client private information in the mdi_pathinfo node
3778  */
3779 void
3780 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3781 {
3782 	if (pip) {
3783 		MDI_PI(pip)->pi_cprivate = priv;
3784 	}
3785 }
3786 
3787 /*
3788  * mdi_pi_get_phci_private():
3789  *		Get the pHCI private information associated with the
3790  *		mdi_pathinfo node
3791  */
3792 caddr_t
3793 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3794 {
3795 	caddr_t	pprivate = NULL;
3796 	if (pip) {
3797 		pprivate = MDI_PI(pip)->pi_pprivate;
3798 	}
3799 	return (pprivate);
3800 }
3801 
3802 /*
3803  * mdi_pi_set_phci_private():
3804  *		Set the pHCI private information in the mdi_pathinfo node
3805  */
3806 void
3807 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3808 {
3809 	if (pip) {
3810 		MDI_PI(pip)->pi_pprivate = priv;
3811 	}
3812 }
3813 
3814 /*
3815  * mdi_pi_get_state():
3816  *		Get the mdi_pathinfo node state. Transient states are internal
3817  *		and not provided to the users
3818  */
3819 mdi_pathinfo_state_t
3820 mdi_pi_get_state(mdi_pathinfo_t *pip)
3821 {
3822 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3823 
3824 	if (pip) {
3825 		if (MDI_PI_IS_TRANSIENT(pip)) {
3826 			/*
3827 			 * mdi_pathinfo is in state transition.  Return the
3828 			 * last good state.
3829 			 */
3830 			state = MDI_PI_OLD_STATE(pip);
3831 		} else {
3832 			state = MDI_PI_STATE(pip);
3833 		}
3834 	}
3835 	return (state);
3836 }
3837 
3838 /*
3839  * Note that the following function needs to be the new interface for
3840  * mdi_pi_get_state when mpxio gets integrated to ON.
3841  */
3842 int
3843 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3844 		uint32_t *ext_state)
3845 {
3846 	*state = MDI_PATHINFO_STATE_INIT;
3847 
3848 	if (pip) {
3849 		if (MDI_PI_IS_TRANSIENT(pip)) {
3850 			/*
3851 			 * mdi_pathinfo is in state transition.  Return the
3852 			 * last good state.
3853 			 */
3854 			*state = MDI_PI_OLD_STATE(pip);
3855 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3856 		} else {
3857 			*state = MDI_PI_STATE(pip);
3858 			*ext_state = MDI_PI_EXT_STATE(pip);
3859 		}
3860 	}
3861 	return (MDI_SUCCESS);
3862 }
3863 
3864 /*
3865  * mdi_pi_get_preferred:
3866  *	Get the preferred path flag
3867  */
3868 int
3869 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3870 {
3871 	if (pip) {
3872 		return (MDI_PI(pip)->pi_preferred);
3873 	}
3874 	return (0);
3875 }
3876 
3877 /*
3878  * mdi_pi_set_preferred:
3879  *	Set the preferred path flag
3880  */
3881 void
3882 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3883 {
3884 	if (pip) {
3885 		MDI_PI(pip)->pi_preferred = preferred;
3886 	}
3887 }
3888 
3889 /*
3890  * mdi_pi_set_state():
3891  *		Set the mdi_pathinfo node state
3892  */
3893 void
3894 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3895 {
3896 	uint32_t	ext_state;
3897 
3898 	if (pip) {
3899 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3900 		MDI_PI(pip)->pi_state = state;
3901 		MDI_PI(pip)->pi_state |= ext_state;
3902 	}
3903 }
3904 
3905 /*
3906  * Property functions:
3907  */
3908 int
3909 i_map_nvlist_error_to_mdi(int val)
3910 {
3911 	int rv;
3912 
3913 	switch (val) {
3914 	case 0:
3915 		rv = DDI_PROP_SUCCESS;
3916 		break;
3917 	case EINVAL:
3918 	case ENOTSUP:
3919 		rv = DDI_PROP_INVAL_ARG;
3920 		break;
3921 	case ENOMEM:
3922 		rv = DDI_PROP_NO_MEMORY;
3923 		break;
3924 	default:
3925 		rv = DDI_PROP_NOT_FOUND;
3926 		break;
3927 	}
3928 	return (rv);
3929 }
3930 
3931 /*
3932  * mdi_pi_get_next_prop():
3933  * 		Property walk function.  The caller should hold mdi_pi_lock()
3934  *		and release by calling mdi_pi_unlock() at the end of walk to
3935  *		get a consistent value.
3936  */
3937 nvpair_t *
3938 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3939 {
3940 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3941 		return (NULL);
3942 	}
3943 	ASSERT(MDI_PI_LOCKED(pip));
3944 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3945 }
3946 
3947 /*
3948  * mdi_prop_remove():
3949  * 		Remove the named property from the named list.
3950  */
3951 int
3952 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3953 {
3954 	if (pip == NULL) {
3955 		return (DDI_PROP_NOT_FOUND);
3956 	}
3957 	ASSERT(!MDI_PI_LOCKED(pip));
3958 	MDI_PI_LOCK(pip);
3959 	if (MDI_PI(pip)->pi_prop == NULL) {
3960 		MDI_PI_UNLOCK(pip);
3961 		return (DDI_PROP_NOT_FOUND);
3962 	}
3963 	if (name) {
3964 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3965 	} else {
3966 		char		nvp_name[MAXNAMELEN];
3967 		nvpair_t	*nvp;
3968 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3969 		while (nvp) {
3970 			nvpair_t	*next;
3971 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3972 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3973 			    nvpair_name(nvp));
3974 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3975 			    nvp_name);
3976 			nvp = next;
3977 		}
3978 	}
3979 	MDI_PI_UNLOCK(pip);
3980 	return (DDI_PROP_SUCCESS);
3981 }
3982 
3983 /*
3984  * mdi_prop_size():
3985  * 		Get buffer size needed to pack the property data.
3986  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3987  *		buffer size.
3988  */
3989 int
3990 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3991 {
3992 	int	rv;
3993 	size_t	bufsize;
3994 
3995 	*buflenp = 0;
3996 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3997 		return (DDI_PROP_NOT_FOUND);
3998 	}
3999 	ASSERT(MDI_PI_LOCKED(pip));
4000 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4001 	    &bufsize, NV_ENCODE_NATIVE);
4002 	*buflenp = bufsize;
4003 	return (i_map_nvlist_error_to_mdi(rv));
4004 }
4005 
4006 /*
4007  * mdi_prop_pack():
4008  * 		pack the property list.  The caller should hold the
4009  *		mdi_pathinfo_t node to get a consistent data
4010  */
4011 int
4012 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4013 {
4014 	int	rv;
4015 	size_t	bufsize;
4016 
4017 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4018 		return (DDI_PROP_NOT_FOUND);
4019 	}
4020 
4021 	ASSERT(MDI_PI_LOCKED(pip));
4022 
4023 	bufsize = buflen;
4024 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4025 	    NV_ENCODE_NATIVE, KM_SLEEP);
4026 
4027 	return (i_map_nvlist_error_to_mdi(rv));
4028 }
4029 
4030 /*
4031  * mdi_prop_update_byte():
4032  *		Create/Update a byte property
4033  */
4034 int
4035 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4036 {
4037 	int rv;
4038 
4039 	if (pip == NULL) {
4040 		return (DDI_PROP_INVAL_ARG);
4041 	}
4042 	ASSERT(!MDI_PI_LOCKED(pip));
4043 	MDI_PI_LOCK(pip);
4044 	if (MDI_PI(pip)->pi_prop == NULL) {
4045 		MDI_PI_UNLOCK(pip);
4046 		return (DDI_PROP_NOT_FOUND);
4047 	}
4048 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4049 	MDI_PI_UNLOCK(pip);
4050 	return (i_map_nvlist_error_to_mdi(rv));
4051 }
4052 
4053 /*
4054  * mdi_prop_update_byte_array():
4055  *		Create/Update a byte array property
4056  */
4057 int
4058 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4059     uint_t nelements)
4060 {
4061 	int rv;
4062 
4063 	if (pip == NULL) {
4064 		return (DDI_PROP_INVAL_ARG);
4065 	}
4066 	ASSERT(!MDI_PI_LOCKED(pip));
4067 	MDI_PI_LOCK(pip);
4068 	if (MDI_PI(pip)->pi_prop == NULL) {
4069 		MDI_PI_UNLOCK(pip);
4070 		return (DDI_PROP_NOT_FOUND);
4071 	}
4072 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4073 	MDI_PI_UNLOCK(pip);
4074 	return (i_map_nvlist_error_to_mdi(rv));
4075 }
4076 
4077 /*
4078  * mdi_prop_update_int():
4079  *		Create/Update a 32 bit integer property
4080  */
4081 int
4082 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4083 {
4084 	int rv;
4085 
4086 	if (pip == NULL) {
4087 		return (DDI_PROP_INVAL_ARG);
4088 	}
4089 	ASSERT(!MDI_PI_LOCKED(pip));
4090 	MDI_PI_LOCK(pip);
4091 	if (MDI_PI(pip)->pi_prop == NULL) {
4092 		MDI_PI_UNLOCK(pip);
4093 		return (DDI_PROP_NOT_FOUND);
4094 	}
4095 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4096 	MDI_PI_UNLOCK(pip);
4097 	return (i_map_nvlist_error_to_mdi(rv));
4098 }
4099 
4100 /*
4101  * mdi_prop_update_int64():
4102  *		Create/Update a 64 bit integer property
4103  */
4104 int
4105 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4106 {
4107 	int rv;
4108 
4109 	if (pip == NULL) {
4110 		return (DDI_PROP_INVAL_ARG);
4111 	}
4112 	ASSERT(!MDI_PI_LOCKED(pip));
4113 	MDI_PI_LOCK(pip);
4114 	if (MDI_PI(pip)->pi_prop == NULL) {
4115 		MDI_PI_UNLOCK(pip);
4116 		return (DDI_PROP_NOT_FOUND);
4117 	}
4118 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4119 	MDI_PI_UNLOCK(pip);
4120 	return (i_map_nvlist_error_to_mdi(rv));
4121 }
4122 
4123 /*
4124  * mdi_prop_update_int_array():
4125  *		Create/Update a int array property
4126  */
4127 int
4128 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4129 	    uint_t nelements)
4130 {
4131 	int rv;
4132 
4133 	if (pip == NULL) {
4134 		return (DDI_PROP_INVAL_ARG);
4135 	}
4136 	ASSERT(!MDI_PI_LOCKED(pip));
4137 	MDI_PI_LOCK(pip);
4138 	if (MDI_PI(pip)->pi_prop == NULL) {
4139 		MDI_PI_UNLOCK(pip);
4140 		return (DDI_PROP_NOT_FOUND);
4141 	}
4142 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4143 	    nelements);
4144 	MDI_PI_UNLOCK(pip);
4145 	return (i_map_nvlist_error_to_mdi(rv));
4146 }
4147 
4148 /*
4149  * mdi_prop_update_string():
4150  *		Create/Update a string property
4151  */
4152 int
4153 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4154 {
4155 	int rv;
4156 
4157 	if (pip == NULL) {
4158 		return (DDI_PROP_INVAL_ARG);
4159 	}
4160 	ASSERT(!MDI_PI_LOCKED(pip));
4161 	MDI_PI_LOCK(pip);
4162 	if (MDI_PI(pip)->pi_prop == NULL) {
4163 		MDI_PI_UNLOCK(pip);
4164 		return (DDI_PROP_NOT_FOUND);
4165 	}
4166 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4167 	MDI_PI_UNLOCK(pip);
4168 	return (i_map_nvlist_error_to_mdi(rv));
4169 }
4170 
4171 /*
4172  * mdi_prop_update_string_array():
4173  *		Create/Update a string array property
4174  */
4175 int
4176 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4177     uint_t nelements)
4178 {
4179 	int rv;
4180 
4181 	if (pip == NULL) {
4182 		return (DDI_PROP_INVAL_ARG);
4183 	}
4184 	ASSERT(!MDI_PI_LOCKED(pip));
4185 	MDI_PI_LOCK(pip);
4186 	if (MDI_PI(pip)->pi_prop == NULL) {
4187 		MDI_PI_UNLOCK(pip);
4188 		return (DDI_PROP_NOT_FOUND);
4189 	}
4190 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4191 	    nelements);
4192 	MDI_PI_UNLOCK(pip);
4193 	return (i_map_nvlist_error_to_mdi(rv));
4194 }
4195 
4196 /*
4197  * mdi_prop_lookup_byte():
4198  * 		Look for byte property identified by name.  The data returned
4199  *		is the actual property and valid as long as mdi_pathinfo_t node
4200  *		is alive.
4201  */
4202 int
4203 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4204 {
4205 	int rv;
4206 
4207 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4208 		return (DDI_PROP_NOT_FOUND);
4209 	}
4210 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4211 	return (i_map_nvlist_error_to_mdi(rv));
4212 }
4213 
4214 
4215 /*
4216  * mdi_prop_lookup_byte_array():
4217  * 		Look for byte array property identified by name.  The data
4218  *		returned is the actual property and valid as long as
4219  *		mdi_pathinfo_t node is alive.
4220  */
4221 int
4222 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4223     uint_t *nelements)
4224 {
4225 	int rv;
4226 
4227 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4228 		return (DDI_PROP_NOT_FOUND);
4229 	}
4230 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4231 	    nelements);
4232 	return (i_map_nvlist_error_to_mdi(rv));
4233 }
4234 
4235 /*
4236  * mdi_prop_lookup_int():
4237  * 		Look for int property identified by name.  The data returned
4238  *		is the actual property and valid as long as mdi_pathinfo_t
4239  *		node is alive.
4240  */
4241 int
4242 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4243 {
4244 	int rv;
4245 
4246 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4247 		return (DDI_PROP_NOT_FOUND);
4248 	}
4249 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4250 	return (i_map_nvlist_error_to_mdi(rv));
4251 }
4252 
4253 /*
4254  * mdi_prop_lookup_int64():
4255  * 		Look for int64 property identified by name.  The data returned
4256  *		is the actual property and valid as long as mdi_pathinfo_t node
4257  *		is alive.
4258  */
4259 int
4260 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4261 {
4262 	int rv;
4263 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4264 		return (DDI_PROP_NOT_FOUND);
4265 	}
4266 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4267 	return (i_map_nvlist_error_to_mdi(rv));
4268 }
4269 
4270 /*
4271  * mdi_prop_lookup_int_array():
4272  * 		Look for int array property identified by name.  The data
4273  *		returned is the actual property and valid as long as
4274  *		mdi_pathinfo_t node is alive.
4275  */
4276 int
4277 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4278     uint_t *nelements)
4279 {
4280 	int rv;
4281 
4282 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4283 		return (DDI_PROP_NOT_FOUND);
4284 	}
4285 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4286 	    (int32_t **)data, nelements);
4287 	return (i_map_nvlist_error_to_mdi(rv));
4288 }
4289 
4290 /*
4291  * mdi_prop_lookup_string():
4292  * 		Look for string property identified by name.  The data
4293  *		returned is the actual property and valid as long as
4294  *		mdi_pathinfo_t node is alive.
4295  */
4296 int
4297 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4298 {
4299 	int rv;
4300 
4301 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4302 		return (DDI_PROP_NOT_FOUND);
4303 	}
4304 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4305 	return (i_map_nvlist_error_to_mdi(rv));
4306 }
4307 
4308 /*
4309  * mdi_prop_lookup_string_array():
4310  * 		Look for string array property identified by name.  The data
4311  *		returned is the actual property and valid as long as
4312  *		mdi_pathinfo_t node is alive.
4313  */
4314 int
4315 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4316     uint_t *nelements)
4317 {
4318 	int rv;
4319 
4320 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4321 		return (DDI_PROP_NOT_FOUND);
4322 	}
4323 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4324 	    nelements);
4325 	return (i_map_nvlist_error_to_mdi(rv));
4326 }
4327 
4328 /*
4329  * mdi_prop_free():
4330  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4331  *		functions return the pointer to actual property data and not a
4332  *		copy of it.  So the data returned is valid as long as
4333  *		mdi_pathinfo_t node is valid.
4334  */
4335 /*ARGSUSED*/
4336 int
4337 mdi_prop_free(void *data)
4338 {
4339 	return (DDI_PROP_SUCCESS);
4340 }
4341 
4342 /*ARGSUSED*/
4343 static void
4344 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4345 {
4346 	char		*phci_path, *ct_path;
4347 	char		*ct_status;
4348 	char		*status;
4349 	dev_info_t	*dip = ct->ct_dip;
4350 	char		lb_buf[64];
4351 
4352 	ASSERT(MDI_CLIENT_LOCKED(ct));
4353 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4354 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4355 		return;
4356 	}
4357 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4358 		ct_status = "optimal";
4359 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4360 		ct_status = "degraded";
4361 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4362 		ct_status = "failed";
4363 	} else {
4364 		ct_status = "unknown";
4365 	}
4366 
4367 	if (MDI_PI_IS_OFFLINE(pip)) {
4368 		status = "offline";
4369 	} else if (MDI_PI_IS_ONLINE(pip)) {
4370 		status = "online";
4371 	} else if (MDI_PI_IS_STANDBY(pip)) {
4372 		status = "standby";
4373 	} else if (MDI_PI_IS_FAULT(pip)) {
4374 		status = "faulted";
4375 	} else {
4376 		status = "unknown";
4377 	}
4378 
4379 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4380 		(void) snprintf(lb_buf, sizeof (lb_buf),
4381 		    "%s, region-size: %d", mdi_load_balance_lba,
4382 			ct->ct_lb_args->region_size);
4383 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4384 		(void) snprintf(lb_buf, sizeof (lb_buf),
4385 		    "%s", mdi_load_balance_none);
4386 	} else {
4387 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4388 		    mdi_load_balance_rr);
4389 	}
4390 
4391 	if (dip) {
4392 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4393 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4394 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4395 		    "path %s (%s%d) to target address: %s is %s"
4396 		    " Load balancing: %s\n",
4397 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4398 		    ddi_get_instance(dip), ct_status,
4399 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4400 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4401 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4402 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4403 		kmem_free(phci_path, MAXPATHLEN);
4404 		kmem_free(ct_path, MAXPATHLEN);
4405 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4406 	}
4407 }
4408 
4409 #ifdef	DEBUG
4410 /*
4411  * i_mdi_log():
4412  *		Utility function for error message management
4413  *
4414  */
4415 /*PRINTFLIKE3*/
4416 static void
4417 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4418 {
4419 	char		name[MAXNAMELEN];
4420 	char		buf[MAXNAMELEN];
4421 	char		*bp;
4422 	va_list		ap;
4423 	int		log_only = 0;
4424 	int		boot_only = 0;
4425 	int		console_only = 0;
4426 
4427 	if (dip) {
4428 		(void) snprintf(name, MAXNAMELEN, "%s%d: ",
4429 		    ddi_node_name(dip), ddi_get_instance(dip));
4430 	} else {
4431 		name[0] = 0;
4432 	}
4433 
4434 	va_start(ap, fmt);
4435 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4436 	va_end(ap);
4437 
4438 	switch (buf[0]) {
4439 	case '!':
4440 		bp = &buf[1];
4441 		log_only = 1;
4442 		break;
4443 	case '?':
4444 		bp = &buf[1];
4445 		boot_only = 1;
4446 		break;
4447 	case '^':
4448 		bp = &buf[1];
4449 		console_only = 1;
4450 		break;
4451 	default:
4452 		bp = buf;
4453 		break;
4454 	}
4455 	if (mdi_debug_logonly) {
4456 		log_only = 1;
4457 		boot_only = 0;
4458 		console_only = 0;
4459 	}
4460 
4461 	switch (level) {
4462 	case CE_NOTE:
4463 		level = CE_CONT;
4464 		/* FALLTHROUGH */
4465 	case CE_CONT:
4466 	case CE_WARN:
4467 	case CE_PANIC:
4468 		if (boot_only) {
4469 			cmn_err(level, "?mdi: %s%s", name, bp);
4470 		} else if (console_only) {
4471 			cmn_err(level, "^mdi: %s%s", name, bp);
4472 		} else if (log_only) {
4473 			cmn_err(level, "!mdi: %s%s", name, bp);
4474 		} else {
4475 			cmn_err(level, "mdi: %s%s", name, bp);
4476 		}
4477 		break;
4478 	default:
4479 		cmn_err(level, "mdi: %s%s", name, bp);
4480 		break;
4481 	}
4482 }
4483 #endif	/* DEBUG */
4484 
4485 void
4486 i_mdi_client_online(dev_info_t *ct_dip)
4487 {
4488 	mdi_client_t	*ct;
4489 
4490 	/*
4491 	 * Client online notification. Mark client state as online
4492 	 * restore our binding with dev_info node
4493 	 */
4494 	ct = i_devi_get_client(ct_dip);
4495 	ASSERT(ct != NULL);
4496 	MDI_CLIENT_LOCK(ct);
4497 	MDI_CLIENT_SET_ONLINE(ct);
4498 	/* catch for any memory leaks */
4499 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4500 	ct->ct_dip = ct_dip;
4501 
4502 	if (ct->ct_power_cnt == 0)
4503 		(void) i_mdi_power_all_phci(ct);
4504 
4505 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4506 	    "i_mdi_pm_hold_client %p\n", (void *)ct));
4507 	i_mdi_pm_hold_client(ct, 1);
4508 
4509 	MDI_CLIENT_UNLOCK(ct);
4510 }
4511 
4512 void
4513 i_mdi_phci_online(dev_info_t *ph_dip)
4514 {
4515 	mdi_phci_t	*ph;
4516 
4517 	/* pHCI online notification. Mark state accordingly */
4518 	ph = i_devi_get_phci(ph_dip);
4519 	ASSERT(ph != NULL);
4520 	MDI_PHCI_LOCK(ph);
4521 	MDI_PHCI_SET_ONLINE(ph);
4522 	MDI_PHCI_UNLOCK(ph);
4523 }
4524 
4525 /*
4526  * mdi_devi_online():
4527  * 		Online notification from NDI framework on pHCI/client
4528  *		device online.
4529  * Return Values:
4530  *		NDI_SUCCESS
4531  *		MDI_FAILURE
4532  */
4533 /*ARGSUSED*/
4534 int
4535 mdi_devi_online(dev_info_t *dip, uint_t flags)
4536 {
4537 	if (MDI_PHCI(dip)) {
4538 		i_mdi_phci_online(dip);
4539 	}
4540 
4541 	if (MDI_CLIENT(dip)) {
4542 		i_mdi_client_online(dip);
4543 	}
4544 	return (NDI_SUCCESS);
4545 }
4546 
4547 /*
4548  * mdi_devi_offline():
4549  * 		Offline notification from NDI framework on pHCI/Client device
4550  *		offline.
4551  *
4552  * Return Values:
4553  *		NDI_SUCCESS
4554  *		NDI_FAILURE
4555  */
4556 /*ARGSUSED*/
4557 int
4558 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4559 {
4560 	int		rv = NDI_SUCCESS;
4561 
4562 	if (MDI_CLIENT(dip)) {
4563 		rv = i_mdi_client_offline(dip, flags);
4564 		if (rv != NDI_SUCCESS)
4565 			return (rv);
4566 	}
4567 
4568 	if (MDI_PHCI(dip)) {
4569 		rv = i_mdi_phci_offline(dip, flags);
4570 
4571 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4572 			/* set client back online */
4573 			i_mdi_client_online(dip);
4574 		}
4575 	}
4576 
4577 	return (rv);
4578 }
4579 
4580 /*ARGSUSED*/
4581 static int
4582 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4583 {
4584 	int		rv = NDI_SUCCESS;
4585 	mdi_phci_t	*ph;
4586 	mdi_client_t	*ct;
4587 	mdi_pathinfo_t	*pip;
4588 	mdi_pathinfo_t	*next;
4589 	mdi_pathinfo_t	*failed_pip = NULL;
4590 	dev_info_t	*cdip;
4591 
4592 	/*
4593 	 * pHCI component offline notification
4594 	 * Make sure that this pHCI instance is free to be offlined.
4595 	 * If it is OK to proceed, Offline and remove all the child
4596 	 * mdi_pathinfo nodes.  This process automatically offlines
4597 	 * corresponding client devices, for which this pHCI provides
4598 	 * critical services.
4599 	 */
4600 	ph = i_devi_get_phci(dip);
4601 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n",
4602 	    (void *)dip, (void *)ph));
4603 	if (ph == NULL) {
4604 		return (rv);
4605 	}
4606 
4607 	MDI_PHCI_LOCK(ph);
4608 
4609 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4610 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined",
4611 		    (void *)ph));
4612 		MDI_PHCI_UNLOCK(ph);
4613 		return (NDI_SUCCESS);
4614 	}
4615 
4616 	/*
4617 	 * Check to see if the pHCI can be offlined
4618 	 */
4619 	if (ph->ph_unstable) {
4620 		MDI_DEBUG(1, (CE_WARN, dip,
4621 		    "!One or more target devices are in transient "
4622 		    "state. This device can not be removed at "
4623 		    "this moment. Please try again later."));
4624 		MDI_PHCI_UNLOCK(ph);
4625 		return (NDI_BUSY);
4626 	}
4627 
4628 	pip = ph->ph_path_head;
4629 	while (pip != NULL) {
4630 		MDI_PI_LOCK(pip);
4631 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4632 
4633 		/*
4634 		 * The mdi_pathinfo state is OK. Check the client state.
4635 		 * If failover in progress fail the pHCI from offlining
4636 		 */
4637 		ct = MDI_PI(pip)->pi_client;
4638 		i_mdi_client_lock(ct, pip);
4639 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4640 		    (ct->ct_unstable)) {
4641 			/*
4642 			 * Failover is in progress, Fail the DR
4643 			 */
4644 			MDI_DEBUG(1, (CE_WARN, dip,
4645 			    "!pHCI device (%s%d) is Busy. %s",
4646 			    ddi_driver_name(dip), ddi_get_instance(dip),
4647 			    "This device can not be removed at "
4648 			    "this moment. Please try again later."));
4649 			MDI_PI_UNLOCK(pip);
4650 			i_mdi_client_unlock(ct);
4651 			MDI_PHCI_UNLOCK(ph);
4652 			return (NDI_BUSY);
4653 		}
4654 		MDI_PI_UNLOCK(pip);
4655 
4656 		/*
4657 		 * Check to see of we are removing the last path of this
4658 		 * client device...
4659 		 */
4660 		cdip = ct->ct_dip;
4661 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4662 		    (i_mdi_client_compute_state(ct, ph) ==
4663 		    MDI_CLIENT_STATE_FAILED)) {
4664 			i_mdi_client_unlock(ct);
4665 			MDI_PHCI_UNLOCK(ph);
4666 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4667 				/*
4668 				 * ndi_devi_offline() failed.
4669 				 * This pHCI provides the critical path
4670 				 * to one or more client devices.
4671 				 * Return busy.
4672 				 */
4673 				MDI_PHCI_LOCK(ph);
4674 				MDI_DEBUG(1, (CE_WARN, dip,
4675 				    "!pHCI device (%s%d) is Busy. %s",
4676 				    ddi_driver_name(dip), ddi_get_instance(dip),
4677 				    "This device can not be removed at "
4678 				    "this moment. Please try again later."));
4679 				failed_pip = pip;
4680 				break;
4681 			} else {
4682 				MDI_PHCI_LOCK(ph);
4683 				pip = next;
4684 			}
4685 		} else {
4686 			i_mdi_client_unlock(ct);
4687 			pip = next;
4688 		}
4689 	}
4690 
4691 	if (failed_pip) {
4692 		pip = ph->ph_path_head;
4693 		while (pip != failed_pip) {
4694 			MDI_PI_LOCK(pip);
4695 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4696 			ct = MDI_PI(pip)->pi_client;
4697 			i_mdi_client_lock(ct, pip);
4698 			cdip = ct->ct_dip;
4699 			switch (MDI_CLIENT_STATE(ct)) {
4700 			case MDI_CLIENT_STATE_OPTIMAL:
4701 			case MDI_CLIENT_STATE_DEGRADED:
4702 				if (cdip) {
4703 					MDI_PI_UNLOCK(pip);
4704 					i_mdi_client_unlock(ct);
4705 					MDI_PHCI_UNLOCK(ph);
4706 					(void) ndi_devi_online(cdip, 0);
4707 					MDI_PHCI_LOCK(ph);
4708 					pip = next;
4709 					continue;
4710 				}
4711 				break;
4712 
4713 			case MDI_CLIENT_STATE_FAILED:
4714 				if (cdip) {
4715 					MDI_PI_UNLOCK(pip);
4716 					i_mdi_client_unlock(ct);
4717 					MDI_PHCI_UNLOCK(ph);
4718 					(void) ndi_devi_offline(cdip, 0);
4719 					MDI_PHCI_LOCK(ph);
4720 					pip = next;
4721 					continue;
4722 				}
4723 				break;
4724 			}
4725 			MDI_PI_UNLOCK(pip);
4726 			i_mdi_client_unlock(ct);
4727 			pip = next;
4728 		}
4729 		MDI_PHCI_UNLOCK(ph);
4730 		return (NDI_BUSY);
4731 	}
4732 
4733 	/*
4734 	 * Mark the pHCI as offline
4735 	 */
4736 	MDI_PHCI_SET_OFFLINE(ph);
4737 
4738 	/*
4739 	 * Mark the child mdi_pathinfo nodes as transient
4740 	 */
4741 	pip = ph->ph_path_head;
4742 	while (pip != NULL) {
4743 		MDI_PI_LOCK(pip);
4744 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4745 		MDI_PI_SET_OFFLINING(pip);
4746 		MDI_PI_UNLOCK(pip);
4747 		pip = next;
4748 	}
4749 	MDI_PHCI_UNLOCK(ph);
4750 	/*
4751 	 * Give a chance for any pending commands to execute
4752 	 */
4753 	delay(1);
4754 	MDI_PHCI_LOCK(ph);
4755 	pip = ph->ph_path_head;
4756 	while (pip != NULL) {
4757 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4758 		(void) i_mdi_pi_offline(pip, flags);
4759 		MDI_PI_LOCK(pip);
4760 		ct = MDI_PI(pip)->pi_client;
4761 		if (!MDI_PI_IS_OFFLINE(pip)) {
4762 			MDI_DEBUG(1, (CE_WARN, dip,
4763 			    "!pHCI device (%s%d) is Busy. %s",
4764 			    ddi_driver_name(dip), ddi_get_instance(dip),
4765 			    "This device can not be removed at "
4766 			    "this moment. Please try again later."));
4767 			MDI_PI_UNLOCK(pip);
4768 			MDI_PHCI_SET_ONLINE(ph);
4769 			MDI_PHCI_UNLOCK(ph);
4770 			return (NDI_BUSY);
4771 		}
4772 		MDI_PI_UNLOCK(pip);
4773 		pip = next;
4774 	}
4775 	MDI_PHCI_UNLOCK(ph);
4776 
4777 	return (rv);
4778 }
4779 
4780 void
4781 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
4782 {
4783 	mdi_phci_t	*ph;
4784 	mdi_client_t	*ct;
4785 	mdi_pathinfo_t	*pip;
4786 	mdi_pathinfo_t	*next;
4787 	dev_info_t	*cdip;
4788 
4789 	if (!MDI_PHCI(dip))
4790 		return;
4791 
4792 	ph = i_devi_get_phci(dip);
4793 	if (ph == NULL) {
4794 		return;
4795 	}
4796 
4797 	MDI_PHCI_LOCK(ph);
4798 
4799 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4800 		/* has no last path */
4801 		MDI_PHCI_UNLOCK(ph);
4802 		return;
4803 	}
4804 
4805 	pip = ph->ph_path_head;
4806 	while (pip != NULL) {
4807 		MDI_PI_LOCK(pip);
4808 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4809 
4810 		ct = MDI_PI(pip)->pi_client;
4811 		i_mdi_client_lock(ct, pip);
4812 		MDI_PI_UNLOCK(pip);
4813 
4814 		cdip = ct->ct_dip;
4815 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4816 		    (i_mdi_client_compute_state(ct, ph) ==
4817 		    MDI_CLIENT_STATE_FAILED)) {
4818 			/* Last path. Mark client dip as retiring */
4819 			i_mdi_client_unlock(ct);
4820 			MDI_PHCI_UNLOCK(ph);
4821 			(void) e_ddi_mark_retiring(cdip, cons_array);
4822 			MDI_PHCI_LOCK(ph);
4823 			pip = next;
4824 		} else {
4825 			i_mdi_client_unlock(ct);
4826 			pip = next;
4827 		}
4828 	}
4829 
4830 	MDI_PHCI_UNLOCK(ph);
4831 
4832 	return;
4833 }
4834 
4835 void
4836 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
4837 {
4838 	mdi_phci_t	*ph;
4839 	mdi_client_t	*ct;
4840 	mdi_pathinfo_t	*pip;
4841 	mdi_pathinfo_t	*next;
4842 	dev_info_t	*cdip;
4843 
4844 	if (!MDI_PHCI(dip))
4845 		return;
4846 
4847 	ph = i_devi_get_phci(dip);
4848 	if (ph == NULL)
4849 		return;
4850 
4851 	MDI_PHCI_LOCK(ph);
4852 
4853 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4854 		MDI_PHCI_UNLOCK(ph);
4855 		/* not last path */
4856 		return;
4857 	}
4858 
4859 	if (ph->ph_unstable) {
4860 		MDI_PHCI_UNLOCK(ph);
4861 		/* can't check for constraints */
4862 		*constraint = 0;
4863 		return;
4864 	}
4865 
4866 	pip = ph->ph_path_head;
4867 	while (pip != NULL) {
4868 		MDI_PI_LOCK(pip);
4869 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4870 
4871 		/*
4872 		 * The mdi_pathinfo state is OK. Check the client state.
4873 		 * If failover in progress fail the pHCI from offlining
4874 		 */
4875 		ct = MDI_PI(pip)->pi_client;
4876 		i_mdi_client_lock(ct, pip);
4877 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4878 		    (ct->ct_unstable)) {
4879 			/*
4880 			 * Failover is in progress, can't check for constraints
4881 			 */
4882 			MDI_PI_UNLOCK(pip);
4883 			i_mdi_client_unlock(ct);
4884 			MDI_PHCI_UNLOCK(ph);
4885 			*constraint = 0;
4886 			return;
4887 		}
4888 		MDI_PI_UNLOCK(pip);
4889 
4890 		/*
4891 		 * Check to see of we are retiring the last path of this
4892 		 * client device...
4893 		 */
4894 		cdip = ct->ct_dip;
4895 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4896 		    (i_mdi_client_compute_state(ct, ph) ==
4897 		    MDI_CLIENT_STATE_FAILED)) {
4898 			i_mdi_client_unlock(ct);
4899 			MDI_PHCI_UNLOCK(ph);
4900 			(void) e_ddi_retire_notify(cdip, constraint);
4901 			MDI_PHCI_LOCK(ph);
4902 			pip = next;
4903 		} else {
4904 			i_mdi_client_unlock(ct);
4905 			pip = next;
4906 		}
4907 	}
4908 
4909 	MDI_PHCI_UNLOCK(ph);
4910 
4911 	return;
4912 }
4913 
4914 /*
4915  * offline the path(s) hanging off the PHCI. If the
4916  * last path to any client, check that constraints
4917  * have been applied.
4918  */
4919 void
4920 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
4921 {
4922 	mdi_phci_t	*ph;
4923 	mdi_client_t	*ct;
4924 	mdi_pathinfo_t	*pip;
4925 	mdi_pathinfo_t	*next;
4926 	dev_info_t	*cdip;
4927 	int		unstable = 0;
4928 	int		constraint;
4929 
4930 	if (!MDI_PHCI(dip))
4931 		return;
4932 
4933 	ph = i_devi_get_phci(dip);
4934 	if (ph == NULL) {
4935 		/* no last path and no pips */
4936 		return;
4937 	}
4938 
4939 	MDI_PHCI_LOCK(ph);
4940 
4941 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4942 		MDI_PHCI_UNLOCK(ph);
4943 		/* no last path and no pips */
4944 		return;
4945 	}
4946 
4947 	/*
4948 	 * Check to see if the pHCI can be offlined
4949 	 */
4950 	if (ph->ph_unstable) {
4951 		unstable = 1;
4952 	}
4953 
4954 	pip = ph->ph_path_head;
4955 	while (pip != NULL) {
4956 		MDI_PI_LOCK(pip);
4957 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4958 
4959 		/*
4960 		 * if failover in progress fail the pHCI from offlining
4961 		 */
4962 		ct = MDI_PI(pip)->pi_client;
4963 		i_mdi_client_lock(ct, pip);
4964 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4965 		    (ct->ct_unstable)) {
4966 			unstable = 1;
4967 		}
4968 		MDI_PI_UNLOCK(pip);
4969 
4970 		/*
4971 		 * Check to see of we are removing the last path of this
4972 		 * client device...
4973 		 */
4974 		cdip = ct->ct_dip;
4975 		if (!phci_only && cdip &&
4976 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4977 		    (i_mdi_client_compute_state(ct, ph) ==
4978 		    MDI_CLIENT_STATE_FAILED)) {
4979 			i_mdi_client_unlock(ct);
4980 			MDI_PHCI_UNLOCK(ph);
4981 			/*
4982 			 * We don't retire clients we just retire the
4983 			 * path to a client. If it is the last path
4984 			 * to a client, constraints are checked and
4985 			 * if we pass the last path is offlined. MPXIO will
4986 			 * then fail all I/Os to the client. Since we don't
4987 			 * want to retire the client on a path error
4988 			 * set constraint = 0 so that the client dip
4989 			 * is not retired.
4990 			 */
4991 			constraint = 0;
4992 			(void) e_ddi_retire_finalize(cdip, &constraint);
4993 			MDI_PHCI_LOCK(ph);
4994 			pip = next;
4995 		} else {
4996 			i_mdi_client_unlock(ct);
4997 			pip = next;
4998 		}
4999 	}
5000 
5001 	/*
5002 	 * Cannot offline pip(s)
5003 	 */
5004 	if (unstable) {
5005 		cmn_err(CE_WARN, "PHCI in transient state, cannot "
5006 		    "retire, dip = %p", (void *)dip);
5007 		MDI_PHCI_UNLOCK(ph);
5008 		return;
5009 	}
5010 
5011 	/*
5012 	 * Mark the pHCI as offline
5013 	 */
5014 	MDI_PHCI_SET_OFFLINE(ph);
5015 
5016 	/*
5017 	 * Mark the child mdi_pathinfo nodes as transient
5018 	 */
5019 	pip = ph->ph_path_head;
5020 	while (pip != NULL) {
5021 		MDI_PI_LOCK(pip);
5022 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5023 		MDI_PI_SET_OFFLINING(pip);
5024 		MDI_PI_UNLOCK(pip);
5025 		pip = next;
5026 	}
5027 	MDI_PHCI_UNLOCK(ph);
5028 	/*
5029 	 * Give a chance for any pending commands to execute
5030 	 */
5031 	delay(1);
5032 	MDI_PHCI_LOCK(ph);
5033 	pip = ph->ph_path_head;
5034 	while (pip != NULL) {
5035 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5036 		(void) i_mdi_pi_offline(pip, 0);
5037 		MDI_PI_LOCK(pip);
5038 		ct = MDI_PI(pip)->pi_client;
5039 		if (!MDI_PI_IS_OFFLINE(pip)) {
5040 			cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
5041 			    "PHCI dip = %p", (void *)dip);
5042 			MDI_PI_UNLOCK(pip);
5043 			MDI_PHCI_SET_ONLINE(ph);
5044 			MDI_PHCI_UNLOCK(ph);
5045 			return;
5046 		}
5047 		MDI_PI_UNLOCK(pip);
5048 		pip = next;
5049 	}
5050 	MDI_PHCI_UNLOCK(ph);
5051 
5052 	return;
5053 }
5054 
5055 void
5056 mdi_phci_unretire(dev_info_t *dip)
5057 {
5058 	ASSERT(MDI_PHCI(dip));
5059 
5060 	/*
5061 	 * Online the phci
5062 	 */
5063 	i_mdi_phci_online(dip);
5064 }
5065 
5066 /*ARGSUSED*/
5067 static int
5068 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5069 {
5070 	int		rv = NDI_SUCCESS;
5071 	mdi_client_t	*ct;
5072 
5073 	/*
5074 	 * Client component to go offline.  Make sure that we are
5075 	 * not in failing over state and update client state
5076 	 * accordingly
5077 	 */
5078 	ct = i_devi_get_client(dip);
5079 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n",
5080 	    (void *)dip, (void *)ct));
5081 	if (ct != NULL) {
5082 		MDI_CLIENT_LOCK(ct);
5083 		if (ct->ct_unstable) {
5084 			/*
5085 			 * One or more paths are in transient state,
5086 			 * Dont allow offline of a client device
5087 			 */
5088 			MDI_DEBUG(1, (CE_WARN, dip,
5089 			    "!One or more paths to this device is "
5090 			    "in transient state. This device can not "
5091 			    "be removed at this moment. "
5092 			    "Please try again later."));
5093 			MDI_CLIENT_UNLOCK(ct);
5094 			return (NDI_BUSY);
5095 		}
5096 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5097 			/*
5098 			 * Failover is in progress, Dont allow DR of
5099 			 * a client device
5100 			 */
5101 			MDI_DEBUG(1, (CE_WARN, dip,
5102 			    "!Client device (%s%d) is Busy. %s",
5103 			    ddi_driver_name(dip), ddi_get_instance(dip),
5104 			    "This device can not be removed at "
5105 			    "this moment. Please try again later."));
5106 			MDI_CLIENT_UNLOCK(ct);
5107 			return (NDI_BUSY);
5108 		}
5109 		MDI_CLIENT_SET_OFFLINE(ct);
5110 
5111 		/*
5112 		 * Unbind our relationship with the dev_info node
5113 		 */
5114 		if (flags & NDI_DEVI_REMOVE) {
5115 			ct->ct_dip = NULL;
5116 		}
5117 		MDI_CLIENT_UNLOCK(ct);
5118 	}
5119 	return (rv);
5120 }
5121 
5122 /*
5123  * mdi_pre_attach():
5124  *		Pre attach() notification handler
5125  */
5126 /*ARGSUSED*/
5127 int
5128 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5129 {
5130 	/* don't support old DDI_PM_RESUME */
5131 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5132 	    (cmd == DDI_PM_RESUME))
5133 		return (DDI_FAILURE);
5134 
5135 	return (DDI_SUCCESS);
5136 }
5137 
5138 /*
5139  * mdi_post_attach():
5140  *		Post attach() notification handler
5141  */
5142 /*ARGSUSED*/
5143 void
5144 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5145 {
5146 	mdi_phci_t	*ph;
5147 	mdi_client_t	*ct;
5148 	mdi_vhci_t	*vh;
5149 
5150 	if (MDI_PHCI(dip)) {
5151 		ph = i_devi_get_phci(dip);
5152 		ASSERT(ph != NULL);
5153 
5154 		MDI_PHCI_LOCK(ph);
5155 		switch (cmd) {
5156 		case DDI_ATTACH:
5157 			MDI_DEBUG(2, (CE_NOTE, dip,
5158 			    "!pHCI post_attach: called %p\n", (void *)ph));
5159 			if (error == DDI_SUCCESS) {
5160 				MDI_PHCI_SET_ATTACH(ph);
5161 			} else {
5162 				MDI_DEBUG(1, (CE_NOTE, dip,
5163 				    "!pHCI post_attach: failed error=%d\n",
5164 				    error));
5165 				MDI_PHCI_SET_DETACH(ph);
5166 			}
5167 			break;
5168 
5169 		case DDI_RESUME:
5170 			MDI_DEBUG(2, (CE_NOTE, dip,
5171 			    "!pHCI post_resume: called %p\n", (void *)ph));
5172 			if (error == DDI_SUCCESS) {
5173 				MDI_PHCI_SET_RESUME(ph);
5174 			} else {
5175 				MDI_DEBUG(1, (CE_NOTE, dip,
5176 				    "!pHCI post_resume: failed error=%d\n",
5177 				    error));
5178 				MDI_PHCI_SET_SUSPEND(ph);
5179 			}
5180 			break;
5181 		}
5182 		MDI_PHCI_UNLOCK(ph);
5183 	}
5184 
5185 	if (MDI_CLIENT(dip)) {
5186 		ct = i_devi_get_client(dip);
5187 		ASSERT(ct != NULL);
5188 
5189 		MDI_CLIENT_LOCK(ct);
5190 		switch (cmd) {
5191 		case DDI_ATTACH:
5192 			MDI_DEBUG(2, (CE_NOTE, dip,
5193 			    "!Client post_attach: called %p\n", (void *)ct));
5194 			if (error != DDI_SUCCESS) {
5195 				MDI_DEBUG(1, (CE_NOTE, dip,
5196 				    "!Client post_attach: failed error=%d\n",
5197 				    error));
5198 				MDI_CLIENT_SET_DETACH(ct);
5199 				MDI_DEBUG(4, (CE_WARN, dip,
5200 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
5201 				i_mdi_pm_reset_client(ct);
5202 				break;
5203 			}
5204 
5205 			/*
5206 			 * Client device has successfully attached, inform
5207 			 * the vhci.
5208 			 */
5209 			vh = ct->ct_vhci;
5210 			if (vh->vh_ops->vo_client_attached)
5211 				(*vh->vh_ops->vo_client_attached)(dip);
5212 
5213 			MDI_CLIENT_SET_ATTACH(ct);
5214 			break;
5215 
5216 		case DDI_RESUME:
5217 			MDI_DEBUG(2, (CE_NOTE, dip,
5218 			    "!Client post_attach: called %p\n", (void *)ct));
5219 			if (error == DDI_SUCCESS) {
5220 				MDI_CLIENT_SET_RESUME(ct);
5221 			} else {
5222 				MDI_DEBUG(1, (CE_NOTE, dip,
5223 				    "!Client post_resume: failed error=%d\n",
5224 				    error));
5225 				MDI_CLIENT_SET_SUSPEND(ct);
5226 			}
5227 			break;
5228 		}
5229 		MDI_CLIENT_UNLOCK(ct);
5230 	}
5231 }
5232 
5233 /*
5234  * mdi_pre_detach():
5235  *		Pre detach notification handler
5236  */
5237 /*ARGSUSED*/
5238 int
5239 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5240 {
5241 	int rv = DDI_SUCCESS;
5242 
5243 	if (MDI_CLIENT(dip)) {
5244 		(void) i_mdi_client_pre_detach(dip, cmd);
5245 	}
5246 
5247 	if (MDI_PHCI(dip)) {
5248 		rv = i_mdi_phci_pre_detach(dip, cmd);
5249 	}
5250 
5251 	return (rv);
5252 }
5253 
5254 /*ARGSUSED*/
5255 static int
5256 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5257 {
5258 	int		rv = DDI_SUCCESS;
5259 	mdi_phci_t	*ph;
5260 	mdi_client_t	*ct;
5261 	mdi_pathinfo_t	*pip;
5262 	mdi_pathinfo_t	*failed_pip = NULL;
5263 	mdi_pathinfo_t	*next;
5264 
5265 	ph = i_devi_get_phci(dip);
5266 	if (ph == NULL) {
5267 		return (rv);
5268 	}
5269 
5270 	MDI_PHCI_LOCK(ph);
5271 	switch (cmd) {
5272 	case DDI_DETACH:
5273 		MDI_DEBUG(2, (CE_NOTE, dip,
5274 		    "!pHCI pre_detach: called %p\n", (void *)ph));
5275 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5276 			/*
5277 			 * mdi_pathinfo nodes are still attached to
5278 			 * this pHCI. Fail the detach for this pHCI.
5279 			 */
5280 			MDI_DEBUG(2, (CE_WARN, dip,
5281 			    "!pHCI pre_detach: "
5282 			    "mdi_pathinfo nodes are still attached "
5283 			    "%p\n", (void *)ph));
5284 			rv = DDI_FAILURE;
5285 			break;
5286 		}
5287 		MDI_PHCI_SET_DETACH(ph);
5288 		break;
5289 
5290 	case DDI_SUSPEND:
5291 		/*
5292 		 * pHCI is getting suspended.  Since mpxio client
5293 		 * devices may not be suspended at this point, to avoid
5294 		 * a potential stack overflow, it is important to suspend
5295 		 * client devices before pHCI can be suspended.
5296 		 */
5297 
5298 		MDI_DEBUG(2, (CE_NOTE, dip,
5299 		    "!pHCI pre_suspend: called %p\n", (void *)ph));
5300 		/*
5301 		 * Suspend all the client devices accessible through this pHCI
5302 		 */
5303 		pip = ph->ph_path_head;
5304 		while (pip != NULL && rv == DDI_SUCCESS) {
5305 			dev_info_t *cdip;
5306 			MDI_PI_LOCK(pip);
5307 			next =
5308 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5309 			ct = MDI_PI(pip)->pi_client;
5310 			i_mdi_client_lock(ct, pip);
5311 			cdip = ct->ct_dip;
5312 			MDI_PI_UNLOCK(pip);
5313 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5314 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5315 				i_mdi_client_unlock(ct);
5316 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5317 				    DDI_SUCCESS) {
5318 					/*
5319 					 * Suspend of one of the client
5320 					 * device has failed.
5321 					 */
5322 					MDI_DEBUG(1, (CE_WARN, dip,
5323 					    "!Suspend of device (%s%d) failed.",
5324 					    ddi_driver_name(cdip),
5325 					    ddi_get_instance(cdip)));
5326 					failed_pip = pip;
5327 					break;
5328 				}
5329 			} else {
5330 				i_mdi_client_unlock(ct);
5331 			}
5332 			pip = next;
5333 		}
5334 
5335 		if (rv == DDI_SUCCESS) {
5336 			/*
5337 			 * Suspend of client devices is complete. Proceed
5338 			 * with pHCI suspend.
5339 			 */
5340 			MDI_PHCI_SET_SUSPEND(ph);
5341 		} else {
5342 			/*
5343 			 * Revert back all the suspended client device states
5344 			 * to converse.
5345 			 */
5346 			pip = ph->ph_path_head;
5347 			while (pip != failed_pip) {
5348 				dev_info_t *cdip;
5349 				MDI_PI_LOCK(pip);
5350 				next =
5351 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5352 				ct = MDI_PI(pip)->pi_client;
5353 				i_mdi_client_lock(ct, pip);
5354 				cdip = ct->ct_dip;
5355 				MDI_PI_UNLOCK(pip);
5356 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5357 					i_mdi_client_unlock(ct);
5358 					(void) devi_attach(cdip, DDI_RESUME);
5359 				} else {
5360 					i_mdi_client_unlock(ct);
5361 				}
5362 				pip = next;
5363 			}
5364 		}
5365 		break;
5366 
5367 	default:
5368 		rv = DDI_FAILURE;
5369 		break;
5370 	}
5371 	MDI_PHCI_UNLOCK(ph);
5372 	return (rv);
5373 }
5374 
5375 /*ARGSUSED*/
5376 static int
5377 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5378 {
5379 	int		rv = DDI_SUCCESS;
5380 	mdi_client_t	*ct;
5381 
5382 	ct = i_devi_get_client(dip);
5383 	if (ct == NULL) {
5384 		return (rv);
5385 	}
5386 
5387 	MDI_CLIENT_LOCK(ct);
5388 	switch (cmd) {
5389 	case DDI_DETACH:
5390 		MDI_DEBUG(2, (CE_NOTE, dip,
5391 		    "!Client pre_detach: called %p\n", (void *)ct));
5392 		MDI_CLIENT_SET_DETACH(ct);
5393 		break;
5394 
5395 	case DDI_SUSPEND:
5396 		MDI_DEBUG(2, (CE_NOTE, dip,
5397 		    "!Client pre_suspend: called %p\n", (void *)ct));
5398 		MDI_CLIENT_SET_SUSPEND(ct);
5399 		break;
5400 
5401 	default:
5402 		rv = DDI_FAILURE;
5403 		break;
5404 	}
5405 	MDI_CLIENT_UNLOCK(ct);
5406 	return (rv);
5407 }
5408 
5409 /*
5410  * mdi_post_detach():
5411  *		Post detach notification handler
5412  */
5413 /*ARGSUSED*/
5414 void
5415 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5416 {
5417 	/*
5418 	 * Detach/Suspend of mpxio component failed. Update our state
5419 	 * too
5420 	 */
5421 	if (MDI_PHCI(dip))
5422 		i_mdi_phci_post_detach(dip, cmd, error);
5423 
5424 	if (MDI_CLIENT(dip))
5425 		i_mdi_client_post_detach(dip, cmd, error);
5426 }
5427 
5428 /*ARGSUSED*/
5429 static void
5430 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5431 {
5432 	mdi_phci_t	*ph;
5433 
5434 	/*
5435 	 * Detach/Suspend of phci component failed. Update our state
5436 	 * too
5437 	 */
5438 	ph = i_devi_get_phci(dip);
5439 	if (ph == NULL) {
5440 		return;
5441 	}
5442 
5443 	MDI_PHCI_LOCK(ph);
5444 	/*
5445 	 * Detach of pHCI failed. Restore back converse
5446 	 * state
5447 	 */
5448 	switch (cmd) {
5449 	case DDI_DETACH:
5450 		MDI_DEBUG(2, (CE_NOTE, dip,
5451 		    "!pHCI post_detach: called %p\n", (void *)ph));
5452 		if (error != DDI_SUCCESS)
5453 			MDI_PHCI_SET_ATTACH(ph);
5454 		break;
5455 
5456 	case DDI_SUSPEND:
5457 		MDI_DEBUG(2, (CE_NOTE, dip,
5458 		    "!pHCI post_suspend: called %p\n", (void *)ph));
5459 		if (error != DDI_SUCCESS)
5460 			MDI_PHCI_SET_RESUME(ph);
5461 		break;
5462 	}
5463 	MDI_PHCI_UNLOCK(ph);
5464 }
5465 
5466 /*ARGSUSED*/
5467 static void
5468 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5469 {
5470 	mdi_client_t	*ct;
5471 
5472 	ct = i_devi_get_client(dip);
5473 	if (ct == NULL) {
5474 		return;
5475 	}
5476 	MDI_CLIENT_LOCK(ct);
5477 	/*
5478 	 * Detach of Client failed. Restore back converse
5479 	 * state
5480 	 */
5481 	switch (cmd) {
5482 	case DDI_DETACH:
5483 		MDI_DEBUG(2, (CE_NOTE, dip,
5484 		    "!Client post_detach: called %p\n", (void *)ct));
5485 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5486 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5487 			    "i_mdi_pm_rele_client\n"));
5488 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5489 		} else {
5490 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5491 			    "i_mdi_pm_reset_client\n"));
5492 			i_mdi_pm_reset_client(ct);
5493 		}
5494 		if (error != DDI_SUCCESS)
5495 			MDI_CLIENT_SET_ATTACH(ct);
5496 		break;
5497 
5498 	case DDI_SUSPEND:
5499 		MDI_DEBUG(2, (CE_NOTE, dip,
5500 		    "!Client post_suspend: called %p\n", (void *)ct));
5501 		if (error != DDI_SUCCESS)
5502 			MDI_CLIENT_SET_RESUME(ct);
5503 		break;
5504 	}
5505 	MDI_CLIENT_UNLOCK(ct);
5506 }
5507 
5508 int
5509 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
5510 {
5511 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
5512 }
5513 
5514 /*
5515  * create and install per-path (client - pHCI) statistics
5516  * I/O stats supported: nread, nwritten, reads, and writes
5517  * Error stats - hard errors, soft errors, & transport errors
5518  */
5519 int
5520 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
5521 {
5522 	kstat_t			*kiosp, *kerrsp;
5523 	struct pi_errs		*nsp;
5524 	struct mdi_pi_kstats	*mdi_statp;
5525 
5526 	if (MDI_PI(pip)->pi_kstats != NULL)
5527 		return (MDI_SUCCESS);
5528 
5529 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5530 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
5531 		return (MDI_FAILURE);
5532 	}
5533 
5534 	(void) strcat(ksname, ",err");
5535 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5536 	    KSTAT_TYPE_NAMED,
5537 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5538 	if (kerrsp == NULL) {
5539 		kstat_delete(kiosp);
5540 		return (MDI_FAILURE);
5541 	}
5542 
5543 	nsp = (struct pi_errs *)kerrsp->ks_data;
5544 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5545 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5546 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5547 	    KSTAT_DATA_UINT32);
5548 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5549 	    KSTAT_DATA_UINT32);
5550 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5551 	    KSTAT_DATA_UINT32);
5552 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5553 	    KSTAT_DATA_UINT32);
5554 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5555 	    KSTAT_DATA_UINT32);
5556 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5557 	    KSTAT_DATA_UINT32);
5558 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5559 	    KSTAT_DATA_UINT32);
5560 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5561 
5562 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5563 	mdi_statp->pi_kstat_ref = 1;
5564 	mdi_statp->pi_kstat_iostats = kiosp;
5565 	mdi_statp->pi_kstat_errstats = kerrsp;
5566 	kstat_install(kiosp);
5567 	kstat_install(kerrsp);
5568 	MDI_PI(pip)->pi_kstats = mdi_statp;
5569 	return (MDI_SUCCESS);
5570 }
5571 
5572 /*
5573  * destroy per-path properties
5574  */
5575 static void
5576 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5577 {
5578 
5579 	struct mdi_pi_kstats *mdi_statp;
5580 
5581 	if (MDI_PI(pip)->pi_kstats == NULL)
5582 		return;
5583 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5584 		return;
5585 
5586 	MDI_PI(pip)->pi_kstats = NULL;
5587 
5588 	/*
5589 	 * the kstat may be shared between multiple pathinfo nodes
5590 	 * decrement this pathinfo's usage, removing the kstats
5591 	 * themselves when the last pathinfo reference is removed.
5592 	 */
5593 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5594 	if (--mdi_statp->pi_kstat_ref != 0)
5595 		return;
5596 
5597 	kstat_delete(mdi_statp->pi_kstat_iostats);
5598 	kstat_delete(mdi_statp->pi_kstat_errstats);
5599 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5600 }
5601 
5602 /*
5603  * update I/O paths KSTATS
5604  */
5605 void
5606 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5607 {
5608 	kstat_t *iostatp;
5609 	size_t xfer_cnt;
5610 
5611 	ASSERT(pip != NULL);
5612 
5613 	/*
5614 	 * I/O can be driven across a path prior to having path
5615 	 * statistics available, i.e. probe(9e).
5616 	 */
5617 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5618 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5619 		xfer_cnt = bp->b_bcount - bp->b_resid;
5620 		if (bp->b_flags & B_READ) {
5621 			KSTAT_IO_PTR(iostatp)->reads++;
5622 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5623 		} else {
5624 			KSTAT_IO_PTR(iostatp)->writes++;
5625 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5626 		}
5627 	}
5628 }
5629 
5630 /*
5631  * Enable the path(specific client/target/initiator)
5632  * Enabling a path means that MPxIO may select the enabled path for routing
5633  * future I/O requests, subject to other path state constraints.
5634  */
5635 int
5636 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5637 {
5638 	mdi_phci_t	*ph;
5639 
5640 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5641 	if (ph == NULL) {
5642 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5643 			" failed. pip: %p ph = NULL\n", (void *)pip));
5644 		return (MDI_FAILURE);
5645 	}
5646 
5647 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5648 		MDI_ENABLE_OP);
5649 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5650 		" Returning success pip = %p. ph = %p\n",
5651 		(void *)pip, (void *)ph));
5652 	return (MDI_SUCCESS);
5653 
5654 }
5655 
5656 /*
5657  * Disable the path (specific client/target/initiator)
5658  * Disabling a path means that MPxIO will not select the disabled path for
5659  * routing any new I/O requests.
5660  */
5661 int
5662 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5663 {
5664 	mdi_phci_t	*ph;
5665 
5666 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5667 	if (ph == NULL) {
5668 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5669 			" failed. pip: %p ph = NULL\n", (void *)pip));
5670 		return (MDI_FAILURE);
5671 	}
5672 
5673 	(void) i_mdi_enable_disable_path(pip,
5674 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5675 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5676 		"Returning success pip = %p. ph = %p",
5677 		(void *)pip, (void *)ph));
5678 	return (MDI_SUCCESS);
5679 }
5680 
5681 /*
5682  * disable the path to a particular pHCI (pHCI specified in the phci_path
5683  * argument) for a particular client (specified in the client_path argument).
5684  * Disabling a path means that MPxIO will not select the disabled path for
5685  * routing any new I/O requests.
5686  * NOTE: this will be removed once the NWS files are changed to use the new
5687  * mdi_{enable,disable}_path interfaces
5688  */
5689 int
5690 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5691 {
5692 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5693 }
5694 
5695 /*
5696  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5697  * argument) for a particular client (specified in the client_path argument).
5698  * Enabling a path means that MPxIO may select the enabled path for routing
5699  * future I/O requests, subject to other path state constraints.
5700  * NOTE: this will be removed once the NWS files are changed to use the new
5701  * mdi_{enable,disable}_path interfaces
5702  */
5703 
5704 int
5705 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5706 {
5707 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5708 }
5709 
5710 /*
5711  * Common routine for doing enable/disable.
5712  */
5713 static mdi_pathinfo_t *
5714 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5715 		int op)
5716 {
5717 	int		sync_flag = 0;
5718 	int		rv;
5719 	mdi_pathinfo_t 	*next;
5720 	int		(*f)() = NULL;
5721 
5722 	f = vh->vh_ops->vo_pi_state_change;
5723 
5724 	sync_flag = (flags << 8) & 0xf00;
5725 
5726 	/*
5727 	 * Do a callback into the mdi consumer to let it
5728 	 * know that path is about to get enabled/disabled.
5729 	 */
5730 	if (f != NULL) {
5731 		rv = (*f)(vh->vh_dip, pip, 0,
5732 			MDI_PI_EXT_STATE(pip),
5733 			MDI_EXT_STATE_CHANGE | sync_flag |
5734 			op | MDI_BEFORE_STATE_CHANGE);
5735 		if (rv != MDI_SUCCESS) {
5736 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5737 			"!vo_pi_state_change: failed rv = %x", rv));
5738 		}
5739 	}
5740 	MDI_PI_LOCK(pip);
5741 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5742 
5743 	switch (flags) {
5744 		case USER_DISABLE:
5745 			if (op == MDI_DISABLE_OP) {
5746 				MDI_PI_SET_USER_DISABLE(pip);
5747 			} else {
5748 				MDI_PI_SET_USER_ENABLE(pip);
5749 			}
5750 			break;
5751 		case DRIVER_DISABLE:
5752 			if (op == MDI_DISABLE_OP) {
5753 				MDI_PI_SET_DRV_DISABLE(pip);
5754 			} else {
5755 				MDI_PI_SET_DRV_ENABLE(pip);
5756 			}
5757 			break;
5758 		case DRIVER_DISABLE_TRANSIENT:
5759 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
5760 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5761 			} else {
5762 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5763 			}
5764 			break;
5765 	}
5766 	MDI_PI_UNLOCK(pip);
5767 	/*
5768 	 * Do a callback into the mdi consumer to let it
5769 	 * know that path is now enabled/disabled.
5770 	 */
5771 	if (f != NULL) {
5772 		rv = (*f)(vh->vh_dip, pip, 0,
5773 			MDI_PI_EXT_STATE(pip),
5774 			MDI_EXT_STATE_CHANGE | sync_flag |
5775 			op | MDI_AFTER_STATE_CHANGE);
5776 		if (rv != MDI_SUCCESS) {
5777 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5778 			"!vo_pi_state_change: failed rv = %x", rv));
5779 		}
5780 	}
5781 	return (next);
5782 }
5783 
5784 /*
5785  * Common routine for doing enable/disable.
5786  * NOTE: this will be removed once the NWS files are changed to use the new
5787  * mdi_{enable,disable}_path has been putback
5788  */
5789 int
5790 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5791 {
5792 
5793 	mdi_phci_t	*ph;
5794 	mdi_vhci_t	*vh = NULL;
5795 	mdi_client_t	*ct;
5796 	mdi_pathinfo_t	*next, *pip;
5797 	int		found_it;
5798 
5799 	ph = i_devi_get_phci(pdip);
5800 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5801 		"Op = %d pdip = %p cdip = %p\n", op, (void *)pdip,
5802 		(void *)cdip));
5803 	if (ph == NULL) {
5804 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5805 			"Op %d failed. ph = NULL\n", op));
5806 		return (MDI_FAILURE);
5807 	}
5808 
5809 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5810 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5811 			"Op Invalid operation = %d\n", op));
5812 		return (MDI_FAILURE);
5813 	}
5814 
5815 	vh = ph->ph_vhci;
5816 
5817 	if (cdip == NULL) {
5818 		/*
5819 		 * Need to mark the Phci as enabled/disabled.
5820 		 */
5821 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5822 		"Op %d for the phci\n", op));
5823 		MDI_PHCI_LOCK(ph);
5824 		switch (flags) {
5825 			case USER_DISABLE:
5826 				if (op == MDI_DISABLE_OP) {
5827 					MDI_PHCI_SET_USER_DISABLE(ph);
5828 				} else {
5829 					MDI_PHCI_SET_USER_ENABLE(ph);
5830 				}
5831 				break;
5832 			case DRIVER_DISABLE:
5833 				if (op == MDI_DISABLE_OP) {
5834 					MDI_PHCI_SET_DRV_DISABLE(ph);
5835 				} else {
5836 					MDI_PHCI_SET_DRV_ENABLE(ph);
5837 				}
5838 				break;
5839 			case DRIVER_DISABLE_TRANSIENT:
5840 				if (op == MDI_DISABLE_OP) {
5841 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5842 				} else {
5843 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5844 				}
5845 				break;
5846 			default:
5847 				MDI_PHCI_UNLOCK(ph);
5848 				MDI_DEBUG(1, (CE_NOTE, NULL,
5849 				"!i_mdi_pi_enable_disable:"
5850 				" Invalid flag argument= %d\n", flags));
5851 		}
5852 
5853 		/*
5854 		 * Phci has been disabled. Now try to enable/disable
5855 		 * path info's to each client.
5856 		 */
5857 		pip = ph->ph_path_head;
5858 		while (pip != NULL) {
5859 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
5860 		}
5861 		MDI_PHCI_UNLOCK(ph);
5862 	} else {
5863 
5864 		/*
5865 		 * Disable a specific client.
5866 		 */
5867 		ct = i_devi_get_client(cdip);
5868 		if (ct == NULL) {
5869 			MDI_DEBUG(1, (CE_NOTE, NULL,
5870 			"!i_mdi_pi_enable_disable:"
5871 			" failed. ct = NULL operation = %d\n", op));
5872 			return (MDI_FAILURE);
5873 		}
5874 
5875 		MDI_CLIENT_LOCK(ct);
5876 		pip = ct->ct_path_head;
5877 		found_it = 0;
5878 		while (pip != NULL) {
5879 			MDI_PI_LOCK(pip);
5880 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5881 			if (MDI_PI(pip)->pi_phci == ph) {
5882 				MDI_PI_UNLOCK(pip);
5883 				found_it = 1;
5884 				break;
5885 			}
5886 			MDI_PI_UNLOCK(pip);
5887 			pip = next;
5888 		}
5889 
5890 
5891 		MDI_CLIENT_UNLOCK(ct);
5892 		if (found_it == 0) {
5893 			MDI_DEBUG(1, (CE_NOTE, NULL,
5894 			"!i_mdi_pi_enable_disable:"
5895 			" failed. Could not find corresponding pip\n"));
5896 			return (MDI_FAILURE);
5897 		}
5898 
5899 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
5900 	}
5901 
5902 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5903 		"Op %d Returning success pdip = %p cdip = %p\n",
5904 		op, (void *)pdip, (void *)cdip));
5905 	return (MDI_SUCCESS);
5906 }
5907 
5908 /*
5909  * Ensure phci powered up
5910  */
5911 static void
5912 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5913 {
5914 	dev_info_t	*ph_dip;
5915 
5916 	ASSERT(pip != NULL);
5917 	ASSERT(MDI_PI_LOCKED(pip));
5918 
5919 	if (MDI_PI(pip)->pi_pm_held) {
5920 		return;
5921 	}
5922 
5923 	ph_dip = mdi_pi_get_phci(pip);
5924 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n",
5925 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
5926 	if (ph_dip == NULL) {
5927 		return;
5928 	}
5929 
5930 	MDI_PI_UNLOCK(pip);
5931 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5932 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5933 
5934 	pm_hold_power(ph_dip);
5935 
5936 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5937 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5938 	MDI_PI_LOCK(pip);
5939 
5940 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
5941 	if (DEVI(ph_dip)->devi_pm_info)
5942 		MDI_PI(pip)->pi_pm_held = 1;
5943 }
5944 
5945 /*
5946  * Allow phci powered down
5947  */
5948 static void
5949 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5950 {
5951 	dev_info_t	*ph_dip = NULL;
5952 
5953 	ASSERT(pip != NULL);
5954 	ASSERT(MDI_PI_LOCKED(pip));
5955 
5956 	if (MDI_PI(pip)->pi_pm_held == 0) {
5957 		return;
5958 	}
5959 
5960 	ph_dip = mdi_pi_get_phci(pip);
5961 	ASSERT(ph_dip != NULL);
5962 
5963 	MDI_PI_UNLOCK(pip);
5964 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n",
5965 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
5966 
5967 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5968 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5969 	pm_rele_power(ph_dip);
5970 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5971 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5972 
5973 	MDI_PI_LOCK(pip);
5974 	MDI_PI(pip)->pi_pm_held = 0;
5975 }
5976 
5977 static void
5978 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5979 {
5980 	ASSERT(MDI_CLIENT_LOCKED(ct));
5981 
5982 	ct->ct_power_cnt += incr;
5983 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p "
5984 	    "ct_power_cnt = %d incr = %d\n", (void *)ct,
5985 	    ct->ct_power_cnt, incr));
5986 	ASSERT(ct->ct_power_cnt >= 0);
5987 }
5988 
5989 static void
5990 i_mdi_rele_all_phci(mdi_client_t *ct)
5991 {
5992 	mdi_pathinfo_t  *pip;
5993 
5994 	ASSERT(MDI_CLIENT_LOCKED(ct));
5995 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5996 	while (pip != NULL) {
5997 		mdi_hold_path(pip);
5998 		MDI_PI_LOCK(pip);
5999 		i_mdi_pm_rele_pip(pip);
6000 		MDI_PI_UNLOCK(pip);
6001 		mdi_rele_path(pip);
6002 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6003 	}
6004 }
6005 
6006 static void
6007 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6008 {
6009 	ASSERT(MDI_CLIENT_LOCKED(ct));
6010 
6011 	if (i_ddi_devi_attached(ct->ct_dip)) {
6012 		ct->ct_power_cnt -= decr;
6013 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p "
6014 		    "ct_power_cnt = %d decr = %d\n",
6015 		    (void *)ct, ct->ct_power_cnt, decr));
6016 	}
6017 
6018 	ASSERT(ct->ct_power_cnt >= 0);
6019 	if (ct->ct_power_cnt == 0) {
6020 		i_mdi_rele_all_phci(ct);
6021 		return;
6022 	}
6023 }
6024 
6025 static void
6026 i_mdi_pm_reset_client(mdi_client_t *ct)
6027 {
6028 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p "
6029 	    "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt));
6030 	ASSERT(MDI_CLIENT_LOCKED(ct));
6031 	ct->ct_power_cnt = 0;
6032 	i_mdi_rele_all_phci(ct);
6033 	ct->ct_powercnt_config = 0;
6034 	ct->ct_powercnt_unconfig = 0;
6035 	ct->ct_powercnt_reset = 1;
6036 }
6037 
6038 static int
6039 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6040 {
6041 	int		ret;
6042 	dev_info_t	*ph_dip;
6043 
6044 	MDI_PI_LOCK(pip);
6045 	i_mdi_pm_hold_pip(pip);
6046 
6047 	ph_dip = mdi_pi_get_phci(pip);
6048 	MDI_PI_UNLOCK(pip);
6049 
6050 	/* bring all components of phci to full power */
6051 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6052 	    "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip),
6053 	    ddi_get_instance(ph_dip), (void *)pip));
6054 
6055 	ret = pm_powerup(ph_dip);
6056 
6057 	if (ret == DDI_FAILURE) {
6058 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6059 		    "pm_powerup FAILED for %s%d %p\n",
6060 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip),
6061 		    (void *)pip));
6062 
6063 		MDI_PI_LOCK(pip);
6064 		i_mdi_pm_rele_pip(pip);
6065 		MDI_PI_UNLOCK(pip);
6066 		return (MDI_FAILURE);
6067 	}
6068 
6069 	return (MDI_SUCCESS);
6070 }
6071 
6072 static int
6073 i_mdi_power_all_phci(mdi_client_t *ct)
6074 {
6075 	mdi_pathinfo_t  *pip;
6076 	int		succeeded = 0;
6077 
6078 	ASSERT(MDI_CLIENT_LOCKED(ct));
6079 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6080 	while (pip != NULL) {
6081 		/*
6082 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6083 		 * or MDI_PATHINFO_STATE_OFFLINE.
6084 		 */
6085 		if (MDI_PI_IS_INIT(pip) ||
6086 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6087 			mdi_hold_path(pip);
6088 			MDI_CLIENT_UNLOCK(ct);
6089 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6090 				succeeded = 1;
6091 
6092 			ASSERT(ct == MDI_PI(pip)->pi_client);
6093 			MDI_CLIENT_LOCK(ct);
6094 			mdi_rele_path(pip);
6095 		}
6096 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6097 	}
6098 
6099 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6100 }
6101 
6102 /*
6103  * mdi_bus_power():
6104  *		1. Place the phci(s) into powered up state so that
6105  *		   client can do power management
6106  *		2. Ensure phci powered up as client power managing
6107  * Return Values:
6108  *		MDI_SUCCESS
6109  *		MDI_FAILURE
6110  */
6111 int
6112 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6113     void *arg, void *result)
6114 {
6115 	int			ret = MDI_SUCCESS;
6116 	pm_bp_child_pwrchg_t	*bpc;
6117 	mdi_client_t		*ct;
6118 	dev_info_t		*cdip;
6119 	pm_bp_has_changed_t	*bphc;
6120 
6121 	/*
6122 	 * BUS_POWER_NOINVOL not supported
6123 	 */
6124 	if (op == BUS_POWER_NOINVOL)
6125 		return (MDI_FAILURE);
6126 
6127 	/*
6128 	 * ignore other OPs.
6129 	 * return quickly to save cou cycles on the ct processing
6130 	 */
6131 	switch (op) {
6132 	case BUS_POWER_PRE_NOTIFICATION:
6133 	case BUS_POWER_POST_NOTIFICATION:
6134 		bpc = (pm_bp_child_pwrchg_t *)arg;
6135 		cdip = bpc->bpc_dip;
6136 		break;
6137 	case BUS_POWER_HAS_CHANGED:
6138 		bphc = (pm_bp_has_changed_t *)arg;
6139 		cdip = bphc->bphc_dip;
6140 		break;
6141 	default:
6142 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6143 	}
6144 
6145 	ASSERT(MDI_CLIENT(cdip));
6146 
6147 	ct = i_devi_get_client(cdip);
6148 	if (ct == NULL)
6149 		return (MDI_FAILURE);
6150 
6151 	/*
6152 	 * wait till the mdi_pathinfo node state change are processed
6153 	 */
6154 	MDI_CLIENT_LOCK(ct);
6155 	switch (op) {
6156 	case BUS_POWER_PRE_NOTIFICATION:
6157 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6158 		    "BUS_POWER_PRE_NOTIFICATION:"
6159 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6160 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6161 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6162 
6163 		/* serialize power level change per client */
6164 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6165 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6166 
6167 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6168 
6169 		if (ct->ct_power_cnt == 0) {
6170 			ret = i_mdi_power_all_phci(ct);
6171 		}
6172 
6173 		/*
6174 		 * if new_level > 0:
6175 		 *	- hold phci(s)
6176 		 *	- power up phci(s) if not already
6177 		 * ignore power down
6178 		 */
6179 		if (bpc->bpc_nlevel > 0) {
6180 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6181 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6182 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
6183 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6184 			}
6185 		}
6186 		break;
6187 	case BUS_POWER_POST_NOTIFICATION:
6188 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6189 		    "BUS_POWER_POST_NOTIFICATION:"
6190 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
6191 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6192 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6193 		    *(int *)result));
6194 
6195 		if (*(int *)result == DDI_SUCCESS) {
6196 			if (bpc->bpc_nlevel > 0) {
6197 				MDI_CLIENT_SET_POWER_UP(ct);
6198 			} else {
6199 				MDI_CLIENT_SET_POWER_DOWN(ct);
6200 			}
6201 		}
6202 
6203 		/* release the hold we did in pre-notification */
6204 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6205 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6206 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6207 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6208 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6209 		}
6210 
6211 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6212 			/* another thread might started attaching */
6213 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6214 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6215 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
6216 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6217 			/* detaching has been taken care in pm_post_unconfig */
6218 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6219 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6220 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
6221 				i_mdi_pm_reset_client(ct);
6222 			}
6223 		}
6224 
6225 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6226 		cv_broadcast(&ct->ct_powerchange_cv);
6227 
6228 		break;
6229 
6230 	/* need to do more */
6231 	case BUS_POWER_HAS_CHANGED:
6232 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
6233 		    "BUS_POWER_HAS_CHANGED:"
6234 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6235 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6236 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6237 
6238 		if (bphc->bphc_nlevel > 0 &&
6239 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6240 			if (ct->ct_power_cnt == 0) {
6241 				ret = i_mdi_power_all_phci(ct);
6242 			}
6243 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6244 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
6245 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6246 		}
6247 
6248 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6249 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6250 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6251 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6252 		}
6253 		break;
6254 	}
6255 
6256 	MDI_CLIENT_UNLOCK(ct);
6257 	return (ret);
6258 }
6259 
6260 static int
6261 i_mdi_pm_pre_config_one(dev_info_t *child)
6262 {
6263 	int		ret = MDI_SUCCESS;
6264 	mdi_client_t	*ct;
6265 
6266 	ct = i_devi_get_client(child);
6267 	if (ct == NULL)
6268 		return (MDI_FAILURE);
6269 
6270 	MDI_CLIENT_LOCK(ct);
6271 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6272 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6273 
6274 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6275 		MDI_CLIENT_UNLOCK(ct);
6276 		MDI_DEBUG(4, (CE_NOTE, child,
6277 		    "i_mdi_pm_pre_config_one already configured\n"));
6278 		return (MDI_SUCCESS);
6279 	}
6280 
6281 	if (ct->ct_powercnt_config) {
6282 		MDI_CLIENT_UNLOCK(ct);
6283 		MDI_DEBUG(4, (CE_NOTE, child,
6284 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
6285 		return (MDI_SUCCESS);
6286 	}
6287 
6288 	if (ct->ct_power_cnt == 0) {
6289 		ret = i_mdi_power_all_phci(ct);
6290 	}
6291 	MDI_DEBUG(4, (CE_NOTE, child,
6292 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
6293 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6294 	ct->ct_powercnt_config = 1;
6295 	ct->ct_powercnt_reset = 0;
6296 	MDI_CLIENT_UNLOCK(ct);
6297 	return (ret);
6298 }
6299 
6300 static int
6301 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6302 {
6303 	int			ret = MDI_SUCCESS;
6304 	dev_info_t		*cdip;
6305 	int			circ;
6306 
6307 	ASSERT(MDI_VHCI(vdip));
6308 
6309 	/* ndi_devi_config_one */
6310 	if (child) {
6311 		ASSERT(DEVI_BUSY_OWNED(vdip));
6312 		return (i_mdi_pm_pre_config_one(child));
6313 	}
6314 
6315 	/* devi_config_common */
6316 	ndi_devi_enter(vdip, &circ);
6317 	cdip = ddi_get_child(vdip);
6318 	while (cdip) {
6319 		dev_info_t *next = ddi_get_next_sibling(cdip);
6320 
6321 		ret = i_mdi_pm_pre_config_one(cdip);
6322 		if (ret != MDI_SUCCESS)
6323 			break;
6324 		cdip = next;
6325 	}
6326 	ndi_devi_exit(vdip, circ);
6327 	return (ret);
6328 }
6329 
6330 static int
6331 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6332 {
6333 	int		ret = MDI_SUCCESS;
6334 	mdi_client_t	*ct;
6335 
6336 	ct = i_devi_get_client(child);
6337 	if (ct == NULL)
6338 		return (MDI_FAILURE);
6339 
6340 	MDI_CLIENT_LOCK(ct);
6341 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6342 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6343 
6344 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6345 		MDI_DEBUG(4, (CE_NOTE, child,
6346 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6347 		MDI_CLIENT_UNLOCK(ct);
6348 		return (MDI_SUCCESS);
6349 	}
6350 
6351 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6352 	    (flags & NDI_AUTODETACH)) {
6353 		MDI_DEBUG(4, (CE_NOTE, child,
6354 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6355 		MDI_CLIENT_UNLOCK(ct);
6356 		return (MDI_FAILURE);
6357 	}
6358 
6359 	if (ct->ct_powercnt_unconfig) {
6360 		MDI_DEBUG(4, (CE_NOTE, child,
6361 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6362 		MDI_CLIENT_UNLOCK(ct);
6363 		*held = 1;
6364 		return (MDI_SUCCESS);
6365 	}
6366 
6367 	if (ct->ct_power_cnt == 0) {
6368 		ret = i_mdi_power_all_phci(ct);
6369 	}
6370 	MDI_DEBUG(4, (CE_NOTE, child,
6371 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6372 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6373 	ct->ct_powercnt_unconfig = 1;
6374 	ct->ct_powercnt_reset = 0;
6375 	MDI_CLIENT_UNLOCK(ct);
6376 	if (ret == MDI_SUCCESS)
6377 		*held = 1;
6378 	return (ret);
6379 }
6380 
6381 static int
6382 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6383     int flags)
6384 {
6385 	int			ret = MDI_SUCCESS;
6386 	dev_info_t		*cdip;
6387 	int			circ;
6388 
6389 	ASSERT(MDI_VHCI(vdip));
6390 	*held = 0;
6391 
6392 	/* ndi_devi_unconfig_one */
6393 	if (child) {
6394 		ASSERT(DEVI_BUSY_OWNED(vdip));
6395 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6396 	}
6397 
6398 	/* devi_unconfig_common */
6399 	ndi_devi_enter(vdip, &circ);
6400 	cdip = ddi_get_child(vdip);
6401 	while (cdip) {
6402 		dev_info_t *next = ddi_get_next_sibling(cdip);
6403 
6404 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6405 		cdip = next;
6406 	}
6407 	ndi_devi_exit(vdip, circ);
6408 
6409 	if (*held)
6410 		ret = MDI_SUCCESS;
6411 
6412 	return (ret);
6413 }
6414 
6415 static void
6416 i_mdi_pm_post_config_one(dev_info_t *child)
6417 {
6418 	mdi_client_t	*ct;
6419 
6420 	ct = i_devi_get_client(child);
6421 	if (ct == NULL)
6422 		return;
6423 
6424 	MDI_CLIENT_LOCK(ct);
6425 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6426 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6427 
6428 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6429 		MDI_DEBUG(4, (CE_NOTE, child,
6430 		    "i_mdi_pm_post_config_one NOT configured\n"));
6431 		MDI_CLIENT_UNLOCK(ct);
6432 		return;
6433 	}
6434 
6435 	/* client has not been updated */
6436 	if (MDI_CLIENT_IS_FAILED(ct)) {
6437 		MDI_DEBUG(4, (CE_NOTE, child,
6438 		    "i_mdi_pm_post_config_one NOT configured\n"));
6439 		MDI_CLIENT_UNLOCK(ct);
6440 		return;
6441 	}
6442 
6443 	/* another thread might have powered it down or detached it */
6444 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6445 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6446 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6447 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6448 		MDI_DEBUG(4, (CE_NOTE, child,
6449 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6450 		i_mdi_pm_reset_client(ct);
6451 	} else {
6452 		mdi_pathinfo_t  *pip, *next;
6453 		int	valid_path_count = 0;
6454 
6455 		MDI_DEBUG(4, (CE_NOTE, child,
6456 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6457 		pip = ct->ct_path_head;
6458 		while (pip != NULL) {
6459 			MDI_PI_LOCK(pip);
6460 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6461 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6462 				valid_path_count ++;
6463 			MDI_PI_UNLOCK(pip);
6464 			pip = next;
6465 		}
6466 		i_mdi_pm_rele_client(ct, valid_path_count);
6467 	}
6468 	ct->ct_powercnt_config = 0;
6469 	MDI_CLIENT_UNLOCK(ct);
6470 }
6471 
6472 static void
6473 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
6474 {
6475 	int		circ;
6476 	dev_info_t	*cdip;
6477 
6478 	ASSERT(MDI_VHCI(vdip));
6479 
6480 	/* ndi_devi_config_one */
6481 	if (child) {
6482 		ASSERT(DEVI_BUSY_OWNED(vdip));
6483 		i_mdi_pm_post_config_one(child);
6484 		return;
6485 	}
6486 
6487 	/* devi_config_common */
6488 	ndi_devi_enter(vdip, &circ);
6489 	cdip = ddi_get_child(vdip);
6490 	while (cdip) {
6491 		dev_info_t *next = ddi_get_next_sibling(cdip);
6492 
6493 		i_mdi_pm_post_config_one(cdip);
6494 		cdip = next;
6495 	}
6496 	ndi_devi_exit(vdip, circ);
6497 }
6498 
6499 static void
6500 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6501 {
6502 	mdi_client_t	*ct;
6503 
6504 	ct = i_devi_get_client(child);
6505 	if (ct == NULL)
6506 		return;
6507 
6508 	MDI_CLIENT_LOCK(ct);
6509 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6510 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6511 
6512 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6513 		MDI_DEBUG(4, (CE_NOTE, child,
6514 		    "i_mdi_pm_post_unconfig NOT held\n"));
6515 		MDI_CLIENT_UNLOCK(ct);
6516 		return;
6517 	}
6518 
6519 	/* failure detaching or another thread just attached it */
6520 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6521 	    i_ddi_devi_attached(ct->ct_dip)) ||
6522 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6523 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6524 		MDI_DEBUG(4, (CE_NOTE, child,
6525 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6526 		i_mdi_pm_reset_client(ct);
6527 	} else {
6528 		mdi_pathinfo_t  *pip, *next;
6529 		int	valid_path_count = 0;
6530 
6531 		MDI_DEBUG(4, (CE_NOTE, child,
6532 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6533 		pip = ct->ct_path_head;
6534 		while (pip != NULL) {
6535 			MDI_PI_LOCK(pip);
6536 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6537 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6538 				valid_path_count ++;
6539 			MDI_PI_UNLOCK(pip);
6540 			pip = next;
6541 		}
6542 		i_mdi_pm_rele_client(ct, valid_path_count);
6543 		ct->ct_powercnt_unconfig = 0;
6544 	}
6545 
6546 	MDI_CLIENT_UNLOCK(ct);
6547 }
6548 
6549 static void
6550 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
6551 {
6552 	int			circ;
6553 	dev_info_t		*cdip;
6554 
6555 	ASSERT(MDI_VHCI(vdip));
6556 
6557 	if (!held) {
6558 		MDI_DEBUG(4, (CE_NOTE, vdip,
6559 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6560 		return;
6561 	}
6562 
6563 	if (child) {
6564 		ASSERT(DEVI_BUSY_OWNED(vdip));
6565 		i_mdi_pm_post_unconfig_one(child);
6566 		return;
6567 	}
6568 
6569 	ndi_devi_enter(vdip, &circ);
6570 	cdip = ddi_get_child(vdip);
6571 	while (cdip) {
6572 		dev_info_t *next = ddi_get_next_sibling(cdip);
6573 
6574 		i_mdi_pm_post_unconfig_one(cdip);
6575 		cdip = next;
6576 	}
6577 	ndi_devi_exit(vdip, circ);
6578 }
6579 
6580 int
6581 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6582 {
6583 	int			circ, ret = MDI_SUCCESS;
6584 	dev_info_t		*client_dip = NULL;
6585 	mdi_client_t		*ct;
6586 
6587 	/*
6588 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6589 	 * Power up pHCI for the named client device.
6590 	 * Note: Before the client is enumerated under vhci by phci,
6591 	 * client_dip can be NULL. Then proceed to power up all the
6592 	 * pHCIs.
6593 	 */
6594 	if (devnm != NULL) {
6595 		ndi_devi_enter(vdip, &circ);
6596 		client_dip = ndi_devi_findchild(vdip, devnm);
6597 	}
6598 
6599 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n",
6600 	    op, devnm ? devnm : "NULL", (void *)client_dip));
6601 
6602 	switch (op) {
6603 	case MDI_PM_PRE_CONFIG:
6604 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6605 		break;
6606 
6607 	case MDI_PM_PRE_UNCONFIG:
6608 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6609 		    flags);
6610 		break;
6611 
6612 	case MDI_PM_POST_CONFIG:
6613 		i_mdi_pm_post_config(vdip, client_dip);
6614 		break;
6615 
6616 	case MDI_PM_POST_UNCONFIG:
6617 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6618 		break;
6619 
6620 	case MDI_PM_HOLD_POWER:
6621 	case MDI_PM_RELE_POWER:
6622 		ASSERT(args);
6623 
6624 		client_dip = (dev_info_t *)args;
6625 		ASSERT(MDI_CLIENT(client_dip));
6626 
6627 		ct = i_devi_get_client(client_dip);
6628 		MDI_CLIENT_LOCK(ct);
6629 
6630 		if (op == MDI_PM_HOLD_POWER) {
6631 			if (ct->ct_power_cnt == 0) {
6632 				(void) i_mdi_power_all_phci(ct);
6633 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6634 				    "mdi_power i_mdi_pm_hold_client\n"));
6635 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6636 			}
6637 		} else {
6638 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6639 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6640 				    "mdi_power i_mdi_pm_rele_client\n"));
6641 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6642 			} else {
6643 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6644 				    "mdi_power i_mdi_pm_reset_client\n"));
6645 				i_mdi_pm_reset_client(ct);
6646 			}
6647 		}
6648 
6649 		MDI_CLIENT_UNLOCK(ct);
6650 		break;
6651 
6652 	default:
6653 		break;
6654 	}
6655 
6656 	if (devnm)
6657 		ndi_devi_exit(vdip, circ);
6658 
6659 	return (ret);
6660 }
6661 
6662 int
6663 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6664 {
6665 	mdi_vhci_t *vhci;
6666 
6667 	if (!MDI_VHCI(dip))
6668 		return (MDI_FAILURE);
6669 
6670 	if (mdi_class) {
6671 		vhci = DEVI(dip)->devi_mdi_xhci;
6672 		ASSERT(vhci);
6673 		*mdi_class = vhci->vh_class;
6674 	}
6675 
6676 	return (MDI_SUCCESS);
6677 }
6678 
6679 int
6680 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6681 {
6682 	mdi_phci_t *phci;
6683 
6684 	if (!MDI_PHCI(dip))
6685 		return (MDI_FAILURE);
6686 
6687 	if (mdi_class) {
6688 		phci = DEVI(dip)->devi_mdi_xhci;
6689 		ASSERT(phci);
6690 		*mdi_class = phci->ph_vhci->vh_class;
6691 	}
6692 
6693 	return (MDI_SUCCESS);
6694 }
6695 
6696 int
6697 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6698 {
6699 	mdi_client_t *client;
6700 
6701 	if (!MDI_CLIENT(dip))
6702 		return (MDI_FAILURE);
6703 
6704 	if (mdi_class) {
6705 		client = DEVI(dip)->devi_mdi_client;
6706 		ASSERT(client);
6707 		*mdi_class = client->ct_vhci->vh_class;
6708 	}
6709 
6710 	return (MDI_SUCCESS);
6711 }
6712 
6713 void *
6714 mdi_client_get_vhci_private(dev_info_t *dip)
6715 {
6716 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6717 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6718 		mdi_client_t	*ct;
6719 		ct = i_devi_get_client(dip);
6720 		return (ct->ct_vprivate);
6721 	}
6722 	return (NULL);
6723 }
6724 
6725 void
6726 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6727 {
6728 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6729 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6730 		mdi_client_t	*ct;
6731 		ct = i_devi_get_client(dip);
6732 		ct->ct_vprivate = data;
6733 	}
6734 }
6735 /*
6736  * mdi_pi_get_vhci_private():
6737  *		Get the vhci private information associated with the
6738  *		mdi_pathinfo node
6739  */
6740 void *
6741 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6742 {
6743 	caddr_t	vprivate = NULL;
6744 	if (pip) {
6745 		vprivate = MDI_PI(pip)->pi_vprivate;
6746 	}
6747 	return (vprivate);
6748 }
6749 
6750 /*
6751  * mdi_pi_set_vhci_private():
6752  *		Set the vhci private information in the mdi_pathinfo node
6753  */
6754 void
6755 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6756 {
6757 	if (pip) {
6758 		MDI_PI(pip)->pi_vprivate = priv;
6759 	}
6760 }
6761 
6762 /*
6763  * mdi_phci_get_vhci_private():
6764  *		Get the vhci private information associated with the
6765  *		mdi_phci node
6766  */
6767 void *
6768 mdi_phci_get_vhci_private(dev_info_t *dip)
6769 {
6770 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6771 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6772 		mdi_phci_t	*ph;
6773 		ph = i_devi_get_phci(dip);
6774 		return (ph->ph_vprivate);
6775 	}
6776 	return (NULL);
6777 }
6778 
6779 /*
6780  * mdi_phci_set_vhci_private():
6781  *		Set the vhci private information in the mdi_phci node
6782  */
6783 void
6784 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6785 {
6786 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6787 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6788 		mdi_phci_t	*ph;
6789 		ph = i_devi_get_phci(dip);
6790 		ph->ph_vprivate = priv;
6791 	}
6792 }
6793 
6794 /*
6795  * List of vhci class names:
6796  * A vhci class name must be in this list only if the corresponding vhci
6797  * driver intends to use the mdi provided bus config implementation
6798  * (i.e., mdi_vhci_bus_config()).
6799  */
6800 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6801 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6802 
6803 /*
6804  * During boot time, the on-disk vhci cache for every vhci class is read
6805  * in the form of an nvlist and stored here.
6806  */
6807 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6808 
6809 /* nvpair names in vhci cache nvlist */
6810 #define	MDI_VHCI_CACHE_VERSION	1
6811 #define	MDI_NVPNAME_VERSION	"version"
6812 #define	MDI_NVPNAME_PHCIS	"phcis"
6813 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6814 
6815 /*
6816  * Given vhci class name, return its on-disk vhci cache filename.
6817  * Memory for the returned filename which includes the full path is allocated
6818  * by this function.
6819  */
6820 static char *
6821 vhclass2vhcache_filename(char *vhclass)
6822 {
6823 	char *filename;
6824 	int len;
6825 	static char *fmt = "/etc/devices/mdi_%s_cache";
6826 
6827 	/*
6828 	 * fmt contains the on-disk vhci cache file name format;
6829 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6830 	 */
6831 
6832 	/* the -1 below is to account for "%s" in the format string */
6833 	len = strlen(fmt) + strlen(vhclass) - 1;
6834 	filename = kmem_alloc(len, KM_SLEEP);
6835 	(void) snprintf(filename, len, fmt, vhclass);
6836 	ASSERT(len == (strlen(filename) + 1));
6837 	return (filename);
6838 }
6839 
6840 /*
6841  * initialize the vhci cache related data structures and read the on-disk
6842  * vhci cached data into memory.
6843  */
6844 static void
6845 setup_vhci_cache(mdi_vhci_t *vh)
6846 {
6847 	mdi_vhci_config_t *vhc;
6848 	mdi_vhci_cache_t *vhcache;
6849 	int i;
6850 	nvlist_t *nvl = NULL;
6851 
6852 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6853 	vh->vh_config = vhc;
6854 	vhcache = &vhc->vhc_vhcache;
6855 
6856 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6857 
6858 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6859 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6860 
6861 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6862 
6863 	/*
6864 	 * Create string hash; same as mod_hash_create_strhash() except that
6865 	 * we use NULL key destructor.
6866 	 */
6867 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6868 	    mdi_bus_config_cache_hash_size,
6869 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6870 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6871 
6872 	/*
6873 	 * The on-disk vhci cache is read during booting prior to the
6874 	 * lights-out period by mdi_read_devices_files().
6875 	 */
6876 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6877 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6878 			nvl = vhcache_nvl[i];
6879 			vhcache_nvl[i] = NULL;
6880 			break;
6881 		}
6882 	}
6883 
6884 	/*
6885 	 * this is to cover the case of some one manually causing unloading
6886 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6887 	 */
6888 	if (nvl == NULL && modrootloaded)
6889 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6890 
6891 	if (nvl != NULL) {
6892 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6893 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6894 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6895 		else  {
6896 			cmn_err(CE_WARN,
6897 			    "%s: data file corrupted, will recreate\n",
6898 			    vhc->vhc_vhcache_filename);
6899 		}
6900 		rw_exit(&vhcache->vhcache_lock);
6901 		nvlist_free(nvl);
6902 	}
6903 
6904 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6905 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6906 
6907 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6908 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6909 }
6910 
6911 /*
6912  * free all vhci cache related resources
6913  */
6914 static int
6915 destroy_vhci_cache(mdi_vhci_t *vh)
6916 {
6917 	mdi_vhci_config_t *vhc = vh->vh_config;
6918 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6919 	mdi_vhcache_phci_t *cphci, *cphci_next;
6920 	mdi_vhcache_client_t *cct, *cct_next;
6921 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6922 
6923 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6924 		return (MDI_FAILURE);
6925 
6926 	kmem_free(vhc->vhc_vhcache_filename,
6927 	    strlen(vhc->vhc_vhcache_filename) + 1);
6928 
6929 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6930 
6931 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6932 	    cphci = cphci_next) {
6933 		cphci_next = cphci->cphci_next;
6934 		free_vhcache_phci(cphci);
6935 	}
6936 
6937 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6938 		cct_next = cct->cct_next;
6939 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6940 			cpi_next = cpi->cpi_next;
6941 			free_vhcache_pathinfo(cpi);
6942 		}
6943 		free_vhcache_client(cct);
6944 	}
6945 
6946 	rw_destroy(&vhcache->vhcache_lock);
6947 
6948 	mutex_destroy(&vhc->vhc_lock);
6949 	cv_destroy(&vhc->vhc_cv);
6950 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6951 	return (MDI_SUCCESS);
6952 }
6953 
6954 /*
6955  * Stop all vhci cache related async threads and free their resources.
6956  */
6957 static int
6958 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6959 {
6960 	mdi_async_client_config_t *acc, *acc_next;
6961 
6962 	mutex_enter(&vhc->vhc_lock);
6963 	vhc->vhc_flags |= MDI_VHC_EXIT;
6964 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6965 	cv_broadcast(&vhc->vhc_cv);
6966 
6967 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6968 	    vhc->vhc_acc_thrcount != 0) {
6969 		mutex_exit(&vhc->vhc_lock);
6970 		delay(1);
6971 		mutex_enter(&vhc->vhc_lock);
6972 	}
6973 
6974 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6975 
6976 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6977 		acc_next = acc->acc_next;
6978 		free_async_client_config(acc);
6979 	}
6980 	vhc->vhc_acc_list_head = NULL;
6981 	vhc->vhc_acc_list_tail = NULL;
6982 	vhc->vhc_acc_count = 0;
6983 
6984 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6985 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6986 		mutex_exit(&vhc->vhc_lock);
6987 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6988 			vhcache_dirty(vhc);
6989 			return (MDI_FAILURE);
6990 		}
6991 	} else
6992 		mutex_exit(&vhc->vhc_lock);
6993 
6994 	if (callb_delete(vhc->vhc_cbid) != 0)
6995 		return (MDI_FAILURE);
6996 
6997 	return (MDI_SUCCESS);
6998 }
6999 
7000 /*
7001  * Stop vhci cache flush thread
7002  */
7003 /* ARGSUSED */
7004 static boolean_t
7005 stop_vhcache_flush_thread(void *arg, int code)
7006 {
7007 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7008 
7009 	mutex_enter(&vhc->vhc_lock);
7010 	vhc->vhc_flags |= MDI_VHC_EXIT;
7011 	cv_broadcast(&vhc->vhc_cv);
7012 
7013 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7014 		mutex_exit(&vhc->vhc_lock);
7015 		delay(1);
7016 		mutex_enter(&vhc->vhc_lock);
7017 	}
7018 
7019 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7020 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7021 		mutex_exit(&vhc->vhc_lock);
7022 		(void) flush_vhcache(vhc, 1);
7023 	} else
7024 		mutex_exit(&vhc->vhc_lock);
7025 
7026 	return (B_TRUE);
7027 }
7028 
7029 /*
7030  * Enqueue the vhcache phci (cphci) at the tail of the list
7031  */
7032 static void
7033 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7034 {
7035 	cphci->cphci_next = NULL;
7036 	if (vhcache->vhcache_phci_head == NULL)
7037 		vhcache->vhcache_phci_head = cphci;
7038 	else
7039 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7040 	vhcache->vhcache_phci_tail = cphci;
7041 }
7042 
7043 /*
7044  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7045  */
7046 static void
7047 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7048     mdi_vhcache_pathinfo_t *cpi)
7049 {
7050 	cpi->cpi_next = NULL;
7051 	if (cct->cct_cpi_head == NULL)
7052 		cct->cct_cpi_head = cpi;
7053 	else
7054 		cct->cct_cpi_tail->cpi_next = cpi;
7055 	cct->cct_cpi_tail = cpi;
7056 }
7057 
7058 /*
7059  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7060  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7061  * flag set come at the beginning of the list. All cpis which have this
7062  * flag set come at the end of the list.
7063  */
7064 static void
7065 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7066     mdi_vhcache_pathinfo_t *newcpi)
7067 {
7068 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7069 
7070 	if (cct->cct_cpi_head == NULL ||
7071 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7072 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7073 	else {
7074 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7075 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7076 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7077 			;
7078 
7079 		if (prev_cpi == NULL)
7080 			cct->cct_cpi_head = newcpi;
7081 		else
7082 			prev_cpi->cpi_next = newcpi;
7083 
7084 		newcpi->cpi_next = cpi;
7085 
7086 		if (cpi == NULL)
7087 			cct->cct_cpi_tail = newcpi;
7088 	}
7089 }
7090 
7091 /*
7092  * Enqueue the vhcache client (cct) at the tail of the list
7093  */
7094 static void
7095 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7096     mdi_vhcache_client_t *cct)
7097 {
7098 	cct->cct_next = NULL;
7099 	if (vhcache->vhcache_client_head == NULL)
7100 		vhcache->vhcache_client_head = cct;
7101 	else
7102 		vhcache->vhcache_client_tail->cct_next = cct;
7103 	vhcache->vhcache_client_tail = cct;
7104 }
7105 
7106 static void
7107 free_string_array(char **str, int nelem)
7108 {
7109 	int i;
7110 
7111 	if (str) {
7112 		for (i = 0; i < nelem; i++) {
7113 			if (str[i])
7114 				kmem_free(str[i], strlen(str[i]) + 1);
7115 		}
7116 		kmem_free(str, sizeof (char *) * nelem);
7117 	}
7118 }
7119 
7120 static void
7121 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7122 {
7123 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7124 	kmem_free(cphci, sizeof (*cphci));
7125 }
7126 
7127 static void
7128 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7129 {
7130 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7131 	kmem_free(cpi, sizeof (*cpi));
7132 }
7133 
7134 static void
7135 free_vhcache_client(mdi_vhcache_client_t *cct)
7136 {
7137 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7138 	kmem_free(cct, sizeof (*cct));
7139 }
7140 
7141 static char *
7142 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7143 {
7144 	char *name_addr;
7145 	int len;
7146 
7147 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7148 	name_addr = kmem_alloc(len, KM_SLEEP);
7149 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7150 
7151 	if (ret_len)
7152 		*ret_len = len;
7153 	return (name_addr);
7154 }
7155 
7156 /*
7157  * Copy the contents of paddrnvl to vhci cache.
7158  * paddrnvl nvlist contains path information for a vhci client.
7159  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7160  */
7161 static void
7162 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7163     mdi_vhcache_client_t *cct)
7164 {
7165 	nvpair_t *nvp = NULL;
7166 	mdi_vhcache_pathinfo_t *cpi;
7167 	uint_t nelem;
7168 	uint32_t *val;
7169 
7170 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7171 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7172 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7173 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7174 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7175 		ASSERT(nelem == 2);
7176 		cpi->cpi_cphci = cphci_list[val[0]];
7177 		cpi->cpi_flags = val[1];
7178 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7179 	}
7180 }
7181 
7182 /*
7183  * Copy the contents of caddrmapnvl to vhci cache.
7184  * caddrmapnvl nvlist contains vhci client address to phci client address
7185  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7186  * this nvlist.
7187  */
7188 static void
7189 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7190     mdi_vhcache_phci_t *cphci_list[])
7191 {
7192 	nvpair_t *nvp = NULL;
7193 	nvlist_t *paddrnvl;
7194 	mdi_vhcache_client_t *cct;
7195 
7196 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7197 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7198 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7199 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7200 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7201 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7202 		/* the client must contain at least one path */
7203 		ASSERT(cct->cct_cpi_head != NULL);
7204 
7205 		enqueue_vhcache_client(vhcache, cct);
7206 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7207 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7208 	}
7209 }
7210 
7211 /*
7212  * Copy the contents of the main nvlist to vhci cache.
7213  *
7214  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7215  * The nvlist contains the mappings between the vhci client addresses and
7216  * their corresponding phci client addresses.
7217  *
7218  * The structure of the nvlist is as follows:
7219  *
7220  * Main nvlist:
7221  *	NAME		TYPE		DATA
7222  *	version		int32		version number
7223  *	phcis		string array	array of phci paths
7224  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7225  *
7226  * structure of c2paddrs_nvl:
7227  *	NAME		TYPE		DATA
7228  *	caddr1		nvlist_t	paddrs_nvl1
7229  *	caddr2		nvlist_t	paddrs_nvl2
7230  *	...
7231  * where caddr1, caddr2, ... are vhci client name and addresses in the
7232  * form of "<clientname>@<clientaddress>".
7233  * (for example: "ssd@2000002037cd9f72");
7234  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7235  *
7236  * structure of paddrs_nvl:
7237  *	NAME		TYPE		DATA
7238  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7239  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7240  *	...
7241  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7242  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7243  * phci-ids are integers that identify PHCIs to which the
7244  * the bus specific address belongs to. These integers are used as an index
7245  * into to the phcis string array in the main nvlist to get the PHCI path.
7246  */
7247 static int
7248 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7249 {
7250 	char **phcis, **phci_namep;
7251 	uint_t nphcis;
7252 	mdi_vhcache_phci_t *cphci, **cphci_list;
7253 	nvlist_t *caddrmapnvl;
7254 	int32_t ver;
7255 	int i;
7256 	size_t cphci_list_size;
7257 
7258 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7259 
7260 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7261 	    ver != MDI_VHCI_CACHE_VERSION)
7262 		return (MDI_FAILURE);
7263 
7264 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7265 	    &nphcis) != 0)
7266 		return (MDI_SUCCESS);
7267 
7268 	ASSERT(nphcis > 0);
7269 
7270 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7271 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7272 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7273 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7274 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7275 		enqueue_vhcache_phci(vhcache, cphci);
7276 		cphci_list[i] = cphci;
7277 	}
7278 
7279 	ASSERT(vhcache->vhcache_phci_head != NULL);
7280 
7281 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7282 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7283 
7284 	kmem_free(cphci_list, cphci_list_size);
7285 	return (MDI_SUCCESS);
7286 }
7287 
7288 /*
7289  * Build paddrnvl for the specified client using the information in the
7290  * vhci cache and add it to the caddrmapnnvl.
7291  * Returns 0 on success, errno on failure.
7292  */
7293 static int
7294 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7295     nvlist_t *caddrmapnvl)
7296 {
7297 	mdi_vhcache_pathinfo_t *cpi;
7298 	nvlist_t *nvl;
7299 	int err;
7300 	uint32_t val[2];
7301 
7302 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7303 
7304 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7305 		return (err);
7306 
7307 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7308 		val[0] = cpi->cpi_cphci->cphci_id;
7309 		val[1] = cpi->cpi_flags;
7310 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7311 		    != 0)
7312 			goto out;
7313 	}
7314 
7315 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7316 out:
7317 	nvlist_free(nvl);
7318 	return (err);
7319 }
7320 
7321 /*
7322  * Build caddrmapnvl using the information in the vhci cache
7323  * and add it to the mainnvl.
7324  * Returns 0 on success, errno on failure.
7325  */
7326 static int
7327 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7328 {
7329 	mdi_vhcache_client_t *cct;
7330 	nvlist_t *nvl;
7331 	int err;
7332 
7333 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7334 
7335 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7336 		return (err);
7337 
7338 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7339 	    cct = cct->cct_next) {
7340 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7341 			goto out;
7342 	}
7343 
7344 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7345 out:
7346 	nvlist_free(nvl);
7347 	return (err);
7348 }
7349 
7350 /*
7351  * Build nvlist using the information in the vhci cache.
7352  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7353  * Returns nvl on success, NULL on failure.
7354  */
7355 static nvlist_t *
7356 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7357 {
7358 	mdi_vhcache_phci_t *cphci;
7359 	uint_t phci_count;
7360 	char **phcis;
7361 	nvlist_t *nvl;
7362 	int err, i;
7363 
7364 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7365 		nvl = NULL;
7366 		goto out;
7367 	}
7368 
7369 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7370 	    MDI_VHCI_CACHE_VERSION)) != 0)
7371 		goto out;
7372 
7373 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7374 	if (vhcache->vhcache_phci_head == NULL) {
7375 		rw_exit(&vhcache->vhcache_lock);
7376 		return (nvl);
7377 	}
7378 
7379 	phci_count = 0;
7380 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7381 	    cphci = cphci->cphci_next)
7382 		cphci->cphci_id = phci_count++;
7383 
7384 	/* build phci pathname list */
7385 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7386 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7387 	    cphci = cphci->cphci_next, i++)
7388 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7389 
7390 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7391 	    phci_count);
7392 	free_string_array(phcis, phci_count);
7393 
7394 	if (err == 0 &&
7395 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7396 		rw_exit(&vhcache->vhcache_lock);
7397 		return (nvl);
7398 	}
7399 
7400 	rw_exit(&vhcache->vhcache_lock);
7401 out:
7402 	if (nvl)
7403 		nvlist_free(nvl);
7404 	return (NULL);
7405 }
7406 
7407 /*
7408  * Lookup vhcache phci structure for the specified phci path.
7409  */
7410 static mdi_vhcache_phci_t *
7411 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7412 {
7413 	mdi_vhcache_phci_t *cphci;
7414 
7415 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7416 
7417 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7418 	    cphci = cphci->cphci_next) {
7419 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7420 			return (cphci);
7421 	}
7422 
7423 	return (NULL);
7424 }
7425 
7426 /*
7427  * Lookup vhcache phci structure for the specified phci.
7428  */
7429 static mdi_vhcache_phci_t *
7430 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7431 {
7432 	mdi_vhcache_phci_t *cphci;
7433 
7434 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7435 
7436 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7437 	    cphci = cphci->cphci_next) {
7438 		if (cphci->cphci_phci == ph)
7439 			return (cphci);
7440 	}
7441 
7442 	return (NULL);
7443 }
7444 
7445 /*
7446  * Add the specified phci to the vhci cache if not already present.
7447  */
7448 static void
7449 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7450 {
7451 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7452 	mdi_vhcache_phci_t *cphci;
7453 	char *pathname;
7454 	int cache_updated;
7455 
7456 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7457 
7458 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7459 	(void) ddi_pathname(ph->ph_dip, pathname);
7460 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7461 	    != NULL) {
7462 		cphci->cphci_phci = ph;
7463 		cache_updated = 0;
7464 	} else {
7465 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7466 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7467 		cphci->cphci_phci = ph;
7468 		enqueue_vhcache_phci(vhcache, cphci);
7469 		cache_updated = 1;
7470 	}
7471 
7472 	rw_exit(&vhcache->vhcache_lock);
7473 
7474 	/*
7475 	 * Since a new phci has been added, reset
7476 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7477 	 * during next vhcache_discover_paths().
7478 	 */
7479 	mutex_enter(&vhc->vhc_lock);
7480 	vhc->vhc_path_discovery_cutoff_time = 0;
7481 	mutex_exit(&vhc->vhc_lock);
7482 
7483 	kmem_free(pathname, MAXPATHLEN);
7484 	if (cache_updated)
7485 		vhcache_dirty(vhc);
7486 }
7487 
7488 /*
7489  * Remove the reference to the specified phci from the vhci cache.
7490  */
7491 static void
7492 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7493 {
7494 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7495 	mdi_vhcache_phci_t *cphci;
7496 
7497 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7498 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7499 		/* do not remove the actual mdi_vhcache_phci structure */
7500 		cphci->cphci_phci = NULL;
7501 	}
7502 	rw_exit(&vhcache->vhcache_lock);
7503 }
7504 
7505 static void
7506 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7507     mdi_vhcache_lookup_token_t *src)
7508 {
7509 	if (src == NULL) {
7510 		dst->lt_cct = NULL;
7511 		dst->lt_cct_lookup_time = 0;
7512 	} else {
7513 		dst->lt_cct = src->lt_cct;
7514 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7515 	}
7516 }
7517 
7518 /*
7519  * Look up vhcache client for the specified client.
7520  */
7521 static mdi_vhcache_client_t *
7522 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7523     mdi_vhcache_lookup_token_t *token)
7524 {
7525 	mod_hash_val_t hv;
7526 	char *name_addr;
7527 	int len;
7528 
7529 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7530 
7531 	/*
7532 	 * If no vhcache clean occurred since the last lookup, we can
7533 	 * simply return the cct from the last lookup operation.
7534 	 * It works because ccts are never freed except during the vhcache
7535 	 * cleanup operation.
7536 	 */
7537 	if (token != NULL &&
7538 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7539 		return (token->lt_cct);
7540 
7541 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7542 	if (mod_hash_find(vhcache->vhcache_client_hash,
7543 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7544 		if (token) {
7545 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7546 			token->lt_cct_lookup_time = lbolt64;
7547 		}
7548 	} else {
7549 		if (token) {
7550 			token->lt_cct = NULL;
7551 			token->lt_cct_lookup_time = 0;
7552 		}
7553 		hv = NULL;
7554 	}
7555 	kmem_free(name_addr, len);
7556 	return ((mdi_vhcache_client_t *)hv);
7557 }
7558 
7559 /*
7560  * Add the specified path to the vhci cache if not already present.
7561  * Also add the vhcache client for the client corresponding to this path
7562  * if it doesn't already exist.
7563  */
7564 static void
7565 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7566 {
7567 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7568 	mdi_vhcache_client_t *cct;
7569 	mdi_vhcache_pathinfo_t *cpi;
7570 	mdi_phci_t *ph = pip->pi_phci;
7571 	mdi_client_t *ct = pip->pi_client;
7572 	int cache_updated = 0;
7573 
7574 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7575 
7576 	/* if vhcache client for this pip doesn't already exist, add it */
7577 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7578 	    NULL)) == NULL) {
7579 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7580 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7581 		    ct->ct_guid, NULL);
7582 		enqueue_vhcache_client(vhcache, cct);
7583 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7584 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7585 		cache_updated = 1;
7586 	}
7587 
7588 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7589 		if (cpi->cpi_cphci->cphci_phci == ph &&
7590 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7591 			cpi->cpi_pip = pip;
7592 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7593 				cpi->cpi_flags &=
7594 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7595 				sort_vhcache_paths(cct);
7596 				cache_updated = 1;
7597 			}
7598 			break;
7599 		}
7600 	}
7601 
7602 	if (cpi == NULL) {
7603 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7604 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7605 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7606 		ASSERT(cpi->cpi_cphci != NULL);
7607 		cpi->cpi_pip = pip;
7608 		enqueue_vhcache_pathinfo(cct, cpi);
7609 		cache_updated = 1;
7610 	}
7611 
7612 	rw_exit(&vhcache->vhcache_lock);
7613 
7614 	if (cache_updated)
7615 		vhcache_dirty(vhc);
7616 }
7617 
7618 /*
7619  * Remove the reference to the specified path from the vhci cache.
7620  */
7621 static void
7622 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7623 {
7624 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7625 	mdi_client_t *ct = pip->pi_client;
7626 	mdi_vhcache_client_t *cct;
7627 	mdi_vhcache_pathinfo_t *cpi;
7628 
7629 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7630 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7631 	    NULL)) != NULL) {
7632 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7633 		    cpi = cpi->cpi_next) {
7634 			if (cpi->cpi_pip == pip) {
7635 				cpi->cpi_pip = NULL;
7636 				break;
7637 			}
7638 		}
7639 	}
7640 	rw_exit(&vhcache->vhcache_lock);
7641 }
7642 
7643 /*
7644  * Flush the vhci cache to disk.
7645  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7646  */
7647 static int
7648 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7649 {
7650 	nvlist_t *nvl;
7651 	int err;
7652 	int rv;
7653 
7654 	/*
7655 	 * It is possible that the system may shutdown before
7656 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7657 	 * flushing the cache in this case do not check for
7658 	 * i_ddi_io_initialized when force flag is set.
7659 	 */
7660 	if (force_flag == 0 && !i_ddi_io_initialized())
7661 		return (MDI_FAILURE);
7662 
7663 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7664 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7665 		nvlist_free(nvl);
7666 	} else
7667 		err = EFAULT;
7668 
7669 	rv = MDI_SUCCESS;
7670 	mutex_enter(&vhc->vhc_lock);
7671 	if (err != 0) {
7672 		if (err == EROFS) {
7673 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7674 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7675 			    MDI_VHC_VHCACHE_DIRTY);
7676 		} else {
7677 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7678 				cmn_err(CE_CONT, "%s: update failed\n",
7679 				    vhc->vhc_vhcache_filename);
7680 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7681 			}
7682 			rv = MDI_FAILURE;
7683 		}
7684 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7685 		cmn_err(CE_CONT,
7686 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7687 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7688 	}
7689 	mutex_exit(&vhc->vhc_lock);
7690 
7691 	return (rv);
7692 }
7693 
7694 /*
7695  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7696  * Exits itself if left idle for the idle timeout period.
7697  */
7698 static void
7699 vhcache_flush_thread(void *arg)
7700 {
7701 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7702 	clock_t idle_time, quit_at_ticks;
7703 	callb_cpr_t cprinfo;
7704 
7705 	/* number of seconds to sleep idle before exiting */
7706 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7707 
7708 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7709 	    "mdi_vhcache_flush");
7710 	mutex_enter(&vhc->vhc_lock);
7711 	for (; ; ) {
7712 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7713 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7714 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7715 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7716 				(void) cv_timedwait(&vhc->vhc_cv,
7717 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7718 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7719 			} else {
7720 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7721 				mutex_exit(&vhc->vhc_lock);
7722 
7723 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7724 					vhcache_dirty(vhc);
7725 
7726 				mutex_enter(&vhc->vhc_lock);
7727 			}
7728 		}
7729 
7730 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7731 
7732 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7733 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7734 		    ddi_get_lbolt() < quit_at_ticks) {
7735 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7736 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7737 			    quit_at_ticks);
7738 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7739 		}
7740 
7741 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7742 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7743 			goto out;
7744 	}
7745 
7746 out:
7747 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7748 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7749 	CALLB_CPR_EXIT(&cprinfo);
7750 }
7751 
7752 /*
7753  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7754  */
7755 static void
7756 vhcache_dirty(mdi_vhci_config_t *vhc)
7757 {
7758 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7759 	int create_thread;
7760 
7761 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7762 	/* do not flush cache until the cache is fully built */
7763 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7764 		rw_exit(&vhcache->vhcache_lock);
7765 		return;
7766 	}
7767 	rw_exit(&vhcache->vhcache_lock);
7768 
7769 	mutex_enter(&vhc->vhc_lock);
7770 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7771 		mutex_exit(&vhc->vhc_lock);
7772 		return;
7773 	}
7774 
7775 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7776 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7777 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7778 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7779 		cv_broadcast(&vhc->vhc_cv);
7780 		create_thread = 0;
7781 	} else {
7782 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7783 		create_thread = 1;
7784 	}
7785 	mutex_exit(&vhc->vhc_lock);
7786 
7787 	if (create_thread)
7788 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7789 		    0, &p0, TS_RUN, minclsyspri);
7790 }
7791 
7792 /*
7793  * phci bus config structure - one for for each phci bus config operation that
7794  * we initiate on behalf of a vhci.
7795  */
7796 typedef struct mdi_phci_bus_config_s {
7797 	char *phbc_phci_path;
7798 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7799 	struct mdi_phci_bus_config_s *phbc_next;
7800 } mdi_phci_bus_config_t;
7801 
7802 /* vhci bus config structure - one for each vhci bus config operation */
7803 typedef struct mdi_vhci_bus_config_s {
7804 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7805 	major_t vhbc_op_major;		/* bus config op major */
7806 	uint_t vhbc_op_flags;		/* bus config op flags */
7807 	kmutex_t vhbc_lock;
7808 	kcondvar_t vhbc_cv;
7809 	int vhbc_thr_count;
7810 } mdi_vhci_bus_config_t;
7811 
7812 /*
7813  * bus config the specified phci
7814  */
7815 static void
7816 bus_config_phci(void *arg)
7817 {
7818 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7819 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7820 	dev_info_t *ph_dip;
7821 
7822 	/*
7823 	 * first configure all path components upto phci and then configure
7824 	 * the phci children.
7825 	 */
7826 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7827 	    != NULL) {
7828 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7829 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7830 			(void) ndi_devi_config_driver(ph_dip,
7831 			    vhbc->vhbc_op_flags,
7832 			    vhbc->vhbc_op_major);
7833 		} else
7834 			(void) ndi_devi_config(ph_dip,
7835 			    vhbc->vhbc_op_flags);
7836 
7837 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7838 		ndi_rele_devi(ph_dip);
7839 	}
7840 
7841 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7842 	kmem_free(phbc, sizeof (*phbc));
7843 
7844 	mutex_enter(&vhbc->vhbc_lock);
7845 	vhbc->vhbc_thr_count--;
7846 	if (vhbc->vhbc_thr_count == 0)
7847 		cv_broadcast(&vhbc->vhbc_cv);
7848 	mutex_exit(&vhbc->vhbc_lock);
7849 }
7850 
7851 /*
7852  * Bus config all phcis associated with the vhci in parallel.
7853  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7854  */
7855 static void
7856 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7857     ddi_bus_config_op_t op, major_t maj)
7858 {
7859 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7860 	mdi_vhci_bus_config_t *vhbc;
7861 	mdi_vhcache_phci_t *cphci;
7862 
7863 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7864 	if (vhcache->vhcache_phci_head == NULL) {
7865 		rw_exit(&vhcache->vhcache_lock);
7866 		return;
7867 	}
7868 
7869 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7870 
7871 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7872 	    cphci = cphci->cphci_next) {
7873 		/* skip phcis that haven't attached before root is available */
7874 		if (!modrootloaded && (cphci->cphci_phci == NULL))
7875 			continue;
7876 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7877 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7878 		    KM_SLEEP);
7879 		phbc->phbc_vhbusconfig = vhbc;
7880 		phbc->phbc_next = phbc_head;
7881 		phbc_head = phbc;
7882 		vhbc->vhbc_thr_count++;
7883 	}
7884 	rw_exit(&vhcache->vhcache_lock);
7885 
7886 	vhbc->vhbc_op = op;
7887 	vhbc->vhbc_op_major = maj;
7888 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7889 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7890 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7891 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7892 
7893 	/* now create threads to initiate bus config on all phcis in parallel */
7894 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7895 		phbc_next = phbc->phbc_next;
7896 		if (mdi_mtc_off)
7897 			bus_config_phci((void *)phbc);
7898 		else
7899 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7900 			    0, &p0, TS_RUN, minclsyspri);
7901 	}
7902 
7903 	mutex_enter(&vhbc->vhbc_lock);
7904 	/* wait until all threads exit */
7905 	while (vhbc->vhbc_thr_count > 0)
7906 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7907 	mutex_exit(&vhbc->vhbc_lock);
7908 
7909 	mutex_destroy(&vhbc->vhbc_lock);
7910 	cv_destroy(&vhbc->vhbc_cv);
7911 	kmem_free(vhbc, sizeof (*vhbc));
7912 }
7913 
7914 /*
7915  * Single threaded version of bus_config_all_phcis()
7916  */
7917 static void
7918 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7919     ddi_bus_config_op_t op, major_t maj)
7920 {
7921 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7922 
7923 	single_threaded_vhconfig_enter(vhc);
7924 	bus_config_all_phcis(vhcache, flags, op, maj);
7925 	single_threaded_vhconfig_exit(vhc);
7926 }
7927 
7928 /*
7929  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7930  * The path includes the child component in addition to the phci path.
7931  */
7932 static int
7933 bus_config_one_phci_child(char *path)
7934 {
7935 	dev_info_t *ph_dip, *child;
7936 	char *devnm;
7937 	int rv = MDI_FAILURE;
7938 
7939 	/* extract the child component of the phci */
7940 	devnm = strrchr(path, '/');
7941 	*devnm++ = '\0';
7942 
7943 	/*
7944 	 * first configure all path components upto phci and then
7945 	 * configure the phci child.
7946 	 */
7947 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7948 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7949 		    NDI_SUCCESS) {
7950 			/*
7951 			 * release the hold that ndi_devi_config_one() placed
7952 			 */
7953 			ndi_rele_devi(child);
7954 			rv = MDI_SUCCESS;
7955 		}
7956 
7957 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7958 		ndi_rele_devi(ph_dip);
7959 	}
7960 
7961 	devnm--;
7962 	*devnm = '/';
7963 	return (rv);
7964 }
7965 
7966 /*
7967  * Build a list of phci client paths for the specified vhci client.
7968  * The list includes only those phci client paths which aren't configured yet.
7969  */
7970 static mdi_phys_path_t *
7971 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7972 {
7973 	mdi_vhcache_pathinfo_t *cpi;
7974 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7975 	int config_path, len;
7976 
7977 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7978 		/*
7979 		 * include only those paths that aren't configured.
7980 		 */
7981 		config_path = 0;
7982 		if (cpi->cpi_pip == NULL)
7983 			config_path = 1;
7984 		else {
7985 			MDI_PI_LOCK(cpi->cpi_pip);
7986 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7987 				config_path = 1;
7988 			MDI_PI_UNLOCK(cpi->cpi_pip);
7989 		}
7990 
7991 		if (config_path) {
7992 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7993 			len = strlen(cpi->cpi_cphci->cphci_path) +
7994 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7995 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7996 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7997 			    cpi->cpi_cphci->cphci_path, ct_name,
7998 			    cpi->cpi_addr);
7999 			pp->phys_path_next = NULL;
8000 
8001 			if (pp_head == NULL)
8002 				pp_head = pp;
8003 			else
8004 				pp_tail->phys_path_next = pp;
8005 			pp_tail = pp;
8006 		}
8007 	}
8008 
8009 	return (pp_head);
8010 }
8011 
8012 /*
8013  * Free the memory allocated for phci client path list.
8014  */
8015 static void
8016 free_phclient_path_list(mdi_phys_path_t *pp_head)
8017 {
8018 	mdi_phys_path_t *pp, *pp_next;
8019 
8020 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8021 		pp_next = pp->phys_path_next;
8022 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8023 		kmem_free(pp, sizeof (*pp));
8024 	}
8025 }
8026 
8027 /*
8028  * Allocated async client structure and initialize with the specified values.
8029  */
8030 static mdi_async_client_config_t *
8031 alloc_async_client_config(char *ct_name, char *ct_addr,
8032     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8033 {
8034 	mdi_async_client_config_t *acc;
8035 
8036 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8037 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8038 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8039 	acc->acc_phclient_path_list_head = pp_head;
8040 	init_vhcache_lookup_token(&acc->acc_token, tok);
8041 	acc->acc_next = NULL;
8042 	return (acc);
8043 }
8044 
8045 /*
8046  * Free the memory allocated for the async client structure and their members.
8047  */
8048 static void
8049 free_async_client_config(mdi_async_client_config_t *acc)
8050 {
8051 	if (acc->acc_phclient_path_list_head)
8052 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8053 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8054 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8055 	kmem_free(acc, sizeof (*acc));
8056 }
8057 
8058 /*
8059  * Sort vhcache pathinfos (cpis) of the specified client.
8060  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8061  * flag set come at the beginning of the list. All cpis which have this
8062  * flag set come at the end of the list.
8063  */
8064 static void
8065 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8066 {
8067 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8068 
8069 	cpi_head = cct->cct_cpi_head;
8070 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8071 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8072 		cpi_next = cpi->cpi_next;
8073 		enqueue_vhcache_pathinfo(cct, cpi);
8074 	}
8075 }
8076 
8077 /*
8078  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8079  * every vhcache pathinfo of the specified client. If not adjust the flag
8080  * setting appropriately.
8081  *
8082  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8083  * on-disk vhci cache. So every time this flag is updated the cache must be
8084  * flushed.
8085  */
8086 static void
8087 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8088     mdi_vhcache_lookup_token_t *tok)
8089 {
8090 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8091 	mdi_vhcache_client_t *cct;
8092 	mdi_vhcache_pathinfo_t *cpi;
8093 
8094 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8095 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8096 	    == NULL) {
8097 		rw_exit(&vhcache->vhcache_lock);
8098 		return;
8099 	}
8100 
8101 	/*
8102 	 * to avoid unnecessary on-disk cache updates, first check if an
8103 	 * update is really needed. If no update is needed simply return.
8104 	 */
8105 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8106 		if ((cpi->cpi_pip != NULL &&
8107 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8108 		    (cpi->cpi_pip == NULL &&
8109 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8110 			break;
8111 		}
8112 	}
8113 	if (cpi == NULL) {
8114 		rw_exit(&vhcache->vhcache_lock);
8115 		return;
8116 	}
8117 
8118 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8119 		rw_exit(&vhcache->vhcache_lock);
8120 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8121 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8122 		    tok)) == NULL) {
8123 			rw_exit(&vhcache->vhcache_lock);
8124 			return;
8125 		}
8126 	}
8127 
8128 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8129 		if (cpi->cpi_pip != NULL)
8130 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8131 		else
8132 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8133 	}
8134 	sort_vhcache_paths(cct);
8135 
8136 	rw_exit(&vhcache->vhcache_lock);
8137 	vhcache_dirty(vhc);
8138 }
8139 
8140 /*
8141  * Configure all specified paths of the client.
8142  */
8143 static void
8144 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8145     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8146 {
8147 	mdi_phys_path_t *pp;
8148 
8149 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8150 		(void) bus_config_one_phci_child(pp->phys_path);
8151 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8152 }
8153 
8154 /*
8155  * Dequeue elements from vhci async client config list and bus configure
8156  * their corresponding phci clients.
8157  */
8158 static void
8159 config_client_paths_thread(void *arg)
8160 {
8161 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8162 	mdi_async_client_config_t *acc;
8163 	clock_t quit_at_ticks;
8164 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8165 	callb_cpr_t cprinfo;
8166 
8167 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8168 	    "mdi_config_client_paths");
8169 
8170 	for (; ; ) {
8171 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8172 
8173 		mutex_enter(&vhc->vhc_lock);
8174 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8175 		    vhc->vhc_acc_list_head == NULL &&
8176 		    ddi_get_lbolt() < quit_at_ticks) {
8177 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8178 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8179 			    quit_at_ticks);
8180 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8181 		}
8182 
8183 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8184 		    vhc->vhc_acc_list_head == NULL)
8185 			goto out;
8186 
8187 		acc = vhc->vhc_acc_list_head;
8188 		vhc->vhc_acc_list_head = acc->acc_next;
8189 		if (vhc->vhc_acc_list_head == NULL)
8190 			vhc->vhc_acc_list_tail = NULL;
8191 		vhc->vhc_acc_count--;
8192 		mutex_exit(&vhc->vhc_lock);
8193 
8194 		config_client_paths_sync(vhc, acc->acc_ct_name,
8195 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8196 		    &acc->acc_token);
8197 
8198 		free_async_client_config(acc);
8199 	}
8200 
8201 out:
8202 	vhc->vhc_acc_thrcount--;
8203 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8204 	CALLB_CPR_EXIT(&cprinfo);
8205 }
8206 
8207 /*
8208  * Arrange for all the phci client paths (pp_head) for the specified client
8209  * to be bus configured asynchronously by a thread.
8210  */
8211 static void
8212 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8213     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8214 {
8215 	mdi_async_client_config_t *acc, *newacc;
8216 	int create_thread;
8217 
8218 	if (pp_head == NULL)
8219 		return;
8220 
8221 	if (mdi_mtc_off) {
8222 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8223 		free_phclient_path_list(pp_head);
8224 		return;
8225 	}
8226 
8227 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8228 	ASSERT(newacc);
8229 
8230 	mutex_enter(&vhc->vhc_lock);
8231 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8232 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8233 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8234 			free_async_client_config(newacc);
8235 			mutex_exit(&vhc->vhc_lock);
8236 			return;
8237 		}
8238 	}
8239 
8240 	if (vhc->vhc_acc_list_head == NULL)
8241 		vhc->vhc_acc_list_head = newacc;
8242 	else
8243 		vhc->vhc_acc_list_tail->acc_next = newacc;
8244 	vhc->vhc_acc_list_tail = newacc;
8245 	vhc->vhc_acc_count++;
8246 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8247 		cv_broadcast(&vhc->vhc_cv);
8248 		create_thread = 0;
8249 	} else {
8250 		vhc->vhc_acc_thrcount++;
8251 		create_thread = 1;
8252 	}
8253 	mutex_exit(&vhc->vhc_lock);
8254 
8255 	if (create_thread)
8256 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8257 		    0, &p0, TS_RUN, minclsyspri);
8258 }
8259 
8260 /*
8261  * Return number of online paths for the specified client.
8262  */
8263 static int
8264 nonline_paths(mdi_vhcache_client_t *cct)
8265 {
8266 	mdi_vhcache_pathinfo_t *cpi;
8267 	int online_count = 0;
8268 
8269 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8270 		if (cpi->cpi_pip != NULL) {
8271 			MDI_PI_LOCK(cpi->cpi_pip);
8272 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8273 				online_count++;
8274 			MDI_PI_UNLOCK(cpi->cpi_pip);
8275 		}
8276 	}
8277 
8278 	return (online_count);
8279 }
8280 
8281 /*
8282  * Bus configure all paths for the specified vhci client.
8283  * If at least one path for the client is already online, the remaining paths
8284  * will be configured asynchronously. Otherwise, it synchronously configures
8285  * the paths until at least one path is online and then rest of the paths
8286  * will be configured asynchronously.
8287  */
8288 static void
8289 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8290 {
8291 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8292 	mdi_phys_path_t *pp_head, *pp;
8293 	mdi_vhcache_client_t *cct;
8294 	mdi_vhcache_lookup_token_t tok;
8295 
8296 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8297 
8298 	init_vhcache_lookup_token(&tok, NULL);
8299 
8300 	if (ct_name == NULL || ct_addr == NULL ||
8301 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8302 	    == NULL ||
8303 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8304 		rw_exit(&vhcache->vhcache_lock);
8305 		return;
8306 	}
8307 
8308 	/* if at least one path is online, configure the rest asynchronously */
8309 	if (nonline_paths(cct) > 0) {
8310 		rw_exit(&vhcache->vhcache_lock);
8311 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8312 		return;
8313 	}
8314 
8315 	rw_exit(&vhcache->vhcache_lock);
8316 
8317 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8318 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8319 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8320 
8321 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8322 			    ct_addr, &tok)) == NULL) {
8323 				rw_exit(&vhcache->vhcache_lock);
8324 				goto out;
8325 			}
8326 
8327 			if (nonline_paths(cct) > 0 &&
8328 			    pp->phys_path_next != NULL) {
8329 				rw_exit(&vhcache->vhcache_lock);
8330 				config_client_paths_async(vhc, ct_name, ct_addr,
8331 				    pp->phys_path_next, &tok);
8332 				pp->phys_path_next = NULL;
8333 				goto out;
8334 			}
8335 
8336 			rw_exit(&vhcache->vhcache_lock);
8337 		}
8338 	}
8339 
8340 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8341 out:
8342 	free_phclient_path_list(pp_head);
8343 }
8344 
8345 static void
8346 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8347 {
8348 	mutex_enter(&vhc->vhc_lock);
8349 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8350 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8351 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8352 	mutex_exit(&vhc->vhc_lock);
8353 }
8354 
8355 static void
8356 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8357 {
8358 	mutex_enter(&vhc->vhc_lock);
8359 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8360 	cv_broadcast(&vhc->vhc_cv);
8361 	mutex_exit(&vhc->vhc_lock);
8362 }
8363 
8364 typedef struct mdi_phci_driver_info {
8365 	char	*phdriver_name;	/* name of the phci driver */
8366 
8367 	/* set to non zero if the phci driver supports root device */
8368 	int	phdriver_root_support;
8369 } mdi_phci_driver_info_t;
8370 
8371 /*
8372  * vhci class and root support capability of a phci driver can be
8373  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8374  * phci driver.conf file. The built-in tables below contain this information
8375  * for those phci drivers whose driver.conf files don't yet contain this info.
8376  *
8377  * All phci drivers expect iscsi have root device support.
8378  */
8379 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8380 	{ "fp", 1 },
8381 	{ "iscsi", 0 },
8382 	{ "ibsrp", 1 }
8383 	};
8384 
8385 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8386 
8387 static void *
8388 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8389 {
8390 	void *new_ptr;
8391 
8392 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8393 	if (old_ptr) {
8394 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
8395 		kmem_free(old_ptr, old_size);
8396 	}
8397 	return (new_ptr);
8398 }
8399 
8400 static void
8401 add_to_phci_list(char ***driver_list, int **root_support_list,
8402     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8403 {
8404 	ASSERT(*cur_elements <= *max_elements);
8405 	if (*cur_elements == *max_elements) {
8406 		*max_elements += 10;
8407 		*driver_list = mdi_realloc(*driver_list,
8408 		    sizeof (char *) * (*cur_elements),
8409 		    sizeof (char *) * (*max_elements));
8410 		*root_support_list = mdi_realloc(*root_support_list,
8411 		    sizeof (int) * (*cur_elements),
8412 		    sizeof (int) * (*max_elements));
8413 	}
8414 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8415 	(*root_support_list)[*cur_elements] = root_support;
8416 	(*cur_elements)++;
8417 }
8418 
8419 static void
8420 get_phci_driver_list(char *vhci_class, char ***driver_list,
8421     int **root_support_list, int *cur_elements, int *max_elements)
8422 {
8423 	mdi_phci_driver_info_t	*st_driver_list, *p;
8424 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8425 	major_t		m;
8426 	struct devnames	*dnp;
8427 	ddi_prop_t	*propp;
8428 
8429 	*driver_list = NULL;
8430 	*root_support_list = NULL;
8431 	*cur_elements = 0;
8432 	*max_elements = 0;
8433 
8434 	/* add the phci drivers derived from the phci driver.conf files */
8435 	for (m = 0; m < devcnt; m++) {
8436 		dnp = &devnamesp[m];
8437 
8438 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8439 			LOCK_DEV_OPS(&dnp->dn_lock);
8440 			if (dnp->dn_global_prop_ptr != NULL &&
8441 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8442 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8443 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8444 			    strcmp(propp->prop_val, vhci_class) == 0) {
8445 
8446 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8447 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8448 				    &dnp->dn_global_prop_ptr->prop_list)
8449 				    == NULL) ? 1 : 0;
8450 
8451 				add_to_phci_list(driver_list, root_support_list,
8452 				    cur_elements, max_elements, dnp->dn_name,
8453 				    root_support);
8454 
8455 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8456 			} else
8457 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8458 		}
8459 	}
8460 
8461 	driver_conf_count = *cur_elements;
8462 
8463 	/* add the phci drivers specified in the built-in tables */
8464 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8465 		st_driver_list = scsi_phci_driver_list;
8466 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8467 		    sizeof (mdi_phci_driver_info_t);
8468 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8469 		st_driver_list = ib_phci_driver_list;
8470 		st_ndrivers = sizeof (ib_phci_driver_list) /
8471 		    sizeof (mdi_phci_driver_info_t);
8472 	} else {
8473 		st_driver_list = NULL;
8474 		st_ndrivers = 0;
8475 	}
8476 
8477 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8478 		/* add this phci driver if not already added before */
8479 		for (j = 0; j < driver_conf_count; j++) {
8480 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8481 				break;
8482 		}
8483 		if (j == driver_conf_count) {
8484 			add_to_phci_list(driver_list, root_support_list,
8485 			    cur_elements, max_elements, p->phdriver_name,
8486 			    p->phdriver_root_support);
8487 		}
8488 	}
8489 }
8490 
8491 /*
8492  * Attach the phci driver instances associated with the specified vhci class.
8493  * If root is mounted attach all phci driver instances.
8494  * If root is not mounted, attach the instances of only those phci
8495  * drivers that have the root support.
8496  */
8497 static void
8498 attach_phci_drivers(char *vhci_class)
8499 {
8500 	char	**driver_list, **p;
8501 	int	*root_support_list;
8502 	int	cur_elements, max_elements, i;
8503 	major_t	m;
8504 
8505 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8506 	    &cur_elements, &max_elements);
8507 
8508 	for (i = 0; i < cur_elements; i++) {
8509 		if (modrootloaded || root_support_list[i]) {
8510 			m = ddi_name_to_major(driver_list[i]);
8511 			if (m != (major_t)-1 && ddi_hold_installed_driver(m))
8512 				ddi_rele_driver(m);
8513 		}
8514 	}
8515 
8516 	if (driver_list) {
8517 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8518 			kmem_free(*p, strlen(*p) + 1);
8519 		kmem_free(driver_list, sizeof (char *) * max_elements);
8520 		kmem_free(root_support_list, sizeof (int) * max_elements);
8521 	}
8522 }
8523 
8524 /*
8525  * Build vhci cache:
8526  *
8527  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8528  * the phci driver instances. During this process the cache gets built.
8529  *
8530  * Cache is built fully if the root is mounted.
8531  * If the root is not mounted, phci drivers that do not have root support
8532  * are not attached. As a result the cache is built partially. The entries
8533  * in the cache reflect only those phci drivers that have root support.
8534  */
8535 static int
8536 build_vhci_cache(mdi_vhci_t *vh)
8537 {
8538 	mdi_vhci_config_t *vhc = vh->vh_config;
8539 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8540 
8541 	single_threaded_vhconfig_enter(vhc);
8542 
8543 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8544 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8545 		rw_exit(&vhcache->vhcache_lock);
8546 		single_threaded_vhconfig_exit(vhc);
8547 		return (0);
8548 	}
8549 	rw_exit(&vhcache->vhcache_lock);
8550 
8551 	attach_phci_drivers(vh->vh_class);
8552 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8553 	    BUS_CONFIG_ALL, (major_t)-1);
8554 
8555 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8556 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8557 	rw_exit(&vhcache->vhcache_lock);
8558 
8559 	single_threaded_vhconfig_exit(vhc);
8560 	vhcache_dirty(vhc);
8561 	return (1);
8562 }
8563 
8564 /*
8565  * Determine if discovery of paths is needed.
8566  */
8567 static int
8568 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8569 {
8570 	int rv = 1;
8571 
8572 	mutex_enter(&vhc->vhc_lock);
8573 	if (i_ddi_io_initialized() == 0) {
8574 		if (vhc->vhc_path_discovery_boot > 0) {
8575 			vhc->vhc_path_discovery_boot--;
8576 			goto out;
8577 		}
8578 	} else {
8579 		if (vhc->vhc_path_discovery_postboot > 0) {
8580 			vhc->vhc_path_discovery_postboot--;
8581 			goto out;
8582 		}
8583 	}
8584 
8585 	/*
8586 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8587 	 * This is to avoid a series of full path discoveries when opening
8588 	 * stale /dev/[r]dsk links.
8589 	 */
8590 	if (mdi_path_discovery_interval != -1 &&
8591 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8592 		goto out;
8593 
8594 	rv = 0;
8595 out:
8596 	mutex_exit(&vhc->vhc_lock);
8597 	return (rv);
8598 }
8599 
8600 /*
8601  * Discover all paths:
8602  *
8603  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8604  * driver instances. During this process all paths will be discovered.
8605  */
8606 static int
8607 vhcache_discover_paths(mdi_vhci_t *vh)
8608 {
8609 	mdi_vhci_config_t *vhc = vh->vh_config;
8610 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8611 	int rv = 0;
8612 
8613 	single_threaded_vhconfig_enter(vhc);
8614 
8615 	if (vhcache_do_discovery(vhc)) {
8616 		attach_phci_drivers(vh->vh_class);
8617 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8618 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8619 
8620 		mutex_enter(&vhc->vhc_lock);
8621 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8622 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8623 		mutex_exit(&vhc->vhc_lock);
8624 		rv = 1;
8625 	}
8626 
8627 	single_threaded_vhconfig_exit(vhc);
8628 	return (rv);
8629 }
8630 
8631 /*
8632  * Generic vhci bus config implementation:
8633  *
8634  * Parameters
8635  *	vdip	vhci dip
8636  *	flags	bus config flags
8637  *	op	bus config operation
8638  *	The remaining parameters are bus config operation specific
8639  *
8640  * for BUS_CONFIG_ONE
8641  *	arg	pointer to name@addr
8642  *	child	upon successful return from this function, *child will be
8643  *		set to the configured and held devinfo child node of vdip.
8644  *	ct_addr	pointer to client address (i.e. GUID)
8645  *
8646  * for BUS_CONFIG_DRIVER
8647  *	arg	major number of the driver
8648  *	child and ct_addr parameters are ignored
8649  *
8650  * for BUS_CONFIG_ALL
8651  *	arg, child, and ct_addr parameters are ignored
8652  *
8653  * Note that for the rest of the bus config operations, this function simply
8654  * calls the framework provided default bus config routine.
8655  */
8656 int
8657 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8658     void *arg, dev_info_t **child, char *ct_addr)
8659 {
8660 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8661 	mdi_vhci_config_t *vhc = vh->vh_config;
8662 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8663 	int rv = 0;
8664 	int params_valid = 0;
8665 	char *cp;
8666 
8667 	/*
8668 	 * To bus config vhcis we relay operation, possibly using another
8669 	 * thread, to phcis. The phci driver then interacts with MDI to cause
8670 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
8671 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
8672 	 * thread may be adding the child, to avoid deadlock we can't wait
8673 	 * for the relayed operations to complete if we have already entered
8674 	 * the vhci node.
8675 	 */
8676 	if (DEVI_BUSY_OWNED(vdip)) {
8677 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: "
8678 		    "vhci dip is busy owned %p\n", (void *)vdip));
8679 		goto default_bus_config;
8680 	}
8681 
8682 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8683 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8684 		rw_exit(&vhcache->vhcache_lock);
8685 		rv = build_vhci_cache(vh);
8686 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8687 	}
8688 
8689 	switch (op) {
8690 	case BUS_CONFIG_ONE:
8691 		if (arg != NULL && ct_addr != NULL) {
8692 			/* extract node name */
8693 			cp = (char *)arg;
8694 			while (*cp != '\0' && *cp != '@')
8695 				cp++;
8696 			if (*cp == '@') {
8697 				params_valid = 1;
8698 				*cp = '\0';
8699 				config_client_paths(vhc, (char *)arg, ct_addr);
8700 				/* config_client_paths() releases cache_lock */
8701 				*cp = '@';
8702 				break;
8703 			}
8704 		}
8705 
8706 		rw_exit(&vhcache->vhcache_lock);
8707 		break;
8708 
8709 	case BUS_CONFIG_DRIVER:
8710 		rw_exit(&vhcache->vhcache_lock);
8711 		if (rv == 0)
8712 			st_bus_config_all_phcis(vhc, flags, op,
8713 			    (major_t)(uintptr_t)arg);
8714 		break;
8715 
8716 	case BUS_CONFIG_ALL:
8717 		rw_exit(&vhcache->vhcache_lock);
8718 		if (rv == 0)
8719 			st_bus_config_all_phcis(vhc, flags, op, -1);
8720 		break;
8721 
8722 	default:
8723 		rw_exit(&vhcache->vhcache_lock);
8724 		break;
8725 	}
8726 
8727 
8728 default_bus_config:
8729 	/*
8730 	 * All requested child nodes are enumerated under the vhci.
8731 	 * Now configure them.
8732 	 */
8733 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8734 	    NDI_SUCCESS) {
8735 		return (MDI_SUCCESS);
8736 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8737 		/* discover all paths and try configuring again */
8738 		if (vhcache_discover_paths(vh) &&
8739 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8740 		    NDI_SUCCESS)
8741 			return (MDI_SUCCESS);
8742 	}
8743 
8744 	return (MDI_FAILURE);
8745 }
8746 
8747 /*
8748  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8749  */
8750 static nvlist_t *
8751 read_on_disk_vhci_cache(char *vhci_class)
8752 {
8753 	nvlist_t *nvl;
8754 	int err;
8755 	char *filename;
8756 
8757 	filename = vhclass2vhcache_filename(vhci_class);
8758 
8759 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8760 		kmem_free(filename, strlen(filename) + 1);
8761 		return (nvl);
8762 	} else if (err == EIO)
8763 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8764 	else if (err == EINVAL)
8765 		cmn_err(CE_WARN,
8766 		    "%s: data file corrupted, will recreate\n", filename);
8767 
8768 	kmem_free(filename, strlen(filename) + 1);
8769 	return (NULL);
8770 }
8771 
8772 /*
8773  * Read on-disk vhci cache into nvlists for all vhci classes.
8774  * Called during booting by i_ddi_read_devices_files().
8775  */
8776 void
8777 mdi_read_devices_files(void)
8778 {
8779 	int i;
8780 
8781 	for (i = 0; i < N_VHCI_CLASSES; i++)
8782 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8783 }
8784 
8785 /*
8786  * Remove all stale entries from vhci cache.
8787  */
8788 static void
8789 clean_vhcache(mdi_vhci_config_t *vhc)
8790 {
8791 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8792 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8793 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8794 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8795 
8796 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8797 
8798 	cct_head = vhcache->vhcache_client_head;
8799 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8800 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8801 		cct_next = cct->cct_next;
8802 
8803 		cpi_head = cct->cct_cpi_head;
8804 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8805 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8806 			cpi_next = cpi->cpi_next;
8807 			if (cpi->cpi_pip != NULL) {
8808 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8809 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8810 			} else
8811 				free_vhcache_pathinfo(cpi);
8812 		}
8813 
8814 		if (cct->cct_cpi_head != NULL)
8815 			enqueue_vhcache_client(vhcache, cct);
8816 		else {
8817 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8818 			    (mod_hash_key_t)cct->cct_name_addr);
8819 			free_vhcache_client(cct);
8820 		}
8821 	}
8822 
8823 	cphci_head = vhcache->vhcache_phci_head;
8824 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8825 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8826 		cphci_next = cphci->cphci_next;
8827 		if (cphci->cphci_phci != NULL)
8828 			enqueue_vhcache_phci(vhcache, cphci);
8829 		else
8830 			free_vhcache_phci(cphci);
8831 	}
8832 
8833 	vhcache->vhcache_clean_time = lbolt64;
8834 	rw_exit(&vhcache->vhcache_lock);
8835 	vhcache_dirty(vhc);
8836 }
8837 
8838 /*
8839  * Remove all stale entries from vhci cache.
8840  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8841  */
8842 void
8843 mdi_clean_vhcache(void)
8844 {
8845 	mdi_vhci_t *vh;
8846 
8847 	mutex_enter(&mdi_mutex);
8848 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8849 		vh->vh_refcnt++;
8850 		mutex_exit(&mdi_mutex);
8851 		clean_vhcache(vh->vh_config);
8852 		mutex_enter(&mdi_mutex);
8853 		vh->vh_refcnt--;
8854 	}
8855 	mutex_exit(&mdi_mutex);
8856 }
8857 
8858 /*
8859  * mdi_vhci_walk_clients():
8860  *		Walker routine to traverse client dev_info nodes
8861  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8862  * below the client, including nexus devices, which we dont want.
8863  * So we just traverse the immediate siblings, starting from 1st client.
8864  */
8865 void
8866 mdi_vhci_walk_clients(dev_info_t *vdip,
8867     int (*f)(dev_info_t *, void *), void *arg)
8868 {
8869 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
8870 	dev_info_t	*cdip;
8871 	mdi_client_t	*ct;
8872 
8873 	MDI_VHCI_CLIENT_LOCK(vh);
8874 	cdip = ddi_get_child(vdip);
8875 	while (cdip) {
8876 		ct = i_devi_get_client(cdip);
8877 		MDI_CLIENT_LOCK(ct);
8878 
8879 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
8880 			cdip = ddi_get_next_sibling(cdip);
8881 		else
8882 			cdip = NULL;
8883 
8884 		MDI_CLIENT_UNLOCK(ct);
8885 	}
8886 	MDI_VHCI_CLIENT_UNLOCK(vh);
8887 }
8888 
8889 /*
8890  * mdi_vhci_walk_phcis():
8891  *		Walker routine to traverse phci dev_info nodes
8892  */
8893 void
8894 mdi_vhci_walk_phcis(dev_info_t *vdip,
8895     int (*f)(dev_info_t *, void *), void *arg)
8896 {
8897 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
8898 	mdi_phci_t	*ph, *next;
8899 
8900 	MDI_VHCI_PHCI_LOCK(vh);
8901 	ph = vh->vh_phci_head;
8902 	while (ph) {
8903 		MDI_PHCI_LOCK(ph);
8904 
8905 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
8906 			next = ph->ph_next;
8907 		else
8908 			next = NULL;
8909 
8910 		MDI_PHCI_UNLOCK(ph);
8911 		ph = next;
8912 	}
8913 	MDI_VHCI_PHCI_UNLOCK(vh);
8914 }
8915 
8916 
8917 /*
8918  * mdi_walk_vhcis():
8919  *		Walker routine to traverse vhci dev_info nodes
8920  */
8921 void
8922 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8923 {
8924 	mdi_vhci_t	*vh = NULL;
8925 
8926 	mutex_enter(&mdi_mutex);
8927 	/*
8928 	 * Scan for already registered vhci
8929 	 */
8930 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8931 		vh->vh_refcnt++;
8932 		mutex_exit(&mdi_mutex);
8933 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8934 			mutex_enter(&mdi_mutex);
8935 			vh->vh_refcnt--;
8936 			break;
8937 		} else {
8938 			mutex_enter(&mdi_mutex);
8939 			vh->vh_refcnt--;
8940 		}
8941 	}
8942 
8943 	mutex_exit(&mdi_mutex);
8944 }
8945 
8946 /*
8947  * i_mdi_log_sysevent():
8948  *		Logs events for pickup by syseventd
8949  */
8950 static void
8951 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8952 {
8953 	char		*path_name;
8954 	nvlist_t	*attr_list;
8955 
8956 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8957 	    KM_SLEEP) != DDI_SUCCESS) {
8958 		goto alloc_failed;
8959 	}
8960 
8961 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8962 	(void) ddi_pathname(dip, path_name);
8963 
8964 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8965 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8966 		goto error;
8967 	}
8968 
8969 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8970 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8971 		goto error;
8972 	}
8973 
8974 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8975 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8976 		goto error;
8977 	}
8978 
8979 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8980 	    path_name) != DDI_SUCCESS) {
8981 		goto error;
8982 	}
8983 
8984 	if (nvlist_add_string(attr_list, DDI_CLASS,
8985 	    ph_vh_class) != DDI_SUCCESS) {
8986 		goto error;
8987 	}
8988 
8989 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8990 	    attr_list, NULL, DDI_SLEEP);
8991 
8992 error:
8993 	kmem_free(path_name, MAXPATHLEN);
8994 	nvlist_free(attr_list);
8995 	return;
8996 
8997 alloc_failed:
8998 	MDI_DEBUG(1, (CE_WARN, dip,
8999 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
9000 }
9001 
9002 char **
9003 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9004 {
9005 	char	**driver_list, **ret_driver_list = NULL;
9006 	int	*root_support_list;
9007 	int	cur_elements, max_elements;
9008 
9009 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9010 	    &cur_elements, &max_elements);
9011 
9012 
9013 	if (driver_list) {
9014 		kmem_free(root_support_list, sizeof (int) * max_elements);
9015 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9016 		    * max_elements, sizeof (char *) * cur_elements);
9017 	}
9018 	*ndrivers = cur_elements;
9019 
9020 	return (ret_driver_list);
9021 
9022 }
9023 
9024 void
9025 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9026 {
9027 	char	**p;
9028 	int	i;
9029 
9030 	if (driver_list) {
9031 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9032 			kmem_free(*p, strlen(*p) + 1);
9033 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9034 	}
9035 }
9036