xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision 821a83dbe7cba700cd57da95bf981850118f369e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
30  * detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 
71 #ifdef	DEBUG
72 #include <sys/debug.h>
73 int	mdi_debug = 1;
74 #define	MDI_DEBUG(level, stmnt) \
75 	    if (mdi_debug >= (level)) i_mdi_log stmnt
76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
77 #else	/* !DEBUG */
78 #define	MDI_DEBUG(level, stmnt)
79 #endif	/* DEBUG */
80 
81 extern pri_t	minclsyspri;
82 extern int	modrootloaded;
83 
84 /*
85  * Global mutex:
86  * Protects vHCI list and structure members, pHCI and Client lists.
87  */
88 kmutex_t	mdi_mutex;
89 
90 /*
91  * Registered vHCI class driver lists
92  */
93 int		mdi_vhci_count;
94 mdi_vhci_t	*mdi_vhci_head;
95 mdi_vhci_t	*mdi_vhci_tail;
96 
97 /*
98  * Client Hash Table size
99  */
100 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
101 
102 /*
103  * taskq interface definitions
104  */
105 #define	MDI_TASKQ_N_THREADS	8
106 #define	MDI_TASKQ_PRI		minclsyspri
107 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
108 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
109 
110 taskq_t				*mdi_taskq;
111 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
112 
113 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
114 
115 /*
116  * The data should be "quiet" for this interval (in seconds) before the
117  * vhci cached data is flushed to the disk.
118  */
119 static int mdi_vhcache_flush_delay = 10;
120 
121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
122 static int mdi_vhcache_flush_daemon_idle_time = 60;
123 
124 /*
125  * MDI falls back to discovery of all paths when a bus_config_one fails.
126  * The following parameters can be used to tune this operation.
127  *
128  * mdi_path_discovery_boot
129  *	Number of times path discovery will be attempted during early boot.
130  *	Probably there is no reason to ever set this value to greater than one.
131  *
132  * mdi_path_discovery_postboot
133  *	Number of times path discovery will be attempted after early boot.
134  *	Set it to a minimum of two to allow for discovery of iscsi paths which
135  *	may happen very late during booting.
136  *
137  * mdi_path_discovery_interval
138  *	Minimum number of seconds MDI will wait between successive discovery
139  *	of all paths. Set it to -1 to disable discovery of all paths.
140  */
141 static int mdi_path_discovery_boot = 1;
142 static int mdi_path_discovery_postboot = 2;
143 static int mdi_path_discovery_interval = 10;
144 
145 /*
146  * number of seconds the asynchronous configuration thread will sleep idle
147  * before exiting.
148  */
149 static int mdi_async_config_idle_time = 600;
150 
151 static int mdi_bus_config_cache_hash_size = 256;
152 
153 /* turns off multithreaded configuration for certain operations */
154 static int mdi_mtc_off = 0;
155 
156 /*
157  * MDI component property name/value string definitions
158  */
159 const char 		*mdi_component_prop = "mpxio-component";
160 const char		*mdi_component_prop_vhci = "vhci";
161 const char		*mdi_component_prop_phci = "phci";
162 const char		*mdi_component_prop_client = "client";
163 
164 /*
165  * MDI client global unique identifier property name
166  */
167 const char		*mdi_client_guid_prop = "client-guid";
168 
169 /*
170  * MDI client load balancing property name/value string definitions
171  */
172 const char		*mdi_load_balance = "load-balance";
173 const char		*mdi_load_balance_none = "none";
174 const char		*mdi_load_balance_rr = "round-robin";
175 const char		*mdi_load_balance_lba = "logical-block";
176 
177 /*
178  * Obsolete vHCI class definition; to be removed after Leadville update
179  */
180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
181 
182 static char vhci_greeting[] =
183 	"\tThere already exists one vHCI driver for class %s\n"
184 	"\tOnly one vHCI driver for each class is allowed\n";
185 
186 /*
187  * Static function prototypes
188  */
189 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
190 static int		i_mdi_client_offline(dev_info_t *, uint_t);
191 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
192 static void		i_mdi_phci_post_detach(dev_info_t *,
193 			    ddi_detach_cmd_t, int);
194 static int		i_mdi_client_pre_detach(dev_info_t *,
195 			    ddi_detach_cmd_t);
196 static void		i_mdi_client_post_detach(dev_info_t *,
197 			    ddi_detach_cmd_t, int);
198 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
199 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
200 static int 		i_mdi_lba_lb(mdi_client_t *ct,
201 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
202 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
203 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
204 static void		i_mdi_pm_reset_client(mdi_client_t *);
205 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
206 static int		i_mdi_power_all_phci(mdi_client_t *);
207 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
208 
209 
210 /*
211  * Internal mdi_pathinfo node functions
212  */
213 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
214 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
215 
216 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
217 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
218 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
219 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
220 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
221 			    mdi_client_t *);
222 static void		i_mdi_phci_unlock(mdi_phci_t *);
223 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
224 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
225 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
226 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
227 			    mdi_client_t *);
228 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
229 static void		i_mdi_client_remove_path(mdi_client_t *,
230 			    mdi_pathinfo_t *);
231 
232 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
233 			    mdi_pathinfo_state_t, int);
234 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
235 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
236 			    char **, int);
237 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
238 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
239 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
240 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
241 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
242 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
243 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
244 static void		i_mdi_client_update_state(mdi_client_t *);
245 static int		i_mdi_client_compute_state(mdi_client_t *,
246 			    mdi_phci_t *);
247 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
248 static void		i_mdi_client_unlock(mdi_client_t *);
249 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
250 static mdi_client_t	*i_devi_get_client(dev_info_t *);
251 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int,
252 			int);
253 /*
254  * Failover related function prototypes
255  */
256 static int		i_mdi_failover(void *);
257 
258 /*
259  * misc internal functions
260  */
261 static int		i_mdi_get_hash_key(char *);
262 static int		i_map_nvlist_error_to_mdi(int);
263 static void		i_mdi_report_path_state(mdi_client_t *,
264 			    mdi_pathinfo_t *);
265 
266 static void		setup_vhci_cache(mdi_vhci_t *);
267 static int		destroy_vhci_cache(mdi_vhci_t *);
268 static void		setup_phci_driver_list(mdi_vhci_t *);
269 static void		free_phci_driver_list(mdi_vhci_config_t *);
270 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
271 static boolean_t	stop_vhcache_flush_thread(void *, int);
272 static void		free_string_array(char **, int);
273 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
274 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
275 static void		free_vhcache_client(mdi_vhcache_client_t *);
276 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
277 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
278 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
279 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
280 static void		vhcache_pi_add(mdi_vhci_config_t *,
281 			    struct mdi_pathinfo *);
282 static void		vhcache_pi_remove(mdi_vhci_config_t *,
283 			    struct mdi_pathinfo *);
284 static void		free_phclient_path_list(mdi_phys_path_t *);
285 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
286 static int		flush_vhcache(mdi_vhci_config_t *, int);
287 static void		vhcache_dirty(mdi_vhci_config_t *);
288 static void		free_async_client_config(mdi_async_client_config_t *);
289 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
290 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
291 static nvlist_t		*read_on_disk_vhci_cache(char *);
292 extern int		fread_nvlist(char *, nvlist_t **);
293 extern int		fwrite_nvlist(char *, nvlist_t *);
294 
295 /* called once when first vhci registers with mdi */
296 static void
297 i_mdi_init()
298 {
299 	static int initialized = 0;
300 
301 	if (initialized)
302 		return;
303 	initialized = 1;
304 
305 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
306 	/*
307 	 * Create our taskq resources
308 	 */
309 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
310 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
311 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
312 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
313 }
314 
315 /*
316  * mdi_get_component_type():
317  *		Return mpxio component type
318  * Return Values:
319  *		MDI_COMPONENT_NONE
320  *		MDI_COMPONENT_VHCI
321  *		MDI_COMPONENT_PHCI
322  *		MDI_COMPONENT_CLIENT
323  * XXX This doesn't work under multi-level MPxIO and should be
324  *	removed when clients migrate mdi_is_*() interfaces.
325  */
326 int
327 mdi_get_component_type(dev_info_t *dip)
328 {
329 	return (DEVI(dip)->devi_mdi_component);
330 }
331 
332 /*
333  * mdi_vhci_register():
334  *		Register a vHCI module with the mpxio framework
335  *		mdi_vhci_register() is called by vHCI drivers to register the
336  *		'class_driver' vHCI driver and its MDI entrypoints with the
337  *		mpxio framework.  The vHCI driver must call this interface as
338  *		part of its attach(9e) handler.
339  *		Competing threads may try to attach mdi_vhci_register() as
340  *		the vHCI drivers are loaded and attached as a result of pHCI
341  *		driver instance registration (mdi_phci_register()) with the
342  *		framework.
343  * Return Values:
344  *		MDI_SUCCESS
345  *		MDI_FAILURE
346  */
347 
348 /*ARGSUSED*/
349 int
350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
351     int flags)
352 {
353 	mdi_vhci_t		*vh = NULL;
354 
355 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
356 
357 	i_mdi_init();
358 
359 	mutex_enter(&mdi_mutex);
360 	/*
361 	 * Scan for already registered vhci
362 	 */
363 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
364 		if (strcmp(vh->vh_class, class) == 0) {
365 			/*
366 			 * vHCI has already been created.  Check for valid
367 			 * vHCI ops registration.  We only support one vHCI
368 			 * module per class
369 			 */
370 			if (vh->vh_ops != NULL) {
371 				mutex_exit(&mdi_mutex);
372 				cmn_err(CE_NOTE, vhci_greeting, class);
373 				return (MDI_FAILURE);
374 			}
375 			break;
376 		}
377 	}
378 
379 	/*
380 	 * if not yet created, create the vHCI component
381 	 */
382 	if (vh == NULL) {
383 		struct client_hash	*hash = NULL;
384 		char			*load_balance;
385 
386 		/*
387 		 * Allocate and initialize the mdi extensions
388 		 */
389 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
390 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
391 		    KM_SLEEP);
392 		vh->vh_client_table = hash;
393 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
394 		(void) strcpy(vh->vh_class, class);
395 		vh->vh_lb = LOAD_BALANCE_RR;
396 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
397 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
398 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
399 				vh->vh_lb = LOAD_BALANCE_NONE;
400 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
401 				    == 0) {
402 				vh->vh_lb = LOAD_BALANCE_LBA;
403 			}
404 			ddi_prop_free(load_balance);
405 		}
406 
407 		/*
408 		 * Store the vHCI ops vectors
409 		 */
410 		vh->vh_dip = vdip;
411 		vh->vh_ops = vops;
412 
413 		setup_vhci_cache(vh);
414 
415 		if (mdi_vhci_head == NULL) {
416 			mdi_vhci_head = vh;
417 		}
418 		if (mdi_vhci_tail) {
419 			mdi_vhci_tail->vh_next = vh;
420 		}
421 		mdi_vhci_tail = vh;
422 		mdi_vhci_count++;
423 	}
424 
425 	/*
426 	 * Claim the devfs node as a vhci component
427 	 */
428 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
429 
430 	/*
431 	 * Initialize our back reference from dev_info node
432 	 */
433 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
434 	mutex_exit(&mdi_mutex);
435 	return (MDI_SUCCESS);
436 }
437 
438 /*
439  * mdi_vhci_unregister():
440  *		Unregister a vHCI module from mpxio framework
441  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
442  * 		of a vhci to unregister it from the framework.
443  * Return Values:
444  *		MDI_SUCCESS
445  *		MDI_FAILURE
446  */
447 
448 /*ARGSUSED*/
449 int
450 mdi_vhci_unregister(dev_info_t *vdip, int flags)
451 {
452 	mdi_vhci_t	*found, *vh, *prev = NULL;
453 
454 	/*
455 	 * Check for invalid VHCI
456 	 */
457 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
458 		return (MDI_FAILURE);
459 
460 	mutex_enter(&mdi_mutex);
461 
462 	/*
463 	 * Scan the list of registered vHCIs for a match
464 	 */
465 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
466 		if (found == vh)
467 			break;
468 		prev = found;
469 	}
470 
471 	if (found == NULL) {
472 		mutex_exit(&mdi_mutex);
473 		return (MDI_FAILURE);
474 	}
475 
476 	/*
477 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
478 	 * should have been unregistered, before a vHCI can be
479 	 * unregistered.
480 	 */
481 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
482 		mutex_exit(&mdi_mutex);
483 		return (MDI_FAILURE);
484 	}
485 
486 	/*
487 	 * Remove the vHCI from the global list
488 	 */
489 	if (vh == mdi_vhci_head) {
490 		mdi_vhci_head = vh->vh_next;
491 	} else {
492 		prev->vh_next = vh->vh_next;
493 	}
494 	if (vh == mdi_vhci_tail) {
495 		mdi_vhci_tail = prev;
496 	}
497 
498 	mdi_vhci_count--;
499 	mutex_exit(&mdi_mutex);
500 
501 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
502 		/* add vhci to the global list */
503 		mutex_enter(&mdi_mutex);
504 		if (mdi_vhci_head == NULL)
505 			mdi_vhci_head = vh;
506 		else
507 			mdi_vhci_tail->vh_next = vh;
508 		mdi_vhci_tail = vh;
509 		mdi_vhci_count++;
510 		mutex_exit(&mdi_mutex);
511 		return (MDI_FAILURE);
512 	}
513 
514 	vh->vh_ops = NULL;
515 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
516 	DEVI(vdip)->devi_mdi_xhci = NULL;
517 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
518 	kmem_free(vh->vh_client_table,
519 	    mdi_client_table_size * sizeof (struct client_hash));
520 
521 	kmem_free(vh, sizeof (mdi_vhci_t));
522 	return (MDI_SUCCESS);
523 }
524 
525 /*
526  * i_mdi_vhci_class2vhci():
527  *		Look for a matching vHCI module given a vHCI class name
528  * Return Values:
529  *		Handle to a vHCI component
530  *		NULL
531  */
532 static mdi_vhci_t *
533 i_mdi_vhci_class2vhci(char *class)
534 {
535 	mdi_vhci_t	*vh = NULL;
536 
537 	ASSERT(!MUTEX_HELD(&mdi_mutex));
538 
539 	mutex_enter(&mdi_mutex);
540 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
541 		if (strcmp(vh->vh_class, class) == 0) {
542 			break;
543 		}
544 	}
545 	mutex_exit(&mdi_mutex);
546 	return (vh);
547 }
548 
549 /*
550  * i_devi_get_vhci():
551  *		Utility function to get the handle to a vHCI component
552  * Return Values:
553  *		Handle to a vHCI component
554  *		NULL
555  */
556 mdi_vhci_t *
557 i_devi_get_vhci(dev_info_t *vdip)
558 {
559 	mdi_vhci_t	*vh = NULL;
560 	if (MDI_VHCI(vdip)) {
561 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
562 	}
563 	return (vh);
564 }
565 
566 /*
567  * mdi_phci_register():
568  *		Register a pHCI module with mpxio framework
569  *		mdi_phci_register() is called by pHCI drivers to register with
570  *		the mpxio framework and a specific 'class_driver' vHCI.  The
571  *		pHCI driver must call this interface as part of its attach(9e)
572  *		handler.
573  * Return Values:
574  *		MDI_SUCCESS
575  *		MDI_FAILURE
576  */
577 
578 /*ARGSUSED*/
579 int
580 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
581 {
582 	mdi_phci_t		*ph;
583 	mdi_vhci_t		*vh;
584 	char			*data;
585 	char			*pathname;
586 
587 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
588 	(void) ddi_pathname(pdip, pathname);
589 
590 	/*
591 	 * Check for mpxio-disable property. Enable mpxio if the property is
592 	 * missing or not set to "yes".
593 	 * If the property is set to "yes" then emit a brief message.
594 	 */
595 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
596 	    &data) == DDI_SUCCESS)) {
597 		if (strcmp(data, "yes") == 0) {
598 			MDI_DEBUG(1, (CE_CONT, pdip,
599 			    "?%s (%s%d) multipath capabilities "
600 			    "disabled via %s.conf.\n", pathname,
601 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
602 			    ddi_driver_name(pdip)));
603 			ddi_prop_free(data);
604 			kmem_free(pathname, MAXPATHLEN);
605 			return (MDI_FAILURE);
606 		}
607 		ddi_prop_free(data);
608 	}
609 
610 	kmem_free(pathname, MAXPATHLEN);
611 
612 	/*
613 	 * Search for a matching vHCI
614 	 */
615 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
616 	if (vh == NULL) {
617 		return (MDI_FAILURE);
618 	}
619 
620 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
621 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
622 	ph->ph_dip = pdip;
623 	ph->ph_vhci = vh;
624 	ph->ph_next = NULL;
625 	ph->ph_unstable = 0;
626 	ph->ph_vprivate = 0;
627 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
628 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
629 
630 	MDI_PHCI_SET_POWER_UP(ph);
631 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
632 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
633 
634 	vhcache_phci_add(vh->vh_config, ph);
635 
636 	mutex_enter(&mdi_mutex);
637 	if (vh->vh_phci_head == NULL) {
638 		vh->vh_phci_head = ph;
639 	}
640 	if (vh->vh_phci_tail) {
641 		vh->vh_phci_tail->ph_next = ph;
642 	}
643 	vh->vh_phci_tail = ph;
644 	vh->vh_phci_count++;
645 	mutex_exit(&mdi_mutex);
646 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
647 	return (MDI_SUCCESS);
648 }
649 
650 /*
651  * mdi_phci_unregister():
652  *		Unregister a pHCI module from mpxio framework
653  *		mdi_phci_unregister() is called by the pHCI drivers from their
654  *		detach(9E) handler to unregister their instances from the
655  *		framework.
656  * Return Values:
657  *		MDI_SUCCESS
658  *		MDI_FAILURE
659  */
660 
661 /*ARGSUSED*/
662 int
663 mdi_phci_unregister(dev_info_t *pdip, int flags)
664 {
665 	mdi_vhci_t		*vh;
666 	mdi_phci_t		*ph;
667 	mdi_phci_t		*tmp;
668 	mdi_phci_t		*prev = NULL;
669 
670 	ph = i_devi_get_phci(pdip);
671 	if (ph == NULL) {
672 		MDI_DEBUG(1, (CE_WARN, pdip,
673 		    "!pHCI unregister: Not a valid pHCI"));
674 		return (MDI_FAILURE);
675 	}
676 
677 	vh = ph->ph_vhci;
678 	ASSERT(vh != NULL);
679 	if (vh == NULL) {
680 		MDI_DEBUG(1, (CE_WARN, pdip,
681 		    "!pHCI unregister: Not a valid vHCI"));
682 		return (MDI_FAILURE);
683 	}
684 
685 	mutex_enter(&mdi_mutex);
686 	tmp = vh->vh_phci_head;
687 	while (tmp) {
688 		if (tmp == ph) {
689 			break;
690 		}
691 		prev = tmp;
692 		tmp = tmp->ph_next;
693 	}
694 
695 	if (ph == vh->vh_phci_head) {
696 		vh->vh_phci_head = ph->ph_next;
697 	} else {
698 		prev->ph_next = ph->ph_next;
699 	}
700 
701 	if (ph == vh->vh_phci_tail) {
702 		vh->vh_phci_tail = prev;
703 	}
704 
705 	vh->vh_phci_count--;
706 
707 	mutex_exit(&mdi_mutex);
708 
709 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
710 	    ESC_DDI_INITIATOR_UNREGISTER);
711 	vhcache_phci_remove(vh->vh_config, ph);
712 	cv_destroy(&ph->ph_unstable_cv);
713 	cv_destroy(&ph->ph_powerchange_cv);
714 	mutex_destroy(&ph->ph_mutex);
715 	kmem_free(ph, sizeof (mdi_phci_t));
716 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
717 	DEVI(pdip)->devi_mdi_xhci = NULL;
718 	return (MDI_SUCCESS);
719 }
720 
721 /*
722  * i_devi_get_phci():
723  * 		Utility function to return the phci extensions.
724  */
725 static mdi_phci_t *
726 i_devi_get_phci(dev_info_t *pdip)
727 {
728 	mdi_phci_t	*ph = NULL;
729 	if (MDI_PHCI(pdip)) {
730 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
731 	}
732 	return (ph);
733 }
734 
735 /*
736  * mdi_phci_path2devinfo():
737  * 		Utility function to search for a valid phci device given
738  *		the devfs pathname.
739  */
740 
741 dev_info_t *
742 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
743 {
744 	char		*temp_pathname;
745 	mdi_vhci_t	*vh;
746 	mdi_phci_t	*ph;
747 	dev_info_t 	*pdip = NULL;
748 
749 	vh = i_devi_get_vhci(vdip);
750 	ASSERT(vh != NULL);
751 
752 	if (vh == NULL) {
753 		/*
754 		 * Invalid vHCI component, return failure
755 		 */
756 		return (NULL);
757 	}
758 
759 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
760 	mutex_enter(&mdi_mutex);
761 	ph = vh->vh_phci_head;
762 	while (ph != NULL) {
763 		pdip = ph->ph_dip;
764 		ASSERT(pdip != NULL);
765 		*temp_pathname = '\0';
766 		(void) ddi_pathname(pdip, temp_pathname);
767 		if (strcmp(temp_pathname, pathname) == 0) {
768 			break;
769 		}
770 		ph = ph->ph_next;
771 	}
772 	if (ph == NULL) {
773 		pdip = NULL;
774 	}
775 	mutex_exit(&mdi_mutex);
776 	kmem_free(temp_pathname, MAXPATHLEN);
777 	return (pdip);
778 }
779 
780 /*
781  * mdi_phci_get_path_count():
782  * 		get number of path information nodes associated with a given
783  *		pHCI device.
784  */
785 int
786 mdi_phci_get_path_count(dev_info_t *pdip)
787 {
788 	mdi_phci_t	*ph;
789 	int		count = 0;
790 
791 	ph = i_devi_get_phci(pdip);
792 	if (ph != NULL) {
793 		count = ph->ph_path_count;
794 	}
795 	return (count);
796 }
797 
798 /*
799  * i_mdi_phci_lock():
800  *		Lock a pHCI device
801  * Return Values:
802  *		None
803  * Note:
804  *		The default locking order is:
805  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
806  *		But there are number of situations where locks need to be
807  *		grabbed in reverse order.  This routine implements try and lock
808  *		mechanism depending on the requested parameter option.
809  */
810 static void
811 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
812 {
813 	if (pip) {
814 		/* Reverse locking is requested. */
815 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
816 			/*
817 			 * tryenter failed. Try to grab again
818 			 * after a small delay
819 			 */
820 			MDI_PI_HOLD(pip);
821 			MDI_PI_UNLOCK(pip);
822 			delay(1);
823 			MDI_PI_LOCK(pip);
824 			MDI_PI_RELE(pip);
825 		}
826 	} else {
827 		MDI_PHCI_LOCK(ph);
828 	}
829 }
830 
831 /*
832  * i_mdi_phci_get_client_lock():
833  *		Lock a pHCI device
834  * Return Values:
835  *		None
836  * Note:
837  *		The default locking order is:
838  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
839  *		But there are number of situations where locks need to be
840  *		grabbed in reverse order.  This routine implements try and lock
841  *		mechanism depending on the requested parameter option.
842  */
843 static void
844 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
845 {
846 	if (ct) {
847 		/* Reverse locking is requested. */
848 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
849 			/*
850 			 * tryenter failed. Try to grab again
851 			 * after a small delay
852 			 */
853 			MDI_CLIENT_UNLOCK(ct);
854 			delay(1);
855 			MDI_CLIENT_LOCK(ct);
856 		}
857 	} else {
858 		MDI_PHCI_LOCK(ph);
859 	}
860 }
861 
862 /*
863  * i_mdi_phci_unlock():
864  *		Unlock the pHCI component
865  */
866 static void
867 i_mdi_phci_unlock(mdi_phci_t *ph)
868 {
869 	MDI_PHCI_UNLOCK(ph);
870 }
871 
872 /*
873  * i_mdi_devinfo_create():
874  *		create client device's devinfo node
875  * Return Values:
876  *		dev_info
877  *		NULL
878  * Notes:
879  */
880 static dev_info_t *
881 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
882 	char **compatible, int ncompatible)
883 {
884 	dev_info_t *cdip = NULL;
885 
886 	ASSERT(MUTEX_HELD(&mdi_mutex));
887 
888 	/* Verify for duplicate entry */
889 	cdip = i_mdi_devinfo_find(vh, name, guid);
890 	ASSERT(cdip == NULL);
891 	if (cdip) {
892 		cmn_err(CE_WARN,
893 		    "i_mdi_devinfo_create: client dip %p already exists",
894 			(void *)cdip);
895 	}
896 
897 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
898 	if (cdip == NULL)
899 		goto fail;
900 
901 	/*
902 	 * Create component type and Global unique identifier
903 	 * properties
904 	 */
905 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
906 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
907 		goto fail;
908 	}
909 
910 	/* Decorate the node with compatible property */
911 	if (compatible &&
912 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
913 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
914 		goto fail;
915 	}
916 
917 	return (cdip);
918 
919 fail:
920 	if (cdip) {
921 		(void) ndi_prop_remove_all(cdip);
922 		(void) ndi_devi_free(cdip);
923 	}
924 	return (NULL);
925 }
926 
927 /*
928  * i_mdi_devinfo_find():
929  *		Find a matching devinfo node for given client node name
930  *		and its guid.
931  * Return Values:
932  *		Handle to a dev_info node or NULL
933  */
934 
935 static dev_info_t *
936 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
937 {
938 	char			*data;
939 	dev_info_t 		*cdip = NULL;
940 	dev_info_t 		*ndip = NULL;
941 	int			circular;
942 
943 	ndi_devi_enter(vh->vh_dip, &circular);
944 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
945 	while ((cdip = ndip) != NULL) {
946 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
947 
948 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
949 			continue;
950 		}
951 
952 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
953 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
954 		    &data) != DDI_PROP_SUCCESS) {
955 			continue;
956 		}
957 
958 		if (strcmp(data, guid) != 0) {
959 			ddi_prop_free(data);
960 			continue;
961 		}
962 		ddi_prop_free(data);
963 		break;
964 	}
965 	ndi_devi_exit(vh->vh_dip, circular);
966 	return (cdip);
967 }
968 
969 /*
970  * i_mdi_devinfo_remove():
971  *		Remove a client device node
972  */
973 static int
974 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
975 {
976 	int	rv = MDI_SUCCESS;
977 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
978 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
979 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
980 		if (rv != NDI_SUCCESS) {
981 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
982 			    " failed. cdip = %p\n", cdip));
983 		}
984 		/*
985 		 * Convert to MDI error code
986 		 */
987 		switch (rv) {
988 		case NDI_SUCCESS:
989 			rv = MDI_SUCCESS;
990 			break;
991 		case NDI_BUSY:
992 			rv = MDI_BUSY;
993 			break;
994 		default:
995 			rv = MDI_FAILURE;
996 			break;
997 		}
998 	}
999 	return (rv);
1000 }
1001 
1002 /*
1003  * i_devi_get_client()
1004  *		Utility function to get mpxio component extensions
1005  */
1006 static mdi_client_t *
1007 i_devi_get_client(dev_info_t *cdip)
1008 {
1009 	mdi_client_t	*ct = NULL;
1010 	if (MDI_CLIENT(cdip)) {
1011 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1012 	}
1013 	return (ct);
1014 }
1015 
1016 /*
1017  * i_mdi_is_child_present():
1018  *		Search for the presence of client device dev_info node
1019  */
1020 
1021 static int
1022 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1023 {
1024 	int		rv = MDI_FAILURE;
1025 	struct dev_info	*dip;
1026 	int		circular;
1027 
1028 	ndi_devi_enter(vdip, &circular);
1029 	dip = DEVI(vdip)->devi_child;
1030 	while (dip) {
1031 		if (dip == DEVI(cdip)) {
1032 			rv = MDI_SUCCESS;
1033 			break;
1034 		}
1035 		dip = dip->devi_sibling;
1036 	}
1037 	ndi_devi_exit(vdip, circular);
1038 	return (rv);
1039 }
1040 
1041 
1042 /*
1043  * i_mdi_client_lock():
1044  *		Grab client component lock
1045  * Return Values:
1046  *		None
1047  * Note:
1048  *		The default locking order is:
1049  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1050  *		But there are number of situations where locks need to be
1051  *		grabbed in reverse order.  This routine implements try and lock
1052  *		mechanism depending on the requested parameter option.
1053  */
1054 
1055 static void
1056 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1057 {
1058 	if (pip) {
1059 		/*
1060 		 * Reverse locking is requested.
1061 		 */
1062 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1063 			/*
1064 			 * tryenter failed. Try to grab again
1065 			 * after a small delay
1066 			 */
1067 			MDI_PI_HOLD(pip);
1068 			MDI_PI_UNLOCK(pip);
1069 			delay(1);
1070 			MDI_PI_LOCK(pip);
1071 			MDI_PI_RELE(pip);
1072 		}
1073 	} else {
1074 		MDI_CLIENT_LOCK(ct);
1075 	}
1076 }
1077 
1078 /*
1079  * i_mdi_client_unlock():
1080  *		Unlock a client component
1081  */
1082 
1083 static void
1084 i_mdi_client_unlock(mdi_client_t *ct)
1085 {
1086 	MDI_CLIENT_UNLOCK(ct);
1087 }
1088 
1089 /*
1090  * i_mdi_client_alloc():
1091  * 		Allocate and initialize a client structure.  Caller should
1092  *		hold the global mdi_mutex.
1093  * Return Values:
1094  *		Handle to a client component
1095  */
1096 /*ARGSUSED*/
1097 static mdi_client_t *
1098 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1099 {
1100 	mdi_client_t	*ct;
1101 
1102 	ASSERT(MUTEX_HELD(&mdi_mutex));
1103 
1104 	/*
1105 	 * Allocate and initialize a component structure.
1106 	 */
1107 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1108 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1109 	ct->ct_hnext = NULL;
1110 	ct->ct_hprev = NULL;
1111 	ct->ct_dip = NULL;
1112 	ct->ct_vhci = vh;
1113 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1114 	(void) strcpy(ct->ct_drvname, name);
1115 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1116 	(void) strcpy(ct->ct_guid, lguid);
1117 	ct->ct_cprivate = NULL;
1118 	ct->ct_vprivate = NULL;
1119 	ct->ct_flags = 0;
1120 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1121 	MDI_CLIENT_SET_OFFLINE(ct);
1122 	MDI_CLIENT_SET_DETACH(ct);
1123 	MDI_CLIENT_SET_POWER_UP(ct);
1124 	ct->ct_failover_flags = 0;
1125 	ct->ct_failover_status = 0;
1126 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1127 	ct->ct_unstable = 0;
1128 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1129 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1130 	ct->ct_lb = vh->vh_lb;
1131 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1132 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1133 	ct->ct_path_count = 0;
1134 	ct->ct_path_head = NULL;
1135 	ct->ct_path_tail = NULL;
1136 	ct->ct_path_last = NULL;
1137 
1138 	/*
1139 	 * Add this client component to our client hash queue
1140 	 */
1141 	i_mdi_client_enlist_table(vh, ct);
1142 	return (ct);
1143 }
1144 
1145 /*
1146  * i_mdi_client_enlist_table():
1147  *		Attach the client device to the client hash table. Caller
1148  *		should hold the mdi_mutex
1149  */
1150 
1151 static void
1152 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1153 {
1154 	int 			index;
1155 	struct client_hash	*head;
1156 
1157 	ASSERT(MUTEX_HELD(&mdi_mutex));
1158 	index = i_mdi_get_hash_key(ct->ct_guid);
1159 	head = &vh->vh_client_table[index];
1160 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1161 	head->ct_hash_head = ct;
1162 	head->ct_hash_count++;
1163 	vh->vh_client_count++;
1164 }
1165 
1166 /*
1167  * i_mdi_client_delist_table():
1168  *		Attach the client device to the client hash table.
1169  *		Caller should hold the mdi_mutex
1170  */
1171 
1172 static void
1173 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1174 {
1175 	int			index;
1176 	char			*guid;
1177 	struct client_hash 	*head;
1178 	mdi_client_t		*next;
1179 	mdi_client_t		*last;
1180 
1181 	ASSERT(MUTEX_HELD(&mdi_mutex));
1182 	guid = ct->ct_guid;
1183 	index = i_mdi_get_hash_key(guid);
1184 	head = &vh->vh_client_table[index];
1185 
1186 	last = NULL;
1187 	next = (mdi_client_t *)head->ct_hash_head;
1188 	while (next != NULL) {
1189 		if (next == ct) {
1190 			break;
1191 		}
1192 		last = next;
1193 		next = next->ct_hnext;
1194 	}
1195 
1196 	if (next) {
1197 		head->ct_hash_count--;
1198 		if (last == NULL) {
1199 			head->ct_hash_head = ct->ct_hnext;
1200 		} else {
1201 			last->ct_hnext = ct->ct_hnext;
1202 		}
1203 		ct->ct_hnext = NULL;
1204 		vh->vh_client_count--;
1205 	}
1206 }
1207 
1208 
1209 /*
1210  * i_mdi_client_free():
1211  *		Free a client component
1212  */
1213 static int
1214 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1215 {
1216 	int		rv = MDI_SUCCESS;
1217 	int		flags = ct->ct_flags;
1218 	dev_info_t	*cdip;
1219 	dev_info_t	*vdip;
1220 
1221 	ASSERT(MUTEX_HELD(&mdi_mutex));
1222 	vdip = vh->vh_dip;
1223 	cdip = ct->ct_dip;
1224 
1225 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1226 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1227 	DEVI(cdip)->devi_mdi_client = NULL;
1228 
1229 	/*
1230 	 * Clear out back ref. to dev_info_t node
1231 	 */
1232 	ct->ct_dip = NULL;
1233 
1234 	/*
1235 	 * Remove this client from our hash queue
1236 	 */
1237 	i_mdi_client_delist_table(vh, ct);
1238 
1239 	/*
1240 	 * Uninitialize and free the component
1241 	 */
1242 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1243 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1244 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1245 	cv_destroy(&ct->ct_failover_cv);
1246 	cv_destroy(&ct->ct_unstable_cv);
1247 	cv_destroy(&ct->ct_powerchange_cv);
1248 	mutex_destroy(&ct->ct_mutex);
1249 	kmem_free(ct, sizeof (*ct));
1250 
1251 	if (cdip != NULL) {
1252 		mutex_exit(&mdi_mutex);
1253 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1254 		mutex_enter(&mdi_mutex);
1255 	}
1256 	return (rv);
1257 }
1258 
1259 /*
1260  * i_mdi_client_find():
1261  * 		Find the client structure corresponding to a given guid
1262  *		Caller should hold the mdi_mutex
1263  */
1264 static mdi_client_t *
1265 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1266 {
1267 	int			index;
1268 	struct client_hash	*head;
1269 	mdi_client_t		*ct;
1270 
1271 	ASSERT(MUTEX_HELD(&mdi_mutex));
1272 	index = i_mdi_get_hash_key(guid);
1273 	head = &vh->vh_client_table[index];
1274 
1275 	ct = head->ct_hash_head;
1276 	while (ct != NULL) {
1277 		if (strcmp(ct->ct_guid, guid) == 0 &&
1278 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1279 			break;
1280 		}
1281 		ct = ct->ct_hnext;
1282 	}
1283 	return (ct);
1284 }
1285 
1286 
1287 
1288 /*
1289  * i_mdi_client_update_state():
1290  *		Compute and update client device state
1291  * Notes:
1292  *		A client device can be in any of three possible states:
1293  *
1294  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1295  *		one online/standby paths. Can tolerate failures.
1296  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1297  *		no alternate paths available as standby. A failure on the online
1298  *		would result in loss of access to device data.
1299  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1300  *		no paths available to access the device.
1301  */
1302 static void
1303 i_mdi_client_update_state(mdi_client_t *ct)
1304 {
1305 	int state;
1306 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1307 	state = i_mdi_client_compute_state(ct, NULL);
1308 	MDI_CLIENT_SET_STATE(ct, state);
1309 }
1310 
1311 /*
1312  * i_mdi_client_compute_state():
1313  *		Compute client device state
1314  *
1315  *		mdi_phci_t *	Pointer to pHCI structure which should
1316  *				while computing the new value.  Used by
1317  *				i_mdi_phci_offline() to find the new
1318  *				client state after DR of a pHCI.
1319  */
1320 static int
1321 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1322 {
1323 	int		state;
1324 	int		online_count = 0;
1325 	int		standby_count = 0;
1326 	mdi_pathinfo_t	*pip, *next;
1327 
1328 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1329 	pip = ct->ct_path_head;
1330 	while (pip != NULL) {
1331 		MDI_PI_LOCK(pip);
1332 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1333 		if (MDI_PI(pip)->pi_phci == ph) {
1334 			MDI_PI_UNLOCK(pip);
1335 			pip = next;
1336 			continue;
1337 		}
1338 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1339 				== MDI_PATHINFO_STATE_ONLINE)
1340 			online_count++;
1341 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1342 				== MDI_PATHINFO_STATE_STANDBY)
1343 			standby_count++;
1344 		MDI_PI_UNLOCK(pip);
1345 		pip = next;
1346 	}
1347 
1348 	if (online_count == 0) {
1349 		if (standby_count == 0) {
1350 			state = MDI_CLIENT_STATE_FAILED;
1351 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1352 			    " ct = %p\n", ct));
1353 		} else if (standby_count == 1) {
1354 			state = MDI_CLIENT_STATE_DEGRADED;
1355 		} else {
1356 			state = MDI_CLIENT_STATE_OPTIMAL;
1357 		}
1358 	} else if (online_count == 1) {
1359 		if (standby_count == 0) {
1360 			state = MDI_CLIENT_STATE_DEGRADED;
1361 		} else {
1362 			state = MDI_CLIENT_STATE_OPTIMAL;
1363 		}
1364 	} else {
1365 		state = MDI_CLIENT_STATE_OPTIMAL;
1366 	}
1367 	return (state);
1368 }
1369 
1370 /*
1371  * i_mdi_client2devinfo():
1372  *		Utility function
1373  */
1374 dev_info_t *
1375 i_mdi_client2devinfo(mdi_client_t *ct)
1376 {
1377 	return (ct->ct_dip);
1378 }
1379 
1380 /*
1381  * mdi_client_path2_devinfo():
1382  * 		Given the parent devinfo and child devfs pathname, search for
1383  *		a valid devfs node handle.
1384  */
1385 dev_info_t *
1386 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1387 {
1388 	dev_info_t 	*cdip = NULL;
1389 	dev_info_t 	*ndip = NULL;
1390 	char		*temp_pathname;
1391 	int		circular;
1392 
1393 	/*
1394 	 * Allocate temp buffer
1395 	 */
1396 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1397 
1398 	/*
1399 	 * Lock parent against changes
1400 	 */
1401 	ndi_devi_enter(vdip, &circular);
1402 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1403 	while ((cdip = ndip) != NULL) {
1404 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1405 
1406 		*temp_pathname = '\0';
1407 		(void) ddi_pathname(cdip, temp_pathname);
1408 		if (strcmp(temp_pathname, pathname) == 0) {
1409 			break;
1410 		}
1411 	}
1412 	/*
1413 	 * Release devinfo lock
1414 	 */
1415 	ndi_devi_exit(vdip, circular);
1416 
1417 	/*
1418 	 * Free the temp buffer
1419 	 */
1420 	kmem_free(temp_pathname, MAXPATHLEN);
1421 	return (cdip);
1422 }
1423 
1424 
1425 /*
1426  * mdi_client_get_path_count():
1427  * 		Utility function to get number of path information nodes
1428  *		associated with a given client device.
1429  */
1430 int
1431 mdi_client_get_path_count(dev_info_t *cdip)
1432 {
1433 	mdi_client_t	*ct;
1434 	int		count = 0;
1435 
1436 	ct = i_devi_get_client(cdip);
1437 	if (ct != NULL) {
1438 		count = ct->ct_path_count;
1439 	}
1440 	return (count);
1441 }
1442 
1443 
1444 /*
1445  * i_mdi_get_hash_key():
1446  * 		Create a hash using strings as keys
1447  *
1448  */
1449 static int
1450 i_mdi_get_hash_key(char *str)
1451 {
1452 	uint32_t	g, hash = 0;
1453 	char		*p;
1454 
1455 	for (p = str; *p != '\0'; p++) {
1456 		g = *p;
1457 		hash += g;
1458 	}
1459 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1460 }
1461 
1462 /*
1463  * mdi_get_lb_policy():
1464  * 		Get current load balancing policy for a given client device
1465  */
1466 client_lb_t
1467 mdi_get_lb_policy(dev_info_t *cdip)
1468 {
1469 	client_lb_t	lb = LOAD_BALANCE_NONE;
1470 	mdi_client_t	*ct;
1471 
1472 	ct = i_devi_get_client(cdip);
1473 	if (ct != NULL) {
1474 		lb = ct->ct_lb;
1475 	}
1476 	return (lb);
1477 }
1478 
1479 /*
1480  * mdi_set_lb_region_size():
1481  * 		Set current region size for the load-balance
1482  */
1483 int
1484 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1485 {
1486 	mdi_client_t	*ct;
1487 	int		rv = MDI_FAILURE;
1488 
1489 	ct = i_devi_get_client(cdip);
1490 	if (ct != NULL && ct->ct_lb_args != NULL) {
1491 		ct->ct_lb_args->region_size = region_size;
1492 		rv = MDI_SUCCESS;
1493 	}
1494 	return (rv);
1495 }
1496 
1497 /*
1498  * mdi_Set_lb_policy():
1499  * 		Set current load balancing policy for a given client device
1500  */
1501 int
1502 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1503 {
1504 	mdi_client_t	*ct;
1505 	int		rv = MDI_FAILURE;
1506 
1507 	ct = i_devi_get_client(cdip);
1508 	if (ct != NULL) {
1509 		ct->ct_lb = lb;
1510 		rv = MDI_SUCCESS;
1511 	}
1512 	return (rv);
1513 }
1514 
1515 /*
1516  * mdi_failover():
1517  *		failover function called by the vHCI drivers to initiate
1518  *		a failover operation.  This is typically due to non-availability
1519  *		of online paths to route I/O requests.  Failover can be
1520  *		triggered through user application also.
1521  *
1522  *		The vHCI driver calls mdi_failover() to initiate a failover
1523  *		operation. mdi_failover() calls back into the vHCI driver's
1524  *		vo_failover() entry point to perform the actual failover
1525  *		operation.  The reason for requiring the vHCI driver to
1526  *		initiate failover by calling mdi_failover(), instead of directly
1527  *		executing vo_failover() itself, is to ensure that the mdi
1528  *		framework can keep track of the client state properly.
1529  *		Additionally, mdi_failover() provides as a convenience the
1530  *		option of performing the failover operation synchronously or
1531  *		asynchronously
1532  *
1533  *		Upon successful completion of the failover operation, the
1534  *		paths that were previously ONLINE will be in the STANDBY state,
1535  *		and the newly activated paths will be in the ONLINE state.
1536  *
1537  *		The flags modifier determines whether the activation is done
1538  *		synchronously: MDI_FAILOVER_SYNC
1539  * Return Values:
1540  *		MDI_SUCCESS
1541  *		MDI_FAILURE
1542  *		MDI_BUSY
1543  */
1544 /*ARGSUSED*/
1545 int
1546 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1547 {
1548 	int			rv;
1549 	mdi_client_t		*ct;
1550 
1551 	ct = i_devi_get_client(cdip);
1552 	ASSERT(ct != NULL);
1553 	if (ct == NULL) {
1554 		/* cdip is not a valid client device. Nothing more to do. */
1555 		return (MDI_FAILURE);
1556 	}
1557 
1558 	MDI_CLIENT_LOCK(ct);
1559 
1560 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1561 		/* A path to the client is being freed */
1562 		MDI_CLIENT_UNLOCK(ct);
1563 		return (MDI_BUSY);
1564 	}
1565 
1566 
1567 	if (MDI_CLIENT_IS_FAILED(ct)) {
1568 		/*
1569 		 * Client is in failed state. Nothing more to do.
1570 		 */
1571 		MDI_CLIENT_UNLOCK(ct);
1572 		return (MDI_FAILURE);
1573 	}
1574 
1575 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1576 		/*
1577 		 * Failover is already in progress; return BUSY
1578 		 */
1579 		MDI_CLIENT_UNLOCK(ct);
1580 		return (MDI_BUSY);
1581 	}
1582 	/*
1583 	 * Make sure that mdi_pathinfo node state changes are processed.
1584 	 * We do not allow failovers to progress while client path state
1585 	 * changes are in progress
1586 	 */
1587 	if (ct->ct_unstable) {
1588 		if (flags == MDI_FAILOVER_ASYNC) {
1589 			MDI_CLIENT_UNLOCK(ct);
1590 			return (MDI_BUSY);
1591 		} else {
1592 			while (ct->ct_unstable)
1593 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1594 		}
1595 	}
1596 
1597 	/*
1598 	 * Client device is in stable state. Before proceeding, perform sanity
1599 	 * checks again.
1600 	 */
1601 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1602 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1603 		/*
1604 		 * Client is in failed state. Nothing more to do.
1605 		 */
1606 		MDI_CLIENT_UNLOCK(ct);
1607 		return (MDI_FAILURE);
1608 	}
1609 
1610 	/*
1611 	 * Set the client state as failover in progress.
1612 	 */
1613 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1614 	ct->ct_failover_flags = flags;
1615 	MDI_CLIENT_UNLOCK(ct);
1616 
1617 	if (flags == MDI_FAILOVER_ASYNC) {
1618 		/*
1619 		 * Submit the initiate failover request via CPR safe
1620 		 * taskq threads.
1621 		 */
1622 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1623 		    ct, KM_SLEEP);
1624 		return (MDI_ACCEPT);
1625 	} else {
1626 		/*
1627 		 * Synchronous failover mode.  Typically invoked from the user
1628 		 * land.
1629 		 */
1630 		rv = i_mdi_failover(ct);
1631 	}
1632 	return (rv);
1633 }
1634 
1635 /*
1636  * i_mdi_failover():
1637  *		internal failover function. Invokes vHCI drivers failover
1638  *		callback function and process the failover status
1639  * Return Values:
1640  *		None
1641  *
1642  * Note: A client device in failover state can not be detached or freed.
1643  */
1644 static int
1645 i_mdi_failover(void *arg)
1646 {
1647 	int		rv = MDI_SUCCESS;
1648 	mdi_client_t	*ct = (mdi_client_t *)arg;
1649 	mdi_vhci_t	*vh = ct->ct_vhci;
1650 
1651 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1652 
1653 	if (vh->vh_ops->vo_failover != NULL) {
1654 		/*
1655 		 * Call vHCI drivers callback routine
1656 		 */
1657 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1658 		    ct->ct_failover_flags);
1659 	}
1660 
1661 	MDI_CLIENT_LOCK(ct);
1662 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1663 
1664 	/*
1665 	 * Save the failover return status
1666 	 */
1667 	ct->ct_failover_status = rv;
1668 
1669 	/*
1670 	 * As a result of failover, client status would have been changed.
1671 	 * Update the client state and wake up anyone waiting on this client
1672 	 * device.
1673 	 */
1674 	i_mdi_client_update_state(ct);
1675 
1676 	cv_broadcast(&ct->ct_failover_cv);
1677 	MDI_CLIENT_UNLOCK(ct);
1678 	return (rv);
1679 }
1680 
1681 /*
1682  * Load balancing is logical block.
1683  * IOs within the range described by region_size
1684  * would go on the same path. This would improve the
1685  * performance by cache-hit on some of the RAID devices.
1686  * Search only for online paths(At some point we
1687  * may want to balance across target ports).
1688  * If no paths are found then default to round-robin.
1689  */
1690 static int
1691 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1692 {
1693 	int		path_index = -1;
1694 	int		online_path_count = 0;
1695 	int		online_nonpref_path_count = 0;
1696 	int 		region_size = ct->ct_lb_args->region_size;
1697 	mdi_pathinfo_t	*pip;
1698 	mdi_pathinfo_t	*next;
1699 	int		preferred, path_cnt;
1700 
1701 	pip = ct->ct_path_head;
1702 	while (pip) {
1703 		MDI_PI_LOCK(pip);
1704 		if (MDI_PI(pip)->pi_state ==
1705 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1706 			online_path_count++;
1707 		} else if (MDI_PI(pip)->pi_state ==
1708 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1709 			online_nonpref_path_count++;
1710 		}
1711 		next = (mdi_pathinfo_t *)
1712 		    MDI_PI(pip)->pi_client_link;
1713 		MDI_PI_UNLOCK(pip);
1714 		pip = next;
1715 	}
1716 	/* if found any online/preferred then use this type */
1717 	if (online_path_count > 0) {
1718 		path_cnt = online_path_count;
1719 		preferred = 1;
1720 	} else if (online_nonpref_path_count > 0) {
1721 		path_cnt = online_nonpref_path_count;
1722 		preferred = 0;
1723 	} else {
1724 		path_cnt = 0;
1725 	}
1726 	if (path_cnt) {
1727 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1728 		pip = ct->ct_path_head;
1729 		while (pip && path_index != -1) {
1730 			MDI_PI_LOCK(pip);
1731 			if (path_index == 0 &&
1732 			    (MDI_PI(pip)->pi_state ==
1733 			    MDI_PATHINFO_STATE_ONLINE) &&
1734 				MDI_PI(pip)->pi_preferred == preferred) {
1735 				MDI_PI_HOLD(pip);
1736 				MDI_PI_UNLOCK(pip);
1737 				*ret_pip = pip;
1738 				return (MDI_SUCCESS);
1739 			}
1740 			path_index --;
1741 			next = (mdi_pathinfo_t *)
1742 			    MDI_PI(pip)->pi_client_link;
1743 			MDI_PI_UNLOCK(pip);
1744 			pip = next;
1745 		}
1746 		if (pip == NULL) {
1747 			MDI_DEBUG(4, (CE_NOTE, NULL,
1748 			    "!lba %p, no pip !!\n",
1749 				bp->b_blkno));
1750 		} else {
1751 			MDI_DEBUG(4, (CE_NOTE, NULL,
1752 			    "!lba %p, no pip for path_index, "
1753 			    "pip %p\n", pip));
1754 		}
1755 	}
1756 	return (MDI_FAILURE);
1757 }
1758 
1759 /*
1760  * mdi_select_path():
1761  *		select a path to access a client device.
1762  *
1763  *		mdi_select_path() function is called by the vHCI drivers to
1764  *		select a path to route the I/O request to.  The caller passes
1765  *		the block I/O data transfer structure ("buf") as one of the
1766  *		parameters.  The mpxio framework uses the buf structure
1767  *		contents to maintain per path statistics (total I/O size /
1768  *		count pending).  If more than one online paths are available to
1769  *		select, the framework automatically selects a suitable path
1770  *		for routing I/O request. If a failover operation is active for
1771  *		this client device the call shall be failed with MDI_BUSY error
1772  *		code.
1773  *
1774  *		By default this function returns a suitable path in online
1775  *		state based on the current load balancing policy.  Currently
1776  *		we support LOAD_BALANCE_NONE (Previously selected online path
1777  *		will continue to be used till the path is usable) and
1778  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1779  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1780  *		based on the logical block).  The load balancing
1781  *		through vHCI drivers configuration file (driver.conf).
1782  *
1783  *		vHCI drivers may override this default behavior by specifying
1784  *		appropriate flags.  If start_pip is specified (non NULL) is
1785  *		used as start point to walk and find the next appropriate path.
1786  *		The following values are currently defined:
1787  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1788  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1789  *
1790  *		The non-standard behavior is used by the scsi_vhci driver,
1791  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1792  *		attach of client devices (to avoid an unnecessary failover
1793  *		when the STANDBY path comes up first), during failover
1794  *		(to activate a STANDBY path as ONLINE).
1795  *
1796  *		The selected path in returned in a held state (ref_cnt).
1797  *		Caller should release the hold by calling mdi_rele_path().
1798  *
1799  * Return Values:
1800  *		MDI_SUCCESS	- Completed successfully
1801  *		MDI_BUSY 	- Client device is busy failing over
1802  *		MDI_NOPATH	- Client device is online, but no valid path are
1803  *				  available to access this client device
1804  *		MDI_FAILURE	- Invalid client device or state
1805  *		MDI_DEVI_ONLINING
1806  *				- Client device (struct dev_info state) is in
1807  *				  onlining state.
1808  */
1809 
1810 /*ARGSUSED*/
1811 int
1812 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1813     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1814 {
1815 	mdi_client_t	*ct;
1816 	mdi_pathinfo_t	*pip;
1817 	mdi_pathinfo_t	*next;
1818 	mdi_pathinfo_t	*head;
1819 	mdi_pathinfo_t	*start;
1820 	client_lb_t	lbp;	/* load balancing policy */
1821 	int		sb = 1;	/* standard behavior */
1822 	int		preferred = 1;	/* preferred path */
1823 	int		cond, cont = 1;
1824 	int		retry = 0;
1825 
1826 	if (flags != 0) {
1827 		/*
1828 		 * disable default behavior
1829 		 */
1830 		sb = 0;
1831 	}
1832 
1833 	*ret_pip = NULL;
1834 	ct = i_devi_get_client(cdip);
1835 	if (ct == NULL) {
1836 		/* mdi extensions are NULL, Nothing more to do */
1837 		return (MDI_FAILURE);
1838 	}
1839 
1840 	MDI_CLIENT_LOCK(ct);
1841 
1842 	if (sb) {
1843 		if (MDI_CLIENT_IS_FAILED(ct)) {
1844 			/*
1845 			 * Client is not ready to accept any I/O requests.
1846 			 * Fail this request.
1847 			 */
1848 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1849 			    "client state offline ct = %p\n", ct));
1850 			MDI_CLIENT_UNLOCK(ct);
1851 			return (MDI_FAILURE);
1852 		}
1853 
1854 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1855 			/*
1856 			 * Check for Failover is in progress. If so tell the
1857 			 * caller that this device is busy.
1858 			 */
1859 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1860 			    "client failover in progress ct = %p\n", ct));
1861 			MDI_CLIENT_UNLOCK(ct);
1862 			return (MDI_BUSY);
1863 		}
1864 
1865 		/*
1866 		 * Check to see whether the client device is attached.
1867 		 * If not so, let the vHCI driver manually select a path
1868 		 * (standby) and let the probe/attach process to continue.
1869 		 */
1870 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
1871 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1872 			MDI_CLIENT_UNLOCK(ct);
1873 			return (MDI_DEVI_ONLINING);
1874 		}
1875 	}
1876 
1877 	/*
1878 	 * Cache in the client list head.  If head of the list is NULL
1879 	 * return MDI_NOPATH
1880 	 */
1881 	head = ct->ct_path_head;
1882 	if (head == NULL) {
1883 		MDI_CLIENT_UNLOCK(ct);
1884 		return (MDI_NOPATH);
1885 	}
1886 
1887 	/*
1888 	 * for non default behavior, bypass current
1889 	 * load balancing policy and always use LOAD_BALANCE_RR
1890 	 * except that the start point will be adjusted based
1891 	 * on the provided start_pip
1892 	 */
1893 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1894 
1895 	switch (lbp) {
1896 	case LOAD_BALANCE_NONE:
1897 		/*
1898 		 * Load balancing is None  or Alternate path mode
1899 		 * Start looking for a online mdi_pathinfo node starting from
1900 		 * last known selected path
1901 		 */
1902 		preferred = 1;
1903 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1904 		if (pip == NULL) {
1905 			pip = head;
1906 		}
1907 		start = pip;
1908 		do {
1909 			MDI_PI_LOCK(pip);
1910 			/*
1911 			 * No need to explicitly check if the path is disabled.
1912 			 * Since we are checking for state == ONLINE and the
1913 			 * same veriable is used for DISABLE/ENABLE information.
1914 			 */
1915 			if (MDI_PI(pip)->pi_state  ==
1916 				MDI_PATHINFO_STATE_ONLINE &&
1917 				preferred == MDI_PI(pip)->pi_preferred) {
1918 				/*
1919 				 * Return the path in hold state. Caller should
1920 				 * release the lock by calling mdi_rele_path()
1921 				 */
1922 				MDI_PI_HOLD(pip);
1923 				MDI_PI_UNLOCK(pip);
1924 				ct->ct_path_last = pip;
1925 				*ret_pip = pip;
1926 				MDI_CLIENT_UNLOCK(ct);
1927 				return (MDI_SUCCESS);
1928 			}
1929 
1930 			/*
1931 			 * Path is busy.
1932 			 */
1933 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1934 			    MDI_PI_IS_TRANSIENT(pip))
1935 				retry = 1;
1936 			/*
1937 			 * Keep looking for a next available online path
1938 			 */
1939 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1940 			if (next == NULL) {
1941 				next = head;
1942 			}
1943 			MDI_PI_UNLOCK(pip);
1944 			pip = next;
1945 			if (start == pip && preferred) {
1946 				preferred = 0;
1947 			} else if (start == pip && !preferred) {
1948 				cont = 0;
1949 			}
1950 		} while (cont);
1951 		break;
1952 
1953 	case LOAD_BALANCE_LBA:
1954 		/*
1955 		 * Make sure we are looking
1956 		 * for an online path. Otherwise, if it is for a STANDBY
1957 		 * path request, it will go through and fetch an ONLINE
1958 		 * path which is not desirable.
1959 		 */
1960 		if ((ct->ct_lb_args != NULL) &&
1961 			    (ct->ct_lb_args->region_size) && bp &&
1962 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1963 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1964 				    == MDI_SUCCESS) {
1965 				MDI_CLIENT_UNLOCK(ct);
1966 				return (MDI_SUCCESS);
1967 			}
1968 		}
1969 		/*  FALLTHROUGH */
1970 	case LOAD_BALANCE_RR:
1971 		/*
1972 		 * Load balancing is Round Robin. Start looking for a online
1973 		 * mdi_pathinfo node starting from last known selected path
1974 		 * as the start point.  If override flags are specified,
1975 		 * process accordingly.
1976 		 * If the search is already in effect(start_pip not null),
1977 		 * then lets just use the same path preference to continue the
1978 		 * traversal.
1979 		 */
1980 
1981 		if (start_pip != NULL) {
1982 			preferred = MDI_PI(start_pip)->pi_preferred;
1983 		} else {
1984 			preferred = 1;
1985 		}
1986 
1987 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1988 		if (start == NULL) {
1989 			pip = head;
1990 		} else {
1991 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1992 			if (pip == NULL) {
1993 				if (!sb) {
1994 					if (preferred == 0) {
1995 						/*
1996 						 * Looks like we have completed
1997 						 * the traversal as preferred
1998 						 * value is 0. Time to bail out.
1999 						 */
2000 						*ret_pip = NULL;
2001 						MDI_CLIENT_UNLOCK(ct);
2002 						return (MDI_NOPATH);
2003 					} else {
2004 						/*
2005 						 * Looks like we reached the
2006 						 * end of the list. Lets enable
2007 						 * traversal of non preferred
2008 						 * paths.
2009 						 */
2010 						preferred = 0;
2011 					}
2012 				}
2013 				pip = head;
2014 			}
2015 		}
2016 		start = pip;
2017 		do {
2018 			MDI_PI_LOCK(pip);
2019 			if (sb) {
2020 				cond = ((MDI_PI(pip)->pi_state ==
2021 				    MDI_PATHINFO_STATE_ONLINE &&
2022 					MDI_PI(pip)->pi_preferred ==
2023 						preferred) ? 1 : 0);
2024 			} else {
2025 				if (flags == MDI_SELECT_ONLINE_PATH) {
2026 					cond = ((MDI_PI(pip)->pi_state ==
2027 					    MDI_PATHINFO_STATE_ONLINE &&
2028 						MDI_PI(pip)->pi_preferred ==
2029 						preferred) ? 1 : 0);
2030 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2031 					cond = ((MDI_PI(pip)->pi_state ==
2032 					    MDI_PATHINFO_STATE_STANDBY &&
2033 						MDI_PI(pip)->pi_preferred ==
2034 						preferred) ? 1 : 0);
2035 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2036 				    MDI_SELECT_STANDBY_PATH)) {
2037 					cond = (((MDI_PI(pip)->pi_state ==
2038 					    MDI_PATHINFO_STATE_ONLINE ||
2039 					    (MDI_PI(pip)->pi_state ==
2040 					    MDI_PATHINFO_STATE_STANDBY)) &&
2041 						MDI_PI(pip)->pi_preferred ==
2042 						preferred) ? 1 : 0);
2043 				} else {
2044 					cond = 0;
2045 				}
2046 			}
2047 			/*
2048 			 * No need to explicitly check if the path is disabled.
2049 			 * Since we are checking for state == ONLINE and the
2050 			 * same veriable is used for DISABLE/ENABLE information.
2051 			 */
2052 			if (cond) {
2053 				/*
2054 				 * Return the path in hold state. Caller should
2055 				 * release the lock by calling mdi_rele_path()
2056 				 */
2057 				MDI_PI_HOLD(pip);
2058 				MDI_PI_UNLOCK(pip);
2059 				if (sb)
2060 					ct->ct_path_last = pip;
2061 				*ret_pip = pip;
2062 				MDI_CLIENT_UNLOCK(ct);
2063 				return (MDI_SUCCESS);
2064 			}
2065 			/*
2066 			 * Path is busy.
2067 			 */
2068 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2069 			    MDI_PI_IS_TRANSIENT(pip))
2070 				retry = 1;
2071 
2072 			/*
2073 			 * Keep looking for a next available online path
2074 			 */
2075 do_again:
2076 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2077 			if (next == NULL) {
2078 				if (!sb) {
2079 					if (preferred == 1) {
2080 						/*
2081 						 * Looks like we reached the
2082 						 * end of the list. Lets enable
2083 						 * traversal of non preferred
2084 						 * paths.
2085 						 */
2086 						preferred = 0;
2087 						next = head;
2088 					} else {
2089 						/*
2090 						 * We have done both the passes
2091 						 * Preferred as well as for
2092 						 * Non-preferred. Bail out now.
2093 						 */
2094 						cont = 0;
2095 					}
2096 				} else {
2097 					/*
2098 					 * Standard behavior case.
2099 					 */
2100 					next = head;
2101 				}
2102 			}
2103 			MDI_PI_UNLOCK(pip);
2104 			if (cont == 0) {
2105 				break;
2106 			}
2107 			pip = next;
2108 
2109 			if (!sb) {
2110 				/*
2111 				 * We need to handle the selection of
2112 				 * non-preferred path in the following
2113 				 * case:
2114 				 *
2115 				 * +------+   +------+   +------+   +-----+
2116 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2117 				 * +------+   +------+   +------+   +-----+
2118 				 *
2119 				 * If we start the search with B, we need to
2120 				 * skip beyond B to pick C which is non -
2121 				 * preferred in the second pass. The following
2122 				 * test, if true, will allow us to skip over
2123 				 * the 'start'(B in the example) to select
2124 				 * other non preferred elements.
2125 				 */
2126 				if ((start_pip != NULL) && (start_pip == pip) &&
2127 				    (MDI_PI(start_pip)->pi_preferred
2128 				    != preferred)) {
2129 					/*
2130 					 * try again after going past the start
2131 					 * pip
2132 					 */
2133 					MDI_PI_LOCK(pip);
2134 					goto do_again;
2135 				}
2136 			} else {
2137 				/*
2138 				 * Standard behavior case
2139 				 */
2140 				if (start == pip && preferred) {
2141 					/* look for nonpreferred paths */
2142 					preferred = 0;
2143 				} else if (start == pip && !preferred) {
2144 					/*
2145 					 * Exit condition
2146 					 */
2147 					cont = 0;
2148 				}
2149 			}
2150 		} while (cont);
2151 		break;
2152 	}
2153 
2154 	MDI_CLIENT_UNLOCK(ct);
2155 	if (retry == 1) {
2156 		return (MDI_BUSY);
2157 	} else {
2158 		return (MDI_NOPATH);
2159 	}
2160 }
2161 
2162 /*
2163  * For a client, return the next available path to any phci
2164  *
2165  * Note:
2166  *		Caller should hold the branch's devinfo node to get a consistent
2167  *		snap shot of the mdi_pathinfo nodes.
2168  *
2169  *		Please note that even the list is stable the mdi_pathinfo
2170  *		node state and properties are volatile.  The caller should lock
2171  *		and unlock the nodes by calling mdi_pi_lock() and
2172  *		mdi_pi_unlock() functions to get a stable properties.
2173  *
2174  *		If there is a need to use the nodes beyond the hold of the
2175  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2176  *		need to be held against unexpected removal by calling
2177  *		mdi_hold_path() and should be released by calling
2178  *		mdi_rele_path() on completion.
2179  */
2180 mdi_pathinfo_t *
2181 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2182 {
2183 	mdi_client_t *ct;
2184 
2185 	if (!MDI_CLIENT(ct_dip))
2186 		return (NULL);
2187 
2188 	/*
2189 	 * Walk through client link
2190 	 */
2191 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2192 	ASSERT(ct != NULL);
2193 
2194 	if (pip == NULL)
2195 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2196 
2197 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2198 }
2199 
2200 /*
2201  * For a phci, return the next available path to any client
2202  * Note: ditto mdi_get_next_phci_path()
2203  */
2204 mdi_pathinfo_t *
2205 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2206 {
2207 	mdi_phci_t *ph;
2208 
2209 	if (!MDI_PHCI(ph_dip))
2210 		return (NULL);
2211 
2212 	/*
2213 	 * Walk through pHCI link
2214 	 */
2215 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2216 	ASSERT(ph != NULL);
2217 
2218 	if (pip == NULL)
2219 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2220 
2221 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2222 }
2223 
2224 /*
2225  * mdi_get_nextpath():
2226  *		mdi_pathinfo node walker function.  Get the next node from the
2227  *		client or pHCI device list.
2228  *
2229  * XXX This is wrapper function for compatibility purposes only.
2230  *
2231  *	It doesn't work under Multi-level MPxIO, where a dip
2232  *	is both client and phci (which link should next_path follow?).
2233  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2234  *	this interface should be removed.
2235  */
2236 void
2237 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2238     mdi_pathinfo_t **ret_pip)
2239 {
2240 	if (MDI_CLIENT(dip)) {
2241 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2242 	} else if (MDI_PHCI(dip)) {
2243 		*ret_pip = mdi_get_next_client_path(dip, pip);
2244 	} else {
2245 		*ret_pip = NULL;
2246 	}
2247 }
2248 
2249 /*
2250  * mdi_hold_path():
2251  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2252  * Return Values:
2253  *		None
2254  */
2255 void
2256 mdi_hold_path(mdi_pathinfo_t *pip)
2257 {
2258 	if (pip) {
2259 		MDI_PI_LOCK(pip);
2260 		MDI_PI_HOLD(pip);
2261 		MDI_PI_UNLOCK(pip);
2262 	}
2263 }
2264 
2265 
2266 /*
2267  * mdi_rele_path():
2268  *		Release the mdi_pathinfo node which was selected
2269  *		through mdi_select_path() mechanism or manually held by
2270  *		calling mdi_hold_path().
2271  * Return Values:
2272  *		None
2273  */
2274 void
2275 mdi_rele_path(mdi_pathinfo_t *pip)
2276 {
2277 	if (pip) {
2278 		MDI_PI_LOCK(pip);
2279 		MDI_PI_RELE(pip);
2280 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2281 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2282 		}
2283 		MDI_PI_UNLOCK(pip);
2284 	}
2285 }
2286 
2287 
2288 /*
2289  * mdi_pi_lock():
2290  * 		Lock the mdi_pathinfo node.
2291  * Note:
2292  *		The caller should release the lock by calling mdi_pi_unlock()
2293  */
2294 void
2295 mdi_pi_lock(mdi_pathinfo_t *pip)
2296 {
2297 	ASSERT(pip != NULL);
2298 	if (pip) {
2299 		MDI_PI_LOCK(pip);
2300 	}
2301 }
2302 
2303 
2304 /*
2305  * mdi_pi_unlock():
2306  * 		Unlock the mdi_pathinfo node.
2307  * Note:
2308  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2309  */
2310 void
2311 mdi_pi_unlock(mdi_pathinfo_t *pip)
2312 {
2313 	ASSERT(pip != NULL);
2314 	if (pip) {
2315 		MDI_PI_UNLOCK(pip);
2316 	}
2317 }
2318 
2319 /*
2320  * mdi_pi_find():
2321  *		Search the list of mdi_pathinfo nodes attached to the
2322  *		pHCI/Client device node whose path address matches "paddr".
2323  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2324  *		found.
2325  * Return Values:
2326  *		mdi_pathinfo node handle
2327  *		NULL
2328  * Notes:
2329  *		Caller need not hold any locks to call this function.
2330  */
2331 mdi_pathinfo_t *
2332 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2333 {
2334 	mdi_phci_t		*ph;
2335 	mdi_vhci_t		*vh;
2336 	mdi_client_t		*ct;
2337 	mdi_pathinfo_t		*pip = NULL;
2338 
2339 	if ((pdip == NULL) || (paddr == NULL)) {
2340 		return (NULL);
2341 	}
2342 	ph = i_devi_get_phci(pdip);
2343 	if (ph == NULL) {
2344 		/*
2345 		 * Invalid pHCI device, Nothing more to do.
2346 		 */
2347 		MDI_DEBUG(2, (CE_WARN, NULL,
2348 		    "!mdi_pi_find: invalid phci"));
2349 		return (NULL);
2350 	}
2351 
2352 	vh = ph->ph_vhci;
2353 	if (vh == NULL) {
2354 		/*
2355 		 * Invalid vHCI device, Nothing more to do.
2356 		 */
2357 		MDI_DEBUG(2, (CE_WARN, NULL,
2358 		    "!mdi_pi_find: invalid phci"));
2359 		return (NULL);
2360 	}
2361 
2362 	/*
2363 	 * Look for client device identified by caddr (guid)
2364 	 */
2365 	if (caddr == NULL) {
2366 		/*
2367 		 * Find a mdi_pathinfo node under pHCI list for a matching
2368 		 * unit address.
2369 		 */
2370 		mutex_enter(&ph->ph_mutex);
2371 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2372 
2373 		while (pip != NULL) {
2374 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2375 				break;
2376 			}
2377 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2378 		}
2379 		mutex_exit(&ph->ph_mutex);
2380 		return (pip);
2381 	}
2382 
2383 	/*
2384 	 * XXX - Is the rest of the code in this function really necessary?
2385 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2386 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2387 	 * whether the search is based on the pathinfo nodes attached to
2388 	 * the pHCI or the client node, the result will be the same.
2389 	 */
2390 
2391 	/*
2392 	 * Find the client device corresponding to 'caddr'
2393 	 */
2394 	mutex_enter(&mdi_mutex);
2395 
2396 	/*
2397 	 * XXX - Passing NULL to the following function works as long as the
2398 	 * the client addresses (caddr) are unique per vhci basis.
2399 	 */
2400 	ct = i_mdi_client_find(vh, NULL, caddr);
2401 	if (ct == NULL) {
2402 		/*
2403 		 * Client not found, Obviously mdi_pathinfo node has not been
2404 		 * created yet.
2405 		 */
2406 		mutex_exit(&mdi_mutex);
2407 		return (pip);
2408 	}
2409 
2410 	/*
2411 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2412 	 * pHCI and paddr
2413 	 */
2414 	MDI_CLIENT_LOCK(ct);
2415 
2416 	/*
2417 	 * Release the global mutex as it is no more needed. Note: We always
2418 	 * respect the locking order while acquiring.
2419 	 */
2420 	mutex_exit(&mdi_mutex);
2421 
2422 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2423 	while (pip != NULL) {
2424 		/*
2425 		 * Compare the unit address
2426 		 */
2427 		if ((MDI_PI(pip)->pi_phci == ph) &&
2428 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2429 			break;
2430 		}
2431 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2432 	}
2433 	MDI_CLIENT_UNLOCK(ct);
2434 	return (pip);
2435 }
2436 
2437 /*
2438  * mdi_pi_alloc():
2439  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2440  *		The mdi_pathinfo node returned by this function identifies a
2441  *		unique device path is capable of having properties attached
2442  *		and passed to mdi_pi_online() to fully attach and online the
2443  *		path and client device node.
2444  *		The mdi_pathinfo node returned by this function must be
2445  *		destroyed using mdi_pi_free() if the path is no longer
2446  *		operational or if the caller fails to attach a client device
2447  *		node when calling mdi_pi_online(). The framework will not free
2448  *		the resources allocated.
2449  *		This function can be called from both interrupt and kernel
2450  *		contexts.  DDI_NOSLEEP flag should be used while calling
2451  *		from interrupt contexts.
2452  * Return Values:
2453  *		MDI_SUCCESS
2454  *		MDI_FAILURE
2455  *		MDI_NOMEM
2456  */
2457 /*ARGSUSED*/
2458 int
2459 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2460     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2461 {
2462 	mdi_vhci_t	*vh;
2463 	mdi_phci_t	*ph;
2464 	mdi_client_t	*ct;
2465 	mdi_pathinfo_t	*pip = NULL;
2466 	dev_info_t	*cdip;
2467 	int		rv = MDI_NOMEM;
2468 	int		path_allocated = 0;
2469 
2470 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2471 	    ret_pip == NULL) {
2472 		/* Nothing more to do */
2473 		return (MDI_FAILURE);
2474 	}
2475 
2476 	*ret_pip = NULL;
2477 	ph = i_devi_get_phci(pdip);
2478 	ASSERT(ph != NULL);
2479 	if (ph == NULL) {
2480 		/* Invalid pHCI device, return failure */
2481 		MDI_DEBUG(1, (CE_WARN, NULL,
2482 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2483 		return (MDI_FAILURE);
2484 	}
2485 
2486 	MDI_PHCI_LOCK(ph);
2487 	vh = ph->ph_vhci;
2488 	if (vh == NULL) {
2489 		/* Invalid vHCI device, return failure */
2490 		MDI_DEBUG(1, (CE_WARN, NULL,
2491 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2492 		MDI_PHCI_UNLOCK(ph);
2493 		return (MDI_FAILURE);
2494 	}
2495 
2496 	if (MDI_PHCI_IS_READY(ph) == 0) {
2497 		/*
2498 		 * Do not allow new node creation when pHCI is in
2499 		 * offline/suspended states
2500 		 */
2501 		MDI_DEBUG(1, (CE_WARN, NULL,
2502 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2503 		MDI_PHCI_UNLOCK(ph);
2504 		return (MDI_BUSY);
2505 	}
2506 	MDI_PHCI_UNSTABLE(ph);
2507 	MDI_PHCI_UNLOCK(ph);
2508 
2509 	/* look for a matching client, create one if not found */
2510 	mutex_enter(&mdi_mutex);
2511 	ct = i_mdi_client_find(vh, cname, caddr);
2512 	if (ct == NULL) {
2513 		ct = i_mdi_client_alloc(vh, cname, caddr);
2514 		ASSERT(ct != NULL);
2515 	}
2516 
2517 	if (ct->ct_dip == NULL) {
2518 		/*
2519 		 * Allocate a devinfo node
2520 		 */
2521 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2522 		    compatible, ncompatible);
2523 		if (ct->ct_dip == NULL) {
2524 			(void) i_mdi_client_free(vh, ct);
2525 			goto fail;
2526 		}
2527 	}
2528 	cdip = ct->ct_dip;
2529 
2530 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2531 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2532 
2533 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2534 	while (pip != NULL) {
2535 		/*
2536 		 * Compare the unit address
2537 		 */
2538 		if ((MDI_PI(pip)->pi_phci == ph) &&
2539 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2540 			break;
2541 		}
2542 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2543 	}
2544 
2545 	if (pip == NULL) {
2546 		/*
2547 		 * This is a new path for this client device.  Allocate and
2548 		 * initialize a new pathinfo node
2549 		 */
2550 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2551 		ASSERT(pip != NULL);
2552 		path_allocated = 1;
2553 	}
2554 	rv = MDI_SUCCESS;
2555 
2556 fail:
2557 	/*
2558 	 * Release the global mutex.
2559 	 */
2560 	mutex_exit(&mdi_mutex);
2561 
2562 	/*
2563 	 * Mark the pHCI as stable
2564 	 */
2565 	MDI_PHCI_LOCK(ph);
2566 	MDI_PHCI_STABLE(ph);
2567 	MDI_PHCI_UNLOCK(ph);
2568 	*ret_pip = pip;
2569 
2570 	if (path_allocated)
2571 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2572 
2573 	return (rv);
2574 }
2575 
2576 /*ARGSUSED*/
2577 int
2578 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2579     int flags, mdi_pathinfo_t **ret_pip)
2580 {
2581 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2582 	    flags, ret_pip));
2583 }
2584 
2585 /*
2586  * i_mdi_pi_alloc():
2587  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2588  * Return Values:
2589  *		mdi_pathinfo
2590  */
2591 
2592 /*ARGSUSED*/
2593 static mdi_pathinfo_t *
2594 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2595 {
2596 	mdi_pathinfo_t	*pip;
2597 	int		ct_circular;
2598 	int		ph_circular;
2599 	int		se_flag;
2600 	int		kmem_flag;
2601 
2602 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2603 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2604 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2605 	    MDI_PATHINFO_STATE_TRANSIENT;
2606 
2607 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2608 		MDI_PI_SET_USER_DISABLE(pip);
2609 
2610 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2611 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2612 
2613 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2614 		MDI_PI_SET_DRV_DISABLE(pip);
2615 
2616 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2617 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2618 	MDI_PI(pip)->pi_client = ct;
2619 	MDI_PI(pip)->pi_phci = ph;
2620 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2621 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2622 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2623 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2624 	MDI_PI(pip)->pi_pprivate = NULL;
2625 	MDI_PI(pip)->pi_cprivate = NULL;
2626 	MDI_PI(pip)->pi_vprivate = NULL;
2627 	MDI_PI(pip)->pi_client_link = NULL;
2628 	MDI_PI(pip)->pi_phci_link = NULL;
2629 	MDI_PI(pip)->pi_ref_cnt = 0;
2630 	MDI_PI(pip)->pi_kstats = NULL;
2631 	MDI_PI(pip)->pi_preferred = 1;
2632 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2633 
2634 	/*
2635 	 * Lock both dev_info nodes against changes in parallel.
2636 	 */
2637 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2638 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2639 
2640 	i_mdi_phci_add_path(ph, pip);
2641 	i_mdi_client_add_path(ct, pip);
2642 
2643 	ndi_devi_exit(ph->ph_dip, ph_circular);
2644 	ndi_devi_exit(ct->ct_dip, ct_circular);
2645 
2646 	/* determine interrupt context */
2647 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2648 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2649 
2650 	i_ddi_di_cache_invalidate(kmem_flag);
2651 
2652 	return (pip);
2653 }
2654 
2655 /*
2656  * i_mdi_phci_add_path():
2657  * 		Add a mdi_pathinfo node to pHCI list.
2658  * Notes:
2659  *		Caller should per-pHCI mutex
2660  */
2661 
2662 static void
2663 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2664 {
2665 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2666 
2667 	if (ph->ph_path_head == NULL) {
2668 		ph->ph_path_head = pip;
2669 	} else {
2670 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2671 	}
2672 	ph->ph_path_tail = pip;
2673 	ph->ph_path_count++;
2674 }
2675 
2676 /*
2677  * i_mdi_client_add_path():
2678  *		Add mdi_pathinfo node to client list
2679  */
2680 
2681 static void
2682 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2683 {
2684 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2685 
2686 	if (ct->ct_path_head == NULL) {
2687 		ct->ct_path_head = pip;
2688 	} else {
2689 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2690 	}
2691 	ct->ct_path_tail = pip;
2692 	ct->ct_path_count++;
2693 }
2694 
2695 /*
2696  * mdi_pi_free():
2697  *		Free the mdi_pathinfo node and also client device node if this
2698  *		is the last path to the device
2699  * Return Values:
2700  *		MDI_SUCCESS
2701  *		MDI_FAILURE
2702  *		MDI_BUSY
2703  */
2704 
2705 /*ARGSUSED*/
2706 int
2707 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2708 {
2709 	int		rv = MDI_SUCCESS;
2710 	mdi_vhci_t	*vh;
2711 	mdi_phci_t	*ph;
2712 	mdi_client_t	*ct;
2713 	int		(*f)();
2714 	int		client_held = 0;
2715 
2716 	MDI_PI_LOCK(pip);
2717 	ph = MDI_PI(pip)->pi_phci;
2718 	ASSERT(ph != NULL);
2719 	if (ph == NULL) {
2720 		/*
2721 		 * Invalid pHCI device, return failure
2722 		 */
2723 		MDI_DEBUG(1, (CE_WARN, NULL,
2724 		    "!mdi_pi_free: invalid pHCI"));
2725 		MDI_PI_UNLOCK(pip);
2726 		return (MDI_FAILURE);
2727 	}
2728 
2729 	vh = ph->ph_vhci;
2730 	ASSERT(vh != NULL);
2731 	if (vh == NULL) {
2732 		/* Invalid pHCI device, return failure */
2733 		MDI_DEBUG(1, (CE_WARN, NULL,
2734 		    "!mdi_pi_free: invalid vHCI"));
2735 		MDI_PI_UNLOCK(pip);
2736 		return (MDI_FAILURE);
2737 	}
2738 
2739 	ct = MDI_PI(pip)->pi_client;
2740 	ASSERT(ct != NULL);
2741 	if (ct == NULL) {
2742 		/*
2743 		 * Invalid Client device, return failure
2744 		 */
2745 		MDI_DEBUG(1, (CE_WARN, NULL,
2746 		    "!mdi_pi_free: invalid client"));
2747 		MDI_PI_UNLOCK(pip);
2748 		return (MDI_FAILURE);
2749 	}
2750 
2751 	/*
2752 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2753 	 * if the node state is either offline or init and the reference count
2754 	 * is zero.
2755 	 */
2756 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2757 	    MDI_PI_IS_INITING(pip))) {
2758 		/*
2759 		 * Node is busy
2760 		 */
2761 		MDI_DEBUG(1, (CE_WARN, NULL,
2762 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2763 		MDI_PI_UNLOCK(pip);
2764 		return (MDI_BUSY);
2765 	}
2766 
2767 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2768 		/*
2769 		 * Give a chance for pending I/Os to complete.
2770 		 */
2771 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2772 		    "%d cmds still pending on path: %p\n",
2773 		    MDI_PI(pip)->pi_ref_cnt, pip));
2774 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2775 		    &MDI_PI(pip)->pi_mutex,
2776 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2777 			/*
2778 			 * The timeout time reached without ref_cnt being zero
2779 			 * being signaled.
2780 			 */
2781 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2782 			    "!mdi_pi_free: "
2783 			    "Timeout reached on path %p without the cond\n",
2784 			    pip));
2785 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2786 			    "!mdi_pi_free: "
2787 			    "%d cmds still pending on path: %p\n",
2788 			    MDI_PI(pip)->pi_ref_cnt, pip));
2789 			MDI_PI_UNLOCK(pip);
2790 			return (MDI_BUSY);
2791 		}
2792 	}
2793 	if (MDI_PI(pip)->pi_pm_held) {
2794 		client_held = 1;
2795 	}
2796 	MDI_PI_UNLOCK(pip);
2797 
2798 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2799 
2800 	MDI_CLIENT_LOCK(ct);
2801 
2802 	/* Prevent further failovers till mdi_mutex is held */
2803 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2804 
2805 	/*
2806 	 * Wait till failover is complete before removing this node.
2807 	 */
2808 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2809 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2810 
2811 	MDI_CLIENT_UNLOCK(ct);
2812 	mutex_enter(&mdi_mutex);
2813 	MDI_CLIENT_LOCK(ct);
2814 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2815 
2816 	if (!MDI_PI_IS_INITING(pip)) {
2817 		f = vh->vh_ops->vo_pi_uninit;
2818 		if (f != NULL) {
2819 			rv = (*f)(vh->vh_dip, pip, 0);
2820 		}
2821 	}
2822 	/*
2823 	 * If vo_pi_uninit() completed successfully.
2824 	 */
2825 	if (rv == MDI_SUCCESS) {
2826 		if (client_held) {
2827 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2828 			    "i_mdi_pm_rele_client\n"));
2829 			i_mdi_pm_rele_client(ct, 1);
2830 		}
2831 		i_mdi_pi_free(ph, pip, ct);
2832 		if (ct->ct_path_count == 0) {
2833 			/*
2834 			 * Client lost its last path.
2835 			 * Clean up the client device
2836 			 */
2837 			MDI_CLIENT_UNLOCK(ct);
2838 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2839 			mutex_exit(&mdi_mutex);
2840 			return (rv);
2841 		}
2842 	}
2843 	MDI_CLIENT_UNLOCK(ct);
2844 	mutex_exit(&mdi_mutex);
2845 
2846 	if (rv == MDI_FAILURE)
2847 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2848 
2849 	return (rv);
2850 }
2851 
2852 /*
2853  * i_mdi_pi_free():
2854  *		Free the mdi_pathinfo node
2855  */
2856 static void
2857 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2858 {
2859 	int	ct_circular;
2860 	int	ph_circular;
2861 	int	se_flag;
2862 	int	kmem_flag;
2863 
2864 	/*
2865 	 * remove any per-path kstats
2866 	 */
2867 	i_mdi_pi_kstat_destroy(pip);
2868 
2869 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2870 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2871 
2872 	i_mdi_client_remove_path(ct, pip);
2873 	i_mdi_phci_remove_path(ph, pip);
2874 
2875 	ndi_devi_exit(ph->ph_dip, ph_circular);
2876 	ndi_devi_exit(ct->ct_dip, ct_circular);
2877 
2878 	/* determine interrupt context */
2879 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2880 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2881 
2882 	i_ddi_di_cache_invalidate(kmem_flag);
2883 
2884 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2885 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2886 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2887 	if (MDI_PI(pip)->pi_addr) {
2888 		kmem_free(MDI_PI(pip)->pi_addr,
2889 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2890 		MDI_PI(pip)->pi_addr = NULL;
2891 	}
2892 
2893 	if (MDI_PI(pip)->pi_prop) {
2894 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2895 		MDI_PI(pip)->pi_prop = NULL;
2896 	}
2897 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2898 }
2899 
2900 
2901 /*
2902  * i_mdi_phci_remove_path():
2903  * 		Remove a mdi_pathinfo node from pHCI list.
2904  * Notes:
2905  *		Caller should hold per-pHCI mutex
2906  */
2907 
2908 static void
2909 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2910 {
2911 	mdi_pathinfo_t	*prev = NULL;
2912 	mdi_pathinfo_t	*path = NULL;
2913 
2914 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2915 
2916 	path = ph->ph_path_head;
2917 	while (path != NULL) {
2918 		if (path == pip) {
2919 			break;
2920 		}
2921 		prev = path;
2922 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2923 	}
2924 
2925 	if (path) {
2926 		ph->ph_path_count--;
2927 		if (prev) {
2928 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2929 		} else {
2930 			ph->ph_path_head =
2931 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2932 		}
2933 		if (ph->ph_path_tail == path) {
2934 			ph->ph_path_tail = prev;
2935 		}
2936 	}
2937 
2938 	/*
2939 	 * Clear the pHCI link
2940 	 */
2941 	MDI_PI(pip)->pi_phci_link = NULL;
2942 	MDI_PI(pip)->pi_phci = NULL;
2943 }
2944 
2945 /*
2946  * i_mdi_client_remove_path():
2947  * 		Remove a mdi_pathinfo node from client path list.
2948  */
2949 
2950 static void
2951 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2952 {
2953 	mdi_pathinfo_t	*prev = NULL;
2954 	mdi_pathinfo_t	*path;
2955 
2956 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2957 
2958 	path = ct->ct_path_head;
2959 	while (path != NULL) {
2960 		if (path == pip) {
2961 			break;
2962 		}
2963 		prev = path;
2964 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2965 	}
2966 
2967 	if (path) {
2968 		ct->ct_path_count--;
2969 		if (prev) {
2970 			MDI_PI(prev)->pi_client_link =
2971 			    MDI_PI(path)->pi_client_link;
2972 		} else {
2973 			ct->ct_path_head =
2974 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2975 		}
2976 		if (ct->ct_path_tail == path) {
2977 			ct->ct_path_tail = prev;
2978 		}
2979 		if (ct->ct_path_last == path) {
2980 			ct->ct_path_last = ct->ct_path_head;
2981 		}
2982 	}
2983 	MDI_PI(pip)->pi_client_link = NULL;
2984 	MDI_PI(pip)->pi_client = NULL;
2985 }
2986 
2987 /*
2988  * i_mdi_pi_state_change():
2989  *		online a mdi_pathinfo node
2990  *
2991  * Return Values:
2992  *		MDI_SUCCESS
2993  *		MDI_FAILURE
2994  */
2995 /*ARGSUSED*/
2996 static int
2997 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
2998 {
2999 	int		rv = MDI_SUCCESS;
3000 	mdi_vhci_t	*vh;
3001 	mdi_phci_t	*ph;
3002 	mdi_client_t	*ct;
3003 	int		(*f)();
3004 	dev_info_t	*cdip;
3005 
3006 	MDI_PI_LOCK(pip);
3007 
3008 	ph = MDI_PI(pip)->pi_phci;
3009 	ASSERT(ph);
3010 	if (ph == NULL) {
3011 		/*
3012 		 * Invalid pHCI device, fail the request
3013 		 */
3014 		MDI_PI_UNLOCK(pip);
3015 		MDI_DEBUG(1, (CE_WARN, NULL,
3016 		    "!mdi_pi_state_change: invalid phci"));
3017 		return (MDI_FAILURE);
3018 	}
3019 
3020 	vh = ph->ph_vhci;
3021 	ASSERT(vh);
3022 	if (vh == NULL) {
3023 		/*
3024 		 * Invalid vHCI device, fail the request
3025 		 */
3026 		MDI_PI_UNLOCK(pip);
3027 		MDI_DEBUG(1, (CE_WARN, NULL,
3028 		    "!mdi_pi_state_change: invalid vhci"));
3029 		return (MDI_FAILURE);
3030 	}
3031 
3032 	ct = MDI_PI(pip)->pi_client;
3033 	ASSERT(ct != NULL);
3034 	if (ct == NULL) {
3035 		/*
3036 		 * Invalid client device, fail the request
3037 		 */
3038 		MDI_PI_UNLOCK(pip);
3039 		MDI_DEBUG(1, (CE_WARN, NULL,
3040 		    "!mdi_pi_state_change: invalid client"));
3041 		return (MDI_FAILURE);
3042 	}
3043 
3044 	/*
3045 	 * If this path has not been initialized yet, Callback vHCI driver's
3046 	 * pathinfo node initialize entry point
3047 	 */
3048 
3049 	if (MDI_PI_IS_INITING(pip)) {
3050 		MDI_PI_UNLOCK(pip);
3051 		f = vh->vh_ops->vo_pi_init;
3052 		if (f != NULL) {
3053 			rv = (*f)(vh->vh_dip, pip, 0);
3054 			if (rv != MDI_SUCCESS) {
3055 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3056 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3057 				    vh, pip));
3058 				return (MDI_FAILURE);
3059 			}
3060 		}
3061 		MDI_PI_LOCK(pip);
3062 		MDI_PI_CLEAR_TRANSIENT(pip);
3063 	}
3064 
3065 	/*
3066 	 * Do not allow state transition when pHCI is in offline/suspended
3067 	 * states
3068 	 */
3069 	i_mdi_phci_lock(ph, pip);
3070 	if (MDI_PHCI_IS_READY(ph) == 0) {
3071 		MDI_DEBUG(1, (CE_WARN, NULL,
3072 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3073 		MDI_PI_UNLOCK(pip);
3074 		i_mdi_phci_unlock(ph);
3075 		return (MDI_BUSY);
3076 	}
3077 	MDI_PHCI_UNSTABLE(ph);
3078 	i_mdi_phci_unlock(ph);
3079 
3080 	/*
3081 	 * Check if mdi_pathinfo state is in transient state.
3082 	 * If yes, offlining is in progress and wait till transient state is
3083 	 * cleared.
3084 	 */
3085 	if (MDI_PI_IS_TRANSIENT(pip)) {
3086 		while (MDI_PI_IS_TRANSIENT(pip)) {
3087 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3088 			    &MDI_PI(pip)->pi_mutex);
3089 		}
3090 	}
3091 
3092 	/*
3093 	 * Grab the client lock in reverse order sequence and release the
3094 	 * mdi_pathinfo mutex.
3095 	 */
3096 	i_mdi_client_lock(ct, pip);
3097 	MDI_PI_UNLOCK(pip);
3098 
3099 	/*
3100 	 * Wait till failover state is cleared
3101 	 */
3102 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3103 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3104 
3105 	/*
3106 	 * Mark the mdi_pathinfo node state as transient
3107 	 */
3108 	MDI_PI_LOCK(pip);
3109 	switch (state) {
3110 	case MDI_PATHINFO_STATE_ONLINE:
3111 		MDI_PI_SET_ONLINING(pip);
3112 		break;
3113 
3114 	case MDI_PATHINFO_STATE_STANDBY:
3115 		MDI_PI_SET_STANDBYING(pip);
3116 		break;
3117 
3118 	case MDI_PATHINFO_STATE_FAULT:
3119 		/*
3120 		 * Mark the pathinfo state as FAULTED
3121 		 */
3122 		MDI_PI_SET_FAULTING(pip);
3123 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3124 		break;
3125 
3126 	case MDI_PATHINFO_STATE_OFFLINE:
3127 		/*
3128 		 * ndi_devi_offline() cannot hold pip or ct locks.
3129 		 */
3130 		MDI_PI_UNLOCK(pip);
3131 		/*
3132 		 * Do not offline if path will become last path and path
3133 		 * is busy for user initiated events.
3134 		 */
3135 		cdip = ct->ct_dip;
3136 		if ((flag & NDI_DEVI_REMOVE) &&
3137 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3138 			i_mdi_client_unlock(ct);
3139 			rv = ndi_devi_offline(cdip, 0);
3140 			if (rv != NDI_SUCCESS) {
3141 				/*
3142 				 * Convert to MDI error code
3143 				 */
3144 				switch (rv) {
3145 				case NDI_BUSY:
3146 					rv = MDI_BUSY;
3147 					break;
3148 				default:
3149 					rv = MDI_FAILURE;
3150 					break;
3151 				}
3152 				goto state_change_exit;
3153 			} else {
3154 				i_mdi_client_lock(ct, NULL);
3155 			}
3156 		}
3157 		/*
3158 		 * Mark the mdi_pathinfo node state as transient
3159 		 */
3160 		MDI_PI_LOCK(pip);
3161 		MDI_PI_SET_OFFLINING(pip);
3162 		break;
3163 	}
3164 	MDI_PI_UNLOCK(pip);
3165 	MDI_CLIENT_UNSTABLE(ct);
3166 	i_mdi_client_unlock(ct);
3167 
3168 	f = vh->vh_ops->vo_pi_state_change;
3169 	if (f != NULL) {
3170 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3171 		if (rv == MDI_NOT_SUPPORTED) {
3172 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3173 		}
3174 		if (rv != MDI_SUCCESS) {
3175 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3176 			    "!vo_pi_state_change: failed rv = %x", rv));
3177 		}
3178 	}
3179 	MDI_CLIENT_LOCK(ct);
3180 	MDI_PI_LOCK(pip);
3181 	if (MDI_PI_IS_TRANSIENT(pip)) {
3182 		if (rv == MDI_SUCCESS) {
3183 			MDI_PI_CLEAR_TRANSIENT(pip);
3184 		} else {
3185 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3186 		}
3187 	}
3188 
3189 	/*
3190 	 * Wake anyone waiting for this mdi_pathinfo node
3191 	 */
3192 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3193 	MDI_PI_UNLOCK(pip);
3194 
3195 	/*
3196 	 * Mark the client device as stable
3197 	 */
3198 	MDI_CLIENT_STABLE(ct);
3199 	if (rv == MDI_SUCCESS) {
3200 		if (ct->ct_unstable == 0) {
3201 			cdip = ct->ct_dip;
3202 
3203 			/*
3204 			 * Onlining the mdi_pathinfo node will impact the
3205 			 * client state Update the client and dev_info node
3206 			 * state accordingly
3207 			 */
3208 			rv = NDI_SUCCESS;
3209 			i_mdi_client_update_state(ct);
3210 			switch (MDI_CLIENT_STATE(ct)) {
3211 			case MDI_CLIENT_STATE_OPTIMAL:
3212 			case MDI_CLIENT_STATE_DEGRADED:
3213 				if (cdip && !i_ddi_devi_attached(cdip) &&
3214 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3215 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3216 
3217 					i_mdi_client_unlock(ct);
3218 					/*
3219 					 * Must do ndi_devi_online() through
3220 					 * hotplug thread for deferred
3221 					 * attach mechanism to work
3222 					 */
3223 					rv = ndi_devi_online(cdip, 0);
3224 					i_mdi_client_lock(ct, NULL);
3225 					if ((rv != NDI_SUCCESS) &&
3226 					    (MDI_CLIENT_STATE(ct) ==
3227 					    MDI_CLIENT_STATE_DEGRADED)) {
3228 						/*
3229 						 * ndi_devi_online failed.
3230 						 * Reset client flags to
3231 						 * offline.
3232 						 */
3233 						MDI_DEBUG(1, (CE_WARN, cdip,
3234 						    "!ndi_devi_online: failed "
3235 						    " Error: %x", rv));
3236 						MDI_CLIENT_SET_OFFLINE(ct);
3237 					}
3238 					if (rv != NDI_SUCCESS) {
3239 						/* Reset the path state */
3240 						MDI_PI_LOCK(pip);
3241 						MDI_PI(pip)->pi_state =
3242 						    MDI_PI_OLD_STATE(pip);
3243 						MDI_PI_UNLOCK(pip);
3244 					}
3245 				}
3246 				break;
3247 
3248 			case MDI_CLIENT_STATE_FAILED:
3249 				/*
3250 				 * This is the last path case for
3251 				 * non-user initiated events.
3252 				 */
3253 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3254 				    cdip && (i_ddi_node_state(cdip) >=
3255 				    DS_INITIALIZED)) {
3256 					i_mdi_client_unlock(ct);
3257 					rv = ndi_devi_offline(cdip, 0);
3258 					i_mdi_client_lock(ct, NULL);
3259 
3260 					if (rv != NDI_SUCCESS) {
3261 						/*
3262 						 * ndi_devi_offline failed.
3263 						 * Reset client flags to
3264 						 * online as the path could not
3265 						 * be offlined.
3266 						 */
3267 						MDI_DEBUG(1, (CE_WARN, cdip,
3268 						    "!ndi_devi_offline: failed "
3269 						    " Error: %x", rv));
3270 						MDI_CLIENT_SET_ONLINE(ct);
3271 					}
3272 				}
3273 				break;
3274 			}
3275 			/*
3276 			 * Convert to MDI error code
3277 			 */
3278 			switch (rv) {
3279 			case NDI_SUCCESS:
3280 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3281 				i_mdi_report_path_state(ct, pip);
3282 				rv = MDI_SUCCESS;
3283 				break;
3284 			case NDI_BUSY:
3285 				rv = MDI_BUSY;
3286 				break;
3287 			default:
3288 				rv = MDI_FAILURE;
3289 				break;
3290 			}
3291 		}
3292 	}
3293 	MDI_CLIENT_UNLOCK(ct);
3294 
3295 state_change_exit:
3296 	/*
3297 	 * Mark the pHCI as stable again.
3298 	 */
3299 	MDI_PHCI_LOCK(ph);
3300 	MDI_PHCI_STABLE(ph);
3301 	MDI_PHCI_UNLOCK(ph);
3302 	return (rv);
3303 }
3304 
3305 /*
3306  * mdi_pi_online():
3307  *		Place the path_info node in the online state.  The path is
3308  *		now available to be selected by mdi_select_path() for
3309  *		transporting I/O requests to client devices.
3310  * Return Values:
3311  *		MDI_SUCCESS
3312  *		MDI_FAILURE
3313  */
3314 int
3315 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3316 {
3317 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3318 	dev_info_t *cdip;
3319 	int		client_held = 0;
3320 	int rv;
3321 
3322 	ASSERT(ct != NULL);
3323 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3324 	if (rv != MDI_SUCCESS)
3325 		return (rv);
3326 
3327 	MDI_PI_LOCK(pip);
3328 	if (MDI_PI(pip)->pi_pm_held == 0) {
3329 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3330 		    "i_mdi_pm_hold_pip\n"));
3331 		i_mdi_pm_hold_pip(pip);
3332 		client_held = 1;
3333 	}
3334 	MDI_PI_UNLOCK(pip);
3335 
3336 	if (client_held) {
3337 		MDI_CLIENT_LOCK(ct);
3338 		if (ct->ct_power_cnt == 0) {
3339 			rv = i_mdi_power_all_phci(ct);
3340 		}
3341 
3342 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3343 		    "i_mdi_pm_hold_client\n"));
3344 		i_mdi_pm_hold_client(ct, 1);
3345 		MDI_CLIENT_UNLOCK(ct);
3346 	}
3347 
3348 	/*
3349 	 * Create the per-path (pathinfo) IO and error kstats which
3350 	 * are reported via iostat(1m).
3351 	 *
3352 	 * Defer creating the per-path kstats if device is not yet
3353 	 * attached;  the names of the kstats are constructed in part
3354 	 * using the devices instance number which is assigned during
3355 	 * process of attaching the client device.
3356 	 *
3357 	 * The framework post_attach handler, mdi_post_attach(), is
3358 	 * is responsible for initializing the client's pathinfo list
3359 	 * once successfully attached.
3360 	 */
3361 	cdip = ct->ct_dip;
3362 	ASSERT(cdip);
3363 	if (cdip == NULL || !i_ddi_devi_attached(cdip))
3364 		return (rv);
3365 
3366 	MDI_CLIENT_LOCK(ct);
3367 	rv = i_mdi_pi_kstat_create(pip);
3368 	MDI_CLIENT_UNLOCK(ct);
3369 	return (rv);
3370 }
3371 
3372 /*
3373  * mdi_pi_standby():
3374  *		Place the mdi_pathinfo node in standby state
3375  *
3376  * Return Values:
3377  *		MDI_SUCCESS
3378  *		MDI_FAILURE
3379  */
3380 int
3381 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3382 {
3383 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3384 }
3385 
3386 /*
3387  * mdi_pi_fault():
3388  *		Place the mdi_pathinfo node in fault'ed state
3389  * Return Values:
3390  *		MDI_SUCCESS
3391  *		MDI_FAILURE
3392  */
3393 int
3394 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3395 {
3396 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3397 }
3398 
3399 /*
3400  * mdi_pi_offline():
3401  *		Offline a mdi_pathinfo node.
3402  * Return Values:
3403  *		MDI_SUCCESS
3404  *		MDI_FAILURE
3405  */
3406 int
3407 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3408 {
3409 	int	ret, client_held = 0;
3410 	mdi_client_t	*ct;
3411 
3412 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3413 
3414 	if (ret == MDI_SUCCESS) {
3415 		MDI_PI_LOCK(pip);
3416 		if (MDI_PI(pip)->pi_pm_held) {
3417 			client_held = 1;
3418 		}
3419 		MDI_PI_UNLOCK(pip);
3420 
3421 		if (client_held) {
3422 			ct = MDI_PI(pip)->pi_client;
3423 			MDI_CLIENT_LOCK(ct);
3424 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3425 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3426 			i_mdi_pm_rele_client(ct, 1);
3427 			MDI_CLIENT_UNLOCK(ct);
3428 		}
3429 	}
3430 
3431 	return (ret);
3432 }
3433 
3434 /*
3435  * i_mdi_pi_offline():
3436  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3437  */
3438 static int
3439 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3440 {
3441 	dev_info_t	*vdip = NULL;
3442 	mdi_vhci_t	*vh = NULL;
3443 	mdi_client_t	*ct = NULL;
3444 	int		(*f)();
3445 	int		rv;
3446 
3447 	MDI_PI_LOCK(pip);
3448 	ct = MDI_PI(pip)->pi_client;
3449 	ASSERT(ct != NULL);
3450 
3451 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3452 		/*
3453 		 * Give a chance for pending I/Os to complete.
3454 		 */
3455 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3456 		    "%d cmds still pending on path: %p\n",
3457 		    MDI_PI(pip)->pi_ref_cnt, pip));
3458 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3459 		    &MDI_PI(pip)->pi_mutex,
3460 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3461 			/*
3462 			 * The timeout time reached without ref_cnt being zero
3463 			 * being signaled.
3464 			 */
3465 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3466 			    "Timeout reached on path %p without the cond\n",
3467 			    pip));
3468 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3469 			    "%d cmds still pending on path: %p\n",
3470 			    MDI_PI(pip)->pi_ref_cnt, pip));
3471 		}
3472 	}
3473 	vh = ct->ct_vhci;
3474 	vdip = vh->vh_dip;
3475 
3476 	/*
3477 	 * Notify vHCI that has registered this event
3478 	 */
3479 	ASSERT(vh->vh_ops);
3480 	f = vh->vh_ops->vo_pi_state_change;
3481 
3482 	if (f != NULL) {
3483 		MDI_PI_UNLOCK(pip);
3484 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3485 		    flags)) != MDI_SUCCESS) {
3486 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3487 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3488 		}
3489 		MDI_PI_LOCK(pip);
3490 	}
3491 
3492 	/*
3493 	 * Set the mdi_pathinfo node state and clear the transient condition
3494 	 */
3495 	MDI_PI_SET_OFFLINE(pip);
3496 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3497 	MDI_PI_UNLOCK(pip);
3498 
3499 	MDI_CLIENT_LOCK(ct);
3500 	if (rv == MDI_SUCCESS) {
3501 		if (ct->ct_unstable == 0) {
3502 			dev_info_t	*cdip = ct->ct_dip;
3503 
3504 			/*
3505 			 * Onlining the mdi_pathinfo node will impact the
3506 			 * client state Update the client and dev_info node
3507 			 * state accordingly
3508 			 */
3509 			i_mdi_client_update_state(ct);
3510 			rv = NDI_SUCCESS;
3511 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3512 				if (cdip &&
3513 				    (i_ddi_node_state(cdip) >=
3514 				    DS_INITIALIZED)) {
3515 					MDI_CLIENT_UNLOCK(ct);
3516 					rv = ndi_devi_offline(cdip, 0);
3517 					MDI_CLIENT_LOCK(ct);
3518 					if (rv != NDI_SUCCESS) {
3519 						/*
3520 						 * ndi_devi_offline failed.
3521 						 * Reset client flags to
3522 						 * online.
3523 						 */
3524 						MDI_DEBUG(4, (CE_WARN, cdip,
3525 						    "!ndi_devi_offline: failed "
3526 						    " Error: %x", rv));
3527 						MDI_CLIENT_SET_ONLINE(ct);
3528 					}
3529 				}
3530 			}
3531 			/*
3532 			 * Convert to MDI error code
3533 			 */
3534 			switch (rv) {
3535 			case NDI_SUCCESS:
3536 				rv = MDI_SUCCESS;
3537 				break;
3538 			case NDI_BUSY:
3539 				rv = MDI_BUSY;
3540 				break;
3541 			default:
3542 				rv = MDI_FAILURE;
3543 				break;
3544 			}
3545 		}
3546 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3547 		i_mdi_report_path_state(ct, pip);
3548 	}
3549 
3550 	MDI_CLIENT_UNLOCK(ct);
3551 
3552 	/*
3553 	 * Change in the mdi_pathinfo node state will impact the client state
3554 	 */
3555 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3556 	    ct, pip));
3557 	return (rv);
3558 }
3559 
3560 
3561 /*
3562  * mdi_pi_get_addr():
3563  *		Get the unit address associated with a mdi_pathinfo node
3564  *
3565  * Return Values:
3566  *		char *
3567  */
3568 char *
3569 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3570 {
3571 	if (pip == NULL)
3572 		return (NULL);
3573 
3574 	return (MDI_PI(pip)->pi_addr);
3575 }
3576 
3577 /*
3578  * mdi_pi_get_client():
3579  *		Get the client devinfo associated with a mdi_pathinfo node
3580  *
3581  * Return Values:
3582  *		Handle to client device dev_info node
3583  */
3584 dev_info_t *
3585 mdi_pi_get_client(mdi_pathinfo_t *pip)
3586 {
3587 	dev_info_t	*dip = NULL;
3588 	if (pip) {
3589 		dip = MDI_PI(pip)->pi_client->ct_dip;
3590 	}
3591 	return (dip);
3592 }
3593 
3594 /*
3595  * mdi_pi_get_phci():
3596  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3597  * Return Values:
3598  *		Handle to dev_info node
3599  */
3600 dev_info_t *
3601 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3602 {
3603 	dev_info_t	*dip = NULL;
3604 	if (pip) {
3605 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3606 	}
3607 	return (dip);
3608 }
3609 
3610 /*
3611  * mdi_pi_get_client_private():
3612  *		Get the client private information associated with the
3613  *		mdi_pathinfo node
3614  */
3615 void *
3616 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3617 {
3618 	void *cprivate = NULL;
3619 	if (pip) {
3620 		cprivate = MDI_PI(pip)->pi_cprivate;
3621 	}
3622 	return (cprivate);
3623 }
3624 
3625 /*
3626  * mdi_pi_set_client_private():
3627  *		Set the client private information in the mdi_pathinfo node
3628  */
3629 void
3630 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3631 {
3632 	if (pip) {
3633 		MDI_PI(pip)->pi_cprivate = priv;
3634 	}
3635 }
3636 
3637 /*
3638  * mdi_pi_get_phci_private():
3639  *		Get the pHCI private information associated with the
3640  *		mdi_pathinfo node
3641  */
3642 caddr_t
3643 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3644 {
3645 	caddr_t	pprivate = NULL;
3646 	if (pip) {
3647 		pprivate = MDI_PI(pip)->pi_pprivate;
3648 	}
3649 	return (pprivate);
3650 }
3651 
3652 /*
3653  * mdi_pi_set_phci_private():
3654  *		Set the pHCI private information in the mdi_pathinfo node
3655  */
3656 void
3657 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3658 {
3659 	if (pip) {
3660 		MDI_PI(pip)->pi_pprivate = priv;
3661 	}
3662 }
3663 
3664 /*
3665  * mdi_pi_get_state():
3666  *		Get the mdi_pathinfo node state. Transient states are internal
3667  *		and not provided to the users
3668  */
3669 mdi_pathinfo_state_t
3670 mdi_pi_get_state(mdi_pathinfo_t *pip)
3671 {
3672 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3673 
3674 	if (pip) {
3675 		if (MDI_PI_IS_TRANSIENT(pip)) {
3676 			/*
3677 			 * mdi_pathinfo is in state transition.  Return the
3678 			 * last good state.
3679 			 */
3680 			state = MDI_PI_OLD_STATE(pip);
3681 		} else {
3682 			state = MDI_PI_STATE(pip);
3683 		}
3684 	}
3685 	return (state);
3686 }
3687 
3688 /*
3689  * Note that the following function needs to be the new interface for
3690  * mdi_pi_get_state when mpxio gets integrated to ON.
3691  */
3692 int
3693 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3694 		uint32_t *ext_state)
3695 {
3696 	*state = MDI_PATHINFO_STATE_INIT;
3697 
3698 	if (pip) {
3699 		if (MDI_PI_IS_TRANSIENT(pip)) {
3700 			/*
3701 			 * mdi_pathinfo is in state transition.  Return the
3702 			 * last good state.
3703 			 */
3704 			*state = MDI_PI_OLD_STATE(pip);
3705 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3706 		} else {
3707 			*state = MDI_PI_STATE(pip);
3708 			*ext_state = MDI_PI_EXT_STATE(pip);
3709 		}
3710 	}
3711 	return (MDI_SUCCESS);
3712 }
3713 
3714 /*
3715  * mdi_pi_get_preferred:
3716  *	Get the preferred path flag
3717  */
3718 int
3719 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3720 {
3721 	if (pip) {
3722 		return (MDI_PI(pip)->pi_preferred);
3723 	}
3724 	return (0);
3725 }
3726 
3727 /*
3728  * mdi_pi_set_preferred:
3729  *	Set the preferred path flag
3730  */
3731 void
3732 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3733 {
3734 	if (pip) {
3735 		MDI_PI(pip)->pi_preferred = preferred;
3736 	}
3737 }
3738 
3739 
3740 /*
3741  * mdi_pi_set_state():
3742  *		Set the mdi_pathinfo node state
3743  */
3744 void
3745 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3746 {
3747 	uint32_t	ext_state;
3748 
3749 	if (pip) {
3750 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3751 		MDI_PI(pip)->pi_state = state;
3752 		MDI_PI(pip)->pi_state |= ext_state;
3753 	}
3754 }
3755 
3756 /*
3757  * Property functions:
3758  */
3759 
3760 int
3761 i_map_nvlist_error_to_mdi(int val)
3762 {
3763 	int rv;
3764 
3765 	switch (val) {
3766 	case 0:
3767 		rv = DDI_PROP_SUCCESS;
3768 		break;
3769 	case EINVAL:
3770 	case ENOTSUP:
3771 		rv = DDI_PROP_INVAL_ARG;
3772 		break;
3773 	case ENOMEM:
3774 		rv = DDI_PROP_NO_MEMORY;
3775 		break;
3776 	default:
3777 		rv = DDI_PROP_NOT_FOUND;
3778 		break;
3779 	}
3780 	return (rv);
3781 }
3782 
3783 /*
3784  * mdi_pi_get_next_prop():
3785  * 		Property walk function.  The caller should hold mdi_pi_lock()
3786  *		and release by calling mdi_pi_unlock() at the end of walk to
3787  *		get a consistent value.
3788  */
3789 
3790 nvpair_t *
3791 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3792 {
3793 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3794 		return (NULL);
3795 	}
3796 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3797 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3798 }
3799 
3800 /*
3801  * mdi_prop_remove():
3802  * 		Remove the named property from the named list.
3803  */
3804 
3805 int
3806 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3807 {
3808 	if (pip == NULL) {
3809 		return (DDI_PROP_NOT_FOUND);
3810 	}
3811 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3812 	MDI_PI_LOCK(pip);
3813 	if (MDI_PI(pip)->pi_prop == NULL) {
3814 		MDI_PI_UNLOCK(pip);
3815 		return (DDI_PROP_NOT_FOUND);
3816 	}
3817 	if (name) {
3818 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3819 	} else {
3820 		char		nvp_name[MAXNAMELEN];
3821 		nvpair_t	*nvp;
3822 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3823 		while (nvp) {
3824 			nvpair_t	*next;
3825 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3826 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3827 			    nvpair_name(nvp));
3828 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3829 			    nvp_name);
3830 			nvp = next;
3831 		}
3832 	}
3833 	MDI_PI_UNLOCK(pip);
3834 	return (DDI_PROP_SUCCESS);
3835 }
3836 
3837 /*
3838  * mdi_prop_size():
3839  * 		Get buffer size needed to pack the property data.
3840  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3841  *		buffer size.
3842  */
3843 
3844 int
3845 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3846 {
3847 	int	rv;
3848 	size_t	bufsize;
3849 
3850 	*buflenp = 0;
3851 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3852 		return (DDI_PROP_NOT_FOUND);
3853 	}
3854 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3855 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3856 	    &bufsize, NV_ENCODE_NATIVE);
3857 	*buflenp = bufsize;
3858 	return (i_map_nvlist_error_to_mdi(rv));
3859 }
3860 
3861 /*
3862  * mdi_prop_pack():
3863  * 		pack the property list.  The caller should hold the
3864  *		mdi_pathinfo_t node to get a consistent data
3865  */
3866 
3867 int
3868 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3869 {
3870 	int	rv;
3871 	size_t	bufsize;
3872 
3873 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3874 		return (DDI_PROP_NOT_FOUND);
3875 	}
3876 
3877 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3878 
3879 	bufsize = buflen;
3880 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3881 	    NV_ENCODE_NATIVE, KM_SLEEP);
3882 
3883 	return (i_map_nvlist_error_to_mdi(rv));
3884 }
3885 
3886 /*
3887  * mdi_prop_update_byte():
3888  *		Create/Update a byte property
3889  */
3890 int
3891 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3892 {
3893 	int rv;
3894 
3895 	if (pip == NULL) {
3896 		return (DDI_PROP_INVAL_ARG);
3897 	}
3898 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3899 	MDI_PI_LOCK(pip);
3900 	if (MDI_PI(pip)->pi_prop == NULL) {
3901 		MDI_PI_UNLOCK(pip);
3902 		return (DDI_PROP_NOT_FOUND);
3903 	}
3904 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3905 	MDI_PI_UNLOCK(pip);
3906 	return (i_map_nvlist_error_to_mdi(rv));
3907 }
3908 
3909 /*
3910  * mdi_prop_update_byte_array():
3911  *		Create/Update a byte array property
3912  */
3913 int
3914 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3915     uint_t nelements)
3916 {
3917 	int rv;
3918 
3919 	if (pip == NULL) {
3920 		return (DDI_PROP_INVAL_ARG);
3921 	}
3922 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3923 	MDI_PI_LOCK(pip);
3924 	if (MDI_PI(pip)->pi_prop == NULL) {
3925 		MDI_PI_UNLOCK(pip);
3926 		return (DDI_PROP_NOT_FOUND);
3927 	}
3928 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3929 	MDI_PI_UNLOCK(pip);
3930 	return (i_map_nvlist_error_to_mdi(rv));
3931 }
3932 
3933 /*
3934  * mdi_prop_update_int():
3935  *		Create/Update a 32 bit integer property
3936  */
3937 int
3938 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3939 {
3940 	int rv;
3941 
3942 	if (pip == NULL) {
3943 		return (DDI_PROP_INVAL_ARG);
3944 	}
3945 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3946 	MDI_PI_LOCK(pip);
3947 	if (MDI_PI(pip)->pi_prop == NULL) {
3948 		MDI_PI_UNLOCK(pip);
3949 		return (DDI_PROP_NOT_FOUND);
3950 	}
3951 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3952 	MDI_PI_UNLOCK(pip);
3953 	return (i_map_nvlist_error_to_mdi(rv));
3954 }
3955 
3956 /*
3957  * mdi_prop_update_int64():
3958  *		Create/Update a 64 bit integer property
3959  */
3960 int
3961 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3962 {
3963 	int rv;
3964 
3965 	if (pip == NULL) {
3966 		return (DDI_PROP_INVAL_ARG);
3967 	}
3968 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3969 	MDI_PI_LOCK(pip);
3970 	if (MDI_PI(pip)->pi_prop == NULL) {
3971 		MDI_PI_UNLOCK(pip);
3972 		return (DDI_PROP_NOT_FOUND);
3973 	}
3974 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3975 	MDI_PI_UNLOCK(pip);
3976 	return (i_map_nvlist_error_to_mdi(rv));
3977 }
3978 
3979 /*
3980  * mdi_prop_update_int_array():
3981  *		Create/Update a int array property
3982  */
3983 int
3984 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
3985 	    uint_t nelements)
3986 {
3987 	int rv;
3988 
3989 	if (pip == NULL) {
3990 		return (DDI_PROP_INVAL_ARG);
3991 	}
3992 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3993 	MDI_PI_LOCK(pip);
3994 	if (MDI_PI(pip)->pi_prop == NULL) {
3995 		MDI_PI_UNLOCK(pip);
3996 		return (DDI_PROP_NOT_FOUND);
3997 	}
3998 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
3999 	    nelements);
4000 	MDI_PI_UNLOCK(pip);
4001 	return (i_map_nvlist_error_to_mdi(rv));
4002 }
4003 
4004 /*
4005  * mdi_prop_update_string():
4006  *		Create/Update a string property
4007  */
4008 int
4009 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4010 {
4011 	int rv;
4012 
4013 	if (pip == NULL) {
4014 		return (DDI_PROP_INVAL_ARG);
4015 	}
4016 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4017 	MDI_PI_LOCK(pip);
4018 	if (MDI_PI(pip)->pi_prop == NULL) {
4019 		MDI_PI_UNLOCK(pip);
4020 		return (DDI_PROP_NOT_FOUND);
4021 	}
4022 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4023 	MDI_PI_UNLOCK(pip);
4024 	return (i_map_nvlist_error_to_mdi(rv));
4025 }
4026 
4027 /*
4028  * mdi_prop_update_string_array():
4029  *		Create/Update a string array property
4030  */
4031 int
4032 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4033     uint_t nelements)
4034 {
4035 	int rv;
4036 
4037 	if (pip == NULL) {
4038 		return (DDI_PROP_INVAL_ARG);
4039 	}
4040 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4041 	MDI_PI_LOCK(pip);
4042 	if (MDI_PI(pip)->pi_prop == NULL) {
4043 		MDI_PI_UNLOCK(pip);
4044 		return (DDI_PROP_NOT_FOUND);
4045 	}
4046 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4047 	    nelements);
4048 	MDI_PI_UNLOCK(pip);
4049 	return (i_map_nvlist_error_to_mdi(rv));
4050 }
4051 
4052 /*
4053  * mdi_prop_lookup_byte():
4054  * 		Look for byte property identified by name.  The data returned
4055  *		is the actual property and valid as long as mdi_pathinfo_t node
4056  *		is alive.
4057  */
4058 int
4059 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4060 {
4061 	int rv;
4062 
4063 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4064 		return (DDI_PROP_NOT_FOUND);
4065 	}
4066 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4067 	return (i_map_nvlist_error_to_mdi(rv));
4068 }
4069 
4070 
4071 /*
4072  * mdi_prop_lookup_byte_array():
4073  * 		Look for byte array property identified by name.  The data
4074  *		returned is the actual property and valid as long as
4075  *		mdi_pathinfo_t node is alive.
4076  */
4077 int
4078 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4079     uint_t *nelements)
4080 {
4081 	int rv;
4082 
4083 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4084 		return (DDI_PROP_NOT_FOUND);
4085 	}
4086 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4087 	    nelements);
4088 	return (i_map_nvlist_error_to_mdi(rv));
4089 }
4090 
4091 /*
4092  * mdi_prop_lookup_int():
4093  * 		Look for int property identified by name.  The data returned
4094  *		is the actual property and valid as long as mdi_pathinfo_t
4095  *		node is alive.
4096  */
4097 int
4098 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4099 {
4100 	int rv;
4101 
4102 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4103 		return (DDI_PROP_NOT_FOUND);
4104 	}
4105 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4106 	return (i_map_nvlist_error_to_mdi(rv));
4107 }
4108 
4109 /*
4110  * mdi_prop_lookup_int64():
4111  * 		Look for int64 property identified by name.  The data returned
4112  *		is the actual property and valid as long as mdi_pathinfo_t node
4113  *		is alive.
4114  */
4115 int
4116 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4117 {
4118 	int rv;
4119 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4120 		return (DDI_PROP_NOT_FOUND);
4121 	}
4122 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4123 	return (i_map_nvlist_error_to_mdi(rv));
4124 }
4125 
4126 /*
4127  * mdi_prop_lookup_int_array():
4128  * 		Look for int array property identified by name.  The data
4129  *		returned is the actual property and valid as long as
4130  *		mdi_pathinfo_t node is alive.
4131  */
4132 int
4133 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4134     uint_t *nelements)
4135 {
4136 	int rv;
4137 
4138 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4139 		return (DDI_PROP_NOT_FOUND);
4140 	}
4141 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4142 	    (int32_t **)data, nelements);
4143 	return (i_map_nvlist_error_to_mdi(rv));
4144 }
4145 
4146 /*
4147  * mdi_prop_lookup_string():
4148  * 		Look for string property identified by name.  The data
4149  *		returned is the actual property and valid as long as
4150  *		mdi_pathinfo_t node is alive.
4151  */
4152 int
4153 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4154 {
4155 	int rv;
4156 
4157 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4158 		return (DDI_PROP_NOT_FOUND);
4159 	}
4160 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4161 	return (i_map_nvlist_error_to_mdi(rv));
4162 }
4163 
4164 /*
4165  * mdi_prop_lookup_string_array():
4166  * 		Look for string array property identified by name.  The data
4167  *		returned is the actual property and valid as long as
4168  *		mdi_pathinfo_t node is alive.
4169  */
4170 
4171 int
4172 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4173     uint_t *nelements)
4174 {
4175 	int rv;
4176 
4177 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4178 		return (DDI_PROP_NOT_FOUND);
4179 	}
4180 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4181 	    nelements);
4182 	return (i_map_nvlist_error_to_mdi(rv));
4183 }
4184 
4185 /*
4186  * mdi_prop_free():
4187  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4188  *		functions return the pointer to actual property data and not a
4189  *		copy of it.  So the data returned is valid as long as
4190  *		mdi_pathinfo_t node is valid.
4191  */
4192 
4193 /*ARGSUSED*/
4194 int
4195 mdi_prop_free(void *data)
4196 {
4197 	return (DDI_PROP_SUCCESS);
4198 }
4199 
4200 /*ARGSUSED*/
4201 static void
4202 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4203 {
4204 	char		*phci_path, *ct_path;
4205 	char		*ct_status;
4206 	char		*status;
4207 	dev_info_t	*dip = ct->ct_dip;
4208 	char		lb_buf[64];
4209 
4210 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4211 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4212 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4213 		return;
4214 	}
4215 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4216 		ct_status = "optimal";
4217 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4218 		ct_status = "degraded";
4219 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4220 		ct_status = "failed";
4221 	} else {
4222 		ct_status = "unknown";
4223 	}
4224 
4225 	if (MDI_PI_IS_OFFLINE(pip)) {
4226 		status = "offline";
4227 	} else if (MDI_PI_IS_ONLINE(pip)) {
4228 		status = "online";
4229 	} else if (MDI_PI_IS_STANDBY(pip)) {
4230 		status = "standby";
4231 	} else if (MDI_PI_IS_FAULT(pip)) {
4232 		status = "faulted";
4233 	} else {
4234 		status = "unknown";
4235 	}
4236 
4237 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4238 		(void) snprintf(lb_buf, sizeof (lb_buf),
4239 		    "%s, region-size: %d", mdi_load_balance_lba,
4240 			ct->ct_lb_args->region_size);
4241 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4242 		(void) snprintf(lb_buf, sizeof (lb_buf),
4243 		    "%s", mdi_load_balance_none);
4244 	} else {
4245 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4246 		    mdi_load_balance_rr);
4247 	}
4248 
4249 	if (dip) {
4250 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4251 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4252 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4253 		    "path %s (%s%d) to target address: %s is %s"
4254 		    " Load balancing: %s\n",
4255 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4256 		    ddi_get_instance(dip), ct_status,
4257 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4258 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4259 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4260 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4261 		kmem_free(phci_path, MAXPATHLEN);
4262 		kmem_free(ct_path, MAXPATHLEN);
4263 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4264 	}
4265 }
4266 
4267 #ifdef	DEBUG
4268 /*
4269  * i_mdi_log():
4270  *		Utility function for error message management
4271  *
4272  */
4273 
4274 /*VARARGS3*/
4275 static void
4276 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4277 {
4278 	char		buf[MAXNAMELEN];
4279 	char		name[MAXNAMELEN];
4280 	va_list		ap;
4281 	int		log_only = 0;
4282 	int		boot_only = 0;
4283 	int		console_only = 0;
4284 
4285 	if (dip) {
4286 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4287 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4288 			    ddi_node_name(dip), ddi_get_instance(dip));
4289 		} else {
4290 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4291 			    ddi_node_name(dip), ddi_get_instance(dip));
4292 		}
4293 	} else {
4294 		name[0] = '\0';
4295 	}
4296 
4297 	va_start(ap, fmt);
4298 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4299 	va_end(ap);
4300 
4301 	switch (buf[0]) {
4302 	case '!':
4303 		log_only = 1;
4304 		break;
4305 	case '?':
4306 		boot_only = 1;
4307 		break;
4308 	case '^':
4309 		console_only = 1;
4310 		break;
4311 	}
4312 
4313 	switch (level) {
4314 	case CE_NOTE:
4315 		level = CE_CONT;
4316 		/* FALLTHROUGH */
4317 	case CE_CONT:
4318 	case CE_WARN:
4319 	case CE_PANIC:
4320 		if (boot_only) {
4321 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4322 		} else if (console_only) {
4323 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4324 		} else if (log_only) {
4325 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4326 		} else {
4327 			cmn_err(level, "%s\t%s", name, buf);
4328 		}
4329 		break;
4330 	default:
4331 		cmn_err(level, "%s\t%s", name, buf);
4332 		break;
4333 	}
4334 }
4335 #endif	/* DEBUG */
4336 
4337 void
4338 i_mdi_client_online(dev_info_t *ct_dip)
4339 {
4340 	mdi_client_t	*ct;
4341 
4342 	/*
4343 	 * Client online notification. Mark client state as online
4344 	 * restore our binding with dev_info node
4345 	 */
4346 	ct = i_devi_get_client(ct_dip);
4347 	ASSERT(ct != NULL);
4348 	MDI_CLIENT_LOCK(ct);
4349 	MDI_CLIENT_SET_ONLINE(ct);
4350 	/* catch for any memory leaks */
4351 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4352 	ct->ct_dip = ct_dip;
4353 
4354 	if (ct->ct_power_cnt == 0)
4355 		(void) i_mdi_power_all_phci(ct);
4356 
4357 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4358 	    "i_mdi_pm_hold_client\n"));
4359 	i_mdi_pm_hold_client(ct, 1);
4360 
4361 	MDI_CLIENT_UNLOCK(ct);
4362 }
4363 
4364 void
4365 i_mdi_phci_online(dev_info_t *ph_dip)
4366 {
4367 	mdi_phci_t	*ph;
4368 
4369 	/* pHCI online notification. Mark state accordingly */
4370 	ph = i_devi_get_phci(ph_dip);
4371 	ASSERT(ph != NULL);
4372 	MDI_PHCI_LOCK(ph);
4373 	MDI_PHCI_SET_ONLINE(ph);
4374 	MDI_PHCI_UNLOCK(ph);
4375 }
4376 
4377 /*
4378  * mdi_devi_online():
4379  * 		Online notification from NDI framework on pHCI/client
4380  *		device online.
4381  * Return Values:
4382  *		NDI_SUCCESS
4383  *		MDI_FAILURE
4384  */
4385 
4386 /*ARGSUSED*/
4387 int
4388 mdi_devi_online(dev_info_t *dip, uint_t flags)
4389 {
4390 	if (MDI_PHCI(dip)) {
4391 		i_mdi_phci_online(dip);
4392 	}
4393 
4394 	if (MDI_CLIENT(dip)) {
4395 		i_mdi_client_online(dip);
4396 	}
4397 	return (NDI_SUCCESS);
4398 }
4399 
4400 /*
4401  * mdi_devi_offline():
4402  * 		Offline notification from NDI framework on pHCI/Client device
4403  *		offline.
4404  *
4405  * Return Values:
4406  *		NDI_SUCCESS
4407  *		NDI_FAILURE
4408  */
4409 
4410 /*ARGSUSED*/
4411 int
4412 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4413 {
4414 	int		rv = NDI_SUCCESS;
4415 
4416 	if (MDI_CLIENT(dip)) {
4417 		rv = i_mdi_client_offline(dip, flags);
4418 		if (rv != NDI_SUCCESS)
4419 			return (rv);
4420 	}
4421 
4422 	if (MDI_PHCI(dip)) {
4423 		rv = i_mdi_phci_offline(dip, flags);
4424 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4425 			/* set client back online */
4426 			i_mdi_client_online(dip);
4427 		}
4428 	}
4429 
4430 	return (rv);
4431 }
4432 
4433 /*ARGSUSED*/
4434 static int
4435 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4436 {
4437 	int		rv = NDI_SUCCESS;
4438 	mdi_phci_t	*ph;
4439 	mdi_client_t	*ct;
4440 	mdi_pathinfo_t	*pip;
4441 	mdi_pathinfo_t	*next;
4442 	mdi_pathinfo_t	*failed_pip = NULL;
4443 	dev_info_t	*cdip;
4444 
4445 	/*
4446 	 * pHCI component offline notification
4447 	 * Make sure that this pHCI instance is free to be offlined.
4448 	 * If it is OK to proceed, Offline and remove all the child
4449 	 * mdi_pathinfo nodes.  This process automatically offlines
4450 	 * corresponding client devices, for which this pHCI provides
4451 	 * critical services.
4452 	 */
4453 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4454 	    dip));
4455 
4456 	ph = i_devi_get_phci(dip);
4457 	if (ph == NULL) {
4458 		return (rv);
4459 	}
4460 
4461 	MDI_PHCI_LOCK(ph);
4462 
4463 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4464 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4465 		MDI_PHCI_UNLOCK(ph);
4466 		return (NDI_SUCCESS);
4467 	}
4468 
4469 	/*
4470 	 * Check to see if the pHCI can be offlined
4471 	 */
4472 	if (ph->ph_unstable) {
4473 		MDI_DEBUG(1, (CE_WARN, dip,
4474 		    "!One or more target devices are in transient "
4475 		    "state. This device can not be removed at "
4476 		    "this moment. Please try again later."));
4477 		MDI_PHCI_UNLOCK(ph);
4478 		return (NDI_BUSY);
4479 	}
4480 
4481 	pip = ph->ph_path_head;
4482 	while (pip != NULL) {
4483 		MDI_PI_LOCK(pip);
4484 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4485 		/*
4486 		 * The mdi_pathinfo state is OK. Check the client state.
4487 		 * If failover in progress fail the pHCI from offlining
4488 		 */
4489 		ct = MDI_PI(pip)->pi_client;
4490 		i_mdi_client_lock(ct, pip);
4491 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4492 		    (ct->ct_unstable)) {
4493 			/*
4494 			 * Failover is in progress, Fail the DR
4495 			 */
4496 			MDI_DEBUG(1, (CE_WARN, dip,
4497 			    "!pHCI device (%s%d) is Busy. %s",
4498 			    ddi_driver_name(dip), ddi_get_instance(dip),
4499 			    "This device can not be removed at "
4500 			    "this moment. Please try again later."));
4501 			MDI_PI_UNLOCK(pip);
4502 			MDI_CLIENT_UNLOCK(ct);
4503 			MDI_PHCI_UNLOCK(ph);
4504 			return (NDI_BUSY);
4505 		}
4506 		MDI_PI_UNLOCK(pip);
4507 
4508 		/*
4509 		 * Check to see of we are removing the last path of this
4510 		 * client device...
4511 		 */
4512 		cdip = ct->ct_dip;
4513 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4514 		    (i_mdi_client_compute_state(ct, ph) ==
4515 		    MDI_CLIENT_STATE_FAILED)) {
4516 			i_mdi_client_unlock(ct);
4517 			MDI_PHCI_UNLOCK(ph);
4518 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4519 				/*
4520 				 * ndi_devi_offline() failed.
4521 				 * This pHCI provides the critical path
4522 				 * to one or more client devices.
4523 				 * Return busy.
4524 				 */
4525 				MDI_PHCI_LOCK(ph);
4526 				MDI_DEBUG(1, (CE_WARN, dip,
4527 				    "!pHCI device (%s%d) is Busy. %s",
4528 				    ddi_driver_name(dip), ddi_get_instance(dip),
4529 				    "This device can not be removed at "
4530 				    "this moment. Please try again later."));
4531 				failed_pip = pip;
4532 				break;
4533 			} else {
4534 				MDI_PHCI_LOCK(ph);
4535 				pip = next;
4536 			}
4537 		} else {
4538 			i_mdi_client_unlock(ct);
4539 			pip = next;
4540 		}
4541 	}
4542 
4543 	if (failed_pip) {
4544 		pip = ph->ph_path_head;
4545 		while (pip != failed_pip) {
4546 			MDI_PI_LOCK(pip);
4547 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4548 			ct = MDI_PI(pip)->pi_client;
4549 			i_mdi_client_lock(ct, pip);
4550 			cdip = ct->ct_dip;
4551 			switch (MDI_CLIENT_STATE(ct)) {
4552 			case MDI_CLIENT_STATE_OPTIMAL:
4553 			case MDI_CLIENT_STATE_DEGRADED:
4554 				if (cdip) {
4555 					MDI_PI_UNLOCK(pip);
4556 					i_mdi_client_unlock(ct);
4557 					MDI_PHCI_UNLOCK(ph);
4558 					(void) ndi_devi_online(cdip, 0);
4559 					MDI_PHCI_LOCK(ph);
4560 					pip = next;
4561 					continue;
4562 				}
4563 				break;
4564 
4565 			case MDI_CLIENT_STATE_FAILED:
4566 				if (cdip) {
4567 					MDI_PI_UNLOCK(pip);
4568 					i_mdi_client_unlock(ct);
4569 					MDI_PHCI_UNLOCK(ph);
4570 					(void) ndi_devi_offline(cdip, 0);
4571 					MDI_PHCI_LOCK(ph);
4572 					pip = next;
4573 					continue;
4574 				}
4575 				break;
4576 			}
4577 			MDI_PI_UNLOCK(pip);
4578 			i_mdi_client_unlock(ct);
4579 			pip = next;
4580 		}
4581 		MDI_PHCI_UNLOCK(ph);
4582 		return (NDI_BUSY);
4583 	}
4584 
4585 	/*
4586 	 * Mark the pHCI as offline
4587 	 */
4588 	MDI_PHCI_SET_OFFLINE(ph);
4589 
4590 	/*
4591 	 * Mark the child mdi_pathinfo nodes as transient
4592 	 */
4593 	pip = ph->ph_path_head;
4594 	while (pip != NULL) {
4595 		MDI_PI_LOCK(pip);
4596 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4597 		MDI_PI_SET_OFFLINING(pip);
4598 		MDI_PI_UNLOCK(pip);
4599 		pip = next;
4600 	}
4601 	MDI_PHCI_UNLOCK(ph);
4602 	/*
4603 	 * Give a chance for any pending commands to execute
4604 	 */
4605 	delay(1);
4606 	MDI_PHCI_LOCK(ph);
4607 	pip = ph->ph_path_head;
4608 	while (pip != NULL) {
4609 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4610 		(void) i_mdi_pi_offline(pip, flags);
4611 		MDI_PI_LOCK(pip);
4612 		ct = MDI_PI(pip)->pi_client;
4613 		if (!MDI_PI_IS_OFFLINE(pip)) {
4614 			MDI_DEBUG(1, (CE_WARN, dip,
4615 			    "!pHCI device (%s%d) is Busy. %s",
4616 			    ddi_driver_name(dip), ddi_get_instance(dip),
4617 			    "This device can not be removed at "
4618 			    "this moment. Please try again later."));
4619 			MDI_PI_UNLOCK(pip);
4620 			MDI_PHCI_SET_ONLINE(ph);
4621 			MDI_PHCI_UNLOCK(ph);
4622 			return (NDI_BUSY);
4623 		}
4624 		MDI_PI_UNLOCK(pip);
4625 		pip = next;
4626 	}
4627 	MDI_PHCI_UNLOCK(ph);
4628 
4629 	return (rv);
4630 }
4631 
4632 /*ARGSUSED*/
4633 static int
4634 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4635 {
4636 	int		rv = NDI_SUCCESS;
4637 	mdi_client_t	*ct;
4638 
4639 	/*
4640 	 * Client component to go offline.  Make sure that we are
4641 	 * not in failing over state and update client state
4642 	 * accordingly
4643 	 */
4644 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4645 	    dip));
4646 	ct = i_devi_get_client(dip);
4647 	if (ct != NULL) {
4648 		MDI_CLIENT_LOCK(ct);
4649 		if (ct->ct_unstable) {
4650 			/*
4651 			 * One or more paths are in transient state,
4652 			 * Dont allow offline of a client device
4653 			 */
4654 			MDI_DEBUG(1, (CE_WARN, dip,
4655 			    "!One or more paths to this device is "
4656 			    "in transient state. This device can not "
4657 			    "be removed at this moment. "
4658 			    "Please try again later."));
4659 			MDI_CLIENT_UNLOCK(ct);
4660 			return (NDI_BUSY);
4661 		}
4662 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4663 			/*
4664 			 * Failover is in progress, Dont allow DR of
4665 			 * a client device
4666 			 */
4667 			MDI_DEBUG(1, (CE_WARN, dip,
4668 			    "!Client device (%s%d) is Busy. %s",
4669 			    ddi_driver_name(dip), ddi_get_instance(dip),
4670 			    "This device can not be removed at "
4671 			    "this moment. Please try again later."));
4672 			MDI_CLIENT_UNLOCK(ct);
4673 			return (NDI_BUSY);
4674 		}
4675 		MDI_CLIENT_SET_OFFLINE(ct);
4676 
4677 		/*
4678 		 * Unbind our relationship with the dev_info node
4679 		 */
4680 		if (flags & NDI_DEVI_REMOVE) {
4681 			ct->ct_dip = NULL;
4682 		}
4683 		MDI_CLIENT_UNLOCK(ct);
4684 	}
4685 	return (rv);
4686 }
4687 
4688 /*
4689  * mdi_pre_attach():
4690  *		Pre attach() notification handler
4691  */
4692 
4693 /*ARGSUSED*/
4694 int
4695 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4696 {
4697 	/* don't support old DDI_PM_RESUME */
4698 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4699 	    (cmd == DDI_PM_RESUME))
4700 		return (DDI_FAILURE);
4701 
4702 	return (DDI_SUCCESS);
4703 }
4704 
4705 /*
4706  * mdi_post_attach():
4707  *		Post attach() notification handler
4708  */
4709 
4710 /*ARGSUSED*/
4711 void
4712 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4713 {
4714 	mdi_phci_t	*ph;
4715 	mdi_client_t	*ct;
4716 	mdi_pathinfo_t	*pip;
4717 
4718 	if (MDI_PHCI(dip)) {
4719 		ph = i_devi_get_phci(dip);
4720 		ASSERT(ph != NULL);
4721 
4722 		MDI_PHCI_LOCK(ph);
4723 		switch (cmd) {
4724 		case DDI_ATTACH:
4725 			MDI_DEBUG(2, (CE_NOTE, dip,
4726 			    "!pHCI post_attach: called %p\n", ph));
4727 			if (error == DDI_SUCCESS) {
4728 				MDI_PHCI_SET_ATTACH(ph);
4729 			} else {
4730 				MDI_DEBUG(1, (CE_NOTE, dip,
4731 				    "!pHCI post_attach: failed error=%d\n",
4732 				    error));
4733 				MDI_PHCI_SET_DETACH(ph);
4734 			}
4735 			break;
4736 
4737 		case DDI_RESUME:
4738 			MDI_DEBUG(2, (CE_NOTE, dip,
4739 			    "!pHCI post_resume: called %p\n", ph));
4740 			if (error == DDI_SUCCESS) {
4741 				MDI_PHCI_SET_RESUME(ph);
4742 			} else {
4743 				MDI_DEBUG(1, (CE_NOTE, dip,
4744 				    "!pHCI post_resume: failed error=%d\n",
4745 				    error));
4746 				MDI_PHCI_SET_SUSPEND(ph);
4747 			}
4748 			break;
4749 		}
4750 		MDI_PHCI_UNLOCK(ph);
4751 	}
4752 
4753 	if (MDI_CLIENT(dip)) {
4754 		ct = i_devi_get_client(dip);
4755 		ASSERT(ct != NULL);
4756 
4757 		MDI_CLIENT_LOCK(ct);
4758 		switch (cmd) {
4759 		case DDI_ATTACH:
4760 			MDI_DEBUG(2, (CE_NOTE, dip,
4761 			    "!Client post_attach: called %p\n", ct));
4762 			if (error != DDI_SUCCESS) {
4763 				MDI_DEBUG(1, (CE_NOTE, dip,
4764 				    "!Client post_attach: failed error=%d\n",
4765 				    error));
4766 				MDI_CLIENT_SET_DETACH(ct);
4767 				MDI_DEBUG(4, (CE_WARN, dip,
4768 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4769 				i_mdi_pm_reset_client(ct);
4770 				break;
4771 			}
4772 
4773 			/*
4774 			 * Client device has successfully attached.
4775 			 * Create kstats for any pathinfo structures
4776 			 * initially associated with this client.
4777 			 */
4778 			for (pip = ct->ct_path_head; pip != NULL;
4779 			    pip = (mdi_pathinfo_t *)
4780 			    MDI_PI(pip)->pi_client_link) {
4781 				(void) i_mdi_pi_kstat_create(pip);
4782 				i_mdi_report_path_state(ct, pip);
4783 			}
4784 			MDI_CLIENT_SET_ATTACH(ct);
4785 			break;
4786 
4787 		case DDI_RESUME:
4788 			MDI_DEBUG(2, (CE_NOTE, dip,
4789 			    "!Client post_attach: called %p\n", ct));
4790 			if (error == DDI_SUCCESS) {
4791 				MDI_CLIENT_SET_RESUME(ct);
4792 			} else {
4793 				MDI_DEBUG(1, (CE_NOTE, dip,
4794 				    "!Client post_resume: failed error=%d\n",
4795 				    error));
4796 				MDI_CLIENT_SET_SUSPEND(ct);
4797 			}
4798 			break;
4799 		}
4800 		MDI_CLIENT_UNLOCK(ct);
4801 	}
4802 }
4803 
4804 /*
4805  * mdi_pre_detach():
4806  *		Pre detach notification handler
4807  */
4808 
4809 /*ARGSUSED*/
4810 int
4811 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4812 {
4813 	int rv = DDI_SUCCESS;
4814 
4815 	if (MDI_CLIENT(dip)) {
4816 		(void) i_mdi_client_pre_detach(dip, cmd);
4817 	}
4818 
4819 	if (MDI_PHCI(dip)) {
4820 		rv = i_mdi_phci_pre_detach(dip, cmd);
4821 	}
4822 
4823 	return (rv);
4824 }
4825 
4826 /*ARGSUSED*/
4827 static int
4828 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4829 {
4830 	int		rv = DDI_SUCCESS;
4831 	mdi_phci_t	*ph;
4832 	mdi_client_t	*ct;
4833 	mdi_pathinfo_t	*pip;
4834 	mdi_pathinfo_t	*failed_pip = NULL;
4835 	mdi_pathinfo_t	*next;
4836 
4837 	ph = i_devi_get_phci(dip);
4838 	if (ph == NULL) {
4839 		return (rv);
4840 	}
4841 
4842 	MDI_PHCI_LOCK(ph);
4843 	switch (cmd) {
4844 	case DDI_DETACH:
4845 		MDI_DEBUG(2, (CE_NOTE, dip,
4846 		    "!pHCI pre_detach: called %p\n", ph));
4847 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4848 			/*
4849 			 * mdi_pathinfo nodes are still attached to
4850 			 * this pHCI. Fail the detach for this pHCI.
4851 			 */
4852 			MDI_DEBUG(2, (CE_WARN, dip,
4853 			    "!pHCI pre_detach: "
4854 			    "mdi_pathinfo nodes are still attached "
4855 			    "%p\n", ph));
4856 			rv = DDI_FAILURE;
4857 			break;
4858 		}
4859 		MDI_PHCI_SET_DETACH(ph);
4860 		break;
4861 
4862 	case DDI_SUSPEND:
4863 		/*
4864 		 * pHCI is getting suspended.  Since mpxio client
4865 		 * devices may not be suspended at this point, to avoid
4866 		 * a potential stack overflow, it is important to suspend
4867 		 * client devices before pHCI can be suspended.
4868 		 */
4869 
4870 		MDI_DEBUG(2, (CE_NOTE, dip,
4871 		    "!pHCI pre_suspend: called %p\n", ph));
4872 		/*
4873 		 * Suspend all the client devices accessible through this pHCI
4874 		 */
4875 		pip = ph->ph_path_head;
4876 		while (pip != NULL && rv == DDI_SUCCESS) {
4877 			dev_info_t *cdip;
4878 			MDI_PI_LOCK(pip);
4879 			next =
4880 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4881 			ct = MDI_PI(pip)->pi_client;
4882 			i_mdi_client_lock(ct, pip);
4883 			cdip = ct->ct_dip;
4884 			MDI_PI_UNLOCK(pip);
4885 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4886 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4887 				i_mdi_client_unlock(ct);
4888 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4889 				    DDI_SUCCESS) {
4890 					/*
4891 					 * Suspend of one of the client
4892 					 * device has failed.
4893 					 */
4894 					MDI_DEBUG(1, (CE_WARN, dip,
4895 					    "!Suspend of device (%s%d) failed.",
4896 					    ddi_driver_name(cdip),
4897 					    ddi_get_instance(cdip)));
4898 					failed_pip = pip;
4899 					break;
4900 				}
4901 			} else {
4902 				i_mdi_client_unlock(ct);
4903 			}
4904 			pip = next;
4905 		}
4906 
4907 		if (rv == DDI_SUCCESS) {
4908 			/*
4909 			 * Suspend of client devices is complete. Proceed
4910 			 * with pHCI suspend.
4911 			 */
4912 			MDI_PHCI_SET_SUSPEND(ph);
4913 		} else {
4914 			/*
4915 			 * Revert back all the suspended client device states
4916 			 * to converse.
4917 			 */
4918 			pip = ph->ph_path_head;
4919 			while (pip != failed_pip) {
4920 				dev_info_t *cdip;
4921 				MDI_PI_LOCK(pip);
4922 				next =
4923 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4924 				ct = MDI_PI(pip)->pi_client;
4925 				i_mdi_client_lock(ct, pip);
4926 				cdip = ct->ct_dip;
4927 				MDI_PI_UNLOCK(pip);
4928 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4929 					i_mdi_client_unlock(ct);
4930 					(void) devi_attach(cdip, DDI_RESUME);
4931 				} else {
4932 					i_mdi_client_unlock(ct);
4933 				}
4934 				pip = next;
4935 			}
4936 		}
4937 		break;
4938 
4939 	default:
4940 		rv = DDI_FAILURE;
4941 		break;
4942 	}
4943 	MDI_PHCI_UNLOCK(ph);
4944 	return (rv);
4945 }
4946 
4947 /*ARGSUSED*/
4948 static int
4949 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4950 {
4951 	int		rv = DDI_SUCCESS;
4952 	mdi_client_t	*ct;
4953 
4954 	ct = i_devi_get_client(dip);
4955 	if (ct == NULL) {
4956 		return (rv);
4957 	}
4958 
4959 	MDI_CLIENT_LOCK(ct);
4960 	switch (cmd) {
4961 	case DDI_DETACH:
4962 		MDI_DEBUG(2, (CE_NOTE, dip,
4963 		    "!Client pre_detach: called %p\n", ct));
4964 		MDI_CLIENT_SET_DETACH(ct);
4965 		break;
4966 
4967 	case DDI_SUSPEND:
4968 		MDI_DEBUG(2, (CE_NOTE, dip,
4969 		    "!Client pre_suspend: called %p\n", ct));
4970 		MDI_CLIENT_SET_SUSPEND(ct);
4971 		break;
4972 
4973 	default:
4974 		rv = DDI_FAILURE;
4975 		break;
4976 	}
4977 	MDI_CLIENT_UNLOCK(ct);
4978 	return (rv);
4979 }
4980 
4981 /*
4982  * mdi_post_detach():
4983  *		Post detach notification handler
4984  */
4985 
4986 /*ARGSUSED*/
4987 void
4988 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4989 {
4990 	/*
4991 	 * Detach/Suspend of mpxio component failed. Update our state
4992 	 * too
4993 	 */
4994 	if (MDI_PHCI(dip))
4995 		i_mdi_phci_post_detach(dip, cmd, error);
4996 
4997 	if (MDI_CLIENT(dip))
4998 		i_mdi_client_post_detach(dip, cmd, error);
4999 }
5000 
5001 /*ARGSUSED*/
5002 static void
5003 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5004 {
5005 	mdi_phci_t	*ph;
5006 
5007 	/*
5008 	 * Detach/Suspend of phci component failed. Update our state
5009 	 * too
5010 	 */
5011 	ph = i_devi_get_phci(dip);
5012 	if (ph == NULL) {
5013 		return;
5014 	}
5015 
5016 	MDI_PHCI_LOCK(ph);
5017 	/*
5018 	 * Detach of pHCI failed. Restore back converse
5019 	 * state
5020 	 */
5021 	switch (cmd) {
5022 	case DDI_DETACH:
5023 		MDI_DEBUG(2, (CE_NOTE, dip,
5024 		    "!pHCI post_detach: called %p\n", ph));
5025 		if (error != DDI_SUCCESS)
5026 			MDI_PHCI_SET_ATTACH(ph);
5027 		break;
5028 
5029 	case DDI_SUSPEND:
5030 		MDI_DEBUG(2, (CE_NOTE, dip,
5031 		    "!pHCI post_suspend: called %p\n", ph));
5032 		if (error != DDI_SUCCESS)
5033 			MDI_PHCI_SET_RESUME(ph);
5034 		break;
5035 	}
5036 	MDI_PHCI_UNLOCK(ph);
5037 }
5038 
5039 /*ARGSUSED*/
5040 static void
5041 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5042 {
5043 	mdi_client_t	*ct;
5044 
5045 	ct = i_devi_get_client(dip);
5046 	if (ct == NULL) {
5047 		return;
5048 	}
5049 	MDI_CLIENT_LOCK(ct);
5050 	/*
5051 	 * Detach of Client failed. Restore back converse
5052 	 * state
5053 	 */
5054 	switch (cmd) {
5055 	case DDI_DETACH:
5056 		MDI_DEBUG(2, (CE_NOTE, dip,
5057 		    "!Client post_detach: called %p\n", ct));
5058 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5059 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5060 			    "i_mdi_pm_rele_client\n"));
5061 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5062 		} else {
5063 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5064 			    "i_mdi_pm_reset_client\n"));
5065 			i_mdi_pm_reset_client(ct);
5066 		}
5067 		if (error != DDI_SUCCESS)
5068 			MDI_CLIENT_SET_ATTACH(ct);
5069 		break;
5070 
5071 	case DDI_SUSPEND:
5072 		MDI_DEBUG(2, (CE_NOTE, dip,
5073 		    "!Client post_suspend: called %p\n", ct));
5074 		if (error != DDI_SUCCESS)
5075 			MDI_CLIENT_SET_RESUME(ct);
5076 		break;
5077 	}
5078 	MDI_CLIENT_UNLOCK(ct);
5079 }
5080 
5081 /*
5082  * create and install per-path (client - pHCI) statistics
5083  * I/O stats supported: nread, nwritten, reads, and writes
5084  * Error stats - hard errors, soft errors, & transport errors
5085  */
5086 static int
5087 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5088 {
5089 
5090 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5091 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5092 	char ksname[KSTAT_STRLEN];
5093 	mdi_pathinfo_t *cpip;
5094 	const char *err_postfix = ",err";
5095 	kstat_t	*kiosp, *kerrsp;
5096 	struct pi_errs	*nsp;
5097 	struct mdi_pi_kstats *mdi_statp;
5098 
5099 	ASSERT(client != NULL && ppath != NULL);
5100 
5101 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5102 
5103 	if (MDI_PI(pip)->pi_kstats != NULL)
5104 		return (MDI_SUCCESS);
5105 
5106 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5107 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5108 		if (cpip == pip)
5109 			continue;
5110 		/*
5111 		 * We have found a different path with same parent
5112 		 * kstats for a given client-pHCI are common
5113 		 */
5114 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5115 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5116 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5117 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5118 			return (MDI_SUCCESS);
5119 		}
5120 	}
5121 
5122 	/*
5123 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5124 	 * clamp length of name against max length of error kstat name
5125 	 */
5126 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5127 	    ddi_driver_name(client), ddi_get_instance(client),
5128 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5129 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5130 		return (MDI_FAILURE);
5131 	}
5132 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5133 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5134 		return (MDI_FAILURE);
5135 	}
5136 
5137 	(void) strcat(ksname, err_postfix);
5138 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5139 	    KSTAT_TYPE_NAMED,
5140 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5141 
5142 	if (kerrsp == NULL) {
5143 		kstat_delete(kiosp);
5144 		return (MDI_FAILURE);
5145 	}
5146 
5147 	nsp = (struct pi_errs *)kerrsp->ks_data;
5148 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5149 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5150 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5151 	    KSTAT_DATA_UINT32);
5152 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5153 	    KSTAT_DATA_UINT32);
5154 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5155 	    KSTAT_DATA_UINT32);
5156 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5157 	    KSTAT_DATA_UINT32);
5158 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5159 	    KSTAT_DATA_UINT32);
5160 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5161 	    KSTAT_DATA_UINT32);
5162 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5163 	    KSTAT_DATA_UINT32);
5164 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5165 
5166 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5167 	mdi_statp->pi_kstat_ref = 1;
5168 	mdi_statp->pi_kstat_iostats = kiosp;
5169 	mdi_statp->pi_kstat_errstats = kerrsp;
5170 	kstat_install(kiosp);
5171 	kstat_install(kerrsp);
5172 	MDI_PI(pip)->pi_kstats = mdi_statp;
5173 	return (MDI_SUCCESS);
5174 }
5175 
5176 /*
5177  * destroy per-path properties
5178  */
5179 static void
5180 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5181 {
5182 
5183 	struct mdi_pi_kstats *mdi_statp;
5184 
5185 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5186 		return;
5187 
5188 	MDI_PI(pip)->pi_kstats = NULL;
5189 
5190 	/*
5191 	 * the kstat may be shared between multiple pathinfo nodes
5192 	 * decrement this pathinfo's usage, removing the kstats
5193 	 * themselves when the last pathinfo reference is removed.
5194 	 */
5195 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5196 	if (--mdi_statp->pi_kstat_ref != 0)
5197 		return;
5198 
5199 	kstat_delete(mdi_statp->pi_kstat_iostats);
5200 	kstat_delete(mdi_statp->pi_kstat_errstats);
5201 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5202 }
5203 
5204 /*
5205  * update I/O paths KSTATS
5206  */
5207 void
5208 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5209 {
5210 	kstat_t *iostatp;
5211 	size_t xfer_cnt;
5212 
5213 	ASSERT(pip != NULL);
5214 
5215 	/*
5216 	 * I/O can be driven across a path prior to having path
5217 	 * statistics available, i.e. probe(9e).
5218 	 */
5219 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5220 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5221 		xfer_cnt = bp->b_bcount - bp->b_resid;
5222 		if (bp->b_flags & B_READ) {
5223 			KSTAT_IO_PTR(iostatp)->reads++;
5224 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5225 		} else {
5226 			KSTAT_IO_PTR(iostatp)->writes++;
5227 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5228 		}
5229 	}
5230 }
5231 
5232 /*
5233  * disable the path to a particular pHCI (pHCI specified in the phci_path
5234  * argument) for a particular client (specified in the client_path argument).
5235  * Disabling a path means that MPxIO will not select the disabled path for
5236  * routing any new I/O requests.
5237  */
5238 int
5239 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5240 {
5241 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5242 }
5243 
5244 /*
5245  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5246  * argument) for a particular client (specified in the client_path argument).
5247  * Enabling a path means that MPxIO may select the enabled path for routing
5248  * future I/O requests, subject to other path state constraints.
5249  */
5250 
5251 int
5252 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5253 {
5254 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5255 }
5256 
5257 
5258 /*
5259  * Common routine for doing enable/disable.
5260  */
5261 int
5262 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5263 {
5264 
5265 	mdi_phci_t	*ph;
5266 	mdi_vhci_t	*vh = NULL;
5267 	mdi_client_t	*ct;
5268 	mdi_pathinfo_t	*next, *pip;
5269 	int		found_it;
5270 	int		(*f)() = NULL;
5271 	int		rv;
5272 	int		sync_flag = 0;
5273 
5274 	ph = i_devi_get_phci(pdip);
5275 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5276 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5277 	if (ph == NULL) {
5278 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5279 			" failed. ph = NULL operation = %d\n", op));
5280 		return (MDI_FAILURE);
5281 	}
5282 
5283 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5284 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5285 			" Invalid operation = %d\n", op));
5286 		return (MDI_FAILURE);
5287 	}
5288 
5289 	sync_flag = (flags << 8) & 0xf00;
5290 
5291 	vh = ph->ph_vhci;
5292 	f = vh->vh_ops->vo_pi_state_change;
5293 
5294 	if (cdip == NULL) {
5295 		/*
5296 		 * Need to mark the Phci as enabled/disabled.
5297 		 */
5298 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5299 		"Operation %d for the phci\n", op));
5300 		MDI_PHCI_LOCK(ph);
5301 		switch (flags) {
5302 			case USER_DISABLE:
5303 				if (op == MDI_DISABLE_OP)
5304 					MDI_PHCI_SET_USER_DISABLE(ph);
5305 				else
5306 					MDI_PHCI_SET_USER_ENABLE(ph);
5307 				break;
5308 			case DRIVER_DISABLE:
5309 				if (op == MDI_DISABLE_OP)
5310 					MDI_PHCI_SET_DRV_DISABLE(ph);
5311 				else
5312 					MDI_PHCI_SET_DRV_ENABLE(ph);
5313 				break;
5314 			case DRIVER_DISABLE_TRANSIENT:
5315 				if (op == MDI_DISABLE_OP)
5316 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5317 				else
5318 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5319 				break;
5320 			default:
5321 				MDI_PHCI_UNLOCK(ph);
5322 				MDI_DEBUG(1, (CE_NOTE, NULL,
5323 				"!i_mdi_pi_enable_disable:"
5324 				" Invalid flag argument= %d\n", flags));
5325 		}
5326 
5327 		/*
5328 		 * Phci has been disabled. Now try to enable/disable
5329 		 * path info's to each client.
5330 		 */
5331 		pip = ph->ph_path_head;
5332 		while (pip != NULL) {
5333 			/*
5334 			 * Do a callback into the mdi consumer to let it
5335 			 * know that path is about to be enabled/disabled.
5336 			 */
5337 			if (f != NULL) {
5338 				rv = (*f)(vh->vh_dip, pip, 0,
5339 					MDI_PI_EXT_STATE(pip),
5340 					MDI_EXT_STATE_CHANGE | sync_flag |
5341 					op | MDI_BEFORE_STATE_CHANGE);
5342 				if (rv != MDI_SUCCESS) {
5343 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5344 				"!vo_pi_state_change: failed rv = %x", rv));
5345 				}
5346 			}
5347 
5348 			MDI_PI_LOCK(pip);
5349 			next =
5350 				(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5351 			switch (flags) {
5352 			case USER_DISABLE:
5353 				if (op == MDI_DISABLE_OP)
5354 					MDI_PI_SET_USER_DISABLE(pip);
5355 				else
5356 					MDI_PI_SET_USER_ENABLE(pip);
5357 				break;
5358 			case DRIVER_DISABLE:
5359 				if (op == MDI_DISABLE_OP)
5360 					MDI_PI_SET_DRV_DISABLE(pip);
5361 				else
5362 					MDI_PI_SET_DRV_ENABLE(pip);
5363 				break;
5364 			case DRIVER_DISABLE_TRANSIENT:
5365 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5366 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5367 				else
5368 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5369 				break;
5370 			}
5371 			MDI_PI_UNLOCK(pip);
5372 			/*
5373 			 * Do a callback into the mdi consumer to let it
5374 			 * know that path is now enabled/disabled.
5375 			 */
5376 			if (f != NULL) {
5377 				rv = (*f)(vh->vh_dip, pip, 0,
5378 					MDI_PI_EXT_STATE(pip),
5379 					MDI_EXT_STATE_CHANGE | sync_flag |
5380 					op | MDI_AFTER_STATE_CHANGE);
5381 				if (rv != MDI_SUCCESS) {
5382 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5383 				"!vo_pi_state_change: failed rv = %x", rv));
5384 				}
5385 			}
5386 			pip = next;
5387 		}
5388 		MDI_PHCI_UNLOCK(ph);
5389 	} else {
5390 
5391 		/*
5392 		 * Disable a specific client.
5393 		 */
5394 		ct = i_devi_get_client(cdip);
5395 		if (ct == NULL) {
5396 			MDI_DEBUG(1, (CE_NOTE, NULL,
5397 			"!i_mdi_pi_enable_disable:"
5398 			" failed. ct = NULL operation = %d\n", op));
5399 			return (MDI_FAILURE);
5400 		}
5401 
5402 		MDI_CLIENT_LOCK(ct);
5403 		pip = ct->ct_path_head;
5404 		found_it = 0;
5405 		while (pip != NULL) {
5406 			MDI_PI_LOCK(pip);
5407 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5408 			if (MDI_PI(pip)->pi_phci == ph) {
5409 				MDI_PI_UNLOCK(pip);
5410 				found_it = 1;
5411 				break;
5412 			}
5413 			MDI_PI_UNLOCK(pip);
5414 			pip = next;
5415 		}
5416 
5417 		MDI_CLIENT_UNLOCK(ct);
5418 		if (found_it == 0) {
5419 			MDI_DEBUG(1, (CE_NOTE, NULL,
5420 			"!i_mdi_pi_enable_disable:"
5421 			" failed. Could not find corresponding pip\n"));
5422 			return (MDI_FAILURE);
5423 		}
5424 		/*
5425 		 * Do a callback into the mdi consumer to let it
5426 		 * know that path is about to get enabled/disabled.
5427 		 */
5428 		if (f != NULL) {
5429 			rv = (*f)(vh->vh_dip, pip, 0,
5430 				MDI_PI_EXT_STATE(pip),
5431 				MDI_EXT_STATE_CHANGE | sync_flag |
5432 				op | MDI_BEFORE_STATE_CHANGE);
5433 			if (rv != MDI_SUCCESS) {
5434 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5435 				"!vo_pi_state_change: failed rv = %x", rv));
5436 			}
5437 		}
5438 		MDI_PI_LOCK(pip);
5439 		switch (flags) {
5440 			case USER_DISABLE:
5441 				if (op == MDI_DISABLE_OP)
5442 					MDI_PI_SET_USER_DISABLE(pip);
5443 				else
5444 					MDI_PI_SET_USER_ENABLE(pip);
5445 				break;
5446 			case DRIVER_DISABLE:
5447 				if (op == MDI_DISABLE_OP)
5448 					MDI_PI_SET_DRV_DISABLE(pip);
5449 				else
5450 					MDI_PI_SET_DRV_ENABLE(pip);
5451 				break;
5452 			case DRIVER_DISABLE_TRANSIENT:
5453 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5454 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5455 				else
5456 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5457 				break;
5458 		}
5459 		MDI_PI_UNLOCK(pip);
5460 		/*
5461 		 * Do a callback into the mdi consumer to let it
5462 		 * know that path is now enabled/disabled.
5463 		 */
5464 		if (f != NULL) {
5465 			rv = (*f)(vh->vh_dip, pip, 0,
5466 				MDI_PI_EXT_STATE(pip),
5467 				MDI_EXT_STATE_CHANGE | sync_flag |
5468 				op | MDI_AFTER_STATE_CHANGE);
5469 			if (rv != MDI_SUCCESS) {
5470 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5471 				"!vo_pi_state_change: failed rv = %x", rv));
5472 			}
5473 		}
5474 	}
5475 
5476 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5477 		" Returning success pdip = %p cdip = %p\n", op, pdip, cdip));
5478 	return (MDI_SUCCESS);
5479 }
5480 
5481 /*ARGSUSED3*/
5482 int
5483 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5484     int flags, clock_t timeout)
5485 {
5486 	mdi_pathinfo_t *pip;
5487 	dev_info_t *dip;
5488 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5489 	char *paddr;
5490 
5491 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5492 
5493 	if (!MDI_PHCI(pdip))
5494 		return (MDI_FAILURE);
5495 
5496 	paddr = strchr(devnm, '@');
5497 	if (paddr == NULL)
5498 		return (MDI_FAILURE);
5499 
5500 	paddr++;	/* skip '@' */
5501 	pip = mdi_pi_find(pdip, NULL, paddr);
5502 	while (pip == NULL && timeout > 0) {
5503 		if (interval > timeout)
5504 			interval = timeout;
5505 		if (flags & NDI_DEVI_DEBUG) {
5506 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5507 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5508 			    paddr, interval, timeout);
5509 		}
5510 		delay(interval);
5511 		timeout -= interval;
5512 		interval += interval;
5513 		pip = mdi_pi_find(pdip, NULL, paddr);
5514 	}
5515 
5516 	if (pip == NULL)
5517 		return (MDI_FAILURE);
5518 	dip = mdi_pi_get_client(pip);
5519 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5520 		return (MDI_FAILURE);
5521 	*cdipp = dip;
5522 
5523 	/* TODO: holding should happen inside search functions */
5524 	ndi_hold_devi(dip);
5525 	return (MDI_SUCCESS);
5526 }
5527 
5528 /*
5529  * Ensure phci powered up
5530  */
5531 static void
5532 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5533 {
5534 	dev_info_t	*ph_dip;
5535 
5536 	ASSERT(pip != NULL);
5537 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5538 
5539 	if (MDI_PI(pip)->pi_pm_held) {
5540 		return;
5541 	}
5542 
5543 	ph_dip = mdi_pi_get_phci(pip);
5544 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5545 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5546 	if (ph_dip == NULL) {
5547 		return;
5548 	}
5549 
5550 	MDI_PI_UNLOCK(pip);
5551 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5552 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5553 	pm_hold_power(ph_dip);
5554 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5555 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5556 	MDI_PI_LOCK(pip);
5557 
5558 	MDI_PI(pip)->pi_pm_held = 1;
5559 }
5560 
5561 /*
5562  * Allow phci powered down
5563  */
5564 static void
5565 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5566 {
5567 	dev_info_t	*ph_dip = NULL;
5568 
5569 	ASSERT(pip != NULL);
5570 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5571 
5572 	if (MDI_PI(pip)->pi_pm_held == 0) {
5573 		return;
5574 	}
5575 
5576 	ph_dip = mdi_pi_get_phci(pip);
5577 	ASSERT(ph_dip != NULL);
5578 
5579 	MDI_PI_UNLOCK(pip);
5580 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5581 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5582 
5583 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5584 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5585 	pm_rele_power(ph_dip);
5586 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5587 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5588 
5589 	MDI_PI_LOCK(pip);
5590 	MDI_PI(pip)->pi_pm_held = 0;
5591 }
5592 
5593 static void
5594 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5595 {
5596 	ASSERT(ct);
5597 
5598 	ct->ct_power_cnt += incr;
5599 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5600 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5601 	ASSERT(ct->ct_power_cnt >= 0);
5602 }
5603 
5604 static void
5605 i_mdi_rele_all_phci(mdi_client_t *ct)
5606 {
5607 	mdi_pathinfo_t  *pip;
5608 
5609 	ASSERT(mutex_owned(&ct->ct_mutex));
5610 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5611 	while (pip != NULL) {
5612 		mdi_hold_path(pip);
5613 		MDI_PI_LOCK(pip);
5614 		i_mdi_pm_rele_pip(pip);
5615 		MDI_PI_UNLOCK(pip);
5616 		mdi_rele_path(pip);
5617 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5618 	}
5619 }
5620 
5621 static void
5622 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5623 {
5624 	ASSERT(ct);
5625 
5626 	if (i_ddi_devi_attached(ct->ct_dip)) {
5627 		ct->ct_power_cnt -= decr;
5628 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5629 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5630 	}
5631 
5632 	ASSERT(ct->ct_power_cnt >= 0);
5633 	if (ct->ct_power_cnt == 0) {
5634 		i_mdi_rele_all_phci(ct);
5635 		return;
5636 	}
5637 }
5638 
5639 static void
5640 i_mdi_pm_reset_client(mdi_client_t *ct)
5641 {
5642 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5643 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5644 	ct->ct_power_cnt = 0;
5645 	i_mdi_rele_all_phci(ct);
5646 	ct->ct_powercnt_config = 0;
5647 	ct->ct_powercnt_unconfig = 0;
5648 	ct->ct_powercnt_reset = 1;
5649 }
5650 
5651 static void
5652 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5653 {
5654 	mdi_pathinfo_t  *pip;
5655 	ASSERT(mutex_owned(&ct->ct_mutex));
5656 
5657 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5658 	while (pip != NULL) {
5659 		mdi_hold_path(pip);
5660 		MDI_PI_LOCK(pip);
5661 		i_mdi_pm_hold_pip(pip);
5662 		MDI_PI_UNLOCK(pip);
5663 		mdi_rele_path(pip);
5664 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5665 	}
5666 }
5667 
5668 static int
5669 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5670 {
5671 	int		ret;
5672 	dev_info_t	*ph_dip;
5673 
5674 	MDI_PI_LOCK(pip);
5675 	i_mdi_pm_hold_pip(pip);
5676 
5677 	ph_dip = mdi_pi_get_phci(pip);
5678 	MDI_PI_UNLOCK(pip);
5679 
5680 	/* bring all components of phci to full power */
5681 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5682 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5683 	    ddi_get_instance(ph_dip)));
5684 
5685 	ret = pm_powerup(ph_dip);
5686 
5687 	if (ret == DDI_FAILURE) {
5688 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5689 		    "pm_powerup FAILED for %s%d\n",
5690 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5691 
5692 		MDI_PI_LOCK(pip);
5693 		i_mdi_pm_rele_pip(pip);
5694 		MDI_PI_UNLOCK(pip);
5695 		return (MDI_FAILURE);
5696 	}
5697 
5698 	return (MDI_SUCCESS);
5699 }
5700 
5701 static int
5702 i_mdi_power_all_phci(mdi_client_t *ct)
5703 {
5704 	mdi_pathinfo_t  *pip;
5705 	int		succeeded = 0;
5706 
5707 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5708 	while (pip != NULL) {
5709 		mdi_hold_path(pip);
5710 		MDI_CLIENT_UNLOCK(ct);
5711 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5712 			succeeded = 1;
5713 
5714 		ASSERT(ct == MDI_PI(pip)->pi_client);
5715 		MDI_CLIENT_LOCK(ct);
5716 		mdi_rele_path(pip);
5717 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5718 	}
5719 
5720 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5721 }
5722 
5723 /*
5724  * mdi_bus_power():
5725  *		1. Place the phci(s) into powered up state so that
5726  *		   client can do power management
5727  *		2. Ensure phci powered up as client power managing
5728  * Return Values:
5729  *		MDI_SUCCESS
5730  *		MDI_FAILURE
5731  */
5732 int
5733 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5734     void *arg, void *result)
5735 {
5736 	int			ret = MDI_SUCCESS;
5737 	pm_bp_child_pwrchg_t	*bpc;
5738 	mdi_client_t		*ct;
5739 	dev_info_t		*cdip;
5740 	pm_bp_has_changed_t	*bphc;
5741 
5742 	/*
5743 	 * BUS_POWER_NOINVOL not supported
5744 	 */
5745 	if (op == BUS_POWER_NOINVOL)
5746 		return (MDI_FAILURE);
5747 
5748 	/*
5749 	 * ignore other OPs.
5750 	 * return quickly to save cou cycles on the ct processing
5751 	 */
5752 	switch (op) {
5753 	case BUS_POWER_PRE_NOTIFICATION:
5754 	case BUS_POWER_POST_NOTIFICATION:
5755 		bpc = (pm_bp_child_pwrchg_t *)arg;
5756 		cdip = bpc->bpc_dip;
5757 		break;
5758 	case BUS_POWER_HAS_CHANGED:
5759 		bphc = (pm_bp_has_changed_t *)arg;
5760 		cdip = bphc->bphc_dip;
5761 		break;
5762 	default:
5763 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5764 	}
5765 
5766 	ASSERT(MDI_CLIENT(cdip));
5767 
5768 	ct = i_devi_get_client(cdip);
5769 	if (ct == NULL)
5770 		return (MDI_FAILURE);
5771 
5772 	/*
5773 	 * wait till the mdi_pathinfo node state change are processed
5774 	 */
5775 	MDI_CLIENT_LOCK(ct);
5776 	switch (op) {
5777 	case BUS_POWER_PRE_NOTIFICATION:
5778 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5779 		    "BUS_POWER_PRE_NOTIFICATION:"
5780 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5781 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5782 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5783 
5784 		/* serialize power level change per client */
5785 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5786 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5787 
5788 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5789 
5790 		if (ct->ct_power_cnt == 0) {
5791 			ret = i_mdi_power_all_phci(ct);
5792 		}
5793 
5794 		/*
5795 		 * if new_level > 0:
5796 		 *	- hold phci(s)
5797 		 *	- power up phci(s) if not already
5798 		 * ignore power down
5799 		 */
5800 		if (bpc->bpc_nlevel > 0) {
5801 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5802 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5803 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5804 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5805 			}
5806 		}
5807 		break;
5808 	case BUS_POWER_POST_NOTIFICATION:
5809 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5810 		    "BUS_POWER_POST_NOTIFICATION:"
5811 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5812 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5813 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5814 		    *(int *)result));
5815 
5816 		if (*(int *)result == DDI_SUCCESS) {
5817 			if (bpc->bpc_nlevel > 0) {
5818 				MDI_CLIENT_SET_POWER_UP(ct);
5819 			} else {
5820 				MDI_CLIENT_SET_POWER_DOWN(ct);
5821 			}
5822 		}
5823 
5824 		/* release the hold we did in pre-notification */
5825 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5826 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5827 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5828 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5829 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5830 		}
5831 
5832 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5833 			/* another thread might started attaching */
5834 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5835 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5836 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5837 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5838 			/* detaching has been taken care in pm_post_unconfig */
5839 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5840 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5841 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5842 				i_mdi_pm_reset_client(ct);
5843 			}
5844 		}
5845 
5846 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5847 		cv_broadcast(&ct->ct_powerchange_cv);
5848 
5849 		break;
5850 
5851 	/* need to do more */
5852 	case BUS_POWER_HAS_CHANGED:
5853 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5854 		    "BUS_POWER_HAS_CHANGED:"
5855 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5856 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5857 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5858 
5859 		if (bphc->bphc_nlevel > 0 &&
5860 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5861 			if (ct->ct_power_cnt == 0) {
5862 				ret = i_mdi_power_all_phci(ct);
5863 			}
5864 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5865 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5866 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5867 		}
5868 
5869 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5870 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5871 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5872 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5873 		}
5874 		break;
5875 	}
5876 
5877 	MDI_CLIENT_UNLOCK(ct);
5878 	return (ret);
5879 }
5880 
5881 static int
5882 i_mdi_pm_pre_config_one(dev_info_t *child)
5883 {
5884 	int		ret = MDI_SUCCESS;
5885 	mdi_client_t	*ct;
5886 
5887 	ct = i_devi_get_client(child);
5888 	if (ct == NULL)
5889 		return (MDI_FAILURE);
5890 
5891 	MDI_CLIENT_LOCK(ct);
5892 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5893 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5894 
5895 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5896 		MDI_CLIENT_UNLOCK(ct);
5897 		MDI_DEBUG(4, (CE_NOTE, child,
5898 		    "i_mdi_pm_pre_config_one already configured\n"));
5899 		return (MDI_SUCCESS);
5900 	}
5901 
5902 	if (ct->ct_powercnt_config) {
5903 		MDI_CLIENT_UNLOCK(ct);
5904 		MDI_DEBUG(4, (CE_NOTE, child,
5905 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5906 		return (MDI_SUCCESS);
5907 	}
5908 
5909 	if (ct->ct_power_cnt == 0) {
5910 		ret = i_mdi_power_all_phci(ct);
5911 	}
5912 	MDI_DEBUG(4, (CE_NOTE, child,
5913 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5914 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5915 	ct->ct_powercnt_config = 1;
5916 	ct->ct_powercnt_reset = 0;
5917 	MDI_CLIENT_UNLOCK(ct);
5918 	return (ret);
5919 }
5920 
5921 static int
5922 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5923 {
5924 	int			ret = MDI_SUCCESS;
5925 	dev_info_t		*cdip;
5926 	int			circ;
5927 
5928 	ASSERT(MDI_VHCI(parent));
5929 
5930 	/* ndi_devi_config_one */
5931 	if (child) {
5932 		return (i_mdi_pm_pre_config_one(child));
5933 	}
5934 
5935 	/* devi_config_common */
5936 	ndi_devi_enter(parent, &circ);
5937 	cdip = ddi_get_child(parent);
5938 	while (cdip) {
5939 		dev_info_t *next = ddi_get_next_sibling(cdip);
5940 
5941 		ret = i_mdi_pm_pre_config_one(cdip);
5942 		if (ret != MDI_SUCCESS)
5943 			break;
5944 		cdip = next;
5945 	}
5946 	ndi_devi_exit(parent, circ);
5947 	return (ret);
5948 }
5949 
5950 static int
5951 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5952 {
5953 	int		ret = MDI_SUCCESS;
5954 	mdi_client_t	*ct;
5955 
5956 	ct = i_devi_get_client(child);
5957 	if (ct == NULL)
5958 		return (MDI_FAILURE);
5959 
5960 	MDI_CLIENT_LOCK(ct);
5961 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5962 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5963 
5964 	if (!i_ddi_devi_attached(ct->ct_dip)) {
5965 		MDI_DEBUG(4, (CE_NOTE, child,
5966 		    "i_mdi_pm_pre_unconfig node detached already\n"));
5967 		MDI_CLIENT_UNLOCK(ct);
5968 		return (MDI_SUCCESS);
5969 	}
5970 
5971 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
5972 	    (flags & NDI_AUTODETACH)) {
5973 		MDI_DEBUG(4, (CE_NOTE, child,
5974 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
5975 		MDI_CLIENT_UNLOCK(ct);
5976 		return (MDI_FAILURE);
5977 	}
5978 
5979 	if (ct->ct_powercnt_unconfig) {
5980 		MDI_DEBUG(4, (CE_NOTE, child,
5981 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
5982 		MDI_CLIENT_UNLOCK(ct);
5983 		*held = 1;
5984 		return (MDI_SUCCESS);
5985 	}
5986 
5987 	if (ct->ct_power_cnt == 0) {
5988 		ret = i_mdi_power_all_phci(ct);
5989 	}
5990 	MDI_DEBUG(4, (CE_NOTE, child,
5991 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
5992 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5993 	ct->ct_powercnt_unconfig = 1;
5994 	ct->ct_powercnt_reset = 0;
5995 	MDI_CLIENT_UNLOCK(ct);
5996 	if (ret == MDI_SUCCESS)
5997 		*held = 1;
5998 	return (ret);
5999 }
6000 
6001 static int
6002 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
6003     int flags)
6004 {
6005 	int			ret = MDI_SUCCESS;
6006 	dev_info_t		*cdip;
6007 	int			circ;
6008 
6009 	ASSERT(MDI_VHCI(parent));
6010 	*held = 0;
6011 
6012 	/* ndi_devi_unconfig_one */
6013 	if (child) {
6014 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6015 	}
6016 
6017 	/* devi_unconfig_common */
6018 	ndi_devi_enter(parent, &circ);
6019 	cdip = ddi_get_child(parent);
6020 	while (cdip) {
6021 		dev_info_t *next = ddi_get_next_sibling(cdip);
6022 
6023 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6024 		cdip = next;
6025 	}
6026 	ndi_devi_exit(parent, circ);
6027 
6028 	if (*held)
6029 		ret = MDI_SUCCESS;
6030 
6031 	return (ret);
6032 }
6033 
6034 static void
6035 i_mdi_pm_post_config_one(dev_info_t *child)
6036 {
6037 	mdi_client_t	*ct;
6038 
6039 	ct = i_devi_get_client(child);
6040 	if (ct == NULL)
6041 		return;
6042 
6043 	MDI_CLIENT_LOCK(ct);
6044 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6045 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6046 
6047 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6048 		MDI_DEBUG(4, (CE_NOTE, child,
6049 		    "i_mdi_pm_post_config_one NOT configured\n"));
6050 		MDI_CLIENT_UNLOCK(ct);
6051 		return;
6052 	}
6053 
6054 	/* client has not been updated */
6055 	if (MDI_CLIENT_IS_FAILED(ct)) {
6056 		MDI_DEBUG(4, (CE_NOTE, child,
6057 		    "i_mdi_pm_post_config_one NOT configured\n"));
6058 		MDI_CLIENT_UNLOCK(ct);
6059 		return;
6060 	}
6061 
6062 	/* another thread might have powered it down or detached it */
6063 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6064 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6065 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6066 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6067 		MDI_DEBUG(4, (CE_NOTE, child,
6068 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6069 		i_mdi_pm_reset_client(ct);
6070 	} else {
6071 		mdi_pathinfo_t  *pip, *next;
6072 		int	valid_path_count = 0;
6073 
6074 		MDI_DEBUG(4, (CE_NOTE, child,
6075 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6076 		pip = ct->ct_path_head;
6077 		while (pip != NULL) {
6078 			MDI_PI_LOCK(pip);
6079 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6080 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6081 				valid_path_count ++;
6082 			MDI_PI_UNLOCK(pip);
6083 			pip = next;
6084 		}
6085 		i_mdi_pm_rele_client(ct, valid_path_count);
6086 	}
6087 	ct->ct_powercnt_config = 0;
6088 	MDI_CLIENT_UNLOCK(ct);
6089 }
6090 
6091 static void
6092 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6093 {
6094 	int		circ;
6095 	dev_info_t	*cdip;
6096 	ASSERT(MDI_VHCI(parent));
6097 
6098 	/* ndi_devi_config_one */
6099 	if (child) {
6100 		i_mdi_pm_post_config_one(child);
6101 		return;
6102 	}
6103 
6104 	/* devi_config_common */
6105 	ndi_devi_enter(parent, &circ);
6106 	cdip = ddi_get_child(parent);
6107 	while (cdip) {
6108 		dev_info_t *next = ddi_get_next_sibling(cdip);
6109 
6110 		i_mdi_pm_post_config_one(cdip);
6111 		cdip = next;
6112 	}
6113 	ndi_devi_exit(parent, circ);
6114 }
6115 
6116 static void
6117 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6118 {
6119 	mdi_client_t	*ct;
6120 
6121 	ct = i_devi_get_client(child);
6122 	if (ct == NULL)
6123 		return;
6124 
6125 	MDI_CLIENT_LOCK(ct);
6126 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6127 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6128 
6129 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6130 		MDI_DEBUG(4, (CE_NOTE, child,
6131 		    "i_mdi_pm_post_unconfig NOT held\n"));
6132 		MDI_CLIENT_UNLOCK(ct);
6133 		return;
6134 	}
6135 
6136 	/* failure detaching or another thread just attached it */
6137 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6138 	    i_ddi_devi_attached(ct->ct_dip)) ||
6139 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6140 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6141 		MDI_DEBUG(4, (CE_NOTE, child,
6142 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6143 		i_mdi_pm_reset_client(ct);
6144 	} else {
6145 		mdi_pathinfo_t  *pip, *next;
6146 		int	valid_path_count = 0;
6147 
6148 		MDI_DEBUG(4, (CE_NOTE, child,
6149 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6150 		pip = ct->ct_path_head;
6151 		while (pip != NULL) {
6152 			MDI_PI_LOCK(pip);
6153 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6154 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6155 				valid_path_count ++;
6156 			MDI_PI_UNLOCK(pip);
6157 			pip = next;
6158 		}
6159 		i_mdi_pm_rele_client(ct, valid_path_count);
6160 		ct->ct_powercnt_unconfig = 0;
6161 	}
6162 
6163 	MDI_CLIENT_UNLOCK(ct);
6164 }
6165 
6166 static void
6167 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6168 {
6169 	int			circ;
6170 	dev_info_t		*cdip;
6171 
6172 	ASSERT(MDI_VHCI(parent));
6173 
6174 	if (!held) {
6175 		MDI_DEBUG(4, (CE_NOTE, parent,
6176 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6177 		return;
6178 	}
6179 
6180 	if (child) {
6181 		i_mdi_pm_post_unconfig_one(child);
6182 		return;
6183 	}
6184 
6185 	ndi_devi_enter(parent, &circ);
6186 	cdip = ddi_get_child(parent);
6187 	while (cdip) {
6188 		dev_info_t *next = ddi_get_next_sibling(cdip);
6189 
6190 		i_mdi_pm_post_unconfig_one(cdip);
6191 		cdip = next;
6192 	}
6193 	ndi_devi_exit(parent, circ);
6194 }
6195 
6196 int
6197 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6198 {
6199 	int			circ, ret = MDI_SUCCESS;
6200 	dev_info_t		*client_dip = NULL;
6201 	mdi_client_t		*ct;
6202 
6203 	/*
6204 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6205 	 * Power up pHCI for the named client device.
6206 	 * Note: Before the client is enumerated under vhci by phci,
6207 	 * client_dip can be NULL. Then proceed to power up all the
6208 	 * pHCIs.
6209 	 */
6210 	if (devnm != NULL) {
6211 		ndi_devi_enter(vdip, &circ);
6212 		client_dip = ndi_devi_findchild(vdip, devnm);
6213 		ndi_devi_exit(vdip, circ);
6214 	}
6215 
6216 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6217 
6218 	switch (op) {
6219 	case MDI_PM_PRE_CONFIG:
6220 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6221 
6222 		break;
6223 	case MDI_PM_PRE_UNCONFIG:
6224 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6225 		    flags);
6226 
6227 		break;
6228 	case MDI_PM_POST_CONFIG:
6229 		i_mdi_pm_post_config(vdip, client_dip);
6230 
6231 		break;
6232 	case MDI_PM_POST_UNCONFIG:
6233 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6234 
6235 		break;
6236 	case MDI_PM_HOLD_POWER:
6237 	case MDI_PM_RELE_POWER:
6238 		ASSERT(args);
6239 
6240 		client_dip = (dev_info_t *)args;
6241 		ASSERT(MDI_CLIENT(client_dip));
6242 
6243 		ct = i_devi_get_client(client_dip);
6244 		MDI_CLIENT_LOCK(ct);
6245 
6246 		if (op == MDI_PM_HOLD_POWER) {
6247 			if (ct->ct_power_cnt == 0) {
6248 				(void) i_mdi_power_all_phci(ct);
6249 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6250 				    "mdi_power i_mdi_pm_hold_client\n"));
6251 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6252 			}
6253 		} else {
6254 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6255 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6256 				    "mdi_power i_mdi_pm_rele_client\n"));
6257 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6258 			} else {
6259 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6260 				    "mdi_power i_mdi_pm_reset_client\n"));
6261 				i_mdi_pm_reset_client(ct);
6262 			}
6263 		}
6264 
6265 		MDI_CLIENT_UNLOCK(ct);
6266 		break;
6267 	default:
6268 		break;
6269 	}
6270 
6271 	return (ret);
6272 }
6273 
6274 int
6275 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6276 {
6277 	mdi_vhci_t *vhci;
6278 
6279 	if (!MDI_VHCI(dip))
6280 		return (MDI_FAILURE);
6281 
6282 	if (mdi_class) {
6283 		vhci = DEVI(dip)->devi_mdi_xhci;
6284 		ASSERT(vhci);
6285 		*mdi_class = vhci->vh_class;
6286 	}
6287 
6288 	return (MDI_SUCCESS);
6289 }
6290 
6291 int
6292 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6293 {
6294 	mdi_phci_t *phci;
6295 
6296 	if (!MDI_PHCI(dip))
6297 		return (MDI_FAILURE);
6298 
6299 	if (mdi_class) {
6300 		phci = DEVI(dip)->devi_mdi_xhci;
6301 		ASSERT(phci);
6302 		*mdi_class = phci->ph_vhci->vh_class;
6303 	}
6304 
6305 	return (MDI_SUCCESS);
6306 }
6307 
6308 int
6309 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6310 {
6311 	mdi_client_t *client;
6312 
6313 	if (!MDI_CLIENT(dip))
6314 		return (MDI_FAILURE);
6315 
6316 	if (mdi_class) {
6317 		client = DEVI(dip)->devi_mdi_client;
6318 		ASSERT(client);
6319 		*mdi_class = client->ct_vhci->vh_class;
6320 	}
6321 
6322 	return (MDI_SUCCESS);
6323 }
6324 
6325 void *
6326 mdi_client_get_vhci_private(dev_info_t *dip)
6327 {
6328 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6329 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6330 		mdi_client_t	*ct;
6331 		ct = i_devi_get_client(dip);
6332 		return (ct->ct_vprivate);
6333 	}
6334 	return (NULL);
6335 }
6336 
6337 void
6338 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6339 {
6340 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6341 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6342 		mdi_client_t	*ct;
6343 		ct = i_devi_get_client(dip);
6344 		ct->ct_vprivate = data;
6345 	}
6346 }
6347 /*
6348  * mdi_pi_get_vhci_private():
6349  *		Get the vhci private information associated with the
6350  *		mdi_pathinfo node
6351  */
6352 void *
6353 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6354 {
6355 	caddr_t	vprivate = NULL;
6356 	if (pip) {
6357 		vprivate = MDI_PI(pip)->pi_vprivate;
6358 	}
6359 	return (vprivate);
6360 }
6361 
6362 /*
6363  * mdi_pi_set_vhci_private():
6364  *		Set the vhci private information in the mdi_pathinfo node
6365  */
6366 void
6367 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6368 {
6369 	if (pip) {
6370 		MDI_PI(pip)->pi_vprivate = priv;
6371 	}
6372 }
6373 
6374 /*
6375  * mdi_phci_get_vhci_private():
6376  *		Get the vhci private information associated with the
6377  *		mdi_phci node
6378  */
6379 void *
6380 mdi_phci_get_vhci_private(dev_info_t *dip)
6381 {
6382 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6383 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6384 		mdi_phci_t	*ph;
6385 		ph = i_devi_get_phci(dip);
6386 		return (ph->ph_vprivate);
6387 	}
6388 	return (NULL);
6389 }
6390 
6391 /*
6392  * mdi_phci_set_vhci_private():
6393  *		Set the vhci private information in the mdi_phci node
6394  */
6395 void
6396 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6397 {
6398 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6399 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6400 		mdi_phci_t	*ph;
6401 		ph = i_devi_get_phci(dip);
6402 		ph->ph_vprivate = priv;
6403 	}
6404 }
6405 
6406 /*
6407  * List of vhci class names:
6408  * A vhci class name must be in this list only if the corresponding vhci
6409  * driver intends to use the mdi provided bus config implementation
6410  * (i.e., mdi_vhci_bus_config()).
6411  */
6412 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6413 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6414 
6415 /*
6416  * Built-in list of phci drivers for every vhci class.
6417  * All phci drivers expect iscsi have root device support.
6418  */
6419 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
6420 	{ "fp", 1 },
6421 	{ "iscsi", 0 },
6422 	{ "ibsrp", 1 }
6423 	};
6424 
6425 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
6426 
6427 /*
6428  * During boot time, the on-disk vhci cache for every vhci class is read
6429  * in the form of an nvlist and stored here.
6430  */
6431 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6432 
6433 /* nvpair names in vhci cache nvlist */
6434 #define	MDI_VHCI_CACHE_VERSION	1
6435 #define	MDI_NVPNAME_VERSION	"version"
6436 #define	MDI_NVPNAME_PHCIS	"phcis"
6437 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6438 
6439 /*
6440  * Given vhci class name, return its on-disk vhci cache filename.
6441  * Memory for the returned filename which includes the full path is allocated
6442  * by this function.
6443  */
6444 static char *
6445 vhclass2vhcache_filename(char *vhclass)
6446 {
6447 	char *filename;
6448 	int len;
6449 	static char *fmt = "/etc/devices/mdi_%s_cache";
6450 
6451 	/*
6452 	 * fmt contains the on-disk vhci cache file name format;
6453 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6454 	 */
6455 
6456 	/* the -1 below is to account for "%s" in the format string */
6457 	len = strlen(fmt) + strlen(vhclass) - 1;
6458 	filename = kmem_alloc(len, KM_SLEEP);
6459 	(void) snprintf(filename, len, fmt, vhclass);
6460 	ASSERT(len == (strlen(filename) + 1));
6461 	return (filename);
6462 }
6463 
6464 /*
6465  * initialize the vhci cache related data structures and read the on-disk
6466  * vhci cached data into memory.
6467  */
6468 static void
6469 setup_vhci_cache(mdi_vhci_t *vh)
6470 {
6471 	mdi_vhci_config_t *vhc;
6472 	mdi_vhci_cache_t *vhcache;
6473 	int i;
6474 	nvlist_t *nvl = NULL;
6475 
6476 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6477 	vh->vh_config = vhc;
6478 	vhcache = &vhc->vhc_vhcache;
6479 
6480 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6481 
6482 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6483 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6484 
6485 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6486 
6487 	/*
6488 	 * Create string hash; same as mod_hash_create_strhash() except that
6489 	 * we use NULL key destructor.
6490 	 */
6491 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6492 	    mdi_bus_config_cache_hash_size,
6493 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6494 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6495 
6496 	setup_phci_driver_list(vh);
6497 
6498 	/*
6499 	 * The on-disk vhci cache is read during booting prior to the
6500 	 * lights-out period by mdi_read_devices_files().
6501 	 */
6502 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6503 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6504 			nvl = vhcache_nvl[i];
6505 			vhcache_nvl[i] = NULL;
6506 			break;
6507 		}
6508 	}
6509 
6510 	/*
6511 	 * this is to cover the case of some one manually causing unloading
6512 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6513 	 */
6514 	if (nvl == NULL && modrootloaded)
6515 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6516 
6517 	if (nvl != NULL) {
6518 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6519 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6520 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6521 		else  {
6522 			cmn_err(CE_WARN,
6523 			    "%s: data file corrupted, will recreate\n",
6524 			    vhc->vhc_vhcache_filename);
6525 		}
6526 		rw_exit(&vhcache->vhcache_lock);
6527 		nvlist_free(nvl);
6528 	}
6529 
6530 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6531 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6532 
6533 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6534 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6535 }
6536 
6537 /*
6538  * free all vhci cache related resources
6539  */
6540 static int
6541 destroy_vhci_cache(mdi_vhci_t *vh)
6542 {
6543 	mdi_vhci_config_t *vhc = vh->vh_config;
6544 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6545 	mdi_vhcache_phci_t *cphci, *cphci_next;
6546 	mdi_vhcache_client_t *cct, *cct_next;
6547 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6548 
6549 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6550 		return (MDI_FAILURE);
6551 
6552 	kmem_free(vhc->vhc_vhcache_filename,
6553 	    strlen(vhc->vhc_vhcache_filename) + 1);
6554 
6555 	if (vhc->vhc_phci_driver_list)
6556 		free_phci_driver_list(vhc);
6557 
6558 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6559 
6560 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6561 	    cphci = cphci_next) {
6562 		cphci_next = cphci->cphci_next;
6563 		free_vhcache_phci(cphci);
6564 	}
6565 
6566 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6567 		cct_next = cct->cct_next;
6568 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6569 			cpi_next = cpi->cpi_next;
6570 			free_vhcache_pathinfo(cpi);
6571 		}
6572 		free_vhcache_client(cct);
6573 	}
6574 
6575 	rw_destroy(&vhcache->vhcache_lock);
6576 
6577 	mutex_destroy(&vhc->vhc_lock);
6578 	cv_destroy(&vhc->vhc_cv);
6579 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6580 	return (MDI_SUCCESS);
6581 }
6582 
6583 /*
6584  * Setup the list of phci drivers associated with the specified vhci class.
6585  * MDI uses this information to rebuild bus config cache if in case the
6586  * cache is not available or corrupted.
6587  */
6588 static void
6589 setup_phci_driver_list(mdi_vhci_t *vh)
6590 {
6591 	mdi_vhci_config_t *vhc = vh->vh_config;
6592 	mdi_phci_driver_info_t *driver_list;
6593 	char **driver_list1;
6594 	uint_t ndrivers, ndrivers1;
6595 	int i, j;
6596 
6597 	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6598 		driver_list = scsi_phci_driver_list;
6599 		ndrivers = sizeof (scsi_phci_driver_list) /
6600 		    sizeof (mdi_phci_driver_info_t);
6601 	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
6602 		driver_list = ib_phci_driver_list;
6603 		ndrivers = sizeof (ib_phci_driver_list) /
6604 		    sizeof (mdi_phci_driver_info_t);
6605 	} else {
6606 		driver_list = NULL;
6607 		ndrivers = 0;
6608 	}
6609 
6610 	/*
6611 	 * The driver.conf file of a vhci driver can specify additional
6612 	 * phci drivers using a project private "phci-drivers" property.
6613 	 */
6614 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
6615 	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
6616 	    &ndrivers1) != DDI_PROP_SUCCESS)
6617 		ndrivers1 = 0;
6618 
6619 	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
6620 	if (vhc->vhc_nphci_drivers == 0)
6621 		return;
6622 
6623 	vhc->vhc_phci_driver_list = kmem_alloc(
6624 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
6625 
6626 	for (i = 0; i < ndrivers; i++) {
6627 		vhc->vhc_phci_driver_list[i].phdriver_name =
6628 		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
6629 		vhc->vhc_phci_driver_list[i].phdriver_root_support =
6630 		    driver_list[i].phdriver_root_support;
6631 	}
6632 
6633 	for (j = 0; j < ndrivers1; j++, i++) {
6634 		vhc->vhc_phci_driver_list[i].phdriver_name =
6635 		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
6636 		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
6637 	}
6638 
6639 	if (ndrivers1)
6640 		ddi_prop_free(driver_list1);
6641 }
6642 
6643 /*
6644  * Free the memory allocated for the phci driver list
6645  */
6646 static void
6647 free_phci_driver_list(mdi_vhci_config_t *vhc)
6648 {
6649 	int i;
6650 
6651 	if (vhc->vhc_phci_driver_list == NULL)
6652 		return;
6653 
6654 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
6655 		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
6656 		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
6657 	}
6658 
6659 	kmem_free(vhc->vhc_phci_driver_list,
6660 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
6661 }
6662 
6663 /*
6664  * Stop all vhci cache related async threads and free their resources.
6665  */
6666 static int
6667 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6668 {
6669 	mdi_async_client_config_t *acc, *acc_next;
6670 
6671 	mutex_enter(&vhc->vhc_lock);
6672 	vhc->vhc_flags |= MDI_VHC_EXIT;
6673 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6674 	cv_broadcast(&vhc->vhc_cv);
6675 
6676 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6677 	    vhc->vhc_acc_thrcount != 0) {
6678 		mutex_exit(&vhc->vhc_lock);
6679 		delay(1);
6680 		mutex_enter(&vhc->vhc_lock);
6681 	}
6682 
6683 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6684 
6685 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6686 		acc_next = acc->acc_next;
6687 		free_async_client_config(acc);
6688 	}
6689 	vhc->vhc_acc_list_head = NULL;
6690 	vhc->vhc_acc_list_tail = NULL;
6691 	vhc->vhc_acc_count = 0;
6692 
6693 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6694 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6695 		mutex_exit(&vhc->vhc_lock);
6696 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6697 			vhcache_dirty(vhc);
6698 			return (MDI_FAILURE);
6699 		}
6700 	} else
6701 		mutex_exit(&vhc->vhc_lock);
6702 
6703 	if (callb_delete(vhc->vhc_cbid) != 0)
6704 		return (MDI_FAILURE);
6705 
6706 	return (MDI_SUCCESS);
6707 }
6708 
6709 /*
6710  * Stop vhci cache flush thread
6711  */
6712 /* ARGSUSED */
6713 static boolean_t
6714 stop_vhcache_flush_thread(void *arg, int code)
6715 {
6716 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6717 
6718 	mutex_enter(&vhc->vhc_lock);
6719 	vhc->vhc_flags |= MDI_VHC_EXIT;
6720 	cv_broadcast(&vhc->vhc_cv);
6721 
6722 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6723 		mutex_exit(&vhc->vhc_lock);
6724 		delay(1);
6725 		mutex_enter(&vhc->vhc_lock);
6726 	}
6727 
6728 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6729 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6730 		mutex_exit(&vhc->vhc_lock);
6731 		(void) flush_vhcache(vhc, 1);
6732 	} else
6733 		mutex_exit(&vhc->vhc_lock);
6734 
6735 	return (B_TRUE);
6736 }
6737 
6738 /*
6739  * Enqueue the vhcache phci (cphci) at the tail of the list
6740  */
6741 static void
6742 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6743 {
6744 	cphci->cphci_next = NULL;
6745 	if (vhcache->vhcache_phci_head == NULL)
6746 		vhcache->vhcache_phci_head = cphci;
6747 	else
6748 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6749 	vhcache->vhcache_phci_tail = cphci;
6750 }
6751 
6752 /*
6753  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6754  */
6755 static void
6756 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6757     mdi_vhcache_pathinfo_t *cpi)
6758 {
6759 	cpi->cpi_next = NULL;
6760 	if (cct->cct_cpi_head == NULL)
6761 		cct->cct_cpi_head = cpi;
6762 	else
6763 		cct->cct_cpi_tail->cpi_next = cpi;
6764 	cct->cct_cpi_tail = cpi;
6765 }
6766 
6767 /*
6768  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6769  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6770  * flag set come at the beginning of the list. All cpis which have this
6771  * flag set come at the end of the list.
6772  */
6773 static void
6774 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6775     mdi_vhcache_pathinfo_t *newcpi)
6776 {
6777 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6778 
6779 	if (cct->cct_cpi_head == NULL ||
6780 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6781 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6782 	else {
6783 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6784 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6785 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6786 			;
6787 
6788 		if (prev_cpi == NULL)
6789 			cct->cct_cpi_head = newcpi;
6790 		else
6791 			prev_cpi->cpi_next = newcpi;
6792 
6793 		newcpi->cpi_next = cpi;
6794 
6795 		if (cpi == NULL)
6796 			cct->cct_cpi_tail = newcpi;
6797 	}
6798 }
6799 
6800 /*
6801  * Enqueue the vhcache client (cct) at the tail of the list
6802  */
6803 static void
6804 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6805     mdi_vhcache_client_t *cct)
6806 {
6807 	cct->cct_next = NULL;
6808 	if (vhcache->vhcache_client_head == NULL)
6809 		vhcache->vhcache_client_head = cct;
6810 	else
6811 		vhcache->vhcache_client_tail->cct_next = cct;
6812 	vhcache->vhcache_client_tail = cct;
6813 }
6814 
6815 static void
6816 free_string_array(char **str, int nelem)
6817 {
6818 	int i;
6819 
6820 	if (str) {
6821 		for (i = 0; i < nelem; i++) {
6822 			if (str[i])
6823 				kmem_free(str[i], strlen(str[i]) + 1);
6824 		}
6825 		kmem_free(str, sizeof (char *) * nelem);
6826 	}
6827 }
6828 
6829 static void
6830 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6831 {
6832 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6833 	kmem_free(cphci, sizeof (*cphci));
6834 }
6835 
6836 static void
6837 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6838 {
6839 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6840 	kmem_free(cpi, sizeof (*cpi));
6841 }
6842 
6843 static void
6844 free_vhcache_client(mdi_vhcache_client_t *cct)
6845 {
6846 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6847 	kmem_free(cct, sizeof (*cct));
6848 }
6849 
6850 static char *
6851 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6852 {
6853 	char *name_addr;
6854 	int len;
6855 
6856 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6857 	name_addr = kmem_alloc(len, KM_SLEEP);
6858 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6859 
6860 	if (ret_len)
6861 		*ret_len = len;
6862 	return (name_addr);
6863 }
6864 
6865 /*
6866  * Copy the contents of paddrnvl to vhci cache.
6867  * paddrnvl nvlist contains path information for a vhci client.
6868  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6869  */
6870 static void
6871 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6872     mdi_vhcache_client_t *cct)
6873 {
6874 	nvpair_t *nvp = NULL;
6875 	mdi_vhcache_pathinfo_t *cpi;
6876 	uint_t nelem;
6877 	uint32_t *val;
6878 
6879 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6880 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6881 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6882 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6883 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6884 		ASSERT(nelem == 2);
6885 		cpi->cpi_cphci = cphci_list[val[0]];
6886 		cpi->cpi_flags = val[1];
6887 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6888 	}
6889 }
6890 
6891 /*
6892  * Copy the contents of caddrmapnvl to vhci cache.
6893  * caddrmapnvl nvlist contains vhci client address to phci client address
6894  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6895  * this nvlist.
6896  */
6897 static void
6898 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6899     mdi_vhcache_phci_t *cphci_list[])
6900 {
6901 	nvpair_t *nvp = NULL;
6902 	nvlist_t *paddrnvl;
6903 	mdi_vhcache_client_t *cct;
6904 
6905 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6906 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6907 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6908 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6909 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6910 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6911 		/* the client must contain at least one path */
6912 		ASSERT(cct->cct_cpi_head != NULL);
6913 
6914 		enqueue_vhcache_client(vhcache, cct);
6915 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6916 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6917 	}
6918 }
6919 
6920 /*
6921  * Copy the contents of the main nvlist to vhci cache.
6922  *
6923  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6924  * The nvlist contains the mappings between the vhci client addresses and
6925  * their corresponding phci client addresses.
6926  *
6927  * The structure of the nvlist is as follows:
6928  *
6929  * Main nvlist:
6930  *	NAME		TYPE		DATA
6931  *	version		int32		version number
6932  *	phcis		string array	array of phci paths
6933  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6934  *
6935  * structure of c2paddrs_nvl:
6936  *	NAME		TYPE		DATA
6937  *	caddr1		nvlist_t	paddrs_nvl1
6938  *	caddr2		nvlist_t	paddrs_nvl2
6939  *	...
6940  * where caddr1, caddr2, ... are vhci client name and addresses in the
6941  * form of "<clientname>@<clientaddress>".
6942  * (for example: "ssd@2000002037cd9f72");
6943  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6944  *
6945  * structure of paddrs_nvl:
6946  *	NAME		TYPE		DATA
6947  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6948  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6949  *	...
6950  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6951  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6952  * phci-ids are integers that identify PHCIs to which the
6953  * the bus specific address belongs to. These integers are used as an index
6954  * into to the phcis string array in the main nvlist to get the PHCI path.
6955  */
6956 static int
6957 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6958 {
6959 	char **phcis, **phci_namep;
6960 	uint_t nphcis;
6961 	mdi_vhcache_phci_t *cphci, **cphci_list;
6962 	nvlist_t *caddrmapnvl;
6963 	int32_t ver;
6964 	int i;
6965 	size_t cphci_list_size;
6966 
6967 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
6968 
6969 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
6970 	    ver != MDI_VHCI_CACHE_VERSION)
6971 		return (MDI_FAILURE);
6972 
6973 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
6974 	    &nphcis) != 0)
6975 		return (MDI_SUCCESS);
6976 
6977 	ASSERT(nphcis > 0);
6978 
6979 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
6980 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
6981 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
6982 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
6983 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
6984 		enqueue_vhcache_phci(vhcache, cphci);
6985 		cphci_list[i] = cphci;
6986 	}
6987 
6988 	ASSERT(vhcache->vhcache_phci_head != NULL);
6989 
6990 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
6991 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
6992 
6993 	kmem_free(cphci_list, cphci_list_size);
6994 	return (MDI_SUCCESS);
6995 }
6996 
6997 /*
6998  * Build paddrnvl for the specified client using the information in the
6999  * vhci cache and add it to the caddrmapnnvl.
7000  * Returns 0 on success, errno on failure.
7001  */
7002 static int
7003 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7004     nvlist_t *caddrmapnvl)
7005 {
7006 	mdi_vhcache_pathinfo_t *cpi;
7007 	nvlist_t *nvl;
7008 	int err;
7009 	uint32_t val[2];
7010 
7011 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7012 
7013 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7014 		return (err);
7015 
7016 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7017 		val[0] = cpi->cpi_cphci->cphci_id;
7018 		val[1] = cpi->cpi_flags;
7019 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7020 		    != 0)
7021 			goto out;
7022 	}
7023 
7024 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7025 out:
7026 	nvlist_free(nvl);
7027 	return (err);
7028 }
7029 
7030 /*
7031  * Build caddrmapnvl using the information in the vhci cache
7032  * and add it to the mainnvl.
7033  * Returns 0 on success, errno on failure.
7034  */
7035 static int
7036 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7037 {
7038 	mdi_vhcache_client_t *cct;
7039 	nvlist_t *nvl;
7040 	int err;
7041 
7042 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7043 
7044 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7045 		return (err);
7046 
7047 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7048 	    cct = cct->cct_next) {
7049 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7050 			goto out;
7051 	}
7052 
7053 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7054 out:
7055 	nvlist_free(nvl);
7056 	return (err);
7057 }
7058 
7059 /*
7060  * Build nvlist using the information in the vhci cache.
7061  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7062  * Returns nvl on success, NULL on failure.
7063  */
7064 static nvlist_t *
7065 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7066 {
7067 	mdi_vhcache_phci_t *cphci;
7068 	uint_t phci_count;
7069 	char **phcis;
7070 	nvlist_t *nvl;
7071 	int err, i;
7072 
7073 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7074 		nvl = NULL;
7075 		goto out;
7076 	}
7077 
7078 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7079 	    MDI_VHCI_CACHE_VERSION)) != 0)
7080 		goto out;
7081 
7082 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7083 	if (vhcache->vhcache_phci_head == NULL) {
7084 		rw_exit(&vhcache->vhcache_lock);
7085 		return (nvl);
7086 	}
7087 
7088 	phci_count = 0;
7089 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7090 	    cphci = cphci->cphci_next)
7091 		cphci->cphci_id = phci_count++;
7092 
7093 	/* build phci pathname list */
7094 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7095 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7096 	    cphci = cphci->cphci_next, i++)
7097 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7098 
7099 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7100 	    phci_count);
7101 	free_string_array(phcis, phci_count);
7102 
7103 	if (err == 0 &&
7104 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7105 		rw_exit(&vhcache->vhcache_lock);
7106 		return (nvl);
7107 	}
7108 
7109 	rw_exit(&vhcache->vhcache_lock);
7110 out:
7111 	if (nvl)
7112 		nvlist_free(nvl);
7113 	return (NULL);
7114 }
7115 
7116 /*
7117  * Lookup vhcache phci structure for the specified phci path.
7118  */
7119 static mdi_vhcache_phci_t *
7120 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7121 {
7122 	mdi_vhcache_phci_t *cphci;
7123 
7124 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7125 
7126 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7127 	    cphci = cphci->cphci_next) {
7128 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7129 			return (cphci);
7130 	}
7131 
7132 	return (NULL);
7133 }
7134 
7135 /*
7136  * Lookup vhcache phci structure for the specified phci.
7137  */
7138 static mdi_vhcache_phci_t *
7139 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7140 {
7141 	mdi_vhcache_phci_t *cphci;
7142 
7143 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7144 
7145 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7146 	    cphci = cphci->cphci_next) {
7147 		if (cphci->cphci_phci == ph)
7148 			return (cphci);
7149 	}
7150 
7151 	return (NULL);
7152 }
7153 
7154 /*
7155  * Add the specified phci to the vhci cache if not already present.
7156  */
7157 static void
7158 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7159 {
7160 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7161 	mdi_vhcache_phci_t *cphci;
7162 	char *pathname;
7163 	int cache_updated;
7164 
7165 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7166 
7167 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7168 	(void) ddi_pathname(ph->ph_dip, pathname);
7169 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7170 	    != NULL) {
7171 		cphci->cphci_phci = ph;
7172 		cache_updated = 0;
7173 	} else {
7174 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7175 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7176 		cphci->cphci_phci = ph;
7177 		enqueue_vhcache_phci(vhcache, cphci);
7178 		cache_updated = 1;
7179 	}
7180 
7181 	rw_exit(&vhcache->vhcache_lock);
7182 
7183 	/*
7184 	 * Since a new phci has been added, reset
7185 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7186 	 * during next vhcache_discover_paths().
7187 	 */
7188 	mutex_enter(&vhc->vhc_lock);
7189 	vhc->vhc_path_discovery_cutoff_time = 0;
7190 	mutex_exit(&vhc->vhc_lock);
7191 
7192 	kmem_free(pathname, MAXPATHLEN);
7193 	if (cache_updated)
7194 		vhcache_dirty(vhc);
7195 }
7196 
7197 /*
7198  * Remove the reference to the specified phci from the vhci cache.
7199  */
7200 static void
7201 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7202 {
7203 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7204 	mdi_vhcache_phci_t *cphci;
7205 
7206 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7207 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7208 		/* do not remove the actual mdi_vhcache_phci structure */
7209 		cphci->cphci_phci = NULL;
7210 	}
7211 	rw_exit(&vhcache->vhcache_lock);
7212 }
7213 
7214 static void
7215 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7216     mdi_vhcache_lookup_token_t *src)
7217 {
7218 	if (src == NULL) {
7219 		dst->lt_cct = NULL;
7220 		dst->lt_cct_lookup_time = 0;
7221 	} else {
7222 		dst->lt_cct = src->lt_cct;
7223 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7224 	}
7225 }
7226 
7227 /*
7228  * Look up vhcache client for the specified client.
7229  */
7230 static mdi_vhcache_client_t *
7231 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7232     mdi_vhcache_lookup_token_t *token)
7233 {
7234 	mod_hash_val_t hv;
7235 	char *name_addr;
7236 	int len;
7237 
7238 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7239 
7240 	/*
7241 	 * If no vhcache clean occurred since the last lookup, we can
7242 	 * simply return the cct from the last lookup operation.
7243 	 * It works because ccts are never freed except during the vhcache
7244 	 * cleanup operation.
7245 	 */
7246 	if (token != NULL &&
7247 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7248 		return (token->lt_cct);
7249 
7250 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7251 	if (mod_hash_find(vhcache->vhcache_client_hash,
7252 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7253 		if (token) {
7254 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7255 			token->lt_cct_lookup_time = lbolt64;
7256 		}
7257 	} else {
7258 		if (token) {
7259 			token->lt_cct = NULL;
7260 			token->lt_cct_lookup_time = 0;
7261 		}
7262 		hv = NULL;
7263 	}
7264 	kmem_free(name_addr, len);
7265 	return ((mdi_vhcache_client_t *)hv);
7266 }
7267 
7268 /*
7269  * Add the specified path to the vhci cache if not already present.
7270  * Also add the vhcache client for the client corresponding to this path
7271  * if it doesn't already exist.
7272  */
7273 static void
7274 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7275 {
7276 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7277 	mdi_vhcache_client_t *cct;
7278 	mdi_vhcache_pathinfo_t *cpi;
7279 	mdi_phci_t *ph = pip->pi_phci;
7280 	mdi_client_t *ct = pip->pi_client;
7281 	int cache_updated = 0;
7282 
7283 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7284 
7285 	/* if vhcache client for this pip doesn't already exist, add it */
7286 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7287 	    NULL)) == NULL) {
7288 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7289 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7290 		    ct->ct_guid, NULL);
7291 		enqueue_vhcache_client(vhcache, cct);
7292 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7293 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7294 		cache_updated = 1;
7295 	}
7296 
7297 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7298 		if (cpi->cpi_cphci->cphci_phci == ph &&
7299 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7300 			cpi->cpi_pip = pip;
7301 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7302 				cpi->cpi_flags &=
7303 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7304 				sort_vhcache_paths(cct);
7305 				cache_updated = 1;
7306 			}
7307 			break;
7308 		}
7309 	}
7310 
7311 	if (cpi == NULL) {
7312 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7313 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7314 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7315 		ASSERT(cpi->cpi_cphci != NULL);
7316 		cpi->cpi_pip = pip;
7317 		enqueue_vhcache_pathinfo(cct, cpi);
7318 		cache_updated = 1;
7319 	}
7320 
7321 	rw_exit(&vhcache->vhcache_lock);
7322 
7323 	if (cache_updated)
7324 		vhcache_dirty(vhc);
7325 }
7326 
7327 /*
7328  * Remove the reference to the specified path from the vhci cache.
7329  */
7330 static void
7331 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7332 {
7333 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7334 	mdi_client_t *ct = pip->pi_client;
7335 	mdi_vhcache_client_t *cct;
7336 	mdi_vhcache_pathinfo_t *cpi;
7337 
7338 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7339 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7340 	    NULL)) != NULL) {
7341 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7342 		    cpi = cpi->cpi_next) {
7343 			if (cpi->cpi_pip == pip) {
7344 				cpi->cpi_pip = NULL;
7345 				break;
7346 			}
7347 		}
7348 	}
7349 	rw_exit(&vhcache->vhcache_lock);
7350 }
7351 
7352 /*
7353  * Flush the vhci cache to disk.
7354  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7355  */
7356 static int
7357 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7358 {
7359 	nvlist_t *nvl;
7360 	int err;
7361 	int rv;
7362 
7363 	/*
7364 	 * It is possible that the system may shutdown before
7365 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7366 	 * flushing the cache in this case do not check for
7367 	 * i_ddi_io_initialized when force flag is set.
7368 	 */
7369 	if (force_flag == 0 && !i_ddi_io_initialized())
7370 		return (MDI_FAILURE);
7371 
7372 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7373 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7374 		nvlist_free(nvl);
7375 	} else
7376 		err = EFAULT;
7377 
7378 	rv = MDI_SUCCESS;
7379 	mutex_enter(&vhc->vhc_lock);
7380 	if (err != 0) {
7381 		if (err == EROFS) {
7382 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7383 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7384 			    MDI_VHC_VHCACHE_DIRTY);
7385 		} else {
7386 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7387 				cmn_err(CE_CONT, "%s: update failed\n",
7388 				    vhc->vhc_vhcache_filename);
7389 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7390 			}
7391 			rv = MDI_FAILURE;
7392 		}
7393 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7394 		cmn_err(CE_CONT,
7395 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7396 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7397 	}
7398 	mutex_exit(&vhc->vhc_lock);
7399 
7400 	return (rv);
7401 }
7402 
7403 /*
7404  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7405  * Exits itself if left idle for the idle timeout period.
7406  */
7407 static void
7408 vhcache_flush_thread(void *arg)
7409 {
7410 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7411 	clock_t idle_time, quit_at_ticks;
7412 	callb_cpr_t cprinfo;
7413 
7414 	/* number of seconds to sleep idle before exiting */
7415 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7416 
7417 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7418 	    "mdi_vhcache_flush");
7419 	mutex_enter(&vhc->vhc_lock);
7420 	for (; ; ) {
7421 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7422 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7423 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7424 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7425 				(void) cv_timedwait(&vhc->vhc_cv,
7426 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7427 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7428 			} else {
7429 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7430 				mutex_exit(&vhc->vhc_lock);
7431 
7432 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7433 					vhcache_dirty(vhc);
7434 
7435 				mutex_enter(&vhc->vhc_lock);
7436 			}
7437 		}
7438 
7439 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7440 
7441 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7442 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7443 		    ddi_get_lbolt() < quit_at_ticks) {
7444 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7445 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7446 			    quit_at_ticks);
7447 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7448 		}
7449 
7450 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7451 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7452 			goto out;
7453 	}
7454 
7455 out:
7456 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7457 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7458 	CALLB_CPR_EXIT(&cprinfo);
7459 }
7460 
7461 /*
7462  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7463  */
7464 static void
7465 vhcache_dirty(mdi_vhci_config_t *vhc)
7466 {
7467 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7468 	int create_thread;
7469 
7470 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7471 	/* do not flush cache until the cache is fully built */
7472 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7473 		rw_exit(&vhcache->vhcache_lock);
7474 		return;
7475 	}
7476 	rw_exit(&vhcache->vhcache_lock);
7477 
7478 	mutex_enter(&vhc->vhc_lock);
7479 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7480 		mutex_exit(&vhc->vhc_lock);
7481 		return;
7482 	}
7483 
7484 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7485 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7486 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7487 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7488 		cv_broadcast(&vhc->vhc_cv);
7489 		create_thread = 0;
7490 	} else {
7491 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7492 		create_thread = 1;
7493 	}
7494 	mutex_exit(&vhc->vhc_lock);
7495 
7496 	if (create_thread)
7497 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7498 		    0, &p0, TS_RUN, minclsyspri);
7499 }
7500 
7501 /*
7502  * phci bus config structure - one for for each phci bus config operation that
7503  * we initiate on behalf of a vhci.
7504  */
7505 typedef struct mdi_phci_bus_config_s {
7506 	char *phbc_phci_path;
7507 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7508 	struct mdi_phci_bus_config_s *phbc_next;
7509 } mdi_phci_bus_config_t;
7510 
7511 /* vhci bus config structure - one for each vhci bus config operation */
7512 typedef struct mdi_vhci_bus_config_s {
7513 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7514 	major_t vhbc_op_major;		/* bus config op major */
7515 	uint_t vhbc_op_flags;		/* bus config op flags */
7516 	kmutex_t vhbc_lock;
7517 	kcondvar_t vhbc_cv;
7518 	int vhbc_thr_count;
7519 } mdi_vhci_bus_config_t;
7520 
7521 /*
7522  * bus config the specified phci
7523  */
7524 static void
7525 bus_config_phci(void *arg)
7526 {
7527 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7528 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7529 	dev_info_t *ph_dip;
7530 
7531 	/*
7532 	 * first configure all path components upto phci and then configure
7533 	 * the phci children.
7534 	 */
7535 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7536 	    != NULL) {
7537 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7538 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7539 			(void) ndi_devi_config_driver(ph_dip,
7540 			    vhbc->vhbc_op_flags,
7541 			    vhbc->vhbc_op_major);
7542 		} else
7543 			(void) ndi_devi_config(ph_dip,
7544 			    vhbc->vhbc_op_flags);
7545 
7546 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7547 		ndi_rele_devi(ph_dip);
7548 	}
7549 
7550 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7551 	kmem_free(phbc, sizeof (*phbc));
7552 
7553 	mutex_enter(&vhbc->vhbc_lock);
7554 	vhbc->vhbc_thr_count--;
7555 	if (vhbc->vhbc_thr_count == 0)
7556 		cv_broadcast(&vhbc->vhbc_cv);
7557 	mutex_exit(&vhbc->vhbc_lock);
7558 }
7559 
7560 /*
7561  * Bus config all phcis associated with the vhci in parallel.
7562  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7563  */
7564 static void
7565 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7566     ddi_bus_config_op_t op, major_t maj)
7567 {
7568 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7569 	mdi_vhci_bus_config_t *vhbc;
7570 	mdi_vhcache_phci_t *cphci;
7571 
7572 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7573 	if (vhcache->vhcache_phci_head == NULL) {
7574 		rw_exit(&vhcache->vhcache_lock);
7575 		return;
7576 	}
7577 
7578 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7579 
7580 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7581 	    cphci = cphci->cphci_next) {
7582 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7583 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7584 		    KM_SLEEP);
7585 		phbc->phbc_vhbusconfig = vhbc;
7586 		phbc->phbc_next = phbc_head;
7587 		phbc_head = phbc;
7588 		vhbc->vhbc_thr_count++;
7589 	}
7590 	rw_exit(&vhcache->vhcache_lock);
7591 
7592 	vhbc->vhbc_op = op;
7593 	vhbc->vhbc_op_major = maj;
7594 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7595 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7596 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7597 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7598 
7599 	/* now create threads to initiate bus config on all phcis in parallel */
7600 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7601 		phbc_next = phbc->phbc_next;
7602 		if (mdi_mtc_off)
7603 			bus_config_phci((void *)phbc);
7604 		else
7605 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7606 			    0, &p0, TS_RUN, minclsyspri);
7607 	}
7608 
7609 	mutex_enter(&vhbc->vhbc_lock);
7610 	/* wait until all threads exit */
7611 	while (vhbc->vhbc_thr_count > 0)
7612 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7613 	mutex_exit(&vhbc->vhbc_lock);
7614 
7615 	mutex_destroy(&vhbc->vhbc_lock);
7616 	cv_destroy(&vhbc->vhbc_cv);
7617 	kmem_free(vhbc, sizeof (*vhbc));
7618 }
7619 
7620 /*
7621  * Single threaded version of bus_config_all_phcis()
7622  */
7623 static void
7624 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7625     ddi_bus_config_op_t op, major_t maj)
7626 {
7627 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7628 
7629 	single_threaded_vhconfig_enter(vhc);
7630 	bus_config_all_phcis(vhcache, flags, op, maj);
7631 	single_threaded_vhconfig_exit(vhc);
7632 }
7633 
7634 /*
7635  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7636  * The path includes the child component in addition to the phci path.
7637  */
7638 static int
7639 bus_config_one_phci_child(char *path)
7640 {
7641 	dev_info_t *ph_dip, *child;
7642 	char *devnm;
7643 	int rv = MDI_FAILURE;
7644 
7645 	/* extract the child component of the phci */
7646 	devnm = strrchr(path, '/');
7647 	*devnm++ = '\0';
7648 
7649 	/*
7650 	 * first configure all path components upto phci and then
7651 	 * configure the phci child.
7652 	 */
7653 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7654 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7655 		    NDI_SUCCESS) {
7656 			/*
7657 			 * release the hold that ndi_devi_config_one() placed
7658 			 */
7659 			ndi_rele_devi(child);
7660 			rv = MDI_SUCCESS;
7661 		}
7662 
7663 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7664 		ndi_rele_devi(ph_dip);
7665 	}
7666 
7667 	devnm--;
7668 	*devnm = '/';
7669 	return (rv);
7670 }
7671 
7672 /*
7673  * Build a list of phci client paths for the specified vhci client.
7674  * The list includes only those phci client paths which aren't configured yet.
7675  */
7676 static mdi_phys_path_t *
7677 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7678 {
7679 	mdi_vhcache_pathinfo_t *cpi;
7680 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7681 	int config_path, len;
7682 
7683 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7684 		/*
7685 		 * include only those paths that aren't configured.
7686 		 */
7687 		config_path = 0;
7688 		if (cpi->cpi_pip == NULL)
7689 			config_path = 1;
7690 		else {
7691 			MDI_PI_LOCK(cpi->cpi_pip);
7692 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7693 				config_path = 1;
7694 			MDI_PI_UNLOCK(cpi->cpi_pip);
7695 		}
7696 
7697 		if (config_path) {
7698 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7699 			len = strlen(cpi->cpi_cphci->cphci_path) +
7700 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7701 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7702 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7703 			    cpi->cpi_cphci->cphci_path, ct_name,
7704 			    cpi->cpi_addr);
7705 			pp->phys_path_next = NULL;
7706 
7707 			if (pp_head == NULL)
7708 				pp_head = pp;
7709 			else
7710 				pp_tail->phys_path_next = pp;
7711 			pp_tail = pp;
7712 		}
7713 	}
7714 
7715 	return (pp_head);
7716 }
7717 
7718 /*
7719  * Free the memory allocated for phci client path list.
7720  */
7721 static void
7722 free_phclient_path_list(mdi_phys_path_t *pp_head)
7723 {
7724 	mdi_phys_path_t *pp, *pp_next;
7725 
7726 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7727 		pp_next = pp->phys_path_next;
7728 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7729 		kmem_free(pp, sizeof (*pp));
7730 	}
7731 }
7732 
7733 /*
7734  * Allocated async client structure and initialize with the specified values.
7735  */
7736 static mdi_async_client_config_t *
7737 alloc_async_client_config(char *ct_name, char *ct_addr,
7738     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7739 {
7740 	mdi_async_client_config_t *acc;
7741 
7742 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7743 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7744 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7745 	acc->acc_phclient_path_list_head = pp_head;
7746 	init_vhcache_lookup_token(&acc->acc_token, tok);
7747 	acc->acc_next = NULL;
7748 	return (acc);
7749 }
7750 
7751 /*
7752  * Free the memory allocated for the async client structure and their members.
7753  */
7754 static void
7755 free_async_client_config(mdi_async_client_config_t *acc)
7756 {
7757 	if (acc->acc_phclient_path_list_head)
7758 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7759 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7760 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7761 	kmem_free(acc, sizeof (*acc));
7762 }
7763 
7764 /*
7765  * Sort vhcache pathinfos (cpis) of the specified client.
7766  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7767  * flag set come at the beginning of the list. All cpis which have this
7768  * flag set come at the end of the list.
7769  */
7770 static void
7771 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7772 {
7773 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7774 
7775 	cpi_head = cct->cct_cpi_head;
7776 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7777 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7778 		cpi_next = cpi->cpi_next;
7779 		enqueue_vhcache_pathinfo(cct, cpi);
7780 	}
7781 }
7782 
7783 /*
7784  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7785  * every vhcache pathinfo of the specified client. If not adjust the flag
7786  * setting appropriately.
7787  *
7788  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7789  * on-disk vhci cache. So every time this flag is updated the cache must be
7790  * flushed.
7791  */
7792 static void
7793 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7794     mdi_vhcache_lookup_token_t *tok)
7795 {
7796 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7797 	mdi_vhcache_client_t *cct;
7798 	mdi_vhcache_pathinfo_t *cpi;
7799 
7800 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7801 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7802 	    == NULL) {
7803 		rw_exit(&vhcache->vhcache_lock);
7804 		return;
7805 	}
7806 
7807 	/*
7808 	 * to avoid unnecessary on-disk cache updates, first check if an
7809 	 * update is really needed. If no update is needed simply return.
7810 	 */
7811 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7812 		if ((cpi->cpi_pip != NULL &&
7813 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7814 		    (cpi->cpi_pip == NULL &&
7815 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7816 			break;
7817 		}
7818 	}
7819 	if (cpi == NULL) {
7820 		rw_exit(&vhcache->vhcache_lock);
7821 		return;
7822 	}
7823 
7824 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7825 		rw_exit(&vhcache->vhcache_lock);
7826 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7827 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7828 		    tok)) == NULL) {
7829 			rw_exit(&vhcache->vhcache_lock);
7830 			return;
7831 		}
7832 	}
7833 
7834 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7835 		if (cpi->cpi_pip != NULL)
7836 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7837 		else
7838 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7839 	}
7840 	sort_vhcache_paths(cct);
7841 
7842 	rw_exit(&vhcache->vhcache_lock);
7843 	vhcache_dirty(vhc);
7844 }
7845 
7846 /*
7847  * Configure all specified paths of the client.
7848  */
7849 static void
7850 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7851     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7852 {
7853 	mdi_phys_path_t *pp;
7854 
7855 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7856 		(void) bus_config_one_phci_child(pp->phys_path);
7857 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7858 }
7859 
7860 /*
7861  * Dequeue elements from vhci async client config list and bus configure
7862  * their corresponding phci clients.
7863  */
7864 static void
7865 config_client_paths_thread(void *arg)
7866 {
7867 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7868 	mdi_async_client_config_t *acc;
7869 	clock_t quit_at_ticks;
7870 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7871 	callb_cpr_t cprinfo;
7872 
7873 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7874 	    "mdi_config_client_paths");
7875 
7876 	for (; ; ) {
7877 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7878 
7879 		mutex_enter(&vhc->vhc_lock);
7880 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7881 		    vhc->vhc_acc_list_head == NULL &&
7882 		    ddi_get_lbolt() < quit_at_ticks) {
7883 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7884 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7885 			    quit_at_ticks);
7886 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7887 		}
7888 
7889 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7890 		    vhc->vhc_acc_list_head == NULL)
7891 			goto out;
7892 
7893 		acc = vhc->vhc_acc_list_head;
7894 		vhc->vhc_acc_list_head = acc->acc_next;
7895 		if (vhc->vhc_acc_list_head == NULL)
7896 			vhc->vhc_acc_list_tail = NULL;
7897 		vhc->vhc_acc_count--;
7898 		mutex_exit(&vhc->vhc_lock);
7899 
7900 		config_client_paths_sync(vhc, acc->acc_ct_name,
7901 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7902 		    &acc->acc_token);
7903 
7904 		free_async_client_config(acc);
7905 	}
7906 
7907 out:
7908 	vhc->vhc_acc_thrcount--;
7909 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7910 	CALLB_CPR_EXIT(&cprinfo);
7911 }
7912 
7913 /*
7914  * Arrange for all the phci client paths (pp_head) for the specified client
7915  * to be bus configured asynchronously by a thread.
7916  */
7917 static void
7918 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7919     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7920 {
7921 	mdi_async_client_config_t *acc, *newacc;
7922 	int create_thread;
7923 
7924 	if (pp_head == NULL)
7925 		return;
7926 
7927 	if (mdi_mtc_off) {
7928 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7929 		free_phclient_path_list(pp_head);
7930 		return;
7931 	}
7932 
7933 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7934 	ASSERT(newacc);
7935 
7936 	mutex_enter(&vhc->vhc_lock);
7937 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7938 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7939 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7940 			free_async_client_config(newacc);
7941 			mutex_exit(&vhc->vhc_lock);
7942 			return;
7943 		}
7944 	}
7945 
7946 	if (vhc->vhc_acc_list_head == NULL)
7947 		vhc->vhc_acc_list_head = newacc;
7948 	else
7949 		vhc->vhc_acc_list_tail->acc_next = newacc;
7950 	vhc->vhc_acc_list_tail = newacc;
7951 	vhc->vhc_acc_count++;
7952 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7953 		cv_broadcast(&vhc->vhc_cv);
7954 		create_thread = 0;
7955 	} else {
7956 		vhc->vhc_acc_thrcount++;
7957 		create_thread = 1;
7958 	}
7959 	mutex_exit(&vhc->vhc_lock);
7960 
7961 	if (create_thread)
7962 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
7963 		    0, &p0, TS_RUN, minclsyspri);
7964 }
7965 
7966 /*
7967  * Return number of online paths for the specified client.
7968  */
7969 static int
7970 nonline_paths(mdi_vhcache_client_t *cct)
7971 {
7972 	mdi_vhcache_pathinfo_t *cpi;
7973 	int online_count = 0;
7974 
7975 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7976 		if (cpi->cpi_pip != NULL) {
7977 			MDI_PI_LOCK(cpi->cpi_pip);
7978 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
7979 				online_count++;
7980 			MDI_PI_UNLOCK(cpi->cpi_pip);
7981 		}
7982 	}
7983 
7984 	return (online_count);
7985 }
7986 
7987 /*
7988  * Bus configure all paths for the specified vhci client.
7989  * If at least one path for the client is already online, the remaining paths
7990  * will be configured asynchronously. Otherwise, it synchronously configures
7991  * the paths until at least one path is online and then rest of the paths
7992  * will be configured asynchronously.
7993  */
7994 static void
7995 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
7996 {
7997 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7998 	mdi_phys_path_t *pp_head, *pp;
7999 	mdi_vhcache_client_t *cct;
8000 	mdi_vhcache_lookup_token_t tok;
8001 
8002 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8003 
8004 	init_vhcache_lookup_token(&tok, NULL);
8005 
8006 	if (ct_name == NULL || ct_addr == NULL ||
8007 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8008 	    == NULL ||
8009 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8010 		rw_exit(&vhcache->vhcache_lock);
8011 		return;
8012 	}
8013 
8014 	/* if at least one path is online, configure the rest asynchronously */
8015 	if (nonline_paths(cct) > 0) {
8016 		rw_exit(&vhcache->vhcache_lock);
8017 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8018 		return;
8019 	}
8020 
8021 	rw_exit(&vhcache->vhcache_lock);
8022 
8023 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8024 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8025 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8026 
8027 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8028 			    ct_addr, &tok)) == NULL) {
8029 				rw_exit(&vhcache->vhcache_lock);
8030 				goto out;
8031 			}
8032 
8033 			if (nonline_paths(cct) > 0 &&
8034 			    pp->phys_path_next != NULL) {
8035 				rw_exit(&vhcache->vhcache_lock);
8036 				config_client_paths_async(vhc, ct_name, ct_addr,
8037 				    pp->phys_path_next, &tok);
8038 				pp->phys_path_next = NULL;
8039 				goto out;
8040 			}
8041 
8042 			rw_exit(&vhcache->vhcache_lock);
8043 		}
8044 	}
8045 
8046 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8047 out:
8048 	free_phclient_path_list(pp_head);
8049 }
8050 
8051 static void
8052 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8053 {
8054 	mutex_enter(&vhc->vhc_lock);
8055 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8056 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8057 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8058 	mutex_exit(&vhc->vhc_lock);
8059 }
8060 
8061 static void
8062 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8063 {
8064 	mutex_enter(&vhc->vhc_lock);
8065 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8066 	cv_broadcast(&vhc->vhc_cv);
8067 	mutex_exit(&vhc->vhc_lock);
8068 }
8069 
8070 /*
8071  * Attach the phci driver instances associated with the vhci:
8072  * If root is mounted attach all phci driver instances.
8073  * If root is not mounted, attach the instances of only those phci
8074  * drivers that have the root support.
8075  */
8076 static void
8077 attach_phci_drivers(mdi_vhci_config_t *vhc)
8078 {
8079 	int  i;
8080 	major_t m;
8081 
8082 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
8083 		if (modrootloaded == 0 &&
8084 		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
8085 			continue;
8086 
8087 		m = ddi_name_to_major(
8088 		    vhc->vhc_phci_driver_list[i].phdriver_name);
8089 		if (m != (major_t)-1) {
8090 			if (ddi_hold_installed_driver(m) != NULL)
8091 				ddi_rele_driver(m);
8092 		}
8093 	}
8094 }
8095 
8096 /*
8097  * Build vhci cache:
8098  *
8099  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8100  * the phci driver instances. During this process the cache gets built.
8101  *
8102  * Cache is built fully if the root is mounted.
8103  * If the root is not mounted, phci drivers that do not have root support
8104  * are not attached. As a result the cache is built partially. The entries
8105  * in the cache reflect only those phci drivers that have root support.
8106  */
8107 static int
8108 build_vhci_cache(mdi_vhci_config_t *vhc)
8109 {
8110 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8111 
8112 	single_threaded_vhconfig_enter(vhc);
8113 
8114 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8115 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8116 		rw_exit(&vhcache->vhcache_lock);
8117 		single_threaded_vhconfig_exit(vhc);
8118 		return (0);
8119 	}
8120 	rw_exit(&vhcache->vhcache_lock);
8121 
8122 	attach_phci_drivers(vhc);
8123 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8124 	    BUS_CONFIG_ALL, (major_t)-1);
8125 
8126 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8127 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8128 	rw_exit(&vhcache->vhcache_lock);
8129 
8130 	single_threaded_vhconfig_exit(vhc);
8131 	vhcache_dirty(vhc);
8132 	return (1);
8133 }
8134 
8135 /*
8136  * Determine if discovery of paths is needed.
8137  */
8138 static int
8139 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8140 {
8141 	int rv = 1;
8142 
8143 	mutex_enter(&vhc->vhc_lock);
8144 	if (i_ddi_io_initialized() == 0) {
8145 		if (vhc->vhc_path_discovery_boot > 0) {
8146 			vhc->vhc_path_discovery_boot--;
8147 			goto out;
8148 		}
8149 	} else {
8150 		if (vhc->vhc_path_discovery_postboot > 0) {
8151 			vhc->vhc_path_discovery_postboot--;
8152 			goto out;
8153 		}
8154 	}
8155 
8156 	/*
8157 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8158 	 * This is to avoid a series of full path discoveries when opening
8159 	 * stale /dev/[r]dsk links.
8160 	 */
8161 	if (mdi_path_discovery_interval != -1 &&
8162 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8163 		goto out;
8164 
8165 	rv = 0;
8166 out:
8167 	mutex_exit(&vhc->vhc_lock);
8168 	return (rv);
8169 }
8170 
8171 /*
8172  * Discover all paths:
8173  *
8174  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8175  * driver instances. During this process all paths will be discovered.
8176  */
8177 static int
8178 vhcache_discover_paths(mdi_vhci_config_t *vhc)
8179 {
8180 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8181 	int rv = 0;
8182 
8183 	single_threaded_vhconfig_enter(vhc);
8184 
8185 	if (vhcache_do_discovery(vhc)) {
8186 		attach_phci_drivers(vhc);
8187 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8188 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8189 
8190 		mutex_enter(&vhc->vhc_lock);
8191 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8192 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8193 		mutex_exit(&vhc->vhc_lock);
8194 		rv = 1;
8195 	}
8196 
8197 	single_threaded_vhconfig_exit(vhc);
8198 	return (rv);
8199 }
8200 
8201 /*
8202  * Generic vhci bus config implementation:
8203  *
8204  * Parameters
8205  *	vdip	vhci dip
8206  *	flags	bus config flags
8207  *	op	bus config operation
8208  *	The remaining parameters are bus config operation specific
8209  *
8210  * for BUS_CONFIG_ONE
8211  *	arg	pointer to name@addr
8212  *	child	upon successful return from this function, *child will be
8213  *		set to the configured and held devinfo child node of vdip.
8214  *	ct_addr	pointer to client address (i.e. GUID)
8215  *
8216  * for BUS_CONFIG_DRIVER
8217  *	arg	major number of the driver
8218  *	child and ct_addr parameters are ignored
8219  *
8220  * for BUS_CONFIG_ALL
8221  *	arg, child, and ct_addr parameters are ignored
8222  *
8223  * Note that for the rest of the bus config operations, this function simply
8224  * calls the framework provided default bus config routine.
8225  */
8226 int
8227 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8228     void *arg, dev_info_t **child, char *ct_addr)
8229 {
8230 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8231 	mdi_vhci_config_t *vhc = vh->vh_config;
8232 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8233 	int rv = 0;
8234 	int params_valid = 0;
8235 	char *cp;
8236 
8237 	/*
8238 	 * While bus configuring phcis, the phci driver interactions with MDI
8239 	 * cause child nodes to be enumerated under the vhci node for which
8240 	 * they need to ndi_devi_enter the vhci node.
8241 	 *
8242 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8243 	 * for the bus config operations on phcis to finish while holding the
8244 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8245 	 * phcis and call the default framework provided bus config function
8246 	 * if we are called with ndi_devi_enter lock held.
8247 	 */
8248 	if (DEVI_BUSY_OWNED(vdip)) {
8249 		MDI_DEBUG(2, (CE_NOTE, vdip,
8250 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8251 		goto default_bus_config;
8252 	}
8253 
8254 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8255 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8256 		rw_exit(&vhcache->vhcache_lock);
8257 		rv = build_vhci_cache(vhc);
8258 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8259 	}
8260 
8261 	switch (op) {
8262 	case BUS_CONFIG_ONE:
8263 		if (arg != NULL && ct_addr != NULL) {
8264 			/* extract node name */
8265 			cp = (char *)arg;
8266 			while (*cp != '\0' && *cp != '@')
8267 				cp++;
8268 			if (*cp == '@') {
8269 				params_valid = 1;
8270 				*cp = '\0';
8271 				config_client_paths(vhc, (char *)arg, ct_addr);
8272 				/* config_client_paths() releases cache_lock */
8273 				*cp = '@';
8274 				break;
8275 			}
8276 		}
8277 
8278 		rw_exit(&vhcache->vhcache_lock);
8279 		break;
8280 
8281 	case BUS_CONFIG_DRIVER:
8282 		rw_exit(&vhcache->vhcache_lock);
8283 		if (rv == 0)
8284 			st_bus_config_all_phcis(vhc, flags, op,
8285 			    (major_t)(uintptr_t)arg);
8286 		break;
8287 
8288 	case BUS_CONFIG_ALL:
8289 		rw_exit(&vhcache->vhcache_lock);
8290 		if (rv == 0)
8291 			st_bus_config_all_phcis(vhc, flags, op, -1);
8292 		break;
8293 
8294 	default:
8295 		rw_exit(&vhcache->vhcache_lock);
8296 		break;
8297 	}
8298 
8299 
8300 default_bus_config:
8301 	/*
8302 	 * All requested child nodes are enumerated under the vhci.
8303 	 * Now configure them.
8304 	 */
8305 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8306 	    NDI_SUCCESS) {
8307 		return (MDI_SUCCESS);
8308 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8309 		/* discover all paths and try configuring again */
8310 		if (vhcache_discover_paths(vhc) &&
8311 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8312 		    NDI_SUCCESS)
8313 			return (MDI_SUCCESS);
8314 	}
8315 
8316 	return (MDI_FAILURE);
8317 }
8318 
8319 /*
8320  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8321  */
8322 static nvlist_t *
8323 read_on_disk_vhci_cache(char *vhci_class)
8324 {
8325 	nvlist_t *nvl;
8326 	int err;
8327 	char *filename;
8328 
8329 	filename = vhclass2vhcache_filename(vhci_class);
8330 
8331 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8332 		kmem_free(filename, strlen(filename) + 1);
8333 		return (nvl);
8334 	} else if (err == EIO)
8335 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8336 	else if (err == EINVAL)
8337 		cmn_err(CE_WARN,
8338 		    "%s: data file corrupted, will recreate\n", filename);
8339 
8340 	kmem_free(filename, strlen(filename) + 1);
8341 	return (NULL);
8342 }
8343 
8344 /*
8345  * Read on-disk vhci cache into nvlists for all vhci classes.
8346  * Called during booting by i_ddi_read_devices_files().
8347  */
8348 void
8349 mdi_read_devices_files(void)
8350 {
8351 	int i;
8352 
8353 	for (i = 0; i < N_VHCI_CLASSES; i++)
8354 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8355 }
8356 
8357 /*
8358  * Remove all stale entries from vhci cache.
8359  */
8360 static void
8361 clean_vhcache(mdi_vhci_config_t *vhc)
8362 {
8363 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8364 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8365 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8366 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8367 
8368 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8369 
8370 	cct_head = vhcache->vhcache_client_head;
8371 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8372 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8373 		cct_next = cct->cct_next;
8374 
8375 		cpi_head = cct->cct_cpi_head;
8376 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8377 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8378 			cpi_next = cpi->cpi_next;
8379 			if (cpi->cpi_pip != NULL) {
8380 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8381 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8382 			} else
8383 				free_vhcache_pathinfo(cpi);
8384 		}
8385 
8386 		if (cct->cct_cpi_head != NULL)
8387 			enqueue_vhcache_client(vhcache, cct);
8388 		else {
8389 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8390 			    (mod_hash_key_t)cct->cct_name_addr);
8391 			free_vhcache_client(cct);
8392 		}
8393 	}
8394 
8395 	cphci_head = vhcache->vhcache_phci_head;
8396 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8397 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8398 		cphci_next = cphci->cphci_next;
8399 		if (cphci->cphci_phci != NULL)
8400 			enqueue_vhcache_phci(vhcache, cphci);
8401 		else
8402 			free_vhcache_phci(cphci);
8403 	}
8404 
8405 	vhcache->vhcache_clean_time = lbolt64;
8406 	rw_exit(&vhcache->vhcache_lock);
8407 	vhcache_dirty(vhc);
8408 }
8409 
8410 /*
8411  * Remove all stale entries from vhci cache.
8412  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8413  */
8414 void
8415 mdi_clean_vhcache(void)
8416 {
8417 	mdi_vhci_t *vh;
8418 
8419 	mutex_enter(&mdi_mutex);
8420 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8421 		vh->vh_refcnt++;
8422 		mutex_exit(&mdi_mutex);
8423 		clean_vhcache(vh->vh_config);
8424 		mutex_enter(&mdi_mutex);
8425 		vh->vh_refcnt--;
8426 	}
8427 	mutex_exit(&mdi_mutex);
8428 }
8429 
8430 /*
8431  * mdi_vhci_walk_clients():
8432  *		Walker routine to traverse client dev_info nodes
8433  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8434  * below the client, including nexus devices, which we dont want.
8435  * So we just traverse the immediate siblings, starting from 1st client.
8436  */
8437 void
8438 mdi_vhci_walk_clients(dev_info_t *vdip,
8439     int (*f)(dev_info_t *, void *), void *arg)
8440 {
8441 	dev_info_t	*cdip;
8442 	mdi_client_t	*ct;
8443 
8444 	mutex_enter(&mdi_mutex);
8445 
8446 	cdip = ddi_get_child(vdip);
8447 
8448 	while (cdip) {
8449 		ct = i_devi_get_client(cdip);
8450 		MDI_CLIENT_LOCK(ct);
8451 
8452 		switch ((*f)(cdip, arg)) {
8453 		case DDI_WALK_CONTINUE:
8454 			cdip = ddi_get_next_sibling(cdip);
8455 			MDI_CLIENT_UNLOCK(ct);
8456 			break;
8457 
8458 		default:
8459 			MDI_CLIENT_UNLOCK(ct);
8460 			mutex_exit(&mdi_mutex);
8461 			return;
8462 		}
8463 	}
8464 
8465 	mutex_exit(&mdi_mutex);
8466 }
8467 
8468 /*
8469  * mdi_vhci_walk_phcis():
8470  *		Walker routine to traverse phci dev_info nodes
8471  */
8472 void
8473 mdi_vhci_walk_phcis(dev_info_t *vdip,
8474     int (*f)(dev_info_t *, void *), void *arg)
8475 {
8476 	mdi_vhci_t	*vh = NULL;
8477 	mdi_phci_t	*ph = NULL;
8478 
8479 	mutex_enter(&mdi_mutex);
8480 
8481 	vh = i_devi_get_vhci(vdip);
8482 	ph = vh->vh_phci_head;
8483 
8484 	while (ph) {
8485 		MDI_PHCI_LOCK(ph);
8486 
8487 		switch ((*f)(ph->ph_dip, arg)) {
8488 		case DDI_WALK_CONTINUE:
8489 			MDI_PHCI_UNLOCK(ph);
8490 			ph = ph->ph_next;
8491 			break;
8492 
8493 		default:
8494 			MDI_PHCI_UNLOCK(ph);
8495 			mutex_exit(&mdi_mutex);
8496 			return;
8497 		}
8498 	}
8499 
8500 	mutex_exit(&mdi_mutex);
8501 }
8502 
8503 
8504 /*
8505  * mdi_walk_vhcis():
8506  *		Walker routine to traverse vhci dev_info nodes
8507  */
8508 void
8509 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8510 {
8511 	mdi_vhci_t	*vh = NULL;
8512 
8513 	mutex_enter(&mdi_mutex);
8514 	/*
8515 	 * Scan for already registered vhci
8516 	 */
8517 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8518 		vh->vh_refcnt++;
8519 		mutex_exit(&mdi_mutex);
8520 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8521 			mutex_enter(&mdi_mutex);
8522 			vh->vh_refcnt--;
8523 			break;
8524 		} else {
8525 			mutex_enter(&mdi_mutex);
8526 			vh->vh_refcnt--;
8527 		}
8528 	}
8529 
8530 	mutex_exit(&mdi_mutex);
8531 }
8532 
8533 /*
8534  * i_mdi_log_sysevent():
8535  *		Logs events for pickup by syseventd
8536  */
8537 static void
8538 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8539 {
8540 	char		*path_name;
8541 	nvlist_t	*attr_list;
8542 
8543 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8544 	    KM_SLEEP) != DDI_SUCCESS) {
8545 		goto alloc_failed;
8546 	}
8547 
8548 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8549 	(void) ddi_pathname(dip, path_name);
8550 
8551 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8552 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8553 		goto error;
8554 	}
8555 
8556 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8557 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8558 		goto error;
8559 	}
8560 
8561 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8562 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8563 		goto error;
8564 	}
8565 
8566 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8567 	    path_name) != DDI_SUCCESS) {
8568 		goto error;
8569 	}
8570 
8571 	if (nvlist_add_string(attr_list, DDI_CLASS,
8572 	    ph_vh_class) != DDI_SUCCESS) {
8573 		goto error;
8574 	}
8575 
8576 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8577 	    attr_list, NULL, DDI_SLEEP);
8578 
8579 error:
8580 	kmem_free(path_name, MAXPATHLEN);
8581 	nvlist_free(attr_list);
8582 	return;
8583 
8584 alloc_failed:
8585 	MDI_DEBUG(1, (CE_WARN, dip,
8586 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8587 }
8588