xref: /titanic_50/usr/src/uts/common/os/sunmdi.c (revision 35551380472894a564e057962b701af78f719377)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
30  * detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 
71 #ifdef	DEBUG
72 #include <sys/debug.h>
73 int	mdi_debug = 1;
74 #define	MDI_DEBUG(level, stmnt) \
75 	    if (mdi_debug >= (level)) i_mdi_log stmnt
76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
77 #else	/* !DEBUG */
78 #define	MDI_DEBUG(level, stmnt)
79 #endif	/* DEBUG */
80 
81 extern pri_t	minclsyspri;
82 extern int	modrootloaded;
83 
84 /*
85  * Global mutex:
86  * Protects vHCI list and structure members, pHCI and Client lists.
87  */
88 kmutex_t	mdi_mutex;
89 
90 /*
91  * Registered vHCI class driver lists
92  */
93 int		mdi_vhci_count;
94 mdi_vhci_t	*mdi_vhci_head;
95 mdi_vhci_t	*mdi_vhci_tail;
96 
97 /*
98  * Client Hash Table size
99  */
100 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
101 
102 /*
103  * taskq interface definitions
104  */
105 #define	MDI_TASKQ_N_THREADS	8
106 #define	MDI_TASKQ_PRI		minclsyspri
107 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
108 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
109 
110 taskq_t				*mdi_taskq;
111 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
112 
113 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
114 
115 /*
116  * The data should be "quiet" for this interval (in seconds) before the
117  * vhci cached data is flushed to the disk.
118  */
119 static int mdi_vhcache_flush_delay = 10;
120 
121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
122 static int mdi_vhcache_flush_daemon_idle_time = 60;
123 
124 /*
125  * MDI falls back to discovery of all paths when a bus_config_one fails.
126  * The following parameters can be used to tune this operation.
127  *
128  * mdi_path_discovery_boot
129  *	Number of times path discovery will be attempted during early boot.
130  *	Probably there is no reason to ever set this value to greater than one.
131  *
132  * mdi_path_discovery_postboot
133  *	Number of times path discovery will be attempted after early boot.
134  *	Set it to a minimum of two to allow for discovery of iscsi paths which
135  *	may happen very late during booting.
136  *
137  * mdi_path_discovery_interval
138  *	Minimum number of seconds MDI will wait between successive discovery
139  *	of all paths. Set it to -1 to disable discovery of all paths.
140  */
141 static int mdi_path_discovery_boot = 1;
142 static int mdi_path_discovery_postboot = 2;
143 static int mdi_path_discovery_interval = 10;
144 
145 /*
146  * number of seconds the asynchronous configuration thread will sleep idle
147  * before exiting.
148  */
149 static int mdi_async_config_idle_time = 600;
150 
151 static int mdi_bus_config_cache_hash_size = 256;
152 
153 /* turns off multithreaded configuration for certain operations */
154 static int mdi_mtc_off = 0;
155 
156 /*
157  * MDI component property name/value string definitions
158  */
159 const char 		*mdi_component_prop = "mpxio-component";
160 const char		*mdi_component_prop_vhci = "vhci";
161 const char		*mdi_component_prop_phci = "phci";
162 const char		*mdi_component_prop_client = "client";
163 
164 /*
165  * MDI client global unique identifier property name
166  */
167 const char		*mdi_client_guid_prop = "client-guid";
168 
169 /*
170  * MDI client load balancing property name/value string definitions
171  */
172 const char		*mdi_load_balance = "load-balance";
173 const char		*mdi_load_balance_none = "none";
174 const char		*mdi_load_balance_rr = "round-robin";
175 const char		*mdi_load_balance_lba = "logical-block";
176 
177 /*
178  * Obsolete vHCI class definition; to be removed after Leadville update
179  */
180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
181 
182 static char vhci_greeting[] =
183 	"\tThere already exists one vHCI driver for class %s\n"
184 	"\tOnly one vHCI driver for each class is allowed\n";
185 
186 /*
187  * Static function prototypes
188  */
189 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
190 static int		i_mdi_client_offline(dev_info_t *, uint_t);
191 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
192 static void		i_mdi_phci_post_detach(dev_info_t *,
193 			    ddi_detach_cmd_t, int);
194 static int		i_mdi_client_pre_detach(dev_info_t *,
195 			    ddi_detach_cmd_t);
196 static void		i_mdi_client_post_detach(dev_info_t *,
197 			    ddi_detach_cmd_t, int);
198 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
199 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
200 static int 		i_mdi_lba_lb(mdi_client_t *ct,
201 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
202 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
203 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
204 static void		i_mdi_pm_reset_client(mdi_client_t *);
205 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
206 static int		i_mdi_power_all_phci(mdi_client_t *);
207 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
208 
209 
210 /*
211  * Internal mdi_pathinfo node functions
212  */
213 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
214 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
215 
216 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
217 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
218 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
219 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
220 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
221 			    mdi_client_t *);
222 static void		i_mdi_phci_unlock(mdi_phci_t *);
223 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
224 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
225 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
226 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
227 			    mdi_client_t *);
228 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
229 static void		i_mdi_client_remove_path(mdi_client_t *,
230 			    mdi_pathinfo_t *);
231 
232 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
233 			    mdi_pathinfo_state_t, int);
234 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
235 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
236 			    char **, int);
237 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
238 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
239 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
240 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
241 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
242 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
243 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
244 static void		i_mdi_client_update_state(mdi_client_t *);
245 static int		i_mdi_client_compute_state(mdi_client_t *,
246 			    mdi_phci_t *);
247 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
248 static void		i_mdi_client_unlock(mdi_client_t *);
249 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
250 static mdi_client_t	*i_devi_get_client(dev_info_t *);
251 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int,
252 			int);
253 /*
254  * Failover related function prototypes
255  */
256 static int		i_mdi_failover(void *);
257 
258 /*
259  * misc internal functions
260  */
261 static int		i_mdi_get_hash_key(char *);
262 static int		i_map_nvlist_error_to_mdi(int);
263 static void		i_mdi_report_path_state(mdi_client_t *,
264 			    mdi_pathinfo_t *);
265 
266 static void		setup_vhci_cache(mdi_vhci_t *);
267 static int		destroy_vhci_cache(mdi_vhci_t *);
268 static void		setup_phci_driver_list(mdi_vhci_t *);
269 static void		free_phci_driver_list(mdi_vhci_config_t *);
270 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
271 static boolean_t	stop_vhcache_flush_thread(void *, int);
272 static void		free_string_array(char **, int);
273 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
274 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
275 static void		free_vhcache_client(mdi_vhcache_client_t *);
276 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
277 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
278 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
279 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
280 static void		vhcache_pi_add(mdi_vhci_config_t *,
281 			    struct mdi_pathinfo *);
282 static void		vhcache_pi_remove(mdi_vhci_config_t *,
283 			    struct mdi_pathinfo *);
284 static void		free_phclient_path_list(mdi_phys_path_t *);
285 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
286 static int		flush_vhcache(mdi_vhci_config_t *, int);
287 static void		vhcache_dirty(mdi_vhci_config_t *);
288 static void		free_async_client_config(mdi_async_client_config_t *);
289 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
290 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
291 static nvlist_t		*read_on_disk_vhci_cache(char *);
292 extern int		fread_nvlist(char *, nvlist_t **);
293 extern int		fwrite_nvlist(char *, nvlist_t *);
294 
295 /* called once when first vhci registers with mdi */
296 static void
297 i_mdi_init()
298 {
299 	static int initialized = 0;
300 
301 	if (initialized)
302 		return;
303 	initialized = 1;
304 
305 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
306 	/*
307 	 * Create our taskq resources
308 	 */
309 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
310 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
311 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
312 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
313 }
314 
315 /*
316  * mdi_get_component_type():
317  *		Return mpxio component type
318  * Return Values:
319  *		MDI_COMPONENT_NONE
320  *		MDI_COMPONENT_VHCI
321  *		MDI_COMPONENT_PHCI
322  *		MDI_COMPONENT_CLIENT
323  * XXX This doesn't work under multi-level MPxIO and should be
324  *	removed when clients migrate mdi_is_*() interfaces.
325  */
326 int
327 mdi_get_component_type(dev_info_t *dip)
328 {
329 	return (DEVI(dip)->devi_mdi_component);
330 }
331 
332 /*
333  * mdi_vhci_register():
334  *		Register a vHCI module with the mpxio framework
335  *		mdi_vhci_register() is called by vHCI drivers to register the
336  *		'class_driver' vHCI driver and its MDI entrypoints with the
337  *		mpxio framework.  The vHCI driver must call this interface as
338  *		part of its attach(9e) handler.
339  *		Competing threads may try to attach mdi_vhci_register() as
340  *		the vHCI drivers are loaded and attached as a result of pHCI
341  *		driver instance registration (mdi_phci_register()) with the
342  *		framework.
343  * Return Values:
344  *		MDI_SUCCESS
345  *		MDI_FAILURE
346  */
347 
348 /*ARGSUSED*/
349 int
350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
351     int flags)
352 {
353 	mdi_vhci_t		*vh = NULL;
354 
355 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
356 
357 	i_mdi_init();
358 
359 	mutex_enter(&mdi_mutex);
360 	/*
361 	 * Scan for already registered vhci
362 	 */
363 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
364 		if (strcmp(vh->vh_class, class) == 0) {
365 			/*
366 			 * vHCI has already been created.  Check for valid
367 			 * vHCI ops registration.  We only support one vHCI
368 			 * module per class
369 			 */
370 			if (vh->vh_ops != NULL) {
371 				mutex_exit(&mdi_mutex);
372 				cmn_err(CE_NOTE, vhci_greeting, class);
373 				return (MDI_FAILURE);
374 			}
375 			break;
376 		}
377 	}
378 
379 	/*
380 	 * if not yet created, create the vHCI component
381 	 */
382 	if (vh == NULL) {
383 		struct client_hash	*hash = NULL;
384 		char			*load_balance;
385 
386 		/*
387 		 * Allocate and initialize the mdi extensions
388 		 */
389 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
390 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
391 		    KM_SLEEP);
392 		vh->vh_client_table = hash;
393 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
394 		(void) strcpy(vh->vh_class, class);
395 		vh->vh_lb = LOAD_BALANCE_RR;
396 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
397 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
398 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
399 				vh->vh_lb = LOAD_BALANCE_NONE;
400 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
401 				    == 0) {
402 				vh->vh_lb = LOAD_BALANCE_LBA;
403 			}
404 			ddi_prop_free(load_balance);
405 		}
406 
407 		/*
408 		 * Store the vHCI ops vectors
409 		 */
410 		vh->vh_dip = vdip;
411 		vh->vh_ops = vops;
412 
413 		setup_vhci_cache(vh);
414 
415 		if (mdi_vhci_head == NULL) {
416 			mdi_vhci_head = vh;
417 		}
418 		if (mdi_vhci_tail) {
419 			mdi_vhci_tail->vh_next = vh;
420 		}
421 		mdi_vhci_tail = vh;
422 		mdi_vhci_count++;
423 	}
424 
425 	/*
426 	 * Claim the devfs node as a vhci component
427 	 */
428 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
429 
430 	/*
431 	 * Initialize our back reference from dev_info node
432 	 */
433 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
434 	mutex_exit(&mdi_mutex);
435 	return (MDI_SUCCESS);
436 }
437 
438 /*
439  * mdi_vhci_unregister():
440  *		Unregister a vHCI module from mpxio framework
441  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
442  * 		of a vhci to unregister it from the framework.
443  * Return Values:
444  *		MDI_SUCCESS
445  *		MDI_FAILURE
446  */
447 
448 /*ARGSUSED*/
449 int
450 mdi_vhci_unregister(dev_info_t *vdip, int flags)
451 {
452 	mdi_vhci_t	*found, *vh, *prev = NULL;
453 
454 	/*
455 	 * Check for invalid VHCI
456 	 */
457 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
458 		return (MDI_FAILURE);
459 
460 	mutex_enter(&mdi_mutex);
461 
462 	/*
463 	 * Scan the list of registered vHCIs for a match
464 	 */
465 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
466 		if (found == vh)
467 			break;
468 		prev = found;
469 	}
470 
471 	if (found == NULL) {
472 		mutex_exit(&mdi_mutex);
473 		return (MDI_FAILURE);
474 	}
475 
476 	/*
477 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
478 	 * should have been unregistered, before a vHCI can be
479 	 * unregistered.
480 	 */
481 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
482 		mutex_exit(&mdi_mutex);
483 		return (MDI_FAILURE);
484 	}
485 
486 	/*
487 	 * Remove the vHCI from the global list
488 	 */
489 	if (vh == mdi_vhci_head) {
490 		mdi_vhci_head = vh->vh_next;
491 	} else {
492 		prev->vh_next = vh->vh_next;
493 	}
494 	if (vh == mdi_vhci_tail) {
495 		mdi_vhci_tail = prev;
496 	}
497 
498 	mdi_vhci_count--;
499 	mutex_exit(&mdi_mutex);
500 
501 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
502 		/* add vhci to the global list */
503 		mutex_enter(&mdi_mutex);
504 		if (mdi_vhci_head == NULL)
505 			mdi_vhci_head = vh;
506 		else
507 			mdi_vhci_tail->vh_next = vh;
508 		mdi_vhci_tail = vh;
509 		mdi_vhci_count++;
510 		mutex_exit(&mdi_mutex);
511 		return (MDI_FAILURE);
512 	}
513 
514 	vh->vh_ops = NULL;
515 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
516 	DEVI(vdip)->devi_mdi_xhci = NULL;
517 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
518 	kmem_free(vh->vh_client_table,
519 	    mdi_client_table_size * sizeof (struct client_hash));
520 	kmem_free(vh, sizeof (mdi_vhci_t));
521 	return (MDI_SUCCESS);
522 }
523 
524 /*
525  * i_mdi_vhci_class2vhci():
526  *		Look for a matching vHCI module given a vHCI class name
527  * Return Values:
528  *		Handle to a vHCI component
529  *		NULL
530  */
531 static mdi_vhci_t *
532 i_mdi_vhci_class2vhci(char *class)
533 {
534 	mdi_vhci_t	*vh = NULL;
535 
536 	ASSERT(!MUTEX_HELD(&mdi_mutex));
537 
538 	mutex_enter(&mdi_mutex);
539 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
540 		if (strcmp(vh->vh_class, class) == 0) {
541 			break;
542 		}
543 	}
544 	mutex_exit(&mdi_mutex);
545 	return (vh);
546 }
547 
548 /*
549  * i_devi_get_vhci():
550  *		Utility function to get the handle to a vHCI component
551  * Return Values:
552  *		Handle to a vHCI component
553  *		NULL
554  */
555 mdi_vhci_t *
556 i_devi_get_vhci(dev_info_t *vdip)
557 {
558 	mdi_vhci_t	*vh = NULL;
559 	if (MDI_VHCI(vdip)) {
560 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
561 	}
562 	return (vh);
563 }
564 
565 /*
566  * mdi_phci_register():
567  *		Register a pHCI module with mpxio framework
568  *		mdi_phci_register() is called by pHCI drivers to register with
569  *		the mpxio framework and a specific 'class_driver' vHCI.  The
570  *		pHCI driver must call this interface as part of its attach(9e)
571  *		handler.
572  * Return Values:
573  *		MDI_SUCCESS
574  *		MDI_FAILURE
575  */
576 
577 /*ARGSUSED*/
578 int
579 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
580 {
581 	mdi_phci_t		*ph;
582 	mdi_vhci_t		*vh;
583 	char			*data;
584 	char			*pathname;
585 
586 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
587 	(void) ddi_pathname(pdip, pathname);
588 
589 	/*
590 	 * Check for mpxio-disable property. Enable mpxio if the property is
591 	 * missing or not set to "yes".
592 	 * If the property is set to "yes" then emit a brief message.
593 	 */
594 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
595 	    &data) == DDI_SUCCESS)) {
596 		if (strcmp(data, "yes") == 0) {
597 			MDI_DEBUG(1, (CE_CONT, pdip,
598 			    "?%s (%s%d) multipath capabilities "
599 			    "disabled via %s.conf.\n", pathname,
600 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
601 			    ddi_driver_name(pdip)));
602 			ddi_prop_free(data);
603 			kmem_free(pathname, MAXPATHLEN);
604 			return (MDI_FAILURE);
605 		}
606 		ddi_prop_free(data);
607 	}
608 
609 	kmem_free(pathname, MAXPATHLEN);
610 
611 	/*
612 	 * Search for a matching vHCI
613 	 */
614 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
615 	if (vh == NULL) {
616 		return (MDI_FAILURE);
617 	}
618 
619 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
620 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
621 	ph->ph_dip = pdip;
622 	ph->ph_vhci = vh;
623 	ph->ph_next = NULL;
624 	ph->ph_unstable = 0;
625 	ph->ph_vprivate = 0;
626 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
627 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
628 
629 	MDI_PHCI_SET_POWER_UP(ph);
630 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
631 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
632 
633 	vhcache_phci_add(vh->vh_config, ph);
634 
635 	mutex_enter(&mdi_mutex);
636 	if (vh->vh_phci_head == NULL) {
637 		vh->vh_phci_head = ph;
638 	}
639 	if (vh->vh_phci_tail) {
640 		vh->vh_phci_tail->ph_next = ph;
641 	}
642 	vh->vh_phci_tail = ph;
643 	vh->vh_phci_count++;
644 	mutex_exit(&mdi_mutex);
645 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
646 	return (MDI_SUCCESS);
647 }
648 
649 /*
650  * mdi_phci_unregister():
651  *		Unregister a pHCI module from mpxio framework
652  *		mdi_phci_unregister() is called by the pHCI drivers from their
653  *		detach(9E) handler to unregister their instances from the
654  *		framework.
655  * Return Values:
656  *		MDI_SUCCESS
657  *		MDI_FAILURE
658  */
659 
660 /*ARGSUSED*/
661 int
662 mdi_phci_unregister(dev_info_t *pdip, int flags)
663 {
664 	mdi_vhci_t		*vh;
665 	mdi_phci_t		*ph;
666 	mdi_phci_t		*tmp;
667 	mdi_phci_t		*prev = NULL;
668 
669 	ph = i_devi_get_phci(pdip);
670 	if (ph == NULL) {
671 		MDI_DEBUG(1, (CE_WARN, pdip,
672 		    "!pHCI unregister: Not a valid pHCI"));
673 		return (MDI_FAILURE);
674 	}
675 
676 	vh = ph->ph_vhci;
677 	ASSERT(vh != NULL);
678 	if (vh == NULL) {
679 		MDI_DEBUG(1, (CE_WARN, pdip,
680 		    "!pHCI unregister: Not a valid vHCI"));
681 		return (MDI_FAILURE);
682 	}
683 
684 	mutex_enter(&mdi_mutex);
685 	tmp = vh->vh_phci_head;
686 	while (tmp) {
687 		if (tmp == ph) {
688 			break;
689 		}
690 		prev = tmp;
691 		tmp = tmp->ph_next;
692 	}
693 
694 	if (ph == vh->vh_phci_head) {
695 		vh->vh_phci_head = ph->ph_next;
696 	} else {
697 		prev->ph_next = ph->ph_next;
698 	}
699 
700 	if (ph == vh->vh_phci_tail) {
701 		vh->vh_phci_tail = prev;
702 	}
703 
704 	vh->vh_phci_count--;
705 
706 	mutex_exit(&mdi_mutex);
707 
708 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
709 	    ESC_DDI_INITIATOR_UNREGISTER);
710 	vhcache_phci_remove(vh->vh_config, ph);
711 	cv_destroy(&ph->ph_unstable_cv);
712 	cv_destroy(&ph->ph_powerchange_cv);
713 	mutex_destroy(&ph->ph_mutex);
714 	kmem_free(ph, sizeof (mdi_phci_t));
715 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
716 	DEVI(pdip)->devi_mdi_xhci = NULL;
717 	return (MDI_SUCCESS);
718 }
719 
720 /*
721  * i_devi_get_phci():
722  * 		Utility function to return the phci extensions.
723  */
724 static mdi_phci_t *
725 i_devi_get_phci(dev_info_t *pdip)
726 {
727 	mdi_phci_t	*ph = NULL;
728 	if (MDI_PHCI(pdip)) {
729 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
730 	}
731 	return (ph);
732 }
733 
734 /*
735  * mdi_phci_path2devinfo():
736  * 		Utility function to search for a valid phci device given
737  *		the devfs pathname.
738  */
739 
740 dev_info_t *
741 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
742 {
743 	char		*temp_pathname;
744 	mdi_vhci_t	*vh;
745 	mdi_phci_t	*ph;
746 	dev_info_t 	*pdip = NULL;
747 
748 	vh = i_devi_get_vhci(vdip);
749 	ASSERT(vh != NULL);
750 
751 	if (vh == NULL) {
752 		/*
753 		 * Invalid vHCI component, return failure
754 		 */
755 		return (NULL);
756 	}
757 
758 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
759 	mutex_enter(&mdi_mutex);
760 	ph = vh->vh_phci_head;
761 	while (ph != NULL) {
762 		pdip = ph->ph_dip;
763 		ASSERT(pdip != NULL);
764 		*temp_pathname = '\0';
765 		(void) ddi_pathname(pdip, temp_pathname);
766 		if (strcmp(temp_pathname, pathname) == 0) {
767 			break;
768 		}
769 		ph = ph->ph_next;
770 	}
771 	if (ph == NULL) {
772 		pdip = NULL;
773 	}
774 	mutex_exit(&mdi_mutex);
775 	kmem_free(temp_pathname, MAXPATHLEN);
776 	return (pdip);
777 }
778 
779 /*
780  * mdi_phci_get_path_count():
781  * 		get number of path information nodes associated with a given
782  *		pHCI device.
783  */
784 int
785 mdi_phci_get_path_count(dev_info_t *pdip)
786 {
787 	mdi_phci_t	*ph;
788 	int		count = 0;
789 
790 	ph = i_devi_get_phci(pdip);
791 	if (ph != NULL) {
792 		count = ph->ph_path_count;
793 	}
794 	return (count);
795 }
796 
797 /*
798  * i_mdi_phci_lock():
799  *		Lock a pHCI device
800  * Return Values:
801  *		None
802  * Note:
803  *		The default locking order is:
804  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
805  *		But there are number of situations where locks need to be
806  *		grabbed in reverse order.  This routine implements try and lock
807  *		mechanism depending on the requested parameter option.
808  */
809 static void
810 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
811 {
812 	if (pip) {
813 		/* Reverse locking is requested. */
814 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
815 			/*
816 			 * tryenter failed. Try to grab again
817 			 * after a small delay
818 			 */
819 			MDI_PI_HOLD(pip);
820 			MDI_PI_UNLOCK(pip);
821 			delay(1);
822 			MDI_PI_LOCK(pip);
823 			MDI_PI_RELE(pip);
824 		}
825 	} else {
826 		MDI_PHCI_LOCK(ph);
827 	}
828 }
829 
830 /*
831  * i_mdi_phci_get_client_lock():
832  *		Lock a pHCI device
833  * Return Values:
834  *		None
835  * Note:
836  *		The default locking order is:
837  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
838  *		But there are number of situations where locks need to be
839  *		grabbed in reverse order.  This routine implements try and lock
840  *		mechanism depending on the requested parameter option.
841  */
842 static void
843 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
844 {
845 	if (ct) {
846 		/* Reverse locking is requested. */
847 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
848 			/*
849 			 * tryenter failed. Try to grab again
850 			 * after a small delay
851 			 */
852 			MDI_CLIENT_UNLOCK(ct);
853 			delay(1);
854 			MDI_CLIENT_LOCK(ct);
855 		}
856 	} else {
857 		MDI_PHCI_LOCK(ph);
858 	}
859 }
860 
861 /*
862  * i_mdi_phci_unlock():
863  *		Unlock the pHCI component
864  */
865 static void
866 i_mdi_phci_unlock(mdi_phci_t *ph)
867 {
868 	MDI_PHCI_UNLOCK(ph);
869 }
870 
871 /*
872  * i_mdi_devinfo_create():
873  *		create client device's devinfo node
874  * Return Values:
875  *		dev_info
876  *		NULL
877  * Notes:
878  */
879 static dev_info_t *
880 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
881 	char **compatible, int ncompatible)
882 {
883 	dev_info_t *cdip = NULL;
884 
885 	ASSERT(MUTEX_HELD(&mdi_mutex));
886 
887 	/* Verify for duplicate entry */
888 	cdip = i_mdi_devinfo_find(vh, name, guid);
889 	ASSERT(cdip == NULL);
890 	if (cdip) {
891 		cmn_err(CE_WARN,
892 		    "i_mdi_devinfo_create: client dip %p already exists",
893 			(void *)cdip);
894 	}
895 
896 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
897 	if (cdip == NULL)
898 		goto fail;
899 
900 	/*
901 	 * Create component type and Global unique identifier
902 	 * properties
903 	 */
904 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
905 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
906 		goto fail;
907 	}
908 
909 	/* Decorate the node with compatible property */
910 	if (compatible &&
911 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
912 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
913 		goto fail;
914 	}
915 
916 	return (cdip);
917 
918 fail:
919 	if (cdip) {
920 		(void) ndi_prop_remove_all(cdip);
921 		(void) ndi_devi_free(cdip);
922 	}
923 	return (NULL);
924 }
925 
926 /*
927  * i_mdi_devinfo_find():
928  *		Find a matching devinfo node for given client node name
929  *		and its guid.
930  * Return Values:
931  *		Handle to a dev_info node or NULL
932  */
933 
934 static dev_info_t *
935 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
936 {
937 	char			*data;
938 	dev_info_t 		*cdip = NULL;
939 	dev_info_t 		*ndip = NULL;
940 	int			circular;
941 
942 	ndi_devi_enter(vh->vh_dip, &circular);
943 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
944 	while ((cdip = ndip) != NULL) {
945 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
946 
947 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
948 			continue;
949 		}
950 
951 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
952 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
953 		    &data) != DDI_PROP_SUCCESS) {
954 			continue;
955 		}
956 
957 		if (strcmp(data, guid) != 0) {
958 			ddi_prop_free(data);
959 			continue;
960 		}
961 		ddi_prop_free(data);
962 		break;
963 	}
964 	ndi_devi_exit(vh->vh_dip, circular);
965 	return (cdip);
966 }
967 
968 /*
969  * i_mdi_devinfo_remove():
970  *		Remove a client device node
971  */
972 static int
973 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
974 {
975 	int	rv = MDI_SUCCESS;
976 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
977 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
978 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
979 		if (rv != NDI_SUCCESS) {
980 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
981 			    " failed. cdip = %p\n", cdip));
982 		}
983 		/*
984 		 * Convert to MDI error code
985 		 */
986 		switch (rv) {
987 		case NDI_SUCCESS:
988 			rv = MDI_SUCCESS;
989 			break;
990 		case NDI_BUSY:
991 			rv = MDI_BUSY;
992 			break;
993 		default:
994 			rv = MDI_FAILURE;
995 			break;
996 		}
997 	}
998 	return (rv);
999 }
1000 
1001 /*
1002  * i_devi_get_client()
1003  *		Utility function to get mpxio component extensions
1004  */
1005 static mdi_client_t *
1006 i_devi_get_client(dev_info_t *cdip)
1007 {
1008 	mdi_client_t	*ct = NULL;
1009 	if (MDI_CLIENT(cdip)) {
1010 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1011 	}
1012 	return (ct);
1013 }
1014 
1015 /*
1016  * i_mdi_is_child_present():
1017  *		Search for the presence of client device dev_info node
1018  */
1019 
1020 static int
1021 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1022 {
1023 	int		rv = MDI_FAILURE;
1024 	struct dev_info	*dip;
1025 	int		circular;
1026 
1027 	ndi_devi_enter(vdip, &circular);
1028 	dip = DEVI(vdip)->devi_child;
1029 	while (dip) {
1030 		if (dip == DEVI(cdip)) {
1031 			rv = MDI_SUCCESS;
1032 			break;
1033 		}
1034 		dip = dip->devi_sibling;
1035 	}
1036 	ndi_devi_exit(vdip, circular);
1037 	return (rv);
1038 }
1039 
1040 
1041 /*
1042  * i_mdi_client_lock():
1043  *		Grab client component lock
1044  * Return Values:
1045  *		None
1046  * Note:
1047  *		The default locking order is:
1048  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1049  *		But there are number of situations where locks need to be
1050  *		grabbed in reverse order.  This routine implements try and lock
1051  *		mechanism depending on the requested parameter option.
1052  */
1053 
1054 static void
1055 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1056 {
1057 	if (pip) {
1058 		/*
1059 		 * Reverse locking is requested.
1060 		 */
1061 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1062 			/*
1063 			 * tryenter failed. Try to grab again
1064 			 * after a small delay
1065 			 */
1066 			MDI_PI_HOLD(pip);
1067 			MDI_PI_UNLOCK(pip);
1068 			delay(1);
1069 			MDI_PI_LOCK(pip);
1070 			MDI_PI_RELE(pip);
1071 		}
1072 	} else {
1073 		MDI_CLIENT_LOCK(ct);
1074 	}
1075 }
1076 
1077 /*
1078  * i_mdi_client_unlock():
1079  *		Unlock a client component
1080  */
1081 
1082 static void
1083 i_mdi_client_unlock(mdi_client_t *ct)
1084 {
1085 	MDI_CLIENT_UNLOCK(ct);
1086 }
1087 
1088 /*
1089  * i_mdi_client_alloc():
1090  * 		Allocate and initialize a client structure.  Caller should
1091  *		hold the global mdi_mutex.
1092  * Return Values:
1093  *		Handle to a client component
1094  */
1095 /*ARGSUSED*/
1096 static mdi_client_t *
1097 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1098 {
1099 	mdi_client_t	*ct;
1100 
1101 	ASSERT(MUTEX_HELD(&mdi_mutex));
1102 
1103 	/*
1104 	 * Allocate and initialize a component structure.
1105 	 */
1106 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1107 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1108 	ct->ct_hnext = NULL;
1109 	ct->ct_hprev = NULL;
1110 	ct->ct_dip = NULL;
1111 	ct->ct_vhci = vh;
1112 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1113 	(void) strcpy(ct->ct_drvname, name);
1114 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1115 	(void) strcpy(ct->ct_guid, lguid);
1116 	ct->ct_cprivate = NULL;
1117 	ct->ct_vprivate = NULL;
1118 	ct->ct_flags = 0;
1119 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1120 	MDI_CLIENT_SET_OFFLINE(ct);
1121 	MDI_CLIENT_SET_DETACH(ct);
1122 	MDI_CLIENT_SET_POWER_UP(ct);
1123 	ct->ct_failover_flags = 0;
1124 	ct->ct_failover_status = 0;
1125 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1126 	ct->ct_unstable = 0;
1127 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1128 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1129 	ct->ct_lb = vh->vh_lb;
1130 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1131 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1132 	ct->ct_path_count = 0;
1133 	ct->ct_path_head = NULL;
1134 	ct->ct_path_tail = NULL;
1135 	ct->ct_path_last = NULL;
1136 
1137 	/*
1138 	 * Add this client component to our client hash queue
1139 	 */
1140 	i_mdi_client_enlist_table(vh, ct);
1141 	return (ct);
1142 }
1143 
1144 /*
1145  * i_mdi_client_enlist_table():
1146  *		Attach the client device to the client hash table. Caller
1147  *		should hold the mdi_mutex
1148  */
1149 
1150 static void
1151 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1152 {
1153 	int 			index;
1154 	struct client_hash	*head;
1155 
1156 	ASSERT(MUTEX_HELD(&mdi_mutex));
1157 	index = i_mdi_get_hash_key(ct->ct_guid);
1158 	head = &vh->vh_client_table[index];
1159 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1160 	head->ct_hash_head = ct;
1161 	head->ct_hash_count++;
1162 	vh->vh_client_count++;
1163 }
1164 
1165 /*
1166  * i_mdi_client_delist_table():
1167  *		Attach the client device to the client hash table.
1168  *		Caller should hold the mdi_mutex
1169  */
1170 
1171 static void
1172 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1173 {
1174 	int			index;
1175 	char			*guid;
1176 	struct client_hash 	*head;
1177 	mdi_client_t		*next;
1178 	mdi_client_t		*last;
1179 
1180 	ASSERT(MUTEX_HELD(&mdi_mutex));
1181 	guid = ct->ct_guid;
1182 	index = i_mdi_get_hash_key(guid);
1183 	head = &vh->vh_client_table[index];
1184 
1185 	last = NULL;
1186 	next = (mdi_client_t *)head->ct_hash_head;
1187 	while (next != NULL) {
1188 		if (next == ct) {
1189 			break;
1190 		}
1191 		last = next;
1192 		next = next->ct_hnext;
1193 	}
1194 
1195 	if (next) {
1196 		head->ct_hash_count--;
1197 		if (last == NULL) {
1198 			head->ct_hash_head = ct->ct_hnext;
1199 		} else {
1200 			last->ct_hnext = ct->ct_hnext;
1201 		}
1202 		ct->ct_hnext = NULL;
1203 		vh->vh_client_count--;
1204 	}
1205 }
1206 
1207 
1208 /*
1209  * i_mdi_client_free():
1210  *		Free a client component
1211  */
1212 static int
1213 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1214 {
1215 	int		rv = MDI_SUCCESS;
1216 	int		flags = ct->ct_flags;
1217 	dev_info_t	*cdip;
1218 	dev_info_t	*vdip;
1219 
1220 	ASSERT(MUTEX_HELD(&mdi_mutex));
1221 	vdip = vh->vh_dip;
1222 	cdip = ct->ct_dip;
1223 
1224 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1225 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1226 	DEVI(cdip)->devi_mdi_client = NULL;
1227 
1228 	/*
1229 	 * Clear out back ref. to dev_info_t node
1230 	 */
1231 	ct->ct_dip = NULL;
1232 
1233 	/*
1234 	 * Remove this client from our hash queue
1235 	 */
1236 	i_mdi_client_delist_table(vh, ct);
1237 
1238 	/*
1239 	 * Uninitialize and free the component
1240 	 */
1241 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1242 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1243 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1244 	cv_destroy(&ct->ct_failover_cv);
1245 	cv_destroy(&ct->ct_unstable_cv);
1246 	cv_destroy(&ct->ct_powerchange_cv);
1247 	mutex_destroy(&ct->ct_mutex);
1248 	kmem_free(ct, sizeof (*ct));
1249 
1250 	if (cdip != NULL) {
1251 		mutex_exit(&mdi_mutex);
1252 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1253 		mutex_enter(&mdi_mutex);
1254 	}
1255 	return (rv);
1256 }
1257 
1258 /*
1259  * i_mdi_client_find():
1260  * 		Find the client structure corresponding to a given guid
1261  *		Caller should hold the mdi_mutex
1262  */
1263 static mdi_client_t *
1264 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1265 {
1266 	int			index;
1267 	struct client_hash	*head;
1268 	mdi_client_t		*ct;
1269 
1270 	ASSERT(MUTEX_HELD(&mdi_mutex));
1271 	index = i_mdi_get_hash_key(guid);
1272 	head = &vh->vh_client_table[index];
1273 
1274 	ct = head->ct_hash_head;
1275 	while (ct != NULL) {
1276 		if (strcmp(ct->ct_guid, guid) == 0 &&
1277 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1278 			break;
1279 		}
1280 		ct = ct->ct_hnext;
1281 	}
1282 	return (ct);
1283 }
1284 
1285 
1286 
1287 /*
1288  * i_mdi_client_update_state():
1289  *		Compute and update client device state
1290  * Notes:
1291  *		A client device can be in any of three possible states:
1292  *
1293  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1294  *		one online/standby paths. Can tolerate failures.
1295  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1296  *		no alternate paths available as standby. A failure on the online
1297  *		would result in loss of access to device data.
1298  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1299  *		no paths available to access the device.
1300  */
1301 static void
1302 i_mdi_client_update_state(mdi_client_t *ct)
1303 {
1304 	int state;
1305 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1306 	state = i_mdi_client_compute_state(ct, NULL);
1307 	MDI_CLIENT_SET_STATE(ct, state);
1308 }
1309 
1310 /*
1311  * i_mdi_client_compute_state():
1312  *		Compute client device state
1313  *
1314  *		mdi_phci_t *	Pointer to pHCI structure which should
1315  *				while computing the new value.  Used by
1316  *				i_mdi_phci_offline() to find the new
1317  *				client state after DR of a pHCI.
1318  */
1319 static int
1320 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1321 {
1322 	int		state;
1323 	int		online_count = 0;
1324 	int		standby_count = 0;
1325 	mdi_pathinfo_t	*pip, *next;
1326 
1327 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1328 	pip = ct->ct_path_head;
1329 	while (pip != NULL) {
1330 		MDI_PI_LOCK(pip);
1331 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1332 		if (MDI_PI(pip)->pi_phci == ph) {
1333 			MDI_PI_UNLOCK(pip);
1334 			pip = next;
1335 			continue;
1336 		}
1337 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1338 				== MDI_PATHINFO_STATE_ONLINE)
1339 			online_count++;
1340 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1341 				== MDI_PATHINFO_STATE_STANDBY)
1342 			standby_count++;
1343 		MDI_PI_UNLOCK(pip);
1344 		pip = next;
1345 	}
1346 
1347 	if (online_count == 0) {
1348 		if (standby_count == 0) {
1349 			state = MDI_CLIENT_STATE_FAILED;
1350 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1351 			    " ct = %p\n", ct));
1352 		} else if (standby_count == 1) {
1353 			state = MDI_CLIENT_STATE_DEGRADED;
1354 		} else {
1355 			state = MDI_CLIENT_STATE_OPTIMAL;
1356 		}
1357 	} else if (online_count == 1) {
1358 		if (standby_count == 0) {
1359 			state = MDI_CLIENT_STATE_DEGRADED;
1360 		} else {
1361 			state = MDI_CLIENT_STATE_OPTIMAL;
1362 		}
1363 	} else {
1364 		state = MDI_CLIENT_STATE_OPTIMAL;
1365 	}
1366 	return (state);
1367 }
1368 
1369 /*
1370  * i_mdi_client2devinfo():
1371  *		Utility function
1372  */
1373 dev_info_t *
1374 i_mdi_client2devinfo(mdi_client_t *ct)
1375 {
1376 	return (ct->ct_dip);
1377 }
1378 
1379 /*
1380  * mdi_client_path2_devinfo():
1381  * 		Given the parent devinfo and child devfs pathname, search for
1382  *		a valid devfs node handle.
1383  */
1384 dev_info_t *
1385 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1386 {
1387 	dev_info_t 	*cdip = NULL;
1388 	dev_info_t 	*ndip = NULL;
1389 	char		*temp_pathname;
1390 	int		circular;
1391 
1392 	/*
1393 	 * Allocate temp buffer
1394 	 */
1395 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1396 
1397 	/*
1398 	 * Lock parent against changes
1399 	 */
1400 	ndi_devi_enter(vdip, &circular);
1401 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1402 	while ((cdip = ndip) != NULL) {
1403 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1404 
1405 		*temp_pathname = '\0';
1406 		(void) ddi_pathname(cdip, temp_pathname);
1407 		if (strcmp(temp_pathname, pathname) == 0) {
1408 			break;
1409 		}
1410 	}
1411 	/*
1412 	 * Release devinfo lock
1413 	 */
1414 	ndi_devi_exit(vdip, circular);
1415 
1416 	/*
1417 	 * Free the temp buffer
1418 	 */
1419 	kmem_free(temp_pathname, MAXPATHLEN);
1420 	return (cdip);
1421 }
1422 
1423 
1424 /*
1425  * mdi_client_get_path_count():
1426  * 		Utility function to get number of path information nodes
1427  *		associated with a given client device.
1428  */
1429 int
1430 mdi_client_get_path_count(dev_info_t *cdip)
1431 {
1432 	mdi_client_t	*ct;
1433 	int		count = 0;
1434 
1435 	ct = i_devi_get_client(cdip);
1436 	if (ct != NULL) {
1437 		count = ct->ct_path_count;
1438 	}
1439 	return (count);
1440 }
1441 
1442 
1443 /*
1444  * i_mdi_get_hash_key():
1445  * 		Create a hash using strings as keys
1446  *
1447  */
1448 static int
1449 i_mdi_get_hash_key(char *str)
1450 {
1451 	uint32_t	g, hash = 0;
1452 	char		*p;
1453 
1454 	for (p = str; *p != '\0'; p++) {
1455 		g = *p;
1456 		hash += g;
1457 	}
1458 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1459 }
1460 
1461 /*
1462  * mdi_get_lb_policy():
1463  * 		Get current load balancing policy for a given client device
1464  */
1465 client_lb_t
1466 mdi_get_lb_policy(dev_info_t *cdip)
1467 {
1468 	client_lb_t	lb = LOAD_BALANCE_NONE;
1469 	mdi_client_t	*ct;
1470 
1471 	ct = i_devi_get_client(cdip);
1472 	if (ct != NULL) {
1473 		lb = ct->ct_lb;
1474 	}
1475 	return (lb);
1476 }
1477 
1478 /*
1479  * mdi_set_lb_region_size():
1480  * 		Set current region size for the load-balance
1481  */
1482 int
1483 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1484 {
1485 	mdi_client_t	*ct;
1486 	int		rv = MDI_FAILURE;
1487 
1488 	ct = i_devi_get_client(cdip);
1489 	if (ct != NULL && ct->ct_lb_args != NULL) {
1490 		ct->ct_lb_args->region_size = region_size;
1491 		rv = MDI_SUCCESS;
1492 	}
1493 	return (rv);
1494 }
1495 
1496 /*
1497  * mdi_Set_lb_policy():
1498  * 		Set current load balancing policy for a given client device
1499  */
1500 int
1501 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1502 {
1503 	mdi_client_t	*ct;
1504 	int		rv = MDI_FAILURE;
1505 
1506 	ct = i_devi_get_client(cdip);
1507 	if (ct != NULL) {
1508 		ct->ct_lb = lb;
1509 		rv = MDI_SUCCESS;
1510 	}
1511 	return (rv);
1512 }
1513 
1514 /*
1515  * mdi_failover():
1516  *		failover function called by the vHCI drivers to initiate
1517  *		a failover operation.  This is typically due to non-availability
1518  *		of online paths to route I/O requests.  Failover can be
1519  *		triggered through user application also.
1520  *
1521  *		The vHCI driver calls mdi_failover() to initiate a failover
1522  *		operation. mdi_failover() calls back into the vHCI driver's
1523  *		vo_failover() entry point to perform the actual failover
1524  *		operation.  The reason for requiring the vHCI driver to
1525  *		initiate failover by calling mdi_failover(), instead of directly
1526  *		executing vo_failover() itself, is to ensure that the mdi
1527  *		framework can keep track of the client state properly.
1528  *		Additionally, mdi_failover() provides as a convenience the
1529  *		option of performing the failover operation synchronously or
1530  *		asynchronously
1531  *
1532  *		Upon successful completion of the failover operation, the
1533  *		paths that were previously ONLINE will be in the STANDBY state,
1534  *		and the newly activated paths will be in the ONLINE state.
1535  *
1536  *		The flags modifier determines whether the activation is done
1537  *		synchronously: MDI_FAILOVER_SYNC
1538  * Return Values:
1539  *		MDI_SUCCESS
1540  *		MDI_FAILURE
1541  *		MDI_BUSY
1542  */
1543 /*ARGSUSED*/
1544 int
1545 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1546 {
1547 	int			rv;
1548 	mdi_client_t		*ct;
1549 
1550 	ct = i_devi_get_client(cdip);
1551 	ASSERT(ct != NULL);
1552 	if (ct == NULL) {
1553 		/* cdip is not a valid client device. Nothing more to do. */
1554 		return (MDI_FAILURE);
1555 	}
1556 
1557 	MDI_CLIENT_LOCK(ct);
1558 
1559 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1560 		/* A path to the client is being freed */
1561 		MDI_CLIENT_UNLOCK(ct);
1562 		return (MDI_BUSY);
1563 	}
1564 
1565 
1566 	if (MDI_CLIENT_IS_FAILED(ct)) {
1567 		/*
1568 		 * Client is in failed state. Nothing more to do.
1569 		 */
1570 		MDI_CLIENT_UNLOCK(ct);
1571 		return (MDI_FAILURE);
1572 	}
1573 
1574 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1575 		/*
1576 		 * Failover is already in progress; return BUSY
1577 		 */
1578 		MDI_CLIENT_UNLOCK(ct);
1579 		return (MDI_BUSY);
1580 	}
1581 	/*
1582 	 * Make sure that mdi_pathinfo node state changes are processed.
1583 	 * We do not allow failovers to progress while client path state
1584 	 * changes are in progress
1585 	 */
1586 	if (ct->ct_unstable) {
1587 		if (flags == MDI_FAILOVER_ASYNC) {
1588 			MDI_CLIENT_UNLOCK(ct);
1589 			return (MDI_BUSY);
1590 		} else {
1591 			while (ct->ct_unstable)
1592 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1593 		}
1594 	}
1595 
1596 	/*
1597 	 * Client device is in stable state. Before proceeding, perform sanity
1598 	 * checks again.
1599 	 */
1600 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1601 	    (i_ddi_node_state(ct->ct_dip) < DS_READY)) {
1602 		/*
1603 		 * Client is in failed state. Nothing more to do.
1604 		 */
1605 		MDI_CLIENT_UNLOCK(ct);
1606 		return (MDI_FAILURE);
1607 	}
1608 
1609 	/*
1610 	 * Set the client state as failover in progress.
1611 	 */
1612 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1613 	ct->ct_failover_flags = flags;
1614 	MDI_CLIENT_UNLOCK(ct);
1615 
1616 	if (flags == MDI_FAILOVER_ASYNC) {
1617 		/*
1618 		 * Submit the initiate failover request via CPR safe
1619 		 * taskq threads.
1620 		 */
1621 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1622 		    ct, KM_SLEEP);
1623 		return (MDI_ACCEPT);
1624 	} else {
1625 		/*
1626 		 * Synchronous failover mode.  Typically invoked from the user
1627 		 * land.
1628 		 */
1629 		rv = i_mdi_failover(ct);
1630 	}
1631 	return (rv);
1632 }
1633 
1634 /*
1635  * i_mdi_failover():
1636  *		internal failover function. Invokes vHCI drivers failover
1637  *		callback function and process the failover status
1638  * Return Values:
1639  *		None
1640  *
1641  * Note: A client device in failover state can not be detached or freed.
1642  */
1643 static int
1644 i_mdi_failover(void *arg)
1645 {
1646 	int		rv = MDI_SUCCESS;
1647 	mdi_client_t	*ct = (mdi_client_t *)arg;
1648 	mdi_vhci_t	*vh = ct->ct_vhci;
1649 
1650 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1651 
1652 	if (vh->vh_ops->vo_failover != NULL) {
1653 		/*
1654 		 * Call vHCI drivers callback routine
1655 		 */
1656 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1657 		    ct->ct_failover_flags);
1658 	}
1659 
1660 	MDI_CLIENT_LOCK(ct);
1661 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1662 
1663 	/*
1664 	 * Save the failover return status
1665 	 */
1666 	ct->ct_failover_status = rv;
1667 
1668 	/*
1669 	 * As a result of failover, client status would have been changed.
1670 	 * Update the client state and wake up anyone waiting on this client
1671 	 * device.
1672 	 */
1673 	i_mdi_client_update_state(ct);
1674 
1675 	cv_broadcast(&ct->ct_failover_cv);
1676 	MDI_CLIENT_UNLOCK(ct);
1677 	return (rv);
1678 }
1679 
1680 /*
1681  * Load balancing is logical block.
1682  * IOs within the range described by region_size
1683  * would go on the same path. This would improve the
1684  * performance by cache-hit on some of the RAID devices.
1685  * Search only for online paths(At some point we
1686  * may want to balance across target ports).
1687  * If no paths are found then default to round-robin.
1688  */
1689 static int
1690 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1691 {
1692 	int		path_index = -1;
1693 	int		online_path_count = 0;
1694 	int		online_nonpref_path_count = 0;
1695 	int 		region_size = ct->ct_lb_args->region_size;
1696 	mdi_pathinfo_t	*pip;
1697 	mdi_pathinfo_t	*next;
1698 	int		preferred, path_cnt;
1699 
1700 	pip = ct->ct_path_head;
1701 	while (pip) {
1702 		MDI_PI_LOCK(pip);
1703 		if (MDI_PI(pip)->pi_state ==
1704 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1705 			online_path_count++;
1706 		} else if (MDI_PI(pip)->pi_state ==
1707 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1708 			online_nonpref_path_count++;
1709 		}
1710 		next = (mdi_pathinfo_t *)
1711 		    MDI_PI(pip)->pi_client_link;
1712 		MDI_PI_UNLOCK(pip);
1713 		pip = next;
1714 	}
1715 	/* if found any online/preferred then use this type */
1716 	if (online_path_count > 0) {
1717 		path_cnt = online_path_count;
1718 		preferred = 1;
1719 	} else if (online_nonpref_path_count > 0) {
1720 		path_cnt = online_nonpref_path_count;
1721 		preferred = 0;
1722 	} else {
1723 		path_cnt = 0;
1724 	}
1725 	if (path_cnt) {
1726 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1727 		pip = ct->ct_path_head;
1728 		while (pip && path_index != -1) {
1729 			MDI_PI_LOCK(pip);
1730 			if (path_index == 0 &&
1731 			    (MDI_PI(pip)->pi_state ==
1732 			    MDI_PATHINFO_STATE_ONLINE) &&
1733 				MDI_PI(pip)->pi_preferred == preferred) {
1734 				MDI_PI_HOLD(pip);
1735 				MDI_PI_UNLOCK(pip);
1736 				*ret_pip = pip;
1737 				return (MDI_SUCCESS);
1738 			}
1739 			path_index --;
1740 			next = (mdi_pathinfo_t *)
1741 			    MDI_PI(pip)->pi_client_link;
1742 			MDI_PI_UNLOCK(pip);
1743 			pip = next;
1744 		}
1745 		if (pip == NULL) {
1746 			MDI_DEBUG(4, (CE_NOTE, NULL,
1747 			    "!lba %p, no pip !!\n",
1748 				bp->b_blkno));
1749 		} else {
1750 			MDI_DEBUG(4, (CE_NOTE, NULL,
1751 			    "!lba %p, no pip for path_index, "
1752 			    "pip %p\n", pip));
1753 		}
1754 	}
1755 	return (MDI_FAILURE);
1756 }
1757 
1758 /*
1759  * mdi_select_path():
1760  *		select a path to access a client device.
1761  *
1762  *		mdi_select_path() function is called by the vHCI drivers to
1763  *		select a path to route the I/O request to.  The caller passes
1764  *		the block I/O data transfer structure ("buf") as one of the
1765  *		parameters.  The mpxio framework uses the buf structure
1766  *		contents to maintain per path statistics (total I/O size /
1767  *		count pending).  If more than one online paths are available to
1768  *		select, the framework automatically selects a suitable path
1769  *		for routing I/O request. If a failover operation is active for
1770  *		this client device the call shall be failed with MDI_BUSY error
1771  *		code.
1772  *
1773  *		By default this function returns a suitable path in online
1774  *		state based on the current load balancing policy.  Currently
1775  *		we support LOAD_BALANCE_NONE (Previously selected online path
1776  *		will continue to be used till the path is usable) and
1777  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1778  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1779  *		based on the logical block).  The load balancing
1780  *		through vHCI drivers configuration file (driver.conf).
1781  *
1782  *		vHCI drivers may override this default behavior by specifying
1783  *		appropriate flags.  If start_pip is specified (non NULL) is
1784  *		used as start point to walk and find the next appropriate path.
1785  *		The following values are currently defined:
1786  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1787  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1788  *
1789  *		The non-standard behavior is used by the scsi_vhci driver,
1790  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1791  *		attach of client devices (to avoid an unnecessary failover
1792  *		when the STANDBY path comes up first), during failover
1793  *		(to activate a STANDBY path as ONLINE).
1794  *
1795  *		The selected path in returned in a held state (ref_cnt).
1796  *		Caller should release the hold by calling mdi_rele_path().
1797  *
1798  * Return Values:
1799  *		MDI_SUCCESS	- Completed successfully
1800  *		MDI_BUSY 	- Client device is busy failing over
1801  *		MDI_NOPATH	- Client device is online, but no valid path are
1802  *				  available to access this client device
1803  *		MDI_FAILURE	- Invalid client device or state
1804  *		MDI_DEVI_ONLINING
1805  *				- Client device (struct dev_info state) is in
1806  *				  onlining state.
1807  */
1808 
1809 /*ARGSUSED*/
1810 int
1811 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1812     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1813 {
1814 	mdi_client_t	*ct;
1815 	mdi_pathinfo_t	*pip;
1816 	mdi_pathinfo_t	*next;
1817 	mdi_pathinfo_t	*head;
1818 	mdi_pathinfo_t	*start;
1819 	client_lb_t	lbp;	/* load balancing policy */
1820 	int		sb = 1;	/* standard behavior */
1821 	int		preferred = 1;	/* preferred path */
1822 	int		cond, cont = 1;
1823 	int		retry = 0;
1824 
1825 	if (flags != 0) {
1826 		/*
1827 		 * disable default behavior
1828 		 */
1829 		sb = 0;
1830 	}
1831 
1832 	*ret_pip = NULL;
1833 	ct = i_devi_get_client(cdip);
1834 	if (ct == NULL) {
1835 		/* mdi extensions are NULL, Nothing more to do */
1836 		return (MDI_FAILURE);
1837 	}
1838 
1839 	MDI_CLIENT_LOCK(ct);
1840 
1841 	if (sb) {
1842 		if (MDI_CLIENT_IS_FAILED(ct)) {
1843 			/*
1844 			 * Client is not ready to accept any I/O requests.
1845 			 * Fail this request.
1846 			 */
1847 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1848 			    "client state offline ct = %p\n", ct));
1849 			MDI_CLIENT_UNLOCK(ct);
1850 			return (MDI_FAILURE);
1851 		}
1852 
1853 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1854 			/*
1855 			 * Check for Failover is in progress. If so tell the
1856 			 * caller that this device is busy.
1857 			 */
1858 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1859 			    "client failover in progress ct = %p\n", ct));
1860 			MDI_CLIENT_UNLOCK(ct);
1861 			return (MDI_BUSY);
1862 		}
1863 
1864 		/*
1865 		 * Check to see whether the client device is attached.
1866 		 * If not so, let the vHCI driver manually select a path
1867 		 * (standby) and let the probe/attach process to continue.
1868 		 */
1869 		if ((MDI_CLIENT_IS_DETACHED(ct)) ||
1870 		    i_ddi_node_state(cdip) < DS_READY) {
1871 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1872 			MDI_CLIENT_UNLOCK(ct);
1873 			return (MDI_DEVI_ONLINING);
1874 		}
1875 	}
1876 
1877 	/*
1878 	 * Cache in the client list head.  If head of the list is NULL
1879 	 * return MDI_NOPATH
1880 	 */
1881 	head = ct->ct_path_head;
1882 	if (head == NULL) {
1883 		MDI_CLIENT_UNLOCK(ct);
1884 		return (MDI_NOPATH);
1885 	}
1886 
1887 	/*
1888 	 * for non default behavior, bypass current
1889 	 * load balancing policy and always use LOAD_BALANCE_RR
1890 	 * except that the start point will be adjusted based
1891 	 * on the provided start_pip
1892 	 */
1893 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1894 
1895 	switch (lbp) {
1896 	case LOAD_BALANCE_NONE:
1897 		/*
1898 		 * Load balancing is None  or Alternate path mode
1899 		 * Start looking for a online mdi_pathinfo node starting from
1900 		 * last known selected path
1901 		 */
1902 		preferred = 1;
1903 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1904 		if (pip == NULL) {
1905 			pip = head;
1906 		}
1907 		start = pip;
1908 		do {
1909 			MDI_PI_LOCK(pip);
1910 			/*
1911 			 * No need to explicitly check if the path is disabled.
1912 			 * Since we are checking for state == ONLINE and the
1913 			 * same veriable is used for DISABLE/ENABLE information.
1914 			 */
1915 			if (MDI_PI(pip)->pi_state  ==
1916 				MDI_PATHINFO_STATE_ONLINE &&
1917 				preferred == MDI_PI(pip)->pi_preferred) {
1918 				/*
1919 				 * Return the path in hold state. Caller should
1920 				 * release the lock by calling mdi_rele_path()
1921 				 */
1922 				MDI_PI_HOLD(pip);
1923 				MDI_PI_UNLOCK(pip);
1924 				ct->ct_path_last = pip;
1925 				*ret_pip = pip;
1926 				MDI_CLIENT_UNLOCK(ct);
1927 				return (MDI_SUCCESS);
1928 			}
1929 
1930 			/*
1931 			 * Path is busy.
1932 			 */
1933 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1934 			    MDI_PI_IS_TRANSIENT(pip))
1935 				retry = 1;
1936 			/*
1937 			 * Keep looking for a next available online path
1938 			 */
1939 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1940 			if (next == NULL) {
1941 				next = head;
1942 			}
1943 			MDI_PI_UNLOCK(pip);
1944 			pip = next;
1945 			if (start == pip && preferred) {
1946 				preferred = 0;
1947 			} else if (start == pip && !preferred) {
1948 				cont = 0;
1949 			}
1950 		} while (cont);
1951 		break;
1952 
1953 	case LOAD_BALANCE_LBA:
1954 		/*
1955 		 * Make sure we are looking
1956 		 * for an online path. Otherwise, if it is for a STANDBY
1957 		 * path request, it will go through and fetch an ONLINE
1958 		 * path which is not desirable.
1959 		 */
1960 		if ((ct->ct_lb_args != NULL) &&
1961 			    (ct->ct_lb_args->region_size) && bp &&
1962 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1963 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1964 				    == MDI_SUCCESS) {
1965 				MDI_CLIENT_UNLOCK(ct);
1966 				return (MDI_SUCCESS);
1967 			}
1968 		}
1969 		/*  FALLTHROUGH */
1970 	case LOAD_BALANCE_RR:
1971 		/*
1972 		 * Load balancing is Round Robin. Start looking for a online
1973 		 * mdi_pathinfo node starting from last known selected path
1974 		 * as the start point.  If override flags are specified,
1975 		 * process accordingly.
1976 		 * If the search is already in effect(start_pip not null),
1977 		 * then lets just use the same path preference to continue the
1978 		 * traversal.
1979 		 */
1980 
1981 		if (start_pip != NULL) {
1982 			preferred = MDI_PI(start_pip)->pi_preferred;
1983 		} else {
1984 			preferred = 1;
1985 		}
1986 
1987 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1988 		if (start == NULL) {
1989 			pip = head;
1990 		} else {
1991 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1992 			if (pip == NULL) {
1993 				if (!sb) {
1994 					if (preferred == 0) {
1995 						/*
1996 						 * Looks like we have completed
1997 						 * the traversal as preferred
1998 						 * value is 0. Time to bail out.
1999 						 */
2000 						*ret_pip = NULL;
2001 						MDI_CLIENT_UNLOCK(ct);
2002 						return (MDI_NOPATH);
2003 					} else {
2004 						/*
2005 						 * Looks like we reached the
2006 						 * end of the list. Lets enable
2007 						 * traversal of non preferred
2008 						 * paths.
2009 						 */
2010 						preferred = 0;
2011 					}
2012 				}
2013 				pip = head;
2014 			}
2015 		}
2016 		start = pip;
2017 		do {
2018 			MDI_PI_LOCK(pip);
2019 			if (sb) {
2020 				cond = ((MDI_PI(pip)->pi_state ==
2021 				    MDI_PATHINFO_STATE_ONLINE &&
2022 					MDI_PI(pip)->pi_preferred ==
2023 						preferred) ? 1 : 0);
2024 			} else {
2025 				if (flags == MDI_SELECT_ONLINE_PATH) {
2026 					cond = ((MDI_PI(pip)->pi_state ==
2027 					    MDI_PATHINFO_STATE_ONLINE &&
2028 						MDI_PI(pip)->pi_preferred ==
2029 						preferred) ? 1 : 0);
2030 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2031 					cond = ((MDI_PI(pip)->pi_state ==
2032 					    MDI_PATHINFO_STATE_STANDBY &&
2033 						MDI_PI(pip)->pi_preferred ==
2034 						preferred) ? 1 : 0);
2035 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2036 				    MDI_SELECT_STANDBY_PATH)) {
2037 					cond = (((MDI_PI(pip)->pi_state ==
2038 					    MDI_PATHINFO_STATE_ONLINE ||
2039 					    (MDI_PI(pip)->pi_state ==
2040 					    MDI_PATHINFO_STATE_STANDBY)) &&
2041 						MDI_PI(pip)->pi_preferred ==
2042 						preferred) ? 1 : 0);
2043 				} else {
2044 					cond = 0;
2045 				}
2046 			}
2047 			/*
2048 			 * No need to explicitly check if the path is disabled.
2049 			 * Since we are checking for state == ONLINE and the
2050 			 * same veriable is used for DISABLE/ENABLE information.
2051 			 */
2052 			if (cond) {
2053 				/*
2054 				 * Return the path in hold state. Caller should
2055 				 * release the lock by calling mdi_rele_path()
2056 				 */
2057 				MDI_PI_HOLD(pip);
2058 				MDI_PI_UNLOCK(pip);
2059 				if (sb)
2060 					ct->ct_path_last = pip;
2061 				*ret_pip = pip;
2062 				MDI_CLIENT_UNLOCK(ct);
2063 				return (MDI_SUCCESS);
2064 			}
2065 			/*
2066 			 * Path is busy.
2067 			 */
2068 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2069 			    MDI_PI_IS_TRANSIENT(pip))
2070 				retry = 1;
2071 
2072 			/*
2073 			 * Keep looking for a next available online path
2074 			 */
2075 do_again:
2076 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2077 			if (next == NULL) {
2078 				if (!sb) {
2079 					if (preferred == 1) {
2080 						/*
2081 						 * Looks like we reached the
2082 						 * end of the list. Lets enable
2083 						 * traversal of non preferred
2084 						 * paths.
2085 						 */
2086 						preferred = 0;
2087 						next = head;
2088 					} else {
2089 						/*
2090 						 * We have done both the passes
2091 						 * Preferred as well as for
2092 						 * Non-preferred. Bail out now.
2093 						 */
2094 						cont = 0;
2095 					}
2096 				} else {
2097 					/*
2098 					 * Standard behavior case.
2099 					 */
2100 					next = head;
2101 				}
2102 			}
2103 			MDI_PI_UNLOCK(pip);
2104 			if (cont == 0) {
2105 				break;
2106 			}
2107 			pip = next;
2108 
2109 			if (!sb) {
2110 				/*
2111 				 * We need to handle the selection of
2112 				 * non-preferred path in the following
2113 				 * case:
2114 				 *
2115 				 * +------+   +------+   +------+   +-----+
2116 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2117 				 * +------+   +------+   +------+   +-----+
2118 				 *
2119 				 * If we start the search with B, we need to
2120 				 * skip beyond B to pick C which is non -
2121 				 * preferred in the second pass. The following
2122 				 * test, if true, will allow us to skip over
2123 				 * the 'start'(B in the example) to select
2124 				 * other non preferred elements.
2125 				 */
2126 				if ((start_pip != NULL) && (start_pip == pip) &&
2127 				    (MDI_PI(start_pip)->pi_preferred
2128 				    != preferred)) {
2129 					/*
2130 					 * try again after going past the start
2131 					 * pip
2132 					 */
2133 					MDI_PI_LOCK(pip);
2134 					goto do_again;
2135 				}
2136 			} else {
2137 				/*
2138 				 * Standard behavior case
2139 				 */
2140 				if (start == pip && preferred) {
2141 					/* look for nonpreferred paths */
2142 					preferred = 0;
2143 				} else if (start == pip && !preferred) {
2144 					/*
2145 					 * Exit condition
2146 					 */
2147 					cont = 0;
2148 				}
2149 			}
2150 		} while (cont);
2151 		break;
2152 	}
2153 
2154 	MDI_CLIENT_UNLOCK(ct);
2155 	if (retry == 1) {
2156 		return (MDI_BUSY);
2157 	} else {
2158 		return (MDI_NOPATH);
2159 	}
2160 }
2161 
2162 /*
2163  * For a client, return the next available path to any phci
2164  *
2165  * Note:
2166  *		Caller should hold the branch's devinfo node to get a consistent
2167  *		snap shot of the mdi_pathinfo nodes.
2168  *
2169  *		Please note that even the list is stable the mdi_pathinfo
2170  *		node state and properties are volatile.  The caller should lock
2171  *		and unlock the nodes by calling mdi_pi_lock() and
2172  *		mdi_pi_unlock() functions to get a stable properties.
2173  *
2174  *		If there is a need to use the nodes beyond the hold of the
2175  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2176  *		need to be held against unexpected removal by calling
2177  *		mdi_hold_path() and should be released by calling
2178  *		mdi_rele_path() on completion.
2179  */
2180 mdi_pathinfo_t *
2181 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2182 {
2183 	mdi_client_t *ct;
2184 
2185 	if (!MDI_CLIENT(ct_dip))
2186 		return (NULL);
2187 
2188 	/*
2189 	 * Walk through client link
2190 	 */
2191 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2192 	ASSERT(ct != NULL);
2193 
2194 	if (pip == NULL)
2195 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2196 
2197 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2198 }
2199 
2200 /*
2201  * For a phci, return the next available path to any client
2202  * Note: ditto mdi_get_next_phci_path()
2203  */
2204 mdi_pathinfo_t *
2205 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2206 {
2207 	mdi_phci_t *ph;
2208 
2209 	if (!MDI_PHCI(ph_dip))
2210 		return (NULL);
2211 
2212 	/*
2213 	 * Walk through pHCI link
2214 	 */
2215 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2216 	ASSERT(ph != NULL);
2217 
2218 	if (pip == NULL)
2219 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2220 
2221 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2222 }
2223 
2224 /*
2225  * mdi_get_nextpath():
2226  *		mdi_pathinfo node walker function.  Get the next node from the
2227  *		client or pHCI device list.
2228  *
2229  * XXX This is wrapper function for compatibility purposes only.
2230  *
2231  *	It doesn't work under Multi-level MPxIO, where a dip
2232  *	is both client and phci (which link should next_path follow?).
2233  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2234  *	this interface should be removed.
2235  */
2236 void
2237 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2238     mdi_pathinfo_t **ret_pip)
2239 {
2240 	if (MDI_CLIENT(dip)) {
2241 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2242 	} else if (MDI_PHCI(dip)) {
2243 		*ret_pip = mdi_get_next_client_path(dip, pip);
2244 	} else {
2245 		*ret_pip = NULL;
2246 	}
2247 }
2248 
2249 /*
2250  * mdi_hold_path():
2251  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2252  * Return Values:
2253  *		None
2254  */
2255 void
2256 mdi_hold_path(mdi_pathinfo_t *pip)
2257 {
2258 	if (pip) {
2259 		MDI_PI_LOCK(pip);
2260 		MDI_PI_HOLD(pip);
2261 		MDI_PI_UNLOCK(pip);
2262 	}
2263 }
2264 
2265 
2266 /*
2267  * mdi_rele_path():
2268  *		Release the mdi_pathinfo node which was selected
2269  *		through mdi_select_path() mechanism or manually held by
2270  *		calling mdi_hold_path().
2271  * Return Values:
2272  *		None
2273  */
2274 void
2275 mdi_rele_path(mdi_pathinfo_t *pip)
2276 {
2277 	if (pip) {
2278 		MDI_PI_LOCK(pip);
2279 		MDI_PI_RELE(pip);
2280 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2281 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2282 		}
2283 		MDI_PI_UNLOCK(pip);
2284 	}
2285 }
2286 
2287 
2288 /*
2289  * mdi_pi_lock():
2290  * 		Lock the mdi_pathinfo node.
2291  * Note:
2292  *		The caller should release the lock by calling mdi_pi_unlock()
2293  */
2294 void
2295 mdi_pi_lock(mdi_pathinfo_t *pip)
2296 {
2297 	ASSERT(pip != NULL);
2298 	if (pip) {
2299 		MDI_PI_LOCK(pip);
2300 	}
2301 }
2302 
2303 
2304 /*
2305  * mdi_pi_unlock():
2306  * 		Unlock the mdi_pathinfo node.
2307  * Note:
2308  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2309  */
2310 void
2311 mdi_pi_unlock(mdi_pathinfo_t *pip)
2312 {
2313 	ASSERT(pip != NULL);
2314 	if (pip) {
2315 		MDI_PI_UNLOCK(pip);
2316 	}
2317 }
2318 
2319 /*
2320  * mdi_pi_find():
2321  *		Search the list of mdi_pathinfo nodes attached to the
2322  *		pHCI/Client device node whose path address matches "paddr".
2323  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2324  *		found.
2325  * Return Values:
2326  *		mdi_pathinfo node handle
2327  *		NULL
2328  * Notes:
2329  *		Caller need not hold any locks to call this function.
2330  */
2331 mdi_pathinfo_t *
2332 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2333 {
2334 	mdi_phci_t		*ph;
2335 	mdi_vhci_t		*vh;
2336 	mdi_client_t		*ct;
2337 	mdi_pathinfo_t		*pip = NULL;
2338 
2339 	if ((pdip == NULL) || (paddr == NULL)) {
2340 		return (NULL);
2341 	}
2342 	ph = i_devi_get_phci(pdip);
2343 	if (ph == NULL) {
2344 		/*
2345 		 * Invalid pHCI device, Nothing more to do.
2346 		 */
2347 		MDI_DEBUG(2, (CE_WARN, NULL,
2348 		    "!mdi_pi_find: invalid phci"));
2349 		return (NULL);
2350 	}
2351 
2352 	vh = ph->ph_vhci;
2353 	if (vh == NULL) {
2354 		/*
2355 		 * Invalid vHCI device, Nothing more to do.
2356 		 */
2357 		MDI_DEBUG(2, (CE_WARN, NULL,
2358 		    "!mdi_pi_find: invalid phci"));
2359 		return (NULL);
2360 	}
2361 
2362 	/*
2363 	 * Look for client device identified by caddr (guid)
2364 	 */
2365 	if (caddr == NULL) {
2366 		/*
2367 		 * Find a mdi_pathinfo node under pHCI list for a matching
2368 		 * unit address.
2369 		 */
2370 		mutex_enter(&ph->ph_mutex);
2371 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2372 
2373 		while (pip != NULL) {
2374 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2375 				break;
2376 			}
2377 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2378 		}
2379 		mutex_exit(&ph->ph_mutex);
2380 		return (pip);
2381 	}
2382 
2383 	/*
2384 	 * XXX - Is the rest of the code in this function really necessary?
2385 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2386 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2387 	 * whether the search is based on the pathinfo nodes attached to
2388 	 * the pHCI or the client node, the result will be the same.
2389 	 */
2390 
2391 	/*
2392 	 * Find the client device corresponding to 'caddr'
2393 	 */
2394 	mutex_enter(&mdi_mutex);
2395 
2396 	/*
2397 	 * XXX - Passing NULL to the following function works as long as the
2398 	 * the client addresses (caddr) are unique per vhci basis.
2399 	 */
2400 	ct = i_mdi_client_find(vh, NULL, caddr);
2401 	if (ct == NULL) {
2402 		/*
2403 		 * Client not found, Obviously mdi_pathinfo node has not been
2404 		 * created yet.
2405 		 */
2406 		mutex_exit(&mdi_mutex);
2407 		return (pip);
2408 	}
2409 
2410 	/*
2411 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2412 	 * pHCI and paddr
2413 	 */
2414 	MDI_CLIENT_LOCK(ct);
2415 
2416 	/*
2417 	 * Release the global mutex as it is no more needed. Note: We always
2418 	 * respect the locking order while acquiring.
2419 	 */
2420 	mutex_exit(&mdi_mutex);
2421 
2422 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2423 	while (pip != NULL) {
2424 		/*
2425 		 * Compare the unit address
2426 		 */
2427 		if ((MDI_PI(pip)->pi_phci == ph) &&
2428 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2429 			break;
2430 		}
2431 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2432 	}
2433 	MDI_CLIENT_UNLOCK(ct);
2434 	return (pip);
2435 }
2436 
2437 /*
2438  * mdi_pi_alloc():
2439  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2440  *		The mdi_pathinfo node returned by this function identifies a
2441  *		unique device path is capable of having properties attached
2442  *		and passed to mdi_pi_online() to fully attach and online the
2443  *		path and client device node.
2444  *		The mdi_pathinfo node returned by this function must be
2445  *		destroyed using mdi_pi_free() if the path is no longer
2446  *		operational or if the caller fails to attach a client device
2447  *		node when calling mdi_pi_online(). The framework will not free
2448  *		the resources allocated.
2449  *		This function can be called from both interrupt and kernel
2450  *		contexts.  DDI_NOSLEEP flag should be used while calling
2451  *		from interrupt contexts.
2452  * Return Values:
2453  *		MDI_SUCCESS
2454  *		MDI_FAILURE
2455  *		MDI_NOMEM
2456  */
2457 /*ARGSUSED*/
2458 int
2459 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2460     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2461 {
2462 	mdi_vhci_t	*vh;
2463 	mdi_phci_t	*ph;
2464 	mdi_client_t	*ct;
2465 	mdi_pathinfo_t	*pip = NULL;
2466 	dev_info_t	*cdip;
2467 	int		rv = MDI_NOMEM;
2468 	int		path_allocated = 0;
2469 
2470 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2471 	    ret_pip == NULL) {
2472 		/* Nothing more to do */
2473 		return (MDI_FAILURE);
2474 	}
2475 
2476 	*ret_pip = NULL;
2477 	ph = i_devi_get_phci(pdip);
2478 	ASSERT(ph != NULL);
2479 	if (ph == NULL) {
2480 		/* Invalid pHCI device, return failure */
2481 		MDI_DEBUG(1, (CE_WARN, NULL,
2482 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2483 		return (MDI_FAILURE);
2484 	}
2485 
2486 	MDI_PHCI_LOCK(ph);
2487 	vh = ph->ph_vhci;
2488 	if (vh == NULL) {
2489 		/* Invalid vHCI device, return failure */
2490 		MDI_DEBUG(1, (CE_WARN, NULL,
2491 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2492 		MDI_PHCI_UNLOCK(ph);
2493 		return (MDI_FAILURE);
2494 	}
2495 
2496 	if (MDI_PHCI_IS_READY(ph) == 0) {
2497 		/*
2498 		 * Do not allow new node creation when pHCI is in
2499 		 * offline/suspended states
2500 		 */
2501 		MDI_DEBUG(1, (CE_WARN, NULL,
2502 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2503 		MDI_PHCI_UNLOCK(ph);
2504 		return (MDI_BUSY);
2505 	}
2506 	MDI_PHCI_UNSTABLE(ph);
2507 	MDI_PHCI_UNLOCK(ph);
2508 
2509 	/* look for a matching client, create one if not found */
2510 	mutex_enter(&mdi_mutex);
2511 	ct = i_mdi_client_find(vh, cname, caddr);
2512 	if (ct == NULL) {
2513 		ct = i_mdi_client_alloc(vh, cname, caddr);
2514 		ASSERT(ct != NULL);
2515 	}
2516 
2517 	if (ct->ct_dip == NULL) {
2518 		/*
2519 		 * Allocate a devinfo node
2520 		 */
2521 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2522 		    compatible, ncompatible);
2523 		if (ct->ct_dip == NULL) {
2524 			(void) i_mdi_client_free(vh, ct);
2525 			goto fail;
2526 		}
2527 	}
2528 	cdip = ct->ct_dip;
2529 
2530 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2531 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2532 
2533 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2534 	while (pip != NULL) {
2535 		/*
2536 		 * Compare the unit address
2537 		 */
2538 		if ((MDI_PI(pip)->pi_phci == ph) &&
2539 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2540 			break;
2541 		}
2542 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2543 	}
2544 
2545 	if (pip == NULL) {
2546 		/*
2547 		 * This is a new path for this client device.  Allocate and
2548 		 * initialize a new pathinfo node
2549 		 */
2550 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2551 		ASSERT(pip != NULL);
2552 		path_allocated = 1;
2553 	}
2554 	rv = MDI_SUCCESS;
2555 
2556 fail:
2557 	/*
2558 	 * Release the global mutex.
2559 	 */
2560 	mutex_exit(&mdi_mutex);
2561 
2562 	/*
2563 	 * Mark the pHCI as stable
2564 	 */
2565 	MDI_PHCI_LOCK(ph);
2566 	MDI_PHCI_STABLE(ph);
2567 	MDI_PHCI_UNLOCK(ph);
2568 	*ret_pip = pip;
2569 
2570 	if (path_allocated)
2571 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2572 
2573 	return (rv);
2574 }
2575 
2576 /*ARGSUSED*/
2577 int
2578 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2579     int flags, mdi_pathinfo_t **ret_pip)
2580 {
2581 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2582 	    flags, ret_pip));
2583 }
2584 
2585 /*
2586  * i_mdi_pi_alloc():
2587  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2588  * Return Values:
2589  *		mdi_pathinfo
2590  */
2591 
2592 /*ARGSUSED*/
2593 static mdi_pathinfo_t *
2594 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2595 {
2596 	mdi_pathinfo_t	*pip;
2597 	int		ct_circular;
2598 	int		ph_circular;
2599 	int		se_flag;
2600 	int		kmem_flag;
2601 
2602 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2603 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2604 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2605 	    MDI_PATHINFO_STATE_TRANSIENT;
2606 
2607 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2608 		MDI_PI_SET_USER_DISABLE(pip);
2609 
2610 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2611 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2612 
2613 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2614 		MDI_PI_SET_DRV_DISABLE(pip);
2615 
2616 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2617 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2618 	MDI_PI(pip)->pi_client = ct;
2619 	MDI_PI(pip)->pi_phci = ph;
2620 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2621 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2622 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2623 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2624 	MDI_PI(pip)->pi_pprivate = NULL;
2625 	MDI_PI(pip)->pi_cprivate = NULL;
2626 	MDI_PI(pip)->pi_vprivate = NULL;
2627 	MDI_PI(pip)->pi_client_link = NULL;
2628 	MDI_PI(pip)->pi_phci_link = NULL;
2629 	MDI_PI(pip)->pi_ref_cnt = 0;
2630 	MDI_PI(pip)->pi_kstats = NULL;
2631 	MDI_PI(pip)->pi_preferred = 1;
2632 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2633 
2634 	/*
2635 	 * Lock both dev_info nodes against changes in parallel.
2636 	 */
2637 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2638 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2639 
2640 	i_mdi_phci_add_path(ph, pip);
2641 	i_mdi_client_add_path(ct, pip);
2642 
2643 	ndi_devi_exit(ph->ph_dip, ph_circular);
2644 	ndi_devi_exit(ct->ct_dip, ct_circular);
2645 
2646 	/* determine interrupt context */
2647 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2648 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2649 
2650 	i_ddi_di_cache_invalidate(kmem_flag);
2651 
2652 	return (pip);
2653 }
2654 
2655 /*
2656  * i_mdi_phci_add_path():
2657  * 		Add a mdi_pathinfo node to pHCI list.
2658  * Notes:
2659  *		Caller should per-pHCI mutex
2660  */
2661 
2662 static void
2663 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2664 {
2665 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2666 
2667 	if (ph->ph_path_head == NULL) {
2668 		ph->ph_path_head = pip;
2669 	} else {
2670 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2671 	}
2672 	ph->ph_path_tail = pip;
2673 	ph->ph_path_count++;
2674 }
2675 
2676 /*
2677  * i_mdi_client_add_path():
2678  *		Add mdi_pathinfo node to client list
2679  */
2680 
2681 static void
2682 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2683 {
2684 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2685 
2686 	if (ct->ct_path_head == NULL) {
2687 		ct->ct_path_head = pip;
2688 	} else {
2689 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2690 	}
2691 	ct->ct_path_tail = pip;
2692 	ct->ct_path_count++;
2693 }
2694 
2695 /*
2696  * mdi_pi_free():
2697  *		Free the mdi_pathinfo node and also client device node if this
2698  *		is the last path to the device
2699  * Return Values:
2700  *		MDI_SUCCESS
2701  *		MDI_FAILURE
2702  *		MDI_BUSY
2703  */
2704 
2705 /*ARGSUSED*/
2706 int
2707 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2708 {
2709 	int		rv = MDI_SUCCESS;
2710 	mdi_vhci_t	*vh;
2711 	mdi_phci_t	*ph;
2712 	mdi_client_t	*ct;
2713 	int		(*f)();
2714 	int		client_held = 0;
2715 
2716 	MDI_PI_LOCK(pip);
2717 	ph = MDI_PI(pip)->pi_phci;
2718 	ASSERT(ph != NULL);
2719 	if (ph == NULL) {
2720 		/*
2721 		 * Invalid pHCI device, return failure
2722 		 */
2723 		MDI_DEBUG(1, (CE_WARN, NULL,
2724 		    "!mdi_pi_free: invalid pHCI"));
2725 		MDI_PI_UNLOCK(pip);
2726 		return (MDI_FAILURE);
2727 	}
2728 
2729 	vh = ph->ph_vhci;
2730 	ASSERT(vh != NULL);
2731 	if (vh == NULL) {
2732 		/* Invalid pHCI device, return failure */
2733 		MDI_DEBUG(1, (CE_WARN, NULL,
2734 		    "!mdi_pi_free: invalid vHCI"));
2735 		MDI_PI_UNLOCK(pip);
2736 		return (MDI_FAILURE);
2737 	}
2738 
2739 	ct = MDI_PI(pip)->pi_client;
2740 	ASSERT(ct != NULL);
2741 	if (ct == NULL) {
2742 		/*
2743 		 * Invalid Client device, return failure
2744 		 */
2745 		MDI_DEBUG(1, (CE_WARN, NULL,
2746 		    "!mdi_pi_free: invalid client"));
2747 		MDI_PI_UNLOCK(pip);
2748 		return (MDI_FAILURE);
2749 	}
2750 
2751 	/*
2752 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2753 	 * if the node state is either offline or init and the reference count
2754 	 * is zero.
2755 	 */
2756 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2757 	    MDI_PI_IS_INITING(pip))) {
2758 		/*
2759 		 * Node is busy
2760 		 */
2761 		MDI_DEBUG(1, (CE_WARN, NULL,
2762 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2763 		MDI_PI_UNLOCK(pip);
2764 		return (MDI_BUSY);
2765 	}
2766 
2767 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2768 		/*
2769 		 * Give a chance for pending I/Os to complete.
2770 		 */
2771 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2772 		    "%d cmds still pending on path: %p\n",
2773 		    MDI_PI(pip)->pi_ref_cnt, pip));
2774 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2775 		    &MDI_PI(pip)->pi_mutex,
2776 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2777 			/*
2778 			 * The timeout time reached without ref_cnt being zero
2779 			 * being signaled.
2780 			 */
2781 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2782 			    "!mdi_pi_free: "
2783 			    "Timeout reached on path %p without the cond\n",
2784 			    pip));
2785 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2786 			    "!mdi_pi_free: "
2787 			    "%d cmds still pending on path: %p\n",
2788 			    MDI_PI(pip)->pi_ref_cnt, pip));
2789 			MDI_PI_UNLOCK(pip);
2790 			return (MDI_BUSY);
2791 		}
2792 	}
2793 	if (MDI_PI(pip)->pi_pm_held) {
2794 		client_held = 1;
2795 	}
2796 	MDI_PI_UNLOCK(pip);
2797 
2798 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2799 
2800 	MDI_CLIENT_LOCK(ct);
2801 
2802 	/* Prevent further failovers till mdi_mutex is held */
2803 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2804 
2805 	/*
2806 	 * Wait till failover is complete before removing this node.
2807 	 */
2808 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2809 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2810 
2811 	MDI_CLIENT_UNLOCK(ct);
2812 	mutex_enter(&mdi_mutex);
2813 	MDI_CLIENT_LOCK(ct);
2814 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2815 
2816 	if (!MDI_PI_IS_INITING(pip)) {
2817 		f = vh->vh_ops->vo_pi_uninit;
2818 		if (f != NULL) {
2819 			rv = (*f)(vh->vh_dip, pip, 0);
2820 		}
2821 	}
2822 	/*
2823 	 * If vo_pi_uninit() completed successfully.
2824 	 */
2825 	if (rv == MDI_SUCCESS) {
2826 		if (client_held) {
2827 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2828 			    "i_mdi_pm_rele_client\n"));
2829 			i_mdi_pm_rele_client(ct, 1);
2830 		}
2831 		i_mdi_pi_free(ph, pip, ct);
2832 		if (ct->ct_path_count == 0) {
2833 			/*
2834 			 * Client lost its last path.
2835 			 * Clean up the client device
2836 			 */
2837 			MDI_CLIENT_UNLOCK(ct);
2838 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2839 			mutex_exit(&mdi_mutex);
2840 			return (rv);
2841 		}
2842 	}
2843 	MDI_CLIENT_UNLOCK(ct);
2844 	mutex_exit(&mdi_mutex);
2845 
2846 	if (rv == MDI_FAILURE)
2847 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2848 
2849 	return (rv);
2850 }
2851 
2852 /*
2853  * i_mdi_pi_free():
2854  *		Free the mdi_pathinfo node
2855  */
2856 static void
2857 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2858 {
2859 	int	ct_circular;
2860 	int	ph_circular;
2861 	int	se_flag;
2862 	int	kmem_flag;
2863 
2864 	/*
2865 	 * remove any per-path kstats
2866 	 */
2867 	i_mdi_pi_kstat_destroy(pip);
2868 
2869 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2870 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2871 
2872 	i_mdi_client_remove_path(ct, pip);
2873 	i_mdi_phci_remove_path(ph, pip);
2874 
2875 	ndi_devi_exit(ph->ph_dip, ph_circular);
2876 	ndi_devi_exit(ct->ct_dip, ct_circular);
2877 
2878 	/* determine interrupt context */
2879 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2880 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2881 
2882 	i_ddi_di_cache_invalidate(kmem_flag);
2883 
2884 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2885 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2886 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2887 	if (MDI_PI(pip)->pi_addr) {
2888 		kmem_free(MDI_PI(pip)->pi_addr,
2889 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2890 		MDI_PI(pip)->pi_addr = NULL;
2891 	}
2892 
2893 	if (MDI_PI(pip)->pi_prop) {
2894 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2895 		MDI_PI(pip)->pi_prop = NULL;
2896 	}
2897 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2898 }
2899 
2900 
2901 /*
2902  * i_mdi_phci_remove_path():
2903  * 		Remove a mdi_pathinfo node from pHCI list.
2904  * Notes:
2905  *		Caller should hold per-pHCI mutex
2906  */
2907 
2908 static void
2909 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2910 {
2911 	mdi_pathinfo_t	*prev = NULL;
2912 	mdi_pathinfo_t	*path = NULL;
2913 
2914 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2915 
2916 	path = ph->ph_path_head;
2917 	while (path != NULL) {
2918 		if (path == pip) {
2919 			break;
2920 		}
2921 		prev = path;
2922 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2923 	}
2924 
2925 	if (path) {
2926 		ph->ph_path_count--;
2927 		if (prev) {
2928 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2929 		} else {
2930 			ph->ph_path_head =
2931 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2932 		}
2933 		if (ph->ph_path_tail == path) {
2934 			ph->ph_path_tail = prev;
2935 		}
2936 	}
2937 
2938 	/*
2939 	 * Clear the pHCI link
2940 	 */
2941 	MDI_PI(pip)->pi_phci_link = NULL;
2942 	MDI_PI(pip)->pi_phci = NULL;
2943 }
2944 
2945 /*
2946  * i_mdi_client_remove_path():
2947  * 		Remove a mdi_pathinfo node from client path list.
2948  */
2949 
2950 static void
2951 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2952 {
2953 	mdi_pathinfo_t	*prev = NULL;
2954 	mdi_pathinfo_t	*path;
2955 
2956 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2957 
2958 	path = ct->ct_path_head;
2959 	while (path != NULL) {
2960 		if (path == pip) {
2961 			break;
2962 		}
2963 		prev = path;
2964 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2965 	}
2966 
2967 	if (path) {
2968 		ct->ct_path_count--;
2969 		if (prev) {
2970 			MDI_PI(prev)->pi_client_link =
2971 			    MDI_PI(path)->pi_client_link;
2972 		} else {
2973 			ct->ct_path_head =
2974 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2975 		}
2976 		if (ct->ct_path_tail == path) {
2977 			ct->ct_path_tail = prev;
2978 		}
2979 		if (ct->ct_path_last == path) {
2980 			ct->ct_path_last = ct->ct_path_head;
2981 		}
2982 	}
2983 	MDI_PI(pip)->pi_client_link = NULL;
2984 	MDI_PI(pip)->pi_client = NULL;
2985 }
2986 
2987 /*
2988  * i_mdi_pi_state_change():
2989  *		online a mdi_pathinfo node
2990  *
2991  * Return Values:
2992  *		MDI_SUCCESS
2993  *		MDI_FAILURE
2994  */
2995 /*ARGSUSED*/
2996 static int
2997 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
2998 {
2999 	int		rv = MDI_SUCCESS;
3000 	mdi_vhci_t	*vh;
3001 	mdi_phci_t	*ph;
3002 	mdi_client_t	*ct;
3003 	int		(*f)();
3004 	dev_info_t	*cdip;
3005 
3006 	MDI_PI_LOCK(pip);
3007 
3008 	ph = MDI_PI(pip)->pi_phci;
3009 	ASSERT(ph);
3010 	if (ph == NULL) {
3011 		/*
3012 		 * Invalid pHCI device, fail the request
3013 		 */
3014 		MDI_PI_UNLOCK(pip);
3015 		MDI_DEBUG(1, (CE_WARN, NULL,
3016 		    "!mdi_pi_state_change: invalid phci"));
3017 		return (MDI_FAILURE);
3018 	}
3019 
3020 	vh = ph->ph_vhci;
3021 	ASSERT(vh);
3022 	if (vh == NULL) {
3023 		/*
3024 		 * Invalid vHCI device, fail the request
3025 		 */
3026 		MDI_PI_UNLOCK(pip);
3027 		MDI_DEBUG(1, (CE_WARN, NULL,
3028 		    "!mdi_pi_state_change: invalid vhci"));
3029 		return (MDI_FAILURE);
3030 	}
3031 
3032 	ct = MDI_PI(pip)->pi_client;
3033 	ASSERT(ct != NULL);
3034 	if (ct == NULL) {
3035 		/*
3036 		 * Invalid client device, fail the request
3037 		 */
3038 		MDI_PI_UNLOCK(pip);
3039 		MDI_DEBUG(1, (CE_WARN, NULL,
3040 		    "!mdi_pi_state_change: invalid client"));
3041 		return (MDI_FAILURE);
3042 	}
3043 
3044 	/*
3045 	 * If this path has not been initialized yet, Callback vHCI driver's
3046 	 * pathinfo node initialize entry point
3047 	 */
3048 
3049 	if (MDI_PI_IS_INITING(pip)) {
3050 		MDI_PI_UNLOCK(pip);
3051 		f = vh->vh_ops->vo_pi_init;
3052 		if (f != NULL) {
3053 			rv = (*f)(vh->vh_dip, pip, 0);
3054 			if (rv != MDI_SUCCESS) {
3055 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3056 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3057 				    vh, pip));
3058 				return (MDI_FAILURE);
3059 			}
3060 		}
3061 		MDI_PI_LOCK(pip);
3062 		MDI_PI_CLEAR_TRANSIENT(pip);
3063 	}
3064 
3065 	/*
3066 	 * Do not allow state transition when pHCI is in offline/suspended
3067 	 * states
3068 	 */
3069 	i_mdi_phci_lock(ph, pip);
3070 	if (MDI_PHCI_IS_READY(ph) == 0) {
3071 		MDI_DEBUG(1, (CE_WARN, NULL,
3072 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3073 		MDI_PI_UNLOCK(pip);
3074 		i_mdi_phci_unlock(ph);
3075 		return (MDI_BUSY);
3076 	}
3077 	MDI_PHCI_UNSTABLE(ph);
3078 	i_mdi_phci_unlock(ph);
3079 
3080 	/*
3081 	 * Check if mdi_pathinfo state is in transient state.
3082 	 * If yes, offlining is in progress and wait till transient state is
3083 	 * cleared.
3084 	 */
3085 	if (MDI_PI_IS_TRANSIENT(pip)) {
3086 		while (MDI_PI_IS_TRANSIENT(pip)) {
3087 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3088 			    &MDI_PI(pip)->pi_mutex);
3089 		}
3090 	}
3091 
3092 	/*
3093 	 * Grab the client lock in reverse order sequence and release the
3094 	 * mdi_pathinfo mutex.
3095 	 */
3096 	i_mdi_client_lock(ct, pip);
3097 	MDI_PI_UNLOCK(pip);
3098 
3099 	/*
3100 	 * Wait till failover state is cleared
3101 	 */
3102 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3103 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3104 
3105 	/*
3106 	 * Mark the mdi_pathinfo node state as transient
3107 	 */
3108 	MDI_PI_LOCK(pip);
3109 	switch (state) {
3110 	case MDI_PATHINFO_STATE_ONLINE:
3111 		MDI_PI_SET_ONLINING(pip);
3112 		break;
3113 
3114 	case MDI_PATHINFO_STATE_STANDBY:
3115 		MDI_PI_SET_STANDBYING(pip);
3116 		break;
3117 
3118 	case MDI_PATHINFO_STATE_FAULT:
3119 		/*
3120 		 * Mark the pathinfo state as FAULTED
3121 		 */
3122 		MDI_PI_SET_FAULTING(pip);
3123 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3124 		break;
3125 
3126 	case MDI_PATHINFO_STATE_OFFLINE:
3127 		/*
3128 		 * ndi_devi_offline() cannot hold pip or ct locks.
3129 		 */
3130 		MDI_PI_UNLOCK(pip);
3131 		/*
3132 		 * Do not offline if path will become last path and path
3133 		 * is busy for user initiated events.
3134 		 */
3135 		cdip = ct->ct_dip;
3136 		if ((flag & NDI_DEVI_REMOVE) &&
3137 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3138 			i_mdi_client_unlock(ct);
3139 			rv = ndi_devi_offline(cdip, 0);
3140 			if (rv != NDI_SUCCESS) {
3141 				/*
3142 				 * Convert to MDI error code
3143 				 */
3144 				switch (rv) {
3145 				case NDI_BUSY:
3146 					rv = MDI_BUSY;
3147 					break;
3148 				default:
3149 					rv = MDI_FAILURE;
3150 					break;
3151 				}
3152 				goto state_change_exit;
3153 			} else {
3154 				i_mdi_client_lock(ct, NULL);
3155 			}
3156 		}
3157 		/*
3158 		 * Mark the mdi_pathinfo node state as transient
3159 		 */
3160 		MDI_PI_LOCK(pip);
3161 		MDI_PI_SET_OFFLINING(pip);
3162 		break;
3163 	}
3164 	MDI_PI_UNLOCK(pip);
3165 	MDI_CLIENT_UNSTABLE(ct);
3166 	i_mdi_client_unlock(ct);
3167 
3168 	f = vh->vh_ops->vo_pi_state_change;
3169 	if (f != NULL) {
3170 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3171 		if (rv == MDI_NOT_SUPPORTED) {
3172 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3173 		}
3174 		if (rv != MDI_SUCCESS) {
3175 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3176 			    "!vo_pi_state_change: failed rv = %x", rv));
3177 		}
3178 	}
3179 	MDI_CLIENT_LOCK(ct);
3180 	MDI_PI_LOCK(pip);
3181 	if (MDI_PI_IS_TRANSIENT(pip)) {
3182 		if (rv == MDI_SUCCESS) {
3183 			MDI_PI_CLEAR_TRANSIENT(pip);
3184 		} else {
3185 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3186 		}
3187 	}
3188 
3189 	/*
3190 	 * Wake anyone waiting for this mdi_pathinfo node
3191 	 */
3192 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3193 	MDI_PI_UNLOCK(pip);
3194 
3195 	/*
3196 	 * Mark the client device as stable
3197 	 */
3198 	MDI_CLIENT_STABLE(ct);
3199 	if (rv == MDI_SUCCESS) {
3200 		if (ct->ct_unstable == 0) {
3201 			cdip = ct->ct_dip;
3202 
3203 			/*
3204 			 * Onlining the mdi_pathinfo node will impact the
3205 			 * client state Update the client and dev_info node
3206 			 * state accordingly
3207 			 */
3208 			rv = NDI_SUCCESS;
3209 			i_mdi_client_update_state(ct);
3210 			switch (MDI_CLIENT_STATE(ct)) {
3211 			case MDI_CLIENT_STATE_OPTIMAL:
3212 			case MDI_CLIENT_STATE_DEGRADED:
3213 				if (cdip &&
3214 				    (i_ddi_node_state(cdip) < DS_READY) &&
3215 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3216 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3217 
3218 					i_mdi_client_unlock(ct);
3219 					/*
3220 					 * Must do ndi_devi_online() through
3221 					 * hotplug thread for deferred
3222 					 * attach mechanism to work
3223 					 */
3224 					rv = ndi_devi_online(cdip, 0);
3225 					i_mdi_client_lock(ct, NULL);
3226 					if ((rv != NDI_SUCCESS) &&
3227 					    (MDI_CLIENT_STATE(ct) ==
3228 					    MDI_CLIENT_STATE_DEGRADED)) {
3229 						/*
3230 						 * ndi_devi_online failed.
3231 						 * Reset client flags to
3232 						 * offline.
3233 						 */
3234 						MDI_DEBUG(1, (CE_WARN, cdip,
3235 						    "!ndi_devi_online: failed "
3236 						    " Error: %x", rv));
3237 						MDI_CLIENT_SET_OFFLINE(ct);
3238 					}
3239 					if (rv != NDI_SUCCESS) {
3240 						/* Reset the path state */
3241 						MDI_PI_LOCK(pip);
3242 						MDI_PI(pip)->pi_state =
3243 						    MDI_PI_OLD_STATE(pip);
3244 						MDI_PI_UNLOCK(pip);
3245 					}
3246 				}
3247 				break;
3248 
3249 			case MDI_CLIENT_STATE_FAILED:
3250 				/*
3251 				 * This is the last path case for
3252 				 * non-user initiated events.
3253 				 */
3254 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3255 				    cdip && (i_ddi_node_state(cdip) >=
3256 				    DS_INITIALIZED)) {
3257 					i_mdi_client_unlock(ct);
3258 					rv = ndi_devi_offline(cdip, 0);
3259 					i_mdi_client_lock(ct, NULL);
3260 
3261 					if (rv != NDI_SUCCESS) {
3262 						/*
3263 						 * ndi_devi_offline failed.
3264 						 * Reset client flags to
3265 						 * online as the path could not
3266 						 * be offlined.
3267 						 */
3268 						MDI_DEBUG(1, (CE_WARN, cdip,
3269 						    "!ndi_devi_offline: failed "
3270 						    " Error: %x", rv));
3271 						MDI_CLIENT_SET_ONLINE(ct);
3272 					}
3273 				}
3274 				break;
3275 			}
3276 			/*
3277 			 * Convert to MDI error code
3278 			 */
3279 			switch (rv) {
3280 			case NDI_SUCCESS:
3281 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3282 				i_mdi_report_path_state(ct, pip);
3283 				rv = MDI_SUCCESS;
3284 				break;
3285 			case NDI_BUSY:
3286 				rv = MDI_BUSY;
3287 				break;
3288 			default:
3289 				rv = MDI_FAILURE;
3290 				break;
3291 			}
3292 		}
3293 	}
3294 	MDI_CLIENT_UNLOCK(ct);
3295 
3296 state_change_exit:
3297 	/*
3298 	 * Mark the pHCI as stable again.
3299 	 */
3300 	MDI_PHCI_LOCK(ph);
3301 	MDI_PHCI_STABLE(ph);
3302 	MDI_PHCI_UNLOCK(ph);
3303 	return (rv);
3304 }
3305 
3306 /*
3307  * mdi_pi_online():
3308  *		Place the path_info node in the online state.  The path is
3309  *		now available to be selected by mdi_select_path() for
3310  *		transporting I/O requests to client devices.
3311  * Return Values:
3312  *		MDI_SUCCESS
3313  *		MDI_FAILURE
3314  */
3315 int
3316 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3317 {
3318 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3319 	dev_info_t *cdip;
3320 	int		client_held = 0;
3321 	int rv;
3322 
3323 	ASSERT(ct != NULL);
3324 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3325 	if (rv != MDI_SUCCESS)
3326 		return (rv);
3327 
3328 	MDI_PI_LOCK(pip);
3329 	if (MDI_PI(pip)->pi_pm_held == 0) {
3330 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3331 		    "i_mdi_pm_hold_pip\n"));
3332 		i_mdi_pm_hold_pip(pip);
3333 		client_held = 1;
3334 	}
3335 	MDI_PI_UNLOCK(pip);
3336 
3337 	if (client_held) {
3338 		MDI_CLIENT_LOCK(ct);
3339 		if (ct->ct_power_cnt == 0) {
3340 			rv = i_mdi_power_all_phci(ct);
3341 		}
3342 
3343 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3344 		    "i_mdi_pm_hold_client\n"));
3345 		i_mdi_pm_hold_client(ct, 1);
3346 		MDI_CLIENT_UNLOCK(ct);
3347 	}
3348 
3349 	/*
3350 	 * Create the per-path (pathinfo) IO and error kstats which
3351 	 * are reported via iostat(1m).
3352 	 *
3353 	 * Defer creating the per-path kstats if device is not yet
3354 	 * attached;  the names of the kstats are constructed in part
3355 	 * using the devices instance number which is assigned during
3356 	 * process of attaching the client device.
3357 	 *
3358 	 * The framework post_attach handler, mdi_post_attach(), is
3359 	 * is responsible for initializing the client's pathinfo list
3360 	 * once successfully attached.
3361 	 */
3362 	cdip = ct->ct_dip;
3363 	ASSERT(cdip);
3364 	if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED))
3365 		return (rv);
3366 
3367 	MDI_CLIENT_LOCK(ct);
3368 	rv = i_mdi_pi_kstat_create(pip);
3369 	MDI_CLIENT_UNLOCK(ct);
3370 	return (rv);
3371 }
3372 
3373 /*
3374  * mdi_pi_standby():
3375  *		Place the mdi_pathinfo node in standby state
3376  *
3377  * Return Values:
3378  *		MDI_SUCCESS
3379  *		MDI_FAILURE
3380  */
3381 int
3382 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3383 {
3384 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3385 }
3386 
3387 /*
3388  * mdi_pi_fault():
3389  *		Place the mdi_pathinfo node in fault'ed state
3390  * Return Values:
3391  *		MDI_SUCCESS
3392  *		MDI_FAILURE
3393  */
3394 int
3395 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3396 {
3397 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3398 }
3399 
3400 /*
3401  * mdi_pi_offline():
3402  *		Offline a mdi_pathinfo node.
3403  * Return Values:
3404  *		MDI_SUCCESS
3405  *		MDI_FAILURE
3406  */
3407 int
3408 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3409 {
3410 	int	ret, client_held = 0;
3411 	mdi_client_t	*ct;
3412 
3413 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3414 
3415 	if (ret == MDI_SUCCESS) {
3416 		MDI_PI_LOCK(pip);
3417 		if (MDI_PI(pip)->pi_pm_held) {
3418 			client_held = 1;
3419 		}
3420 		MDI_PI_UNLOCK(pip);
3421 
3422 		if (client_held) {
3423 			ct = MDI_PI(pip)->pi_client;
3424 			MDI_CLIENT_LOCK(ct);
3425 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3426 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3427 			i_mdi_pm_rele_client(ct, 1);
3428 			MDI_CLIENT_UNLOCK(ct);
3429 		}
3430 	}
3431 
3432 	return (ret);
3433 }
3434 
3435 /*
3436  * i_mdi_pi_offline():
3437  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3438  */
3439 static int
3440 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3441 {
3442 	dev_info_t	*vdip = NULL;
3443 	mdi_vhci_t	*vh = NULL;
3444 	mdi_client_t	*ct = NULL;
3445 	int		(*f)();
3446 	int		rv;
3447 
3448 	MDI_PI_LOCK(pip);
3449 	ct = MDI_PI(pip)->pi_client;
3450 	ASSERT(ct != NULL);
3451 
3452 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3453 		/*
3454 		 * Give a chance for pending I/Os to complete.
3455 		 */
3456 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3457 		    "%d cmds still pending on path: %p\n",
3458 		    MDI_PI(pip)->pi_ref_cnt, pip));
3459 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3460 		    &MDI_PI(pip)->pi_mutex,
3461 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3462 			/*
3463 			 * The timeout time reached without ref_cnt being zero
3464 			 * being signaled.
3465 			 */
3466 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3467 			    "Timeout reached on path %p without the cond\n",
3468 			    pip));
3469 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3470 			    "%d cmds still pending on path: %p\n",
3471 			    MDI_PI(pip)->pi_ref_cnt, pip));
3472 		}
3473 	}
3474 	vh = ct->ct_vhci;
3475 	vdip = vh->vh_dip;
3476 
3477 	/*
3478 	 * Notify vHCI that has registered this event
3479 	 */
3480 	ASSERT(vh->vh_ops);
3481 	f = vh->vh_ops->vo_pi_state_change;
3482 
3483 	if (f != NULL) {
3484 		MDI_PI_UNLOCK(pip);
3485 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3486 		    flags)) != MDI_SUCCESS) {
3487 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3488 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3489 		}
3490 		MDI_PI_LOCK(pip);
3491 	}
3492 
3493 	/*
3494 	 * Set the mdi_pathinfo node state and clear the transient condition
3495 	 */
3496 	MDI_PI_SET_OFFLINE(pip);
3497 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3498 	MDI_PI_UNLOCK(pip);
3499 
3500 	MDI_CLIENT_LOCK(ct);
3501 	if (rv == MDI_SUCCESS) {
3502 		if (ct->ct_unstable == 0) {
3503 			dev_info_t	*cdip = ct->ct_dip;
3504 
3505 			/*
3506 			 * Onlining the mdi_pathinfo node will impact the
3507 			 * client state Update the client and dev_info node
3508 			 * state accordingly
3509 			 */
3510 			i_mdi_client_update_state(ct);
3511 			rv = NDI_SUCCESS;
3512 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3513 				if (cdip &&
3514 				    (i_ddi_node_state(cdip) >=
3515 				    DS_INITIALIZED)) {
3516 					MDI_CLIENT_UNLOCK(ct);
3517 					rv = ndi_devi_offline(cdip, 0);
3518 					MDI_CLIENT_LOCK(ct);
3519 					if (rv != NDI_SUCCESS) {
3520 						/*
3521 						 * ndi_devi_offline failed.
3522 						 * Reset client flags to
3523 						 * online.
3524 						 */
3525 						MDI_DEBUG(4, (CE_WARN, cdip,
3526 						    "!ndi_devi_offline: failed "
3527 						    " Error: %x", rv));
3528 						MDI_CLIENT_SET_ONLINE(ct);
3529 					}
3530 				}
3531 			}
3532 			/*
3533 			 * Convert to MDI error code
3534 			 */
3535 			switch (rv) {
3536 			case NDI_SUCCESS:
3537 				rv = MDI_SUCCESS;
3538 				break;
3539 			case NDI_BUSY:
3540 				rv = MDI_BUSY;
3541 				break;
3542 			default:
3543 				rv = MDI_FAILURE;
3544 				break;
3545 			}
3546 		}
3547 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3548 		i_mdi_report_path_state(ct, pip);
3549 	}
3550 
3551 	MDI_CLIENT_UNLOCK(ct);
3552 
3553 	/*
3554 	 * Change in the mdi_pathinfo node state will impact the client state
3555 	 */
3556 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3557 	    ct, pip));
3558 	return (rv);
3559 }
3560 
3561 
3562 /*
3563  * mdi_pi_get_addr():
3564  *		Get the unit address associated with a mdi_pathinfo node
3565  *
3566  * Return Values:
3567  *		char *
3568  */
3569 char *
3570 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3571 {
3572 	if (pip == NULL)
3573 		return (NULL);
3574 
3575 	return (MDI_PI(pip)->pi_addr);
3576 }
3577 
3578 /*
3579  * mdi_pi_get_client():
3580  *		Get the client devinfo associated with a mdi_pathinfo node
3581  *
3582  * Return Values:
3583  *		Handle to client device dev_info node
3584  */
3585 dev_info_t *
3586 mdi_pi_get_client(mdi_pathinfo_t *pip)
3587 {
3588 	dev_info_t	*dip = NULL;
3589 	if (pip) {
3590 		dip = MDI_PI(pip)->pi_client->ct_dip;
3591 	}
3592 	return (dip);
3593 }
3594 
3595 /*
3596  * mdi_pi_get_phci():
3597  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3598  * Return Values:
3599  *		Handle to dev_info node
3600  */
3601 dev_info_t *
3602 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3603 {
3604 	dev_info_t	*dip = NULL;
3605 	if (pip) {
3606 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3607 	}
3608 	return (dip);
3609 }
3610 
3611 /*
3612  * mdi_pi_get_client_private():
3613  *		Get the client private information associated with the
3614  *		mdi_pathinfo node
3615  */
3616 void *
3617 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3618 {
3619 	void *cprivate = NULL;
3620 	if (pip) {
3621 		cprivate = MDI_PI(pip)->pi_cprivate;
3622 	}
3623 	return (cprivate);
3624 }
3625 
3626 /*
3627  * mdi_pi_set_client_private():
3628  *		Set the client private information in the mdi_pathinfo node
3629  */
3630 void
3631 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3632 {
3633 	if (pip) {
3634 		MDI_PI(pip)->pi_cprivate = priv;
3635 	}
3636 }
3637 
3638 /*
3639  * mdi_pi_get_phci_private():
3640  *		Get the pHCI private information associated with the
3641  *		mdi_pathinfo node
3642  */
3643 caddr_t
3644 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3645 {
3646 	caddr_t	pprivate = NULL;
3647 	if (pip) {
3648 		pprivate = MDI_PI(pip)->pi_pprivate;
3649 	}
3650 	return (pprivate);
3651 }
3652 
3653 /*
3654  * mdi_pi_set_phci_private():
3655  *		Set the pHCI private information in the mdi_pathinfo node
3656  */
3657 void
3658 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3659 {
3660 	if (pip) {
3661 		MDI_PI(pip)->pi_pprivate = priv;
3662 	}
3663 }
3664 
3665 /*
3666  * mdi_pi_get_state():
3667  *		Get the mdi_pathinfo node state. Transient states are internal
3668  *		and not provided to the users
3669  */
3670 mdi_pathinfo_state_t
3671 mdi_pi_get_state(mdi_pathinfo_t *pip)
3672 {
3673 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3674 
3675 	if (pip) {
3676 		if (MDI_PI_IS_TRANSIENT(pip)) {
3677 			/*
3678 			 * mdi_pathinfo is in state transition.  Return the
3679 			 * last good state.
3680 			 */
3681 			state = MDI_PI_OLD_STATE(pip);
3682 		} else {
3683 			state = MDI_PI_STATE(pip);
3684 		}
3685 	}
3686 	return (state);
3687 }
3688 
3689 /*
3690  * Note that the following function needs to be the new interface for
3691  * mdi_pi_get_state when mpxio gets integrated to ON.
3692  */
3693 int
3694 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3695 		uint32_t *ext_state)
3696 {
3697 	*state = MDI_PATHINFO_STATE_INIT;
3698 
3699 	if (pip) {
3700 		if (MDI_PI_IS_TRANSIENT(pip)) {
3701 			/*
3702 			 * mdi_pathinfo is in state transition.  Return the
3703 			 * last good state.
3704 			 */
3705 			*state = MDI_PI_OLD_STATE(pip);
3706 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3707 		} else {
3708 			*state = MDI_PI_STATE(pip);
3709 			*ext_state = MDI_PI_EXT_STATE(pip);
3710 		}
3711 	}
3712 	return (MDI_SUCCESS);
3713 }
3714 
3715 /*
3716  * mdi_pi_get_preferred:
3717  *	Get the preferred path flag
3718  */
3719 int
3720 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3721 {
3722 	if (pip) {
3723 		return (MDI_PI(pip)->pi_preferred);
3724 	}
3725 	return (0);
3726 }
3727 
3728 /*
3729  * mdi_pi_set_preferred:
3730  *	Set the preferred path flag
3731  */
3732 void
3733 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3734 {
3735 	if (pip) {
3736 		MDI_PI(pip)->pi_preferred = preferred;
3737 	}
3738 }
3739 
3740 
3741 /*
3742  * mdi_pi_set_state():
3743  *		Set the mdi_pathinfo node state
3744  */
3745 void
3746 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3747 {
3748 	uint32_t	ext_state;
3749 
3750 	if (pip) {
3751 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3752 		MDI_PI(pip)->pi_state = state;
3753 		MDI_PI(pip)->pi_state |= ext_state;
3754 	}
3755 }
3756 
3757 /*
3758  * Property functions:
3759  */
3760 
3761 int
3762 i_map_nvlist_error_to_mdi(int val)
3763 {
3764 	int rv;
3765 
3766 	switch (val) {
3767 	case 0:
3768 		rv = DDI_PROP_SUCCESS;
3769 		break;
3770 	case EINVAL:
3771 	case ENOTSUP:
3772 		rv = DDI_PROP_INVAL_ARG;
3773 		break;
3774 	case ENOMEM:
3775 		rv = DDI_PROP_NO_MEMORY;
3776 		break;
3777 	default:
3778 		rv = DDI_PROP_NOT_FOUND;
3779 		break;
3780 	}
3781 	return (rv);
3782 }
3783 
3784 /*
3785  * mdi_pi_get_next_prop():
3786  * 		Property walk function.  The caller should hold mdi_pi_lock()
3787  *		and release by calling mdi_pi_unlock() at the end of walk to
3788  *		get a consistent value.
3789  */
3790 
3791 nvpair_t *
3792 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3793 {
3794 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3795 		return (NULL);
3796 	}
3797 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3798 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3799 }
3800 
3801 /*
3802  * mdi_prop_remove():
3803  * 		Remove the named property from the named list.
3804  */
3805 
3806 int
3807 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3808 {
3809 	if (pip == NULL) {
3810 		return (DDI_PROP_NOT_FOUND);
3811 	}
3812 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3813 	MDI_PI_LOCK(pip);
3814 	if (MDI_PI(pip)->pi_prop == NULL) {
3815 		MDI_PI_UNLOCK(pip);
3816 		return (DDI_PROP_NOT_FOUND);
3817 	}
3818 	if (name) {
3819 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3820 	} else {
3821 		char		nvp_name[MAXNAMELEN];
3822 		nvpair_t	*nvp;
3823 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3824 		while (nvp) {
3825 			nvpair_t	*next;
3826 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3827 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3828 			    nvpair_name(nvp));
3829 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3830 			    nvp_name);
3831 			nvp = next;
3832 		}
3833 	}
3834 	MDI_PI_UNLOCK(pip);
3835 	return (DDI_PROP_SUCCESS);
3836 }
3837 
3838 /*
3839  * mdi_prop_size():
3840  * 		Get buffer size needed to pack the property data.
3841  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3842  *		buffer size.
3843  */
3844 
3845 int
3846 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3847 {
3848 	int	rv;
3849 	size_t	bufsize;
3850 
3851 	*buflenp = 0;
3852 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3853 		return (DDI_PROP_NOT_FOUND);
3854 	}
3855 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3856 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3857 	    &bufsize, NV_ENCODE_NATIVE);
3858 	*buflenp = bufsize;
3859 	return (i_map_nvlist_error_to_mdi(rv));
3860 }
3861 
3862 /*
3863  * mdi_prop_pack():
3864  * 		pack the property list.  The caller should hold the
3865  *		mdi_pathinfo_t node to get a consistent data
3866  */
3867 
3868 int
3869 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3870 {
3871 	int	rv;
3872 	size_t	bufsize;
3873 
3874 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3875 		return (DDI_PROP_NOT_FOUND);
3876 	}
3877 
3878 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3879 
3880 	bufsize = buflen;
3881 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3882 	    NV_ENCODE_NATIVE, KM_SLEEP);
3883 
3884 	return (i_map_nvlist_error_to_mdi(rv));
3885 }
3886 
3887 /*
3888  * mdi_prop_update_byte():
3889  *		Create/Update a byte property
3890  */
3891 int
3892 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3893 {
3894 	int rv;
3895 
3896 	if (pip == NULL) {
3897 		return (DDI_PROP_INVAL_ARG);
3898 	}
3899 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3900 	MDI_PI_LOCK(pip);
3901 	if (MDI_PI(pip)->pi_prop == NULL) {
3902 		MDI_PI_UNLOCK(pip);
3903 		return (DDI_PROP_NOT_FOUND);
3904 	}
3905 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3906 	MDI_PI_UNLOCK(pip);
3907 	return (i_map_nvlist_error_to_mdi(rv));
3908 }
3909 
3910 /*
3911  * mdi_prop_update_byte_array():
3912  *		Create/Update a byte array property
3913  */
3914 int
3915 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3916     uint_t nelements)
3917 {
3918 	int rv;
3919 
3920 	if (pip == NULL) {
3921 		return (DDI_PROP_INVAL_ARG);
3922 	}
3923 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3924 	MDI_PI_LOCK(pip);
3925 	if (MDI_PI(pip)->pi_prop == NULL) {
3926 		MDI_PI_UNLOCK(pip);
3927 		return (DDI_PROP_NOT_FOUND);
3928 	}
3929 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3930 	MDI_PI_UNLOCK(pip);
3931 	return (i_map_nvlist_error_to_mdi(rv));
3932 }
3933 
3934 /*
3935  * mdi_prop_update_int():
3936  *		Create/Update a 32 bit integer property
3937  */
3938 int
3939 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3940 {
3941 	int rv;
3942 
3943 	if (pip == NULL) {
3944 		return (DDI_PROP_INVAL_ARG);
3945 	}
3946 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3947 	MDI_PI_LOCK(pip);
3948 	if (MDI_PI(pip)->pi_prop == NULL) {
3949 		MDI_PI_UNLOCK(pip);
3950 		return (DDI_PROP_NOT_FOUND);
3951 	}
3952 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3953 	MDI_PI_UNLOCK(pip);
3954 	return (i_map_nvlist_error_to_mdi(rv));
3955 }
3956 
3957 /*
3958  * mdi_prop_update_int64():
3959  *		Create/Update a 64 bit integer property
3960  */
3961 int
3962 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3963 {
3964 	int rv;
3965 
3966 	if (pip == NULL) {
3967 		return (DDI_PROP_INVAL_ARG);
3968 	}
3969 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3970 	MDI_PI_LOCK(pip);
3971 	if (MDI_PI(pip)->pi_prop == NULL) {
3972 		MDI_PI_UNLOCK(pip);
3973 		return (DDI_PROP_NOT_FOUND);
3974 	}
3975 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3976 	MDI_PI_UNLOCK(pip);
3977 	return (i_map_nvlist_error_to_mdi(rv));
3978 }
3979 
3980 /*
3981  * mdi_prop_update_int_array():
3982  *		Create/Update a int array property
3983  */
3984 int
3985 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
3986 	    uint_t nelements)
3987 {
3988 	int rv;
3989 
3990 	if (pip == NULL) {
3991 		return (DDI_PROP_INVAL_ARG);
3992 	}
3993 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3994 	MDI_PI_LOCK(pip);
3995 	if (MDI_PI(pip)->pi_prop == NULL) {
3996 		MDI_PI_UNLOCK(pip);
3997 		return (DDI_PROP_NOT_FOUND);
3998 	}
3999 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4000 	    nelements);
4001 	MDI_PI_UNLOCK(pip);
4002 	return (i_map_nvlist_error_to_mdi(rv));
4003 }
4004 
4005 /*
4006  * mdi_prop_update_string():
4007  *		Create/Update a string property
4008  */
4009 int
4010 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4011 {
4012 	int rv;
4013 
4014 	if (pip == NULL) {
4015 		return (DDI_PROP_INVAL_ARG);
4016 	}
4017 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4018 	MDI_PI_LOCK(pip);
4019 	if (MDI_PI(pip)->pi_prop == NULL) {
4020 		MDI_PI_UNLOCK(pip);
4021 		return (DDI_PROP_NOT_FOUND);
4022 	}
4023 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4024 	MDI_PI_UNLOCK(pip);
4025 	return (i_map_nvlist_error_to_mdi(rv));
4026 }
4027 
4028 /*
4029  * mdi_prop_update_string_array():
4030  *		Create/Update a string array property
4031  */
4032 int
4033 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4034     uint_t nelements)
4035 {
4036 	int rv;
4037 
4038 	if (pip == NULL) {
4039 		return (DDI_PROP_INVAL_ARG);
4040 	}
4041 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4042 	MDI_PI_LOCK(pip);
4043 	if (MDI_PI(pip)->pi_prop == NULL) {
4044 		MDI_PI_UNLOCK(pip);
4045 		return (DDI_PROP_NOT_FOUND);
4046 	}
4047 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4048 	    nelements);
4049 	MDI_PI_UNLOCK(pip);
4050 	return (i_map_nvlist_error_to_mdi(rv));
4051 }
4052 
4053 /*
4054  * mdi_prop_lookup_byte():
4055  * 		Look for byte property identified by name.  The data returned
4056  *		is the actual property and valid as long as mdi_pathinfo_t node
4057  *		is alive.
4058  */
4059 int
4060 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4061 {
4062 	int rv;
4063 
4064 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4065 		return (DDI_PROP_NOT_FOUND);
4066 	}
4067 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4068 	return (i_map_nvlist_error_to_mdi(rv));
4069 }
4070 
4071 
4072 /*
4073  * mdi_prop_lookup_byte_array():
4074  * 		Look for byte array property identified by name.  The data
4075  *		returned is the actual property and valid as long as
4076  *		mdi_pathinfo_t node is alive.
4077  */
4078 int
4079 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4080     uint_t *nelements)
4081 {
4082 	int rv;
4083 
4084 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4085 		return (DDI_PROP_NOT_FOUND);
4086 	}
4087 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4088 	    nelements);
4089 	return (i_map_nvlist_error_to_mdi(rv));
4090 }
4091 
4092 /*
4093  * mdi_prop_lookup_int():
4094  * 		Look for int property identified by name.  The data returned
4095  *		is the actual property and valid as long as mdi_pathinfo_t
4096  *		node is alive.
4097  */
4098 int
4099 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4100 {
4101 	int rv;
4102 
4103 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4104 		return (DDI_PROP_NOT_FOUND);
4105 	}
4106 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4107 	return (i_map_nvlist_error_to_mdi(rv));
4108 }
4109 
4110 /*
4111  * mdi_prop_lookup_int64():
4112  * 		Look for int64 property identified by name.  The data returned
4113  *		is the actual property and valid as long as mdi_pathinfo_t node
4114  *		is alive.
4115  */
4116 int
4117 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4118 {
4119 	int rv;
4120 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4121 		return (DDI_PROP_NOT_FOUND);
4122 	}
4123 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4124 	return (i_map_nvlist_error_to_mdi(rv));
4125 }
4126 
4127 /*
4128  * mdi_prop_lookup_int_array():
4129  * 		Look for int array property identified by name.  The data
4130  *		returned is the actual property and valid as long as
4131  *		mdi_pathinfo_t node is alive.
4132  */
4133 int
4134 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4135     uint_t *nelements)
4136 {
4137 	int rv;
4138 
4139 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4140 		return (DDI_PROP_NOT_FOUND);
4141 	}
4142 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4143 	    (int32_t **)data, nelements);
4144 	return (i_map_nvlist_error_to_mdi(rv));
4145 }
4146 
4147 /*
4148  * mdi_prop_lookup_string():
4149  * 		Look for string property identified by name.  The data
4150  *		returned is the actual property and valid as long as
4151  *		mdi_pathinfo_t node is alive.
4152  */
4153 int
4154 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4155 {
4156 	int rv;
4157 
4158 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4159 		return (DDI_PROP_NOT_FOUND);
4160 	}
4161 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4162 	return (i_map_nvlist_error_to_mdi(rv));
4163 }
4164 
4165 /*
4166  * mdi_prop_lookup_string_array():
4167  * 		Look for string array property identified by name.  The data
4168  *		returned is the actual property and valid as long as
4169  *		mdi_pathinfo_t node is alive.
4170  */
4171 
4172 int
4173 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4174     uint_t *nelements)
4175 {
4176 	int rv;
4177 
4178 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4179 		return (DDI_PROP_NOT_FOUND);
4180 	}
4181 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4182 	    nelements);
4183 	return (i_map_nvlist_error_to_mdi(rv));
4184 }
4185 
4186 /*
4187  * mdi_prop_free():
4188  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4189  *		functions return the pointer to actual property data and not a
4190  *		copy of it.  So the data returned is valid as long as
4191  *		mdi_pathinfo_t node is valid.
4192  */
4193 
4194 /*ARGSUSED*/
4195 int
4196 mdi_prop_free(void *data)
4197 {
4198 	return (DDI_PROP_SUCCESS);
4199 }
4200 
4201 /*ARGSUSED*/
4202 static void
4203 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4204 {
4205 	char		*phci_path, *ct_path;
4206 	char		*ct_status;
4207 	char		*status;
4208 	dev_info_t	*dip = ct->ct_dip;
4209 	char		lb_buf[64];
4210 
4211 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4212 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4213 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4214 		return;
4215 	}
4216 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4217 		ct_status = "optimal";
4218 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4219 		ct_status = "degraded";
4220 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4221 		ct_status = "failed";
4222 	} else {
4223 		ct_status = "unknown";
4224 	}
4225 
4226 	if (MDI_PI_IS_OFFLINE(pip)) {
4227 		status = "offline";
4228 	} else if (MDI_PI_IS_ONLINE(pip)) {
4229 		status = "online";
4230 	} else if (MDI_PI_IS_STANDBY(pip)) {
4231 		status = "standby";
4232 	} else if (MDI_PI_IS_FAULT(pip)) {
4233 		status = "faulted";
4234 	} else {
4235 		status = "unknown";
4236 	}
4237 
4238 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4239 		(void) snprintf(lb_buf, sizeof (lb_buf),
4240 		    "%s, region-size: %d", mdi_load_balance_lba,
4241 			ct->ct_lb_args->region_size);
4242 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4243 		(void) snprintf(lb_buf, sizeof (lb_buf),
4244 		    "%s", mdi_load_balance_none);
4245 	} else {
4246 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4247 		    mdi_load_balance_rr);
4248 	}
4249 
4250 	if (dip) {
4251 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4252 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4253 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4254 		    "path %s (%s%d) to target address: %s is %s"
4255 		    " Load balancing: %s\n",
4256 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4257 		    ddi_get_instance(dip), ct_status,
4258 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4259 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4260 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4261 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4262 		kmem_free(phci_path, MAXPATHLEN);
4263 		kmem_free(ct_path, MAXPATHLEN);
4264 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4265 	}
4266 }
4267 
4268 #ifdef	DEBUG
4269 /*
4270  * i_mdi_log():
4271  *		Utility function for error message management
4272  *
4273  */
4274 
4275 /*VARARGS3*/
4276 static void
4277 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4278 {
4279 	char		buf[MAXNAMELEN];
4280 	char		name[MAXNAMELEN];
4281 	va_list		ap;
4282 	int		log_only = 0;
4283 	int		boot_only = 0;
4284 	int		console_only = 0;
4285 
4286 	if (dip) {
4287 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4288 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4289 			    ddi_node_name(dip), ddi_get_instance(dip));
4290 		} else {
4291 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4292 			    ddi_node_name(dip), ddi_get_instance(dip));
4293 		}
4294 	} else {
4295 		name[0] = '\0';
4296 	}
4297 
4298 	va_start(ap, fmt);
4299 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4300 	va_end(ap);
4301 
4302 	switch (buf[0]) {
4303 	case '!':
4304 		log_only = 1;
4305 		break;
4306 	case '?':
4307 		boot_only = 1;
4308 		break;
4309 	case '^':
4310 		console_only = 1;
4311 		break;
4312 	}
4313 
4314 	switch (level) {
4315 	case CE_NOTE:
4316 		level = CE_CONT;
4317 		/* FALLTHROUGH */
4318 	case CE_CONT:
4319 	case CE_WARN:
4320 	case CE_PANIC:
4321 		if (boot_only) {
4322 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4323 		} else if (console_only) {
4324 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4325 		} else if (log_only) {
4326 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4327 		} else {
4328 			cmn_err(level, "%s\t%s", name, buf);
4329 		}
4330 		break;
4331 	default:
4332 		cmn_err(level, "%s\t%s", name, buf);
4333 		break;
4334 	}
4335 }
4336 #endif	/* DEBUG */
4337 
4338 void
4339 i_mdi_client_online(dev_info_t *ct_dip)
4340 {
4341 	mdi_client_t	*ct;
4342 
4343 	/*
4344 	 * Client online notification. Mark client state as online
4345 	 * restore our binding with dev_info node
4346 	 */
4347 	ct = i_devi_get_client(ct_dip);
4348 	ASSERT(ct != NULL);
4349 	MDI_CLIENT_LOCK(ct);
4350 	MDI_CLIENT_SET_ONLINE(ct);
4351 	/* catch for any memory leaks */
4352 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4353 	ct->ct_dip = ct_dip;
4354 
4355 	if (ct->ct_power_cnt == 0)
4356 		(void) i_mdi_power_all_phci(ct);
4357 
4358 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4359 	    "i_mdi_pm_hold_client\n"));
4360 	i_mdi_pm_hold_client(ct, 1);
4361 
4362 	MDI_CLIENT_UNLOCK(ct);
4363 }
4364 
4365 void
4366 i_mdi_phci_online(dev_info_t *ph_dip)
4367 {
4368 	mdi_phci_t	*ph;
4369 
4370 	/* pHCI online notification. Mark state accordingly */
4371 	ph = i_devi_get_phci(ph_dip);
4372 	ASSERT(ph != NULL);
4373 	MDI_PHCI_LOCK(ph);
4374 	MDI_PHCI_SET_ONLINE(ph);
4375 	MDI_PHCI_UNLOCK(ph);
4376 }
4377 
4378 /*
4379  * mdi_devi_online():
4380  * 		Online notification from NDI framework on pHCI/client
4381  *		device online.
4382  * Return Values:
4383  *		NDI_SUCCESS
4384  *		MDI_FAILURE
4385  */
4386 
4387 /*ARGSUSED*/
4388 int
4389 mdi_devi_online(dev_info_t *dip, uint_t flags)
4390 {
4391 	if (MDI_PHCI(dip)) {
4392 		i_mdi_phci_online(dip);
4393 	}
4394 
4395 	if (MDI_CLIENT(dip)) {
4396 		i_mdi_client_online(dip);
4397 	}
4398 	return (NDI_SUCCESS);
4399 }
4400 
4401 /*
4402  * mdi_devi_offline():
4403  * 		Offline notification from NDI framework on pHCI/Client device
4404  *		offline.
4405  *
4406  * Return Values:
4407  *		NDI_SUCCESS
4408  *		NDI_FAILURE
4409  */
4410 
4411 /*ARGSUSED*/
4412 int
4413 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4414 {
4415 	int		rv = NDI_SUCCESS;
4416 
4417 	if (MDI_CLIENT(dip)) {
4418 		rv = i_mdi_client_offline(dip, flags);
4419 		if (rv != NDI_SUCCESS)
4420 			return (rv);
4421 	}
4422 
4423 	if (MDI_PHCI(dip)) {
4424 		rv = i_mdi_phci_offline(dip, flags);
4425 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4426 			/* set client back online */
4427 			i_mdi_client_online(dip);
4428 		}
4429 	}
4430 
4431 	return (rv);
4432 }
4433 
4434 /*ARGSUSED*/
4435 static int
4436 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4437 {
4438 	int		rv = NDI_SUCCESS;
4439 	mdi_phci_t	*ph;
4440 	mdi_client_t	*ct;
4441 	mdi_pathinfo_t	*pip;
4442 	mdi_pathinfo_t	*next;
4443 	mdi_pathinfo_t	*failed_pip = NULL;
4444 	dev_info_t	*cdip;
4445 
4446 	/*
4447 	 * pHCI component offline notification
4448 	 * Make sure that this pHCI instance is free to be offlined.
4449 	 * If it is OK to proceed, Offline and remove all the child
4450 	 * mdi_pathinfo nodes.  This process automatically offlines
4451 	 * corresponding client devices, for which this pHCI provides
4452 	 * critical services.
4453 	 */
4454 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4455 	    dip));
4456 
4457 	ph = i_devi_get_phci(dip);
4458 	if (ph == NULL) {
4459 		return (rv);
4460 	}
4461 
4462 	MDI_PHCI_LOCK(ph);
4463 
4464 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4465 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4466 		MDI_PHCI_UNLOCK(ph);
4467 		return (NDI_SUCCESS);
4468 	}
4469 
4470 	/*
4471 	 * Check to see if the pHCI can be offlined
4472 	 */
4473 	if (ph->ph_unstable) {
4474 		MDI_DEBUG(1, (CE_WARN, dip,
4475 		    "!One or more target devices are in transient "
4476 		    "state. This device can not be removed at "
4477 		    "this moment. Please try again later."));
4478 		MDI_PHCI_UNLOCK(ph);
4479 		return (NDI_BUSY);
4480 	}
4481 
4482 	pip = ph->ph_path_head;
4483 	while (pip != NULL) {
4484 		MDI_PI_LOCK(pip);
4485 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4486 		/*
4487 		 * The mdi_pathinfo state is OK. Check the client state.
4488 		 * If failover in progress fail the pHCI from offlining
4489 		 */
4490 		ct = MDI_PI(pip)->pi_client;
4491 		i_mdi_client_lock(ct, pip);
4492 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4493 		    (ct->ct_unstable)) {
4494 			/*
4495 			 * Failover is in progress, Fail the DR
4496 			 */
4497 			MDI_DEBUG(1, (CE_WARN, dip,
4498 			    "!pHCI device (%s%d) is Busy. %s",
4499 			    ddi_driver_name(dip), ddi_get_instance(dip),
4500 			    "This device can not be removed at "
4501 			    "this moment. Please try again later."));
4502 			MDI_PI_UNLOCK(pip);
4503 			MDI_CLIENT_UNLOCK(ct);
4504 			MDI_PHCI_UNLOCK(ph);
4505 			return (NDI_BUSY);
4506 		}
4507 		MDI_PI_UNLOCK(pip);
4508 
4509 		/*
4510 		 * Check to see of we are removing the last path of this
4511 		 * client device...
4512 		 */
4513 		cdip = ct->ct_dip;
4514 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4515 		    (i_mdi_client_compute_state(ct, ph) ==
4516 		    MDI_CLIENT_STATE_FAILED)) {
4517 			i_mdi_client_unlock(ct);
4518 			MDI_PHCI_UNLOCK(ph);
4519 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4520 				/*
4521 				 * ndi_devi_offline() failed.
4522 				 * This pHCI provides the critical path
4523 				 * to one or more client devices.
4524 				 * Return busy.
4525 				 */
4526 				MDI_PHCI_LOCK(ph);
4527 				MDI_DEBUG(1, (CE_WARN, dip,
4528 				    "!pHCI device (%s%d) is Busy. %s",
4529 				    ddi_driver_name(dip), ddi_get_instance(dip),
4530 				    "This device can not be removed at "
4531 				    "this moment. Please try again later."));
4532 				failed_pip = pip;
4533 				break;
4534 			} else {
4535 				MDI_PHCI_LOCK(ph);
4536 				pip = next;
4537 			}
4538 		} else {
4539 			i_mdi_client_unlock(ct);
4540 			pip = next;
4541 		}
4542 	}
4543 
4544 	if (failed_pip) {
4545 		pip = ph->ph_path_head;
4546 		while (pip != failed_pip) {
4547 			MDI_PI_LOCK(pip);
4548 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4549 			ct = MDI_PI(pip)->pi_client;
4550 			i_mdi_client_lock(ct, pip);
4551 			cdip = ct->ct_dip;
4552 			switch (MDI_CLIENT_STATE(ct)) {
4553 			case MDI_CLIENT_STATE_OPTIMAL:
4554 			case MDI_CLIENT_STATE_DEGRADED:
4555 				if (cdip) {
4556 					MDI_PI_UNLOCK(pip);
4557 					i_mdi_client_unlock(ct);
4558 					MDI_PHCI_UNLOCK(ph);
4559 					(void) ndi_devi_online(cdip, 0);
4560 					MDI_PHCI_LOCK(ph);
4561 					pip = next;
4562 					continue;
4563 				}
4564 				break;
4565 
4566 			case MDI_CLIENT_STATE_FAILED:
4567 				if (cdip) {
4568 					MDI_PI_UNLOCK(pip);
4569 					i_mdi_client_unlock(ct);
4570 					MDI_PHCI_UNLOCK(ph);
4571 					(void) ndi_devi_offline(cdip, 0);
4572 					MDI_PHCI_LOCK(ph);
4573 					pip = next;
4574 					continue;
4575 				}
4576 				break;
4577 			}
4578 			MDI_PI_UNLOCK(pip);
4579 			i_mdi_client_unlock(ct);
4580 			pip = next;
4581 		}
4582 		MDI_PHCI_UNLOCK(ph);
4583 		return (NDI_BUSY);
4584 	}
4585 
4586 	/*
4587 	 * Mark the pHCI as offline
4588 	 */
4589 	MDI_PHCI_SET_OFFLINE(ph);
4590 
4591 	/*
4592 	 * Mark the child mdi_pathinfo nodes as transient
4593 	 */
4594 	pip = ph->ph_path_head;
4595 	while (pip != NULL) {
4596 		MDI_PI_LOCK(pip);
4597 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4598 		MDI_PI_SET_OFFLINING(pip);
4599 		MDI_PI_UNLOCK(pip);
4600 		pip = next;
4601 	}
4602 	MDI_PHCI_UNLOCK(ph);
4603 	/*
4604 	 * Give a chance for any pending commands to execute
4605 	 */
4606 	delay(1);
4607 	MDI_PHCI_LOCK(ph);
4608 	pip = ph->ph_path_head;
4609 	while (pip != NULL) {
4610 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4611 		(void) i_mdi_pi_offline(pip, flags);
4612 		MDI_PI_LOCK(pip);
4613 		ct = MDI_PI(pip)->pi_client;
4614 		if (!MDI_PI_IS_OFFLINE(pip)) {
4615 			MDI_DEBUG(1, (CE_WARN, dip,
4616 			    "!pHCI device (%s%d) is Busy. %s",
4617 			    ddi_driver_name(dip), ddi_get_instance(dip),
4618 			    "This device can not be removed at "
4619 			    "this moment. Please try again later."));
4620 			MDI_PI_UNLOCK(pip);
4621 			MDI_PHCI_SET_ONLINE(ph);
4622 			MDI_PHCI_UNLOCK(ph);
4623 			return (NDI_BUSY);
4624 		}
4625 		MDI_PI_UNLOCK(pip);
4626 		pip = next;
4627 	}
4628 	MDI_PHCI_UNLOCK(ph);
4629 
4630 	return (rv);
4631 }
4632 
4633 /*ARGSUSED*/
4634 static int
4635 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4636 {
4637 	int		rv = NDI_SUCCESS;
4638 	mdi_client_t	*ct;
4639 
4640 	/*
4641 	 * Client component to go offline.  Make sure that we are
4642 	 * not in failing over state and update client state
4643 	 * accordingly
4644 	 */
4645 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4646 	    dip));
4647 	ct = i_devi_get_client(dip);
4648 	if (ct != NULL) {
4649 		MDI_CLIENT_LOCK(ct);
4650 		if (ct->ct_unstable) {
4651 			/*
4652 			 * One or more paths are in transient state,
4653 			 * Dont allow offline of a client device
4654 			 */
4655 			MDI_DEBUG(1, (CE_WARN, dip,
4656 			    "!One or more paths to this device is "
4657 			    "in transient state. This device can not "
4658 			    "be removed at this moment. "
4659 			    "Please try again later."));
4660 			MDI_CLIENT_UNLOCK(ct);
4661 			return (NDI_BUSY);
4662 		}
4663 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4664 			/*
4665 			 * Failover is in progress, Dont allow DR of
4666 			 * a client device
4667 			 */
4668 			MDI_DEBUG(1, (CE_WARN, dip,
4669 			    "!Client device (%s%d) is Busy. %s",
4670 			    ddi_driver_name(dip), ddi_get_instance(dip),
4671 			    "This device can not be removed at "
4672 			    "this moment. Please try again later."));
4673 			MDI_CLIENT_UNLOCK(ct);
4674 			return (NDI_BUSY);
4675 		}
4676 		MDI_CLIENT_SET_OFFLINE(ct);
4677 
4678 		/*
4679 		 * Unbind our relationship with the dev_info node
4680 		 */
4681 		if (flags & NDI_DEVI_REMOVE) {
4682 			ct->ct_dip = NULL;
4683 		}
4684 		MDI_CLIENT_UNLOCK(ct);
4685 	}
4686 	return (rv);
4687 }
4688 
4689 /*
4690  * mdi_pre_attach():
4691  *		Pre attach() notification handler
4692  */
4693 
4694 /*ARGSUSED*/
4695 int
4696 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4697 {
4698 	/* don't support old DDI_PM_RESUME */
4699 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4700 	    (cmd == DDI_PM_RESUME))
4701 		return (DDI_FAILURE);
4702 
4703 	return (DDI_SUCCESS);
4704 }
4705 
4706 /*
4707  * mdi_post_attach():
4708  *		Post attach() notification handler
4709  */
4710 
4711 /*ARGSUSED*/
4712 void
4713 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4714 {
4715 	mdi_phci_t	*ph;
4716 	mdi_client_t	*ct;
4717 	mdi_pathinfo_t	*pip;
4718 
4719 	if (MDI_PHCI(dip)) {
4720 		ph = i_devi_get_phci(dip);
4721 		ASSERT(ph != NULL);
4722 
4723 		MDI_PHCI_LOCK(ph);
4724 		switch (cmd) {
4725 		case DDI_ATTACH:
4726 			MDI_DEBUG(2, (CE_NOTE, dip,
4727 			    "!pHCI post_attach: called %p\n", ph));
4728 			if (error == DDI_SUCCESS) {
4729 				MDI_PHCI_SET_ATTACH(ph);
4730 			} else {
4731 				MDI_DEBUG(1, (CE_NOTE, dip,
4732 				    "!pHCI post_attach: failed error=%d\n",
4733 				    error));
4734 				MDI_PHCI_SET_DETACH(ph);
4735 			}
4736 			break;
4737 
4738 		case DDI_RESUME:
4739 			MDI_DEBUG(2, (CE_NOTE, dip,
4740 			    "!pHCI post_resume: called %p\n", ph));
4741 			if (error == DDI_SUCCESS) {
4742 				MDI_PHCI_SET_RESUME(ph);
4743 			} else {
4744 				MDI_DEBUG(1, (CE_NOTE, dip,
4745 				    "!pHCI post_resume: failed error=%d\n",
4746 				    error));
4747 				MDI_PHCI_SET_SUSPEND(ph);
4748 			}
4749 			break;
4750 		}
4751 		MDI_PHCI_UNLOCK(ph);
4752 	}
4753 
4754 	if (MDI_CLIENT(dip)) {
4755 		ct = i_devi_get_client(dip);
4756 		ASSERT(ct != NULL);
4757 
4758 		MDI_CLIENT_LOCK(ct);
4759 		switch (cmd) {
4760 		case DDI_ATTACH:
4761 			MDI_DEBUG(2, (CE_NOTE, dip,
4762 			    "!Client post_attach: called %p\n", ct));
4763 			if (error != DDI_SUCCESS) {
4764 				MDI_DEBUG(1, (CE_NOTE, dip,
4765 				    "!Client post_attach: failed error=%d\n",
4766 				    error));
4767 				MDI_CLIENT_SET_DETACH(ct);
4768 				MDI_DEBUG(4, (CE_WARN, dip,
4769 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4770 				i_mdi_pm_reset_client(ct);
4771 				break;
4772 			}
4773 
4774 			/*
4775 			 * Client device has successfully attached.
4776 			 * Create kstats for any pathinfo structures
4777 			 * initially associated with this client.
4778 			 */
4779 			for (pip = ct->ct_path_head; pip != NULL;
4780 			    pip = (mdi_pathinfo_t *)
4781 			    MDI_PI(pip)->pi_client_link) {
4782 				(void) i_mdi_pi_kstat_create(pip);
4783 				i_mdi_report_path_state(ct, pip);
4784 			}
4785 			MDI_CLIENT_SET_ATTACH(ct);
4786 			break;
4787 
4788 		case DDI_RESUME:
4789 			MDI_DEBUG(2, (CE_NOTE, dip,
4790 			    "!Client post_attach: called %p\n", ct));
4791 			if (error == DDI_SUCCESS) {
4792 				MDI_CLIENT_SET_RESUME(ct);
4793 			} else {
4794 				MDI_DEBUG(1, (CE_NOTE, dip,
4795 				    "!Client post_resume: failed error=%d\n",
4796 				    error));
4797 				MDI_CLIENT_SET_SUSPEND(ct);
4798 			}
4799 			break;
4800 		}
4801 		MDI_CLIENT_UNLOCK(ct);
4802 	}
4803 }
4804 
4805 /*
4806  * mdi_pre_detach():
4807  *		Pre detach notification handler
4808  */
4809 
4810 /*ARGSUSED*/
4811 int
4812 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4813 {
4814 	int rv = DDI_SUCCESS;
4815 
4816 	if (MDI_CLIENT(dip)) {
4817 		(void) i_mdi_client_pre_detach(dip, cmd);
4818 	}
4819 
4820 	if (MDI_PHCI(dip)) {
4821 		rv = i_mdi_phci_pre_detach(dip, cmd);
4822 	}
4823 
4824 	return (rv);
4825 }
4826 
4827 /*ARGSUSED*/
4828 static int
4829 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4830 {
4831 	int		rv = DDI_SUCCESS;
4832 	mdi_phci_t	*ph;
4833 	mdi_client_t	*ct;
4834 	mdi_pathinfo_t	*pip;
4835 	mdi_pathinfo_t	*failed_pip = NULL;
4836 	mdi_pathinfo_t	*next;
4837 
4838 	ph = i_devi_get_phci(dip);
4839 	if (ph == NULL) {
4840 		return (rv);
4841 	}
4842 
4843 	MDI_PHCI_LOCK(ph);
4844 	switch (cmd) {
4845 	case DDI_DETACH:
4846 		MDI_DEBUG(2, (CE_NOTE, dip,
4847 		    "!pHCI pre_detach: called %p\n", ph));
4848 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4849 			/*
4850 			 * mdi_pathinfo nodes are still attached to
4851 			 * this pHCI. Fail the detach for this pHCI.
4852 			 */
4853 			MDI_DEBUG(2, (CE_WARN, dip,
4854 			    "!pHCI pre_detach: "
4855 			    "mdi_pathinfo nodes are still attached "
4856 			    "%p\n", ph));
4857 			rv = DDI_FAILURE;
4858 			break;
4859 		}
4860 		MDI_PHCI_SET_DETACH(ph);
4861 		break;
4862 
4863 	case DDI_SUSPEND:
4864 		/*
4865 		 * pHCI is getting suspended.  Since mpxio client
4866 		 * devices may not be suspended at this point, to avoid
4867 		 * a potential stack overflow, it is important to suspend
4868 		 * client devices before pHCI can be suspended.
4869 		 */
4870 
4871 		MDI_DEBUG(2, (CE_NOTE, dip,
4872 		    "!pHCI pre_suspend: called %p\n", ph));
4873 		/*
4874 		 * Suspend all the client devices accessible through this pHCI
4875 		 */
4876 		pip = ph->ph_path_head;
4877 		while (pip != NULL && rv == DDI_SUCCESS) {
4878 			dev_info_t *cdip;
4879 			MDI_PI_LOCK(pip);
4880 			next =
4881 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4882 			ct = MDI_PI(pip)->pi_client;
4883 			i_mdi_client_lock(ct, pip);
4884 			cdip = ct->ct_dip;
4885 			MDI_PI_UNLOCK(pip);
4886 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4887 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4888 				i_mdi_client_unlock(ct);
4889 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4890 				    DDI_SUCCESS) {
4891 					/*
4892 					 * Suspend of one of the client
4893 					 * device has failed.
4894 					 */
4895 					MDI_DEBUG(1, (CE_WARN, dip,
4896 					    "!Suspend of device (%s%d) failed.",
4897 					    ddi_driver_name(cdip),
4898 					    ddi_get_instance(cdip)));
4899 					failed_pip = pip;
4900 					break;
4901 				}
4902 			} else {
4903 				i_mdi_client_unlock(ct);
4904 			}
4905 			pip = next;
4906 		}
4907 
4908 		if (rv == DDI_SUCCESS) {
4909 			/*
4910 			 * Suspend of client devices is complete. Proceed
4911 			 * with pHCI suspend.
4912 			 */
4913 			MDI_PHCI_SET_SUSPEND(ph);
4914 		} else {
4915 			/*
4916 			 * Revert back all the suspended client device states
4917 			 * to converse.
4918 			 */
4919 			pip = ph->ph_path_head;
4920 			while (pip != failed_pip) {
4921 				dev_info_t *cdip;
4922 				MDI_PI_LOCK(pip);
4923 				next =
4924 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4925 				ct = MDI_PI(pip)->pi_client;
4926 				i_mdi_client_lock(ct, pip);
4927 				cdip = ct->ct_dip;
4928 				MDI_PI_UNLOCK(pip);
4929 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4930 					i_mdi_client_unlock(ct);
4931 					(void) devi_attach(cdip, DDI_RESUME);
4932 				} else {
4933 					i_mdi_client_unlock(ct);
4934 				}
4935 				pip = next;
4936 			}
4937 		}
4938 		break;
4939 
4940 	default:
4941 		rv = DDI_FAILURE;
4942 		break;
4943 	}
4944 	MDI_PHCI_UNLOCK(ph);
4945 	return (rv);
4946 }
4947 
4948 /*ARGSUSED*/
4949 static int
4950 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4951 {
4952 	int		rv = DDI_SUCCESS;
4953 	mdi_client_t	*ct;
4954 
4955 	ct = i_devi_get_client(dip);
4956 	if (ct == NULL) {
4957 		return (rv);
4958 	}
4959 
4960 	MDI_CLIENT_LOCK(ct);
4961 	switch (cmd) {
4962 	case DDI_DETACH:
4963 		MDI_DEBUG(2, (CE_NOTE, dip,
4964 		    "!Client pre_detach: called %p\n", ct));
4965 		MDI_CLIENT_SET_DETACH(ct);
4966 		break;
4967 
4968 	case DDI_SUSPEND:
4969 		MDI_DEBUG(2, (CE_NOTE, dip,
4970 		    "!Client pre_suspend: called %p\n", ct));
4971 		MDI_CLIENT_SET_SUSPEND(ct);
4972 		break;
4973 
4974 	default:
4975 		rv = DDI_FAILURE;
4976 		break;
4977 	}
4978 	MDI_CLIENT_UNLOCK(ct);
4979 	return (rv);
4980 }
4981 
4982 /*
4983  * mdi_post_detach():
4984  *		Post detach notification handler
4985  */
4986 
4987 /*ARGSUSED*/
4988 void
4989 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4990 {
4991 	/*
4992 	 * Detach/Suspend of mpxio component failed. Update our state
4993 	 * too
4994 	 */
4995 	if (MDI_PHCI(dip))
4996 		i_mdi_phci_post_detach(dip, cmd, error);
4997 
4998 	if (MDI_CLIENT(dip))
4999 		i_mdi_client_post_detach(dip, cmd, error);
5000 }
5001 
5002 /*ARGSUSED*/
5003 static void
5004 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5005 {
5006 	mdi_phci_t	*ph;
5007 
5008 	/*
5009 	 * Detach/Suspend of phci component failed. Update our state
5010 	 * too
5011 	 */
5012 	ph = i_devi_get_phci(dip);
5013 	if (ph == NULL) {
5014 		return;
5015 	}
5016 
5017 	MDI_PHCI_LOCK(ph);
5018 	/*
5019 	 * Detach of pHCI failed. Restore back converse
5020 	 * state
5021 	 */
5022 	switch (cmd) {
5023 	case DDI_DETACH:
5024 		MDI_DEBUG(2, (CE_NOTE, dip,
5025 		    "!pHCI post_detach: called %p\n", ph));
5026 		if (error != DDI_SUCCESS)
5027 			MDI_PHCI_SET_ATTACH(ph);
5028 		break;
5029 
5030 	case DDI_SUSPEND:
5031 		MDI_DEBUG(2, (CE_NOTE, dip,
5032 		    "!pHCI post_suspend: called %p\n", ph));
5033 		if (error != DDI_SUCCESS)
5034 			MDI_PHCI_SET_RESUME(ph);
5035 		break;
5036 	}
5037 	MDI_PHCI_UNLOCK(ph);
5038 }
5039 
5040 /*ARGSUSED*/
5041 static void
5042 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5043 {
5044 	mdi_client_t	*ct;
5045 
5046 	ct = i_devi_get_client(dip);
5047 	if (ct == NULL) {
5048 		return;
5049 	}
5050 	MDI_CLIENT_LOCK(ct);
5051 	/*
5052 	 * Detach of Client failed. Restore back converse
5053 	 * state
5054 	 */
5055 	switch (cmd) {
5056 	case DDI_DETACH:
5057 		MDI_DEBUG(2, (CE_NOTE, dip,
5058 		    "!Client post_detach: called %p\n", ct));
5059 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5060 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5061 			    "i_mdi_pm_rele_client\n"));
5062 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5063 		} else {
5064 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5065 			    "i_mdi_pm_reset_client\n"));
5066 			i_mdi_pm_reset_client(ct);
5067 		}
5068 		if (error != DDI_SUCCESS)
5069 			MDI_CLIENT_SET_ATTACH(ct);
5070 		break;
5071 
5072 	case DDI_SUSPEND:
5073 		MDI_DEBUG(2, (CE_NOTE, dip,
5074 		    "!Client post_suspend: called %p\n", ct));
5075 		if (error != DDI_SUCCESS)
5076 			MDI_CLIENT_SET_RESUME(ct);
5077 		break;
5078 	}
5079 	MDI_CLIENT_UNLOCK(ct);
5080 }
5081 
5082 /*
5083  * create and install per-path (client - pHCI) statistics
5084  * I/O stats supported: nread, nwritten, reads, and writes
5085  * Error stats - hard errors, soft errors, & transport errors
5086  */
5087 static int
5088 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5089 {
5090 
5091 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5092 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5093 	char ksname[KSTAT_STRLEN];
5094 	mdi_pathinfo_t *cpip;
5095 	const char *err_postfix = ",err";
5096 	kstat_t	*kiosp, *kerrsp;
5097 	struct pi_errs	*nsp;
5098 	struct mdi_pi_kstats *mdi_statp;
5099 
5100 	ASSERT(client != NULL && ppath != NULL);
5101 
5102 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5103 
5104 	if (MDI_PI(pip)->pi_kstats != NULL)
5105 		return (MDI_SUCCESS);
5106 
5107 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5108 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5109 		if (cpip == pip)
5110 			continue;
5111 		/*
5112 		 * We have found a different path with same parent
5113 		 * kstats for a given client-pHCI are common
5114 		 */
5115 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5116 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5117 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5118 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5119 			return (MDI_SUCCESS);
5120 		}
5121 	}
5122 
5123 	/*
5124 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5125 	 * clamp length of name against max length of error kstat name
5126 	 */
5127 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5128 	    ddi_driver_name(client), ddi_get_instance(client),
5129 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5130 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5131 		return (MDI_FAILURE);
5132 	}
5133 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5134 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5135 		return (MDI_FAILURE);
5136 	}
5137 
5138 	(void) strcat(ksname, err_postfix);
5139 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5140 	    KSTAT_TYPE_NAMED,
5141 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5142 
5143 	if (kerrsp == NULL) {
5144 		kstat_delete(kiosp);
5145 		return (MDI_FAILURE);
5146 	}
5147 
5148 	nsp = (struct pi_errs *)kerrsp->ks_data;
5149 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5150 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5151 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5152 	    KSTAT_DATA_UINT32);
5153 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5154 	    KSTAT_DATA_UINT32);
5155 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5156 	    KSTAT_DATA_UINT32);
5157 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5158 	    KSTAT_DATA_UINT32);
5159 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5160 	    KSTAT_DATA_UINT32);
5161 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5162 	    KSTAT_DATA_UINT32);
5163 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5164 	    KSTAT_DATA_UINT32);
5165 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5166 
5167 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5168 	mdi_statp->pi_kstat_ref = 1;
5169 	mdi_statp->pi_kstat_iostats = kiosp;
5170 	mdi_statp->pi_kstat_errstats = kerrsp;
5171 	kstat_install(kiosp);
5172 	kstat_install(kerrsp);
5173 	MDI_PI(pip)->pi_kstats = mdi_statp;
5174 	return (MDI_SUCCESS);
5175 }
5176 
5177 /*
5178  * destroy per-path properties
5179  */
5180 static void
5181 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5182 {
5183 
5184 	struct mdi_pi_kstats *mdi_statp;
5185 
5186 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5187 		return;
5188 
5189 	MDI_PI(pip)->pi_kstats = NULL;
5190 
5191 	/*
5192 	 * the kstat may be shared between multiple pathinfo nodes
5193 	 * decrement this pathinfo's usage, removing the kstats
5194 	 * themselves when the last pathinfo reference is removed.
5195 	 */
5196 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5197 	if (--mdi_statp->pi_kstat_ref != 0)
5198 		return;
5199 
5200 	kstat_delete(mdi_statp->pi_kstat_iostats);
5201 	kstat_delete(mdi_statp->pi_kstat_errstats);
5202 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5203 }
5204 
5205 /*
5206  * update I/O paths KSTATS
5207  */
5208 void
5209 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5210 {
5211 	kstat_t *iostatp;
5212 	size_t xfer_cnt;
5213 
5214 	ASSERT(pip != NULL);
5215 
5216 	/*
5217 	 * I/O can be driven across a path prior to having path
5218 	 * statistics available, i.e. probe(9e).
5219 	 */
5220 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5221 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5222 		xfer_cnt = bp->b_bcount - bp->b_resid;
5223 		if (bp->b_flags & B_READ) {
5224 			KSTAT_IO_PTR(iostatp)->reads++;
5225 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5226 		} else {
5227 			KSTAT_IO_PTR(iostatp)->writes++;
5228 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5229 		}
5230 	}
5231 }
5232 
5233 /*
5234  * disable the path to a particular pHCI (pHCI specified in the phci_path
5235  * argument) for a particular client (specified in the client_path argument).
5236  * Disabling a path means that MPxIO will not select the disabled path for
5237  * routing any new I/O requests.
5238  */
5239 int
5240 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5241 {
5242 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5243 }
5244 
5245 /*
5246  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5247  * argument) for a particular client (specified in the client_path argument).
5248  * Enabling a path means that MPxIO may select the enabled path for routing
5249  * future I/O requests, subject to other path state constraints.
5250  */
5251 
5252 int
5253 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5254 {
5255 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5256 }
5257 
5258 
5259 /*
5260  * Common routine for doing enable/disable.
5261  */
5262 int
5263 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5264 {
5265 
5266 	mdi_phci_t	*ph;
5267 	mdi_vhci_t	*vh = NULL;
5268 	mdi_client_t	*ct;
5269 	mdi_pathinfo_t	*next, *pip;
5270 	int		found_it;
5271 	int		(*f)() = NULL;
5272 	int		rv;
5273 	int		sync_flag = 0;
5274 
5275 	ph = i_devi_get_phci(pdip);
5276 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5277 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5278 	if (ph == NULL) {
5279 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5280 			" failed. ph = NULL operation = %d\n", op));
5281 		return (MDI_FAILURE);
5282 	}
5283 
5284 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5285 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5286 			" Invalid operation = %d\n", op));
5287 		return (MDI_FAILURE);
5288 	}
5289 
5290 	sync_flag = (flags << 8) & 0xf00;
5291 
5292 	vh = ph->ph_vhci;
5293 	f = vh->vh_ops->vo_pi_state_change;
5294 
5295 	if (cdip == NULL) {
5296 		/*
5297 		 * Need to mark the Phci as enabled/disabled.
5298 		 */
5299 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5300 		"Operation %d for the phci\n", op));
5301 		MDI_PHCI_LOCK(ph);
5302 		switch (flags) {
5303 			case USER_DISABLE:
5304 				if (op == MDI_DISABLE_OP)
5305 					MDI_PHCI_SET_USER_DISABLE(ph);
5306 				else
5307 					MDI_PHCI_SET_USER_ENABLE(ph);
5308 				break;
5309 			case DRIVER_DISABLE:
5310 				if (op == MDI_DISABLE_OP)
5311 					MDI_PHCI_SET_DRV_DISABLE(ph);
5312 				else
5313 					MDI_PHCI_SET_DRV_ENABLE(ph);
5314 				break;
5315 			case DRIVER_DISABLE_TRANSIENT:
5316 				if (op == MDI_DISABLE_OP)
5317 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5318 				else
5319 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5320 				break;
5321 			default:
5322 				MDI_PHCI_UNLOCK(ph);
5323 				MDI_DEBUG(1, (CE_NOTE, NULL,
5324 				"!i_mdi_pi_enable_disable:"
5325 				" Invalid flag argument= %d\n", flags));
5326 		}
5327 
5328 		/*
5329 		 * Phci has been disabled. Now try to enable/disable
5330 		 * path info's to each client.
5331 		 */
5332 		pip = ph->ph_path_head;
5333 		while (pip != NULL) {
5334 			/*
5335 			 * Do a callback into the mdi consumer to let it
5336 			 * know that path is about to be enabled/disabled.
5337 			 */
5338 			if (f != NULL) {
5339 				rv = (*f)(vh->vh_dip, pip, 0,
5340 					MDI_PI_EXT_STATE(pip),
5341 					MDI_EXT_STATE_CHANGE | sync_flag |
5342 					op | MDI_BEFORE_STATE_CHANGE);
5343 				if (rv != MDI_SUCCESS) {
5344 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5345 				"!vo_pi_state_change: failed rv = %x", rv));
5346 				}
5347 			}
5348 
5349 			MDI_PI_LOCK(pip);
5350 			next =
5351 				(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5352 			switch (flags) {
5353 			case USER_DISABLE:
5354 				if (op == MDI_DISABLE_OP)
5355 					MDI_PI_SET_USER_DISABLE(pip);
5356 				else
5357 					MDI_PI_SET_USER_ENABLE(pip);
5358 				break;
5359 			case DRIVER_DISABLE:
5360 				if (op == MDI_DISABLE_OP)
5361 					MDI_PI_SET_DRV_DISABLE(pip);
5362 				else
5363 					MDI_PI_SET_DRV_ENABLE(pip);
5364 				break;
5365 			case DRIVER_DISABLE_TRANSIENT:
5366 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5367 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5368 				else
5369 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5370 				break;
5371 			}
5372 			MDI_PI_UNLOCK(pip);
5373 			/*
5374 			 * Do a callback into the mdi consumer to let it
5375 			 * know that path is now enabled/disabled.
5376 			 */
5377 			if (f != NULL) {
5378 				rv = (*f)(vh->vh_dip, pip, 0,
5379 					MDI_PI_EXT_STATE(pip),
5380 					MDI_EXT_STATE_CHANGE | sync_flag |
5381 					op | MDI_AFTER_STATE_CHANGE);
5382 				if (rv != MDI_SUCCESS) {
5383 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5384 				"!vo_pi_state_change: failed rv = %x", rv));
5385 				}
5386 			}
5387 			pip = next;
5388 		}
5389 		MDI_PHCI_UNLOCK(ph);
5390 	} else {
5391 
5392 		/*
5393 		 * Disable a specific client.
5394 		 */
5395 		ct = i_devi_get_client(cdip);
5396 		if (ct == NULL) {
5397 			MDI_DEBUG(1, (CE_NOTE, NULL,
5398 			"!i_mdi_pi_enable_disable:"
5399 			" failed. ct = NULL operation = %d\n", op));
5400 			return (MDI_FAILURE);
5401 		}
5402 
5403 		MDI_CLIENT_LOCK(ct);
5404 		pip = ct->ct_path_head;
5405 		found_it = 0;
5406 		while (pip != NULL) {
5407 			MDI_PI_LOCK(pip);
5408 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5409 			if (MDI_PI(pip)->pi_phci == ph) {
5410 				MDI_PI_UNLOCK(pip);
5411 				found_it = 1;
5412 				break;
5413 			}
5414 			MDI_PI_UNLOCK(pip);
5415 			pip = next;
5416 		}
5417 
5418 		MDI_CLIENT_UNLOCK(ct);
5419 		if (found_it == 0) {
5420 			MDI_DEBUG(1, (CE_NOTE, NULL,
5421 			"!i_mdi_pi_enable_disable:"
5422 			" failed. Could not find corresponding pip\n"));
5423 			return (MDI_FAILURE);
5424 		}
5425 		/*
5426 		 * Do a callback into the mdi consumer to let it
5427 		 * know that path is about to get enabled/disabled.
5428 		 */
5429 		if (f != NULL) {
5430 			rv = (*f)(vh->vh_dip, pip, 0,
5431 				MDI_PI_EXT_STATE(pip),
5432 				MDI_EXT_STATE_CHANGE | sync_flag |
5433 				op | MDI_BEFORE_STATE_CHANGE);
5434 			if (rv != MDI_SUCCESS) {
5435 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5436 				"!vo_pi_state_change: failed rv = %x", rv));
5437 			}
5438 		}
5439 		MDI_PI_LOCK(pip);
5440 		switch (flags) {
5441 			case USER_DISABLE:
5442 				if (op == MDI_DISABLE_OP)
5443 					MDI_PI_SET_USER_DISABLE(pip);
5444 				else
5445 					MDI_PI_SET_USER_ENABLE(pip);
5446 				break;
5447 			case DRIVER_DISABLE:
5448 				if (op == MDI_DISABLE_OP)
5449 					MDI_PI_SET_DRV_DISABLE(pip);
5450 				else
5451 					MDI_PI_SET_DRV_ENABLE(pip);
5452 				break;
5453 			case DRIVER_DISABLE_TRANSIENT:
5454 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5455 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5456 				else
5457 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5458 				break;
5459 		}
5460 		MDI_PI_UNLOCK(pip);
5461 		/*
5462 		 * Do a callback into the mdi consumer to let it
5463 		 * know that path is now enabled/disabled.
5464 		 */
5465 		if (f != NULL) {
5466 			rv = (*f)(vh->vh_dip, pip, 0,
5467 				MDI_PI_EXT_STATE(pip),
5468 				MDI_EXT_STATE_CHANGE | sync_flag |
5469 				op | MDI_AFTER_STATE_CHANGE);
5470 			if (rv != MDI_SUCCESS) {
5471 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5472 				"!vo_pi_state_change: failed rv = %x", rv));
5473 			}
5474 		}
5475 	}
5476 
5477 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5478 		" Returning success pdip = %p cdip = %p\n", op, pdip, cdip));
5479 	return (MDI_SUCCESS);
5480 }
5481 
5482 /*ARGSUSED3*/
5483 int
5484 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5485     int flags, clock_t timeout)
5486 {
5487 	mdi_pathinfo_t *pip;
5488 	dev_info_t *dip;
5489 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5490 	char *paddr;
5491 
5492 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5493 
5494 	if (!MDI_PHCI(pdip))
5495 		return (MDI_FAILURE);
5496 
5497 	paddr = strchr(devnm, '@');
5498 	if (paddr == NULL)
5499 		return (MDI_FAILURE);
5500 
5501 	paddr++;	/* skip '@' */
5502 	pip = mdi_pi_find(pdip, NULL, paddr);
5503 	while (pip == NULL && timeout > 0) {
5504 		if (interval > timeout)
5505 			interval = timeout;
5506 		if (flags & NDI_DEVI_DEBUG) {
5507 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5508 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5509 			    paddr, interval, timeout);
5510 		}
5511 		delay(interval);
5512 		timeout -= interval;
5513 		interval += interval;
5514 		pip = mdi_pi_find(pdip, NULL, paddr);
5515 	}
5516 
5517 	if (pip == NULL)
5518 		return (MDI_FAILURE);
5519 	dip = mdi_pi_get_client(pip);
5520 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5521 		return (MDI_FAILURE);
5522 	*cdipp = dip;
5523 
5524 	/* TODO: holding should happen inside search functions */
5525 	ndi_hold_devi(dip);
5526 	return (MDI_SUCCESS);
5527 }
5528 
5529 /*
5530  * Ensure phci powered up
5531  */
5532 static void
5533 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5534 {
5535 	dev_info_t	*ph_dip;
5536 
5537 	ASSERT(pip != NULL);
5538 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5539 
5540 	if (MDI_PI(pip)->pi_pm_held) {
5541 		return;
5542 	}
5543 
5544 	ph_dip = mdi_pi_get_phci(pip);
5545 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5546 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5547 	if (ph_dip == NULL) {
5548 		return;
5549 	}
5550 
5551 	MDI_PI_UNLOCK(pip);
5552 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5553 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5554 	pm_hold_power(ph_dip);
5555 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5556 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5557 	MDI_PI_LOCK(pip);
5558 
5559 	MDI_PI(pip)->pi_pm_held = 1;
5560 }
5561 
5562 /*
5563  * Allow phci powered down
5564  */
5565 static void
5566 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5567 {
5568 	dev_info_t	*ph_dip = NULL;
5569 
5570 	ASSERT(pip != NULL);
5571 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5572 
5573 	if (MDI_PI(pip)->pi_pm_held == 0) {
5574 		return;
5575 	}
5576 
5577 	ph_dip = mdi_pi_get_phci(pip);
5578 	ASSERT(ph_dip != NULL);
5579 
5580 	MDI_PI_UNLOCK(pip);
5581 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5582 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5583 
5584 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5585 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5586 	pm_rele_power(ph_dip);
5587 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5588 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5589 
5590 	MDI_PI_LOCK(pip);
5591 	MDI_PI(pip)->pi_pm_held = 0;
5592 }
5593 
5594 static void
5595 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5596 {
5597 	ASSERT(ct);
5598 
5599 	ct->ct_power_cnt += incr;
5600 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5601 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5602 	ASSERT(ct->ct_power_cnt >= 0);
5603 }
5604 
5605 static void
5606 i_mdi_rele_all_phci(mdi_client_t *ct)
5607 {
5608 	mdi_pathinfo_t  *pip;
5609 
5610 	ASSERT(mutex_owned(&ct->ct_mutex));
5611 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5612 	while (pip != NULL) {
5613 		mdi_hold_path(pip);
5614 		MDI_PI_LOCK(pip);
5615 		i_mdi_pm_rele_pip(pip);
5616 		MDI_PI_UNLOCK(pip);
5617 		mdi_rele_path(pip);
5618 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5619 	}
5620 }
5621 
5622 static void
5623 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5624 {
5625 	ASSERT(ct);
5626 
5627 	if (i_ddi_node_state(ct->ct_dip) >= DS_READY) {
5628 		ct->ct_power_cnt -= decr;
5629 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5630 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5631 	}
5632 
5633 	ASSERT(ct->ct_power_cnt >= 0);
5634 	if (ct->ct_power_cnt == 0) {
5635 		i_mdi_rele_all_phci(ct);
5636 		return;
5637 	}
5638 }
5639 
5640 static void
5641 i_mdi_pm_reset_client(mdi_client_t *ct)
5642 {
5643 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5644 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5645 	ct->ct_power_cnt = 0;
5646 	i_mdi_rele_all_phci(ct);
5647 	ct->ct_powercnt_reset = 1;
5648 	ct->ct_powercnt_held = 0;
5649 }
5650 
5651 static void
5652 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5653 {
5654 	mdi_pathinfo_t  *pip;
5655 	ASSERT(mutex_owned(&ct->ct_mutex));
5656 
5657 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5658 	while (pip != NULL) {
5659 		mdi_hold_path(pip);
5660 		MDI_PI_LOCK(pip);
5661 		i_mdi_pm_hold_pip(pip);
5662 		MDI_PI_UNLOCK(pip);
5663 		mdi_rele_path(pip);
5664 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5665 	}
5666 }
5667 
5668 static int
5669 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5670 {
5671 	int		ret;
5672 	dev_info_t	*ph_dip;
5673 
5674 	MDI_PI_LOCK(pip);
5675 	i_mdi_pm_hold_pip(pip);
5676 
5677 	ph_dip = mdi_pi_get_phci(pip);
5678 	MDI_PI_UNLOCK(pip);
5679 
5680 	/* bring all components of phci to full power */
5681 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5682 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5683 	    ddi_get_instance(ph_dip)));
5684 
5685 	ret = pm_powerup(ph_dip);
5686 
5687 	if (ret == DDI_FAILURE) {
5688 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5689 		    "pm_powerup FAILED for %s%d\n",
5690 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5691 
5692 		MDI_PI_LOCK(pip);
5693 		i_mdi_pm_rele_pip(pip);
5694 		MDI_PI_UNLOCK(pip);
5695 		return (MDI_FAILURE);
5696 	}
5697 
5698 	return (MDI_SUCCESS);
5699 }
5700 
5701 static int
5702 i_mdi_power_all_phci(mdi_client_t *ct)
5703 {
5704 	mdi_pathinfo_t  *pip;
5705 	int		succeeded = 0;
5706 
5707 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5708 	while (pip != NULL) {
5709 		mdi_hold_path(pip);
5710 		MDI_CLIENT_UNLOCK(ct);
5711 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5712 			succeeded = 1;
5713 
5714 		ASSERT(ct == MDI_PI(pip)->pi_client);
5715 		MDI_CLIENT_LOCK(ct);
5716 		mdi_rele_path(pip);
5717 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5718 	}
5719 
5720 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5721 }
5722 
5723 /*
5724  * mdi_bus_power():
5725  *		1. Place the phci(s) into powered up state so that
5726  *		   client can do power management
5727  *		2. Ensure phci powered up as client power managing
5728  * Return Values:
5729  *		MDI_SUCCESS
5730  *		MDI_FAILURE
5731  */
5732 int
5733 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5734     void *arg, void *result)
5735 {
5736 	int			ret = MDI_SUCCESS;
5737 	pm_bp_child_pwrchg_t	*bpc;
5738 	mdi_client_t		*ct;
5739 	dev_info_t		*cdip;
5740 	pm_bp_has_changed_t	*bphc;
5741 
5742 	/*
5743 	 * BUS_POWER_NOINVOL not supported
5744 	 */
5745 	if (op == BUS_POWER_NOINVOL)
5746 		return (MDI_FAILURE);
5747 
5748 	/*
5749 	 * ignore other OPs.
5750 	 * return quickly to save cou cycles on the ct processing
5751 	 */
5752 	switch (op) {
5753 	case BUS_POWER_PRE_NOTIFICATION:
5754 	case BUS_POWER_POST_NOTIFICATION:
5755 		bpc = (pm_bp_child_pwrchg_t *)arg;
5756 		cdip = bpc->bpc_dip;
5757 		break;
5758 	case BUS_POWER_HAS_CHANGED:
5759 		bphc = (pm_bp_has_changed_t *)arg;
5760 		cdip = bphc->bphc_dip;
5761 		break;
5762 	default:
5763 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5764 	}
5765 
5766 	ASSERT(MDI_CLIENT(cdip));
5767 
5768 	ct = i_devi_get_client(cdip);
5769 	if (ct == NULL)
5770 		return (MDI_FAILURE);
5771 
5772 	/*
5773 	 * wait till the mdi_pathinfo node state change are processed
5774 	 */
5775 	MDI_CLIENT_LOCK(ct);
5776 	switch (op) {
5777 	case BUS_POWER_PRE_NOTIFICATION:
5778 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5779 		    "BUS_POWER_PRE_NOTIFICATION:"
5780 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5781 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5782 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5783 
5784 		/* serialize power level change per client */
5785 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5786 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5787 
5788 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5789 
5790 		if (ct->ct_power_cnt == 0) {
5791 			ret = i_mdi_power_all_phci(ct);
5792 		}
5793 
5794 		/*
5795 		 * if new_level > 0:
5796 		 *	- hold phci(s)
5797 		 *	- power up phci(s) if not already
5798 		 * ignore power down
5799 		 */
5800 		if (bpc->bpc_nlevel > 0) {
5801 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5802 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5803 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5804 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5805 			}
5806 		}
5807 		break;
5808 	case BUS_POWER_POST_NOTIFICATION:
5809 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5810 		    "BUS_POWER_POST_NOTIFICATION:"
5811 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5812 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5813 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5814 		    *(int *)result));
5815 
5816 		if (*(int *)result == DDI_SUCCESS) {
5817 			if (bpc->bpc_nlevel > 0) {
5818 				MDI_CLIENT_SET_POWER_UP(ct);
5819 			} else {
5820 				MDI_CLIENT_SET_POWER_DOWN(ct);
5821 			}
5822 		}
5823 
5824 		/* release the hold we did in pre-notification */
5825 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5826 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5827 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5828 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5829 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5830 		}
5831 
5832 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5833 			/* another thread might started attaching */
5834 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5835 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5836 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5837 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5838 			/* detaching has been taken care in pm_post_unconfig */
5839 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5840 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5841 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5842 				i_mdi_pm_reset_client(ct);
5843 			}
5844 		}
5845 
5846 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5847 		cv_broadcast(&ct->ct_powerchange_cv);
5848 
5849 		break;
5850 
5851 	/* need to do more */
5852 	case BUS_POWER_HAS_CHANGED:
5853 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5854 		    "BUS_POWER_HAS_CHANGED:"
5855 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5856 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5857 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5858 
5859 		if (bphc->bphc_nlevel > 0 &&
5860 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5861 			if (ct->ct_power_cnt == 0) {
5862 				ret = i_mdi_power_all_phci(ct);
5863 			}
5864 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5865 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5866 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5867 		}
5868 
5869 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5870 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5871 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5872 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5873 		}
5874 		break;
5875 	}
5876 
5877 	MDI_CLIENT_UNLOCK(ct);
5878 	return (ret);
5879 }
5880 
5881 static int
5882 i_mdi_pm_pre_config_one(dev_info_t *child)
5883 {
5884 	int		ret = MDI_SUCCESS;
5885 	mdi_client_t	*ct;
5886 
5887 	ct = i_devi_get_client(child);
5888 	if (ct == NULL)
5889 		return (MDI_FAILURE);
5890 
5891 	MDI_CLIENT_LOCK(ct);
5892 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5893 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5894 
5895 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5896 		MDI_CLIENT_UNLOCK(ct);
5897 		MDI_DEBUG(4, (CE_NOTE, child,
5898 		    "i_mdi_pm_pre_config_one already configured\n"));
5899 		return (MDI_SUCCESS);
5900 	}
5901 
5902 	if (ct->ct_powercnt_held) {
5903 		MDI_CLIENT_UNLOCK(ct);
5904 		MDI_DEBUG(4, (CE_NOTE, child,
5905 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5906 		return (MDI_SUCCESS);
5907 	}
5908 
5909 	if (ct->ct_power_cnt == 0) {
5910 		ret = i_mdi_power_all_phci(ct);
5911 	}
5912 	MDI_DEBUG(4, (CE_NOTE, child,
5913 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5914 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5915 	ct->ct_powercnt_held = 1;
5916 	ct->ct_powercnt_reset = 0;
5917 	MDI_CLIENT_UNLOCK(ct);
5918 	return (ret);
5919 }
5920 
5921 static int
5922 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5923 {
5924 	int			ret = MDI_SUCCESS;
5925 	dev_info_t		*cdip;
5926 	int			circ;
5927 
5928 	ASSERT(MDI_VHCI(parent));
5929 
5930 	/* ndi_devi_config_one */
5931 	if (child) {
5932 		return (i_mdi_pm_pre_config_one(child));
5933 	}
5934 
5935 	/* devi_config_common */
5936 	ndi_devi_enter(parent, &circ);
5937 	cdip = ddi_get_child(parent);
5938 	while (cdip) {
5939 		dev_info_t *next = ddi_get_next_sibling(cdip);
5940 
5941 		ret = i_mdi_pm_pre_config_one(cdip);
5942 		if (ret != MDI_SUCCESS)
5943 			break;
5944 		cdip = next;
5945 	}
5946 	ndi_devi_exit(parent, circ);
5947 	return (ret);
5948 }
5949 
5950 static int
5951 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5952 {
5953 	int		ret = MDI_SUCCESS;
5954 	mdi_client_t	*ct;
5955 
5956 	ct = i_devi_get_client(child);
5957 	if (ct == NULL)
5958 		return (MDI_FAILURE);
5959 
5960 	MDI_CLIENT_LOCK(ct);
5961 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5962 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5963 
5964 	if (i_ddi_node_state(ct->ct_dip) < DS_READY) {
5965 		MDI_DEBUG(4, (CE_NOTE, child,
5966 		    "i_mdi_pm_pre_unconfig node detached already\n"));
5967 		MDI_CLIENT_UNLOCK(ct);
5968 		return (MDI_SUCCESS);
5969 	}
5970 
5971 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
5972 	    (flags & NDI_AUTODETACH)) {
5973 		MDI_DEBUG(4, (CE_NOTE, child,
5974 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
5975 		MDI_CLIENT_UNLOCK(ct);
5976 		return (MDI_FAILURE);
5977 	}
5978 
5979 	if (ct->ct_powercnt_held) {
5980 		MDI_DEBUG(4, (CE_NOTE, child,
5981 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
5982 		MDI_CLIENT_UNLOCK(ct);
5983 		*held = 1;
5984 		return (MDI_SUCCESS);
5985 	}
5986 
5987 	if (ct->ct_power_cnt == 0) {
5988 		ret = i_mdi_power_all_phci(ct);
5989 	}
5990 	MDI_DEBUG(4, (CE_NOTE, child,
5991 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
5992 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5993 	ct->ct_powercnt_held = 1;
5994 	ct->ct_powercnt_reset = 0;
5995 	MDI_CLIENT_UNLOCK(ct);
5996 	if (ret == MDI_SUCCESS)
5997 		*held = 1;
5998 	return (ret);
5999 }
6000 
6001 static int
6002 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
6003     int flags)
6004 {
6005 	int			ret = MDI_SUCCESS;
6006 	dev_info_t		*cdip;
6007 	int			circ;
6008 
6009 	ASSERT(MDI_VHCI(parent));
6010 	*held = 0;
6011 
6012 	/* ndi_devi_unconfig_one */
6013 	if (child) {
6014 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6015 	}
6016 
6017 	/* devi_unconfig_common */
6018 	ndi_devi_enter(parent, &circ);
6019 	cdip = ddi_get_child(parent);
6020 	while (cdip) {
6021 		dev_info_t *next = ddi_get_next_sibling(cdip);
6022 
6023 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6024 		cdip = next;
6025 	}
6026 	ndi_devi_exit(parent, circ);
6027 
6028 	if (*held)
6029 		ret = MDI_SUCCESS;
6030 
6031 	return (ret);
6032 }
6033 
6034 static void
6035 i_mdi_pm_post_config_one(dev_info_t *child)
6036 {
6037 	mdi_client_t	*ct;
6038 
6039 	ct = i_devi_get_client(child);
6040 	if (ct == NULL)
6041 		return;
6042 
6043 	MDI_CLIENT_LOCK(ct);
6044 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6045 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6046 
6047 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) {
6048 		MDI_DEBUG(4, (CE_NOTE, child,
6049 		    "i_mdi_pm_post_config_one NOT held\n"));
6050 		MDI_CLIENT_UNLOCK(ct);
6051 		return;
6052 	}
6053 
6054 	/* client has not been updated */
6055 	if (MDI_CLIENT_IS_FAILED(ct)) {
6056 		MDI_DEBUG(4, (CE_NOTE, child,
6057 		    "i_mdi_pm_post_config_one NOT configured\n"));
6058 		MDI_CLIENT_UNLOCK(ct);
6059 		return;
6060 	}
6061 
6062 	/* another thread might have powered it down or detached it */
6063 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6064 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6065 	    (i_ddi_node_state(ct->ct_dip) < DS_READY &&
6066 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6067 		MDI_DEBUG(4, (CE_NOTE, child,
6068 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6069 		i_mdi_pm_reset_client(ct);
6070 	} else {
6071 		mdi_pathinfo_t	*pip, *next;
6072 		int	valid_path_count = 0;
6073 
6074 		MDI_DEBUG(4, (CE_NOTE, child,
6075 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6076 		pip = ct->ct_path_head;
6077 		while (pip != NULL) {
6078 			MDI_PI_LOCK(pip);
6079 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6080 			if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6081 				== MDI_PATHINFO_STATE_ONLINE ||
6082 			    (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6083 				== MDI_PATHINFO_STATE_STANDBY)
6084 				valid_path_count ++;
6085 			MDI_PI_UNLOCK(pip);
6086 			pip = next;
6087 		}
6088 		i_mdi_pm_rele_client(ct, valid_path_count);
6089 	}
6090 	ct->ct_powercnt_held = 0;
6091 	MDI_CLIENT_UNLOCK(ct);
6092 }
6093 
6094 static void
6095 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6096 {
6097 	int		circ;
6098 	dev_info_t	*cdip;
6099 	ASSERT(MDI_VHCI(parent));
6100 
6101 	/* ndi_devi_config_one */
6102 	if (child) {
6103 		i_mdi_pm_post_config_one(child);
6104 		return;
6105 	}
6106 
6107 	/* devi_config_common */
6108 	ndi_devi_enter(parent, &circ);
6109 	cdip = ddi_get_child(parent);
6110 	while (cdip) {
6111 		dev_info_t *next = ddi_get_next_sibling(cdip);
6112 
6113 		i_mdi_pm_post_config_one(cdip);
6114 		cdip = next;
6115 	}
6116 	ndi_devi_exit(parent, circ);
6117 }
6118 
6119 static void
6120 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6121 {
6122 	mdi_client_t	*ct;
6123 
6124 	ct = i_devi_get_client(child);
6125 	if (ct == NULL)
6126 		return;
6127 
6128 	MDI_CLIENT_LOCK(ct);
6129 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6130 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6131 
6132 	if (!ct->ct_powercnt_held) {
6133 		MDI_DEBUG(4, (CE_NOTE, child,
6134 		    "i_mdi_pm_post_unconfig NOT held\n"));
6135 		MDI_CLIENT_UNLOCK(ct);
6136 		return;
6137 	}
6138 
6139 	/* failure detaching or another thread just attached it */
6140 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6141 	    i_ddi_node_state(ct->ct_dip) == DS_READY) ||
6142 	    (i_ddi_node_state(ct->ct_dip) != DS_READY &&
6143 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6144 		MDI_DEBUG(4, (CE_NOTE, child,
6145 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6146 		i_mdi_pm_reset_client(ct);
6147 	}
6148 
6149 	MDI_DEBUG(4, (CE_NOTE, child,
6150 	    "i_mdi_pm_post_unconfig not changed\n"));
6151 	MDI_CLIENT_UNLOCK(ct);
6152 }
6153 
6154 static void
6155 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6156 {
6157 	int			circ;
6158 	dev_info_t		*cdip;
6159 
6160 	ASSERT(MDI_VHCI(parent));
6161 
6162 	if (!held) {
6163 		MDI_DEBUG(4, (CE_NOTE, parent,
6164 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6165 		return;
6166 	}
6167 
6168 	if (child) {
6169 		i_mdi_pm_post_unconfig_one(child);
6170 		return;
6171 	}
6172 
6173 	ndi_devi_enter(parent, &circ);
6174 	cdip = ddi_get_child(parent);
6175 	while (cdip) {
6176 		dev_info_t *next = ddi_get_next_sibling(cdip);
6177 
6178 		i_mdi_pm_post_unconfig_one(cdip);
6179 		cdip = next;
6180 	}
6181 	ndi_devi_exit(parent, circ);
6182 }
6183 
6184 int
6185 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6186 {
6187 	int			circ, ret = MDI_SUCCESS;
6188 	dev_info_t		*client_dip = NULL;
6189 	mdi_client_t		*ct;
6190 
6191 	/*
6192 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6193 	 * Power up pHCI for the named client device.
6194 	 * Note: Before the client is enumerated under vhci by phci,
6195 	 * client_dip can be NULL. Then proceed to power up all the
6196 	 * pHCIs.
6197 	 */
6198 	if (devnm != NULL) {
6199 		ndi_devi_enter(vdip, &circ);
6200 		client_dip = ndi_devi_findchild(vdip, devnm);
6201 		ndi_devi_exit(vdip, circ);
6202 	}
6203 
6204 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6205 
6206 	switch (op) {
6207 	case MDI_PM_PRE_CONFIG:
6208 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6209 
6210 		break;
6211 	case MDI_PM_PRE_UNCONFIG:
6212 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6213 		    flags);
6214 
6215 		break;
6216 	case MDI_PM_POST_CONFIG:
6217 		i_mdi_pm_post_config(vdip, client_dip);
6218 
6219 		break;
6220 	case MDI_PM_POST_UNCONFIG:
6221 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6222 
6223 		break;
6224 	case MDI_PM_HOLD_POWER:
6225 	case MDI_PM_RELE_POWER:
6226 		ASSERT(args);
6227 
6228 		client_dip = (dev_info_t *)args;
6229 		ASSERT(MDI_CLIENT(client_dip));
6230 
6231 		ct = i_devi_get_client(client_dip);
6232 		MDI_CLIENT_LOCK(ct);
6233 
6234 		if (op == MDI_PM_HOLD_POWER) {
6235 			if (ct->ct_power_cnt == 0) {
6236 				(void) i_mdi_power_all_phci(ct);
6237 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6238 				    "mdi_power i_mdi_pm_hold_client\n"));
6239 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6240 			}
6241 		} else {
6242 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6243 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6244 				    "mdi_power i_mdi_pm_rele_client\n"));
6245 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6246 			} else {
6247 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6248 				    "mdi_power i_mdi_pm_reset_client\n"));
6249 				i_mdi_pm_reset_client(ct);
6250 			}
6251 		}
6252 
6253 		MDI_CLIENT_UNLOCK(ct);
6254 		break;
6255 	default:
6256 		break;
6257 	}
6258 
6259 	return (ret);
6260 }
6261 
6262 int
6263 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6264 {
6265 	mdi_vhci_t *vhci;
6266 
6267 	if (!MDI_VHCI(dip))
6268 		return (MDI_FAILURE);
6269 
6270 	if (mdi_class) {
6271 		vhci = DEVI(dip)->devi_mdi_xhci;
6272 		ASSERT(vhci);
6273 		*mdi_class = vhci->vh_class;
6274 	}
6275 
6276 	return (MDI_SUCCESS);
6277 }
6278 
6279 int
6280 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6281 {
6282 	mdi_phci_t *phci;
6283 
6284 	if (!MDI_PHCI(dip))
6285 		return (MDI_FAILURE);
6286 
6287 	if (mdi_class) {
6288 		phci = DEVI(dip)->devi_mdi_xhci;
6289 		ASSERT(phci);
6290 		*mdi_class = phci->ph_vhci->vh_class;
6291 	}
6292 
6293 	return (MDI_SUCCESS);
6294 }
6295 
6296 int
6297 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6298 {
6299 	mdi_client_t *client;
6300 
6301 	if (!MDI_CLIENT(dip))
6302 		return (MDI_FAILURE);
6303 
6304 	if (mdi_class) {
6305 		client = DEVI(dip)->devi_mdi_client;
6306 		ASSERT(client);
6307 		*mdi_class = client->ct_vhci->vh_class;
6308 	}
6309 
6310 	return (MDI_SUCCESS);
6311 }
6312 
6313 void *
6314 mdi_client_get_vhci_private(dev_info_t *dip)
6315 {
6316 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6317 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6318 		mdi_client_t	*ct;
6319 		ct = i_devi_get_client(dip);
6320 		return (ct->ct_vprivate);
6321 	}
6322 	return (NULL);
6323 }
6324 
6325 void
6326 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6327 {
6328 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6329 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6330 		mdi_client_t	*ct;
6331 		ct = i_devi_get_client(dip);
6332 		ct->ct_vprivate = data;
6333 	}
6334 }
6335 /*
6336  * mdi_pi_get_vhci_private():
6337  *		Get the vhci private information associated with the
6338  *		mdi_pathinfo node
6339  */
6340 void *
6341 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6342 {
6343 	caddr_t	vprivate = NULL;
6344 	if (pip) {
6345 		vprivate = MDI_PI(pip)->pi_vprivate;
6346 	}
6347 	return (vprivate);
6348 }
6349 
6350 /*
6351  * mdi_pi_set_vhci_private():
6352  *		Set the vhci private information in the mdi_pathinfo node
6353  */
6354 void
6355 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6356 {
6357 	if (pip) {
6358 		MDI_PI(pip)->pi_vprivate = priv;
6359 	}
6360 }
6361 
6362 /*
6363  * mdi_phci_get_vhci_private():
6364  *		Get the vhci private information associated with the
6365  *		mdi_phci node
6366  */
6367 void *
6368 mdi_phci_get_vhci_private(dev_info_t *dip)
6369 {
6370 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6371 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6372 		mdi_phci_t	*ph;
6373 		ph = i_devi_get_phci(dip);
6374 		return (ph->ph_vprivate);
6375 	}
6376 	return (NULL);
6377 }
6378 
6379 /*
6380  * mdi_phci_set_vhci_private():
6381  *		Set the vhci private information in the mdi_phci node
6382  */
6383 void
6384 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6385 {
6386 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6387 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6388 		mdi_phci_t	*ph;
6389 		ph = i_devi_get_phci(dip);
6390 		ph->ph_vprivate = priv;
6391 	}
6392 }
6393 
6394 /*
6395  * List of vhci class names:
6396  * A vhci class name must be in this list only if the corresponding vhci
6397  * driver intends to use the mdi provided bus config implementation
6398  * (i.e., mdi_vhci_bus_config()).
6399  */
6400 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6401 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6402 
6403 /*
6404  * Built-in list of phci drivers for every vhci class.
6405  * All phci drivers expect iscsi have root device support.
6406  */
6407 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
6408 	{ "fp", 1 },
6409 	{ "iscsi", 0 },
6410 	{ "ibsrp", 1 }
6411 	};
6412 
6413 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
6414 
6415 /*
6416  * During boot time, the on-disk vhci cache for every vhci class is read
6417  * in the form of an nvlist and stored here.
6418  */
6419 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6420 
6421 /* nvpair names in vhci cache nvlist */
6422 #define	MDI_VHCI_CACHE_VERSION	1
6423 #define	MDI_NVPNAME_VERSION	"version"
6424 #define	MDI_NVPNAME_PHCIS	"phcis"
6425 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6426 
6427 /*
6428  * Given vhci class name, return its on-disk vhci cache filename.
6429  * Memory for the returned filename which includes the full path is allocated
6430  * by this function.
6431  */
6432 static char *
6433 vhclass2vhcache_filename(char *vhclass)
6434 {
6435 	char *filename;
6436 	int len;
6437 	static char *fmt = "/etc/devices/mdi_%s_cache";
6438 
6439 	/*
6440 	 * fmt contains the on-disk vhci cache file name format;
6441 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6442 	 */
6443 
6444 	/* the -1 below is to account for "%s" in the format string */
6445 	len = strlen(fmt) + strlen(vhclass) - 1;
6446 	filename = kmem_alloc(len, KM_SLEEP);
6447 	(void) snprintf(filename, len, fmt, vhclass);
6448 	ASSERT(len == (strlen(filename) + 1));
6449 	return (filename);
6450 }
6451 
6452 /*
6453  * initialize the vhci cache related data structures and read the on-disk
6454  * vhci cached data into memory.
6455  */
6456 static void
6457 setup_vhci_cache(mdi_vhci_t *vh)
6458 {
6459 	mdi_vhci_config_t *vhc;
6460 	mdi_vhci_cache_t *vhcache;
6461 	int i;
6462 	nvlist_t *nvl = NULL;
6463 
6464 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6465 	vh->vh_config = vhc;
6466 	vhcache = &vhc->vhc_vhcache;
6467 
6468 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6469 
6470 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6471 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6472 
6473 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6474 
6475 	/*
6476 	 * Create string hash; same as mod_hash_create_strhash() except that
6477 	 * we use NULL key destructor.
6478 	 */
6479 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6480 	    mdi_bus_config_cache_hash_size,
6481 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6482 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6483 
6484 	setup_phci_driver_list(vh);
6485 
6486 	/*
6487 	 * The on-disk vhci cache is read during booting prior to the
6488 	 * lights-out period by mdi_read_devices_files().
6489 	 */
6490 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6491 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6492 			nvl = vhcache_nvl[i];
6493 			vhcache_nvl[i] = NULL;
6494 			break;
6495 		}
6496 	}
6497 
6498 	/*
6499 	 * this is to cover the case of some one manually causing unloading
6500 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6501 	 */
6502 	if (nvl == NULL && modrootloaded)
6503 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6504 
6505 	if (nvl != NULL) {
6506 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6507 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6508 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6509 		else  {
6510 			cmn_err(CE_WARN,
6511 			    "%s: data file corrupted, will recreate\n",
6512 			    vhc->vhc_vhcache_filename);
6513 		}
6514 		rw_exit(&vhcache->vhcache_lock);
6515 		nvlist_free(nvl);
6516 	}
6517 
6518 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6519 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6520 
6521 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6522 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6523 }
6524 
6525 /*
6526  * free all vhci cache related resources
6527  */
6528 static int
6529 destroy_vhci_cache(mdi_vhci_t *vh)
6530 {
6531 	mdi_vhci_config_t *vhc = vh->vh_config;
6532 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6533 	mdi_vhcache_phci_t *cphci, *cphci_next;
6534 	mdi_vhcache_client_t *cct, *cct_next;
6535 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6536 
6537 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6538 		return (MDI_FAILURE);
6539 
6540 	kmem_free(vhc->vhc_vhcache_filename,
6541 	    strlen(vhc->vhc_vhcache_filename) + 1);
6542 
6543 	if (vhc->vhc_phci_driver_list)
6544 		free_phci_driver_list(vhc);
6545 
6546 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6547 
6548 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6549 	    cphci = cphci_next) {
6550 		cphci_next = cphci->cphci_next;
6551 		free_vhcache_phci(cphci);
6552 	}
6553 
6554 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6555 		cct_next = cct->cct_next;
6556 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6557 			cpi_next = cpi->cpi_next;
6558 			free_vhcache_pathinfo(cpi);
6559 		}
6560 		free_vhcache_client(cct);
6561 	}
6562 
6563 	rw_destroy(&vhcache->vhcache_lock);
6564 
6565 	mutex_destroy(&vhc->vhc_lock);
6566 	cv_destroy(&vhc->vhc_cv);
6567 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6568 	return (MDI_SUCCESS);
6569 }
6570 
6571 /*
6572  * Setup the list of phci drivers associated with the specified vhci class.
6573  * MDI uses this information to rebuild bus config cache if in case the
6574  * cache is not available or corrupted.
6575  */
6576 static void
6577 setup_phci_driver_list(mdi_vhci_t *vh)
6578 {
6579 	mdi_vhci_config_t *vhc = vh->vh_config;
6580 	mdi_phci_driver_info_t *driver_list;
6581 	char **driver_list1;
6582 	uint_t ndrivers, ndrivers1;
6583 	int i, j;
6584 
6585 	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6586 		driver_list = scsi_phci_driver_list;
6587 		ndrivers = sizeof (scsi_phci_driver_list) /
6588 		    sizeof (mdi_phci_driver_info_t);
6589 	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
6590 		driver_list = ib_phci_driver_list;
6591 		ndrivers = sizeof (ib_phci_driver_list) /
6592 		    sizeof (mdi_phci_driver_info_t);
6593 	} else {
6594 		driver_list = NULL;
6595 		ndrivers = 0;
6596 	}
6597 
6598 	/*
6599 	 * The driver.conf file of a vhci driver can specify additional
6600 	 * phci drivers using a project private "phci-drivers" property.
6601 	 */
6602 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
6603 	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
6604 	    &ndrivers1) != DDI_PROP_SUCCESS)
6605 		ndrivers1 = 0;
6606 
6607 	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
6608 	if (vhc->vhc_nphci_drivers == 0)
6609 		return;
6610 
6611 	vhc->vhc_phci_driver_list = kmem_alloc(
6612 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
6613 
6614 	for (i = 0; i < ndrivers; i++) {
6615 		vhc->vhc_phci_driver_list[i].phdriver_name =
6616 		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
6617 		vhc->vhc_phci_driver_list[i].phdriver_root_support =
6618 		    driver_list[i].phdriver_root_support;
6619 	}
6620 
6621 	for (j = 0; j < ndrivers1; j++, i++) {
6622 		vhc->vhc_phci_driver_list[i].phdriver_name =
6623 		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
6624 		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
6625 	}
6626 
6627 	if (ndrivers1)
6628 		ddi_prop_free(driver_list1);
6629 }
6630 
6631 /*
6632  * Free the memory allocated for the phci driver list
6633  */
6634 static void
6635 free_phci_driver_list(mdi_vhci_config_t *vhc)
6636 {
6637 	int i;
6638 
6639 	if (vhc->vhc_phci_driver_list == NULL)
6640 		return;
6641 
6642 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
6643 		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
6644 		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
6645 	}
6646 
6647 	kmem_free(vhc->vhc_phci_driver_list,
6648 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
6649 }
6650 
6651 /*
6652  * Stop all vhci cache related async threads and free their resources.
6653  */
6654 static int
6655 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6656 {
6657 	mdi_async_client_config_t *acc, *acc_next;
6658 
6659 	mutex_enter(&vhc->vhc_lock);
6660 	vhc->vhc_flags |= MDI_VHC_EXIT;
6661 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6662 	cv_broadcast(&vhc->vhc_cv);
6663 
6664 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6665 	    vhc->vhc_acc_thrcount != 0) {
6666 		mutex_exit(&vhc->vhc_lock);
6667 		delay(1);
6668 		mutex_enter(&vhc->vhc_lock);
6669 	}
6670 
6671 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6672 
6673 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6674 		acc_next = acc->acc_next;
6675 		free_async_client_config(acc);
6676 	}
6677 	vhc->vhc_acc_list_head = NULL;
6678 	vhc->vhc_acc_list_tail = NULL;
6679 	vhc->vhc_acc_count = 0;
6680 
6681 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6682 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6683 		mutex_exit(&vhc->vhc_lock);
6684 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6685 			vhcache_dirty(vhc);
6686 			return (MDI_FAILURE);
6687 		}
6688 	} else
6689 		mutex_exit(&vhc->vhc_lock);
6690 
6691 	if (callb_delete(vhc->vhc_cbid) != 0)
6692 		return (MDI_FAILURE);
6693 
6694 	return (MDI_SUCCESS);
6695 }
6696 
6697 /*
6698  * Stop vhci cache flush thread
6699  */
6700 /* ARGSUSED */
6701 static boolean_t
6702 stop_vhcache_flush_thread(void *arg, int code)
6703 {
6704 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6705 
6706 	mutex_enter(&vhc->vhc_lock);
6707 	vhc->vhc_flags |= MDI_VHC_EXIT;
6708 	cv_broadcast(&vhc->vhc_cv);
6709 
6710 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6711 		mutex_exit(&vhc->vhc_lock);
6712 		delay(1);
6713 		mutex_enter(&vhc->vhc_lock);
6714 	}
6715 
6716 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6717 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6718 		mutex_exit(&vhc->vhc_lock);
6719 		(void) flush_vhcache(vhc, 1);
6720 	} else
6721 		mutex_exit(&vhc->vhc_lock);
6722 
6723 	return (B_TRUE);
6724 }
6725 
6726 /*
6727  * Enqueue the vhcache phci (cphci) at the tail of the list
6728  */
6729 static void
6730 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6731 {
6732 	cphci->cphci_next = NULL;
6733 	if (vhcache->vhcache_phci_head == NULL)
6734 		vhcache->vhcache_phci_head = cphci;
6735 	else
6736 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6737 	vhcache->vhcache_phci_tail = cphci;
6738 }
6739 
6740 /*
6741  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6742  */
6743 static void
6744 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6745     mdi_vhcache_pathinfo_t *cpi)
6746 {
6747 	cpi->cpi_next = NULL;
6748 	if (cct->cct_cpi_head == NULL)
6749 		cct->cct_cpi_head = cpi;
6750 	else
6751 		cct->cct_cpi_tail->cpi_next = cpi;
6752 	cct->cct_cpi_tail = cpi;
6753 }
6754 
6755 /*
6756  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6757  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6758  * flag set come at the beginning of the list. All cpis which have this
6759  * flag set come at the end of the list.
6760  */
6761 static void
6762 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6763     mdi_vhcache_pathinfo_t *newcpi)
6764 {
6765 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6766 
6767 	if (cct->cct_cpi_head == NULL ||
6768 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6769 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6770 	else {
6771 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6772 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6773 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6774 			;
6775 
6776 		if (prev_cpi == NULL)
6777 			cct->cct_cpi_head = newcpi;
6778 		else
6779 			prev_cpi->cpi_next = newcpi;
6780 
6781 		newcpi->cpi_next = cpi;
6782 
6783 		if (cpi == NULL)
6784 			cct->cct_cpi_tail = newcpi;
6785 	}
6786 }
6787 
6788 /*
6789  * Enqueue the vhcache client (cct) at the tail of the list
6790  */
6791 static void
6792 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6793     mdi_vhcache_client_t *cct)
6794 {
6795 	cct->cct_next = NULL;
6796 	if (vhcache->vhcache_client_head == NULL)
6797 		vhcache->vhcache_client_head = cct;
6798 	else
6799 		vhcache->vhcache_client_tail->cct_next = cct;
6800 	vhcache->vhcache_client_tail = cct;
6801 }
6802 
6803 static void
6804 free_string_array(char **str, int nelem)
6805 {
6806 	int i;
6807 
6808 	if (str) {
6809 		for (i = 0; i < nelem; i++) {
6810 			if (str[i])
6811 				kmem_free(str[i], strlen(str[i]) + 1);
6812 		}
6813 		kmem_free(str, sizeof (char *) * nelem);
6814 	}
6815 }
6816 
6817 static void
6818 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6819 {
6820 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6821 	kmem_free(cphci, sizeof (*cphci));
6822 }
6823 
6824 static void
6825 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6826 {
6827 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6828 	kmem_free(cpi, sizeof (*cpi));
6829 }
6830 
6831 static void
6832 free_vhcache_client(mdi_vhcache_client_t *cct)
6833 {
6834 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6835 	kmem_free(cct, sizeof (*cct));
6836 }
6837 
6838 static char *
6839 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6840 {
6841 	char *name_addr;
6842 	int len;
6843 
6844 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6845 	name_addr = kmem_alloc(len, KM_SLEEP);
6846 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6847 
6848 	if (ret_len)
6849 		*ret_len = len;
6850 	return (name_addr);
6851 }
6852 
6853 /*
6854  * Copy the contents of paddrnvl to vhci cache.
6855  * paddrnvl nvlist contains path information for a vhci client.
6856  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6857  */
6858 static void
6859 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6860     mdi_vhcache_client_t *cct)
6861 {
6862 	nvpair_t *nvp = NULL;
6863 	mdi_vhcache_pathinfo_t *cpi;
6864 	uint_t nelem;
6865 	uint32_t *val;
6866 
6867 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6868 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6869 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6870 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6871 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6872 		ASSERT(nelem == 2);
6873 		cpi->cpi_cphci = cphci_list[val[0]];
6874 		cpi->cpi_flags = val[1];
6875 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6876 	}
6877 }
6878 
6879 /*
6880  * Copy the contents of caddrmapnvl to vhci cache.
6881  * caddrmapnvl nvlist contains vhci client address to phci client address
6882  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6883  * this nvlist.
6884  */
6885 static void
6886 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6887     mdi_vhcache_phci_t *cphci_list[])
6888 {
6889 	nvpair_t *nvp = NULL;
6890 	nvlist_t *paddrnvl;
6891 	mdi_vhcache_client_t *cct;
6892 
6893 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6894 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6895 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6896 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6897 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6898 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6899 		/* the client must contain at least one path */
6900 		ASSERT(cct->cct_cpi_head != NULL);
6901 
6902 		enqueue_vhcache_client(vhcache, cct);
6903 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6904 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6905 	}
6906 }
6907 
6908 /*
6909  * Copy the contents of the main nvlist to vhci cache.
6910  *
6911  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6912  * The nvlist contains the mappings between the vhci client addresses and
6913  * their corresponding phci client addresses.
6914  *
6915  * The structure of the nvlist is as follows:
6916  *
6917  * Main nvlist:
6918  *	NAME		TYPE		DATA
6919  *	version		int32		version number
6920  *	phcis		string array	array of phci paths
6921  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6922  *
6923  * structure of c2paddrs_nvl:
6924  *	NAME		TYPE		DATA
6925  *	caddr1		nvlist_t	paddrs_nvl1
6926  *	caddr2		nvlist_t	paddrs_nvl2
6927  *	...
6928  * where caddr1, caddr2, ... are vhci client name and addresses in the
6929  * form of "<clientname>@<clientaddress>".
6930  * (for example: "ssd@2000002037cd9f72");
6931  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6932  *
6933  * structure of paddrs_nvl:
6934  *	NAME		TYPE		DATA
6935  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6936  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6937  *	...
6938  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6939  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6940  * phci-ids are integers that identify PHCIs to which the
6941  * the bus specific address belongs to. These integers are used as an index
6942  * into to the phcis string array in the main nvlist to get the PHCI path.
6943  */
6944 static int
6945 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6946 {
6947 	char **phcis, **phci_namep;
6948 	uint_t nphcis;
6949 	mdi_vhcache_phci_t *cphci, **cphci_list;
6950 	nvlist_t *caddrmapnvl;
6951 	int32_t ver;
6952 	int i;
6953 	size_t cphci_list_size;
6954 
6955 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
6956 
6957 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
6958 	    ver != MDI_VHCI_CACHE_VERSION)
6959 		return (MDI_FAILURE);
6960 
6961 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
6962 	    &nphcis) != 0)
6963 		return (MDI_SUCCESS);
6964 
6965 	ASSERT(nphcis > 0);
6966 
6967 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
6968 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
6969 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
6970 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
6971 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
6972 		enqueue_vhcache_phci(vhcache, cphci);
6973 		cphci_list[i] = cphci;
6974 	}
6975 
6976 	ASSERT(vhcache->vhcache_phci_head != NULL);
6977 
6978 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
6979 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
6980 
6981 	kmem_free(cphci_list, cphci_list_size);
6982 	return (MDI_SUCCESS);
6983 }
6984 
6985 /*
6986  * Build paddrnvl for the specified client using the information in the
6987  * vhci cache and add it to the caddrmapnnvl.
6988  * Returns 0 on success, errno on failure.
6989  */
6990 static int
6991 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
6992     nvlist_t *caddrmapnvl)
6993 {
6994 	mdi_vhcache_pathinfo_t *cpi;
6995 	nvlist_t *nvl;
6996 	int err;
6997 	uint32_t val[2];
6998 
6999 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7000 
7001 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7002 		return (err);
7003 
7004 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7005 		val[0] = cpi->cpi_cphci->cphci_id;
7006 		val[1] = cpi->cpi_flags;
7007 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7008 		    != 0)
7009 			goto out;
7010 	}
7011 
7012 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7013 out:
7014 	nvlist_free(nvl);
7015 	return (err);
7016 }
7017 
7018 /*
7019  * Build caddrmapnvl using the information in the vhci cache
7020  * and add it to the mainnvl.
7021  * Returns 0 on success, errno on failure.
7022  */
7023 static int
7024 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7025 {
7026 	mdi_vhcache_client_t *cct;
7027 	nvlist_t *nvl;
7028 	int err;
7029 
7030 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7031 
7032 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7033 		return (err);
7034 
7035 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7036 	    cct = cct->cct_next) {
7037 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7038 			goto out;
7039 	}
7040 
7041 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7042 out:
7043 	nvlist_free(nvl);
7044 	return (err);
7045 }
7046 
7047 /*
7048  * Build nvlist using the information in the vhci cache.
7049  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7050  * Returns nvl on success, NULL on failure.
7051  */
7052 static nvlist_t *
7053 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7054 {
7055 	mdi_vhcache_phci_t *cphci;
7056 	uint_t phci_count;
7057 	char **phcis;
7058 	nvlist_t *nvl;
7059 	int err, i;
7060 
7061 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7062 		nvl = NULL;
7063 		goto out;
7064 	}
7065 
7066 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7067 	    MDI_VHCI_CACHE_VERSION)) != 0)
7068 		goto out;
7069 
7070 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7071 	if (vhcache->vhcache_phci_head == NULL) {
7072 		rw_exit(&vhcache->vhcache_lock);
7073 		return (nvl);
7074 	}
7075 
7076 	phci_count = 0;
7077 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7078 	    cphci = cphci->cphci_next)
7079 		cphci->cphci_id = phci_count++;
7080 
7081 	/* build phci pathname list */
7082 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7083 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7084 	    cphci = cphci->cphci_next, i++)
7085 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7086 
7087 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7088 	    phci_count);
7089 	free_string_array(phcis, phci_count);
7090 
7091 	if (err == 0 &&
7092 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7093 		rw_exit(&vhcache->vhcache_lock);
7094 		return (nvl);
7095 	}
7096 
7097 	rw_exit(&vhcache->vhcache_lock);
7098 out:
7099 	if (nvl)
7100 		nvlist_free(nvl);
7101 	return (NULL);
7102 }
7103 
7104 /*
7105  * Lookup vhcache phci structure for the specified phci path.
7106  */
7107 static mdi_vhcache_phci_t *
7108 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7109 {
7110 	mdi_vhcache_phci_t *cphci;
7111 
7112 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7113 
7114 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7115 	    cphci = cphci->cphci_next) {
7116 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7117 			return (cphci);
7118 	}
7119 
7120 	return (NULL);
7121 }
7122 
7123 /*
7124  * Lookup vhcache phci structure for the specified phci.
7125  */
7126 static mdi_vhcache_phci_t *
7127 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7128 {
7129 	mdi_vhcache_phci_t *cphci;
7130 
7131 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7132 
7133 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7134 	    cphci = cphci->cphci_next) {
7135 		if (cphci->cphci_phci == ph)
7136 			return (cphci);
7137 	}
7138 
7139 	return (NULL);
7140 }
7141 
7142 /*
7143  * Add the specified phci to the vhci cache if not already present.
7144  */
7145 static void
7146 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7147 {
7148 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7149 	mdi_vhcache_phci_t *cphci;
7150 	char *pathname;
7151 	int cache_updated;
7152 
7153 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7154 
7155 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7156 	(void) ddi_pathname(ph->ph_dip, pathname);
7157 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7158 	    != NULL) {
7159 		cphci->cphci_phci = ph;
7160 		cache_updated = 0;
7161 	} else {
7162 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7163 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7164 		cphci->cphci_phci = ph;
7165 		enqueue_vhcache_phci(vhcache, cphci);
7166 		cache_updated = 1;
7167 	}
7168 
7169 	rw_exit(&vhcache->vhcache_lock);
7170 
7171 	/*
7172 	 * Since a new phci has been added, reset
7173 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7174 	 * during next vhcache_discover_paths().
7175 	 */
7176 	mutex_enter(&vhc->vhc_lock);
7177 	vhc->vhc_path_discovery_cutoff_time = 0;
7178 	mutex_exit(&vhc->vhc_lock);
7179 
7180 	kmem_free(pathname, MAXPATHLEN);
7181 	if (cache_updated)
7182 		vhcache_dirty(vhc);
7183 }
7184 
7185 /*
7186  * Remove the reference to the specified phci from the vhci cache.
7187  */
7188 static void
7189 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7190 {
7191 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7192 	mdi_vhcache_phci_t *cphci;
7193 
7194 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7195 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7196 		/* do not remove the actual mdi_vhcache_phci structure */
7197 		cphci->cphci_phci = NULL;
7198 	}
7199 	rw_exit(&vhcache->vhcache_lock);
7200 }
7201 
7202 static void
7203 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7204     mdi_vhcache_lookup_token_t *src)
7205 {
7206 	if (src == NULL) {
7207 		dst->lt_cct = NULL;
7208 		dst->lt_cct_lookup_time = 0;
7209 	} else {
7210 		dst->lt_cct = src->lt_cct;
7211 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7212 	}
7213 }
7214 
7215 /*
7216  * Look up vhcache client for the specified client.
7217  */
7218 static mdi_vhcache_client_t *
7219 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7220     mdi_vhcache_lookup_token_t *token)
7221 {
7222 	mod_hash_val_t hv;
7223 	char *name_addr;
7224 	int len;
7225 
7226 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7227 
7228 	/*
7229 	 * If no vhcache clean occurred since the last lookup, we can
7230 	 * simply return the cct from the last lookup operation.
7231 	 * It works because ccts are never freed except during the vhcache
7232 	 * cleanup operation.
7233 	 */
7234 	if (token != NULL &&
7235 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7236 		return (token->lt_cct);
7237 
7238 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7239 	if (mod_hash_find(vhcache->vhcache_client_hash,
7240 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7241 		if (token) {
7242 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7243 			token->lt_cct_lookup_time = lbolt64;
7244 		}
7245 	} else {
7246 		if (token) {
7247 			token->lt_cct = NULL;
7248 			token->lt_cct_lookup_time = 0;
7249 		}
7250 		hv = NULL;
7251 	}
7252 	kmem_free(name_addr, len);
7253 	return ((mdi_vhcache_client_t *)hv);
7254 }
7255 
7256 /*
7257  * Add the specified path to the vhci cache if not already present.
7258  * Also add the vhcache client for the client corresponding to this path
7259  * if it doesn't already exist.
7260  */
7261 static void
7262 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7263 {
7264 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7265 	mdi_vhcache_client_t *cct;
7266 	mdi_vhcache_pathinfo_t *cpi;
7267 	mdi_phci_t *ph = pip->pi_phci;
7268 	mdi_client_t *ct = pip->pi_client;
7269 	int cache_updated = 0;
7270 
7271 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7272 
7273 	/* if vhcache client for this pip doesn't already exist, add it */
7274 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7275 	    NULL)) == NULL) {
7276 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7277 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7278 		    ct->ct_guid, NULL);
7279 		enqueue_vhcache_client(vhcache, cct);
7280 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7281 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7282 		cache_updated = 1;
7283 	}
7284 
7285 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7286 		if (cpi->cpi_cphci->cphci_phci == ph &&
7287 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7288 			cpi->cpi_pip = pip;
7289 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7290 				cpi->cpi_flags &=
7291 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7292 				sort_vhcache_paths(cct);
7293 				cache_updated = 1;
7294 			}
7295 			break;
7296 		}
7297 	}
7298 
7299 	if (cpi == NULL) {
7300 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7301 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7302 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7303 		ASSERT(cpi->cpi_cphci != NULL);
7304 		cpi->cpi_pip = pip;
7305 		enqueue_vhcache_pathinfo(cct, cpi);
7306 		cache_updated = 1;
7307 	}
7308 
7309 	rw_exit(&vhcache->vhcache_lock);
7310 
7311 	if (cache_updated)
7312 		vhcache_dirty(vhc);
7313 }
7314 
7315 /*
7316  * Remove the reference to the specified path from the vhci cache.
7317  */
7318 static void
7319 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7320 {
7321 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7322 	mdi_client_t *ct = pip->pi_client;
7323 	mdi_vhcache_client_t *cct;
7324 	mdi_vhcache_pathinfo_t *cpi;
7325 
7326 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7327 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7328 	    NULL)) != NULL) {
7329 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7330 		    cpi = cpi->cpi_next) {
7331 			if (cpi->cpi_pip == pip) {
7332 				cpi->cpi_pip = NULL;
7333 				break;
7334 			}
7335 		}
7336 	}
7337 	rw_exit(&vhcache->vhcache_lock);
7338 }
7339 
7340 /*
7341  * Flush the vhci cache to disk.
7342  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7343  */
7344 static int
7345 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7346 {
7347 	nvlist_t *nvl;
7348 	int err;
7349 	int rv;
7350 
7351 	/*
7352 	 * It is possible that the system may shutdown before
7353 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7354 	 * flushing the cache in this case do not check for
7355 	 * i_ddi_io_initialized when force flag is set.
7356 	 */
7357 	if (force_flag == 0 && !i_ddi_io_initialized())
7358 		return (MDI_FAILURE);
7359 
7360 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7361 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7362 		nvlist_free(nvl);
7363 	} else
7364 		err = EFAULT;
7365 
7366 	rv = MDI_SUCCESS;
7367 	mutex_enter(&vhc->vhc_lock);
7368 	if (err != 0) {
7369 		if (err == EROFS) {
7370 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7371 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7372 			    MDI_VHC_VHCACHE_DIRTY);
7373 		} else {
7374 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7375 				cmn_err(CE_CONT, "%s: update failed\n",
7376 				    vhc->vhc_vhcache_filename);
7377 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7378 			}
7379 			rv = MDI_FAILURE;
7380 		}
7381 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7382 		cmn_err(CE_CONT,
7383 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7384 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7385 	}
7386 	mutex_exit(&vhc->vhc_lock);
7387 
7388 	return (rv);
7389 }
7390 
7391 /*
7392  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7393  * Exits itself if left idle for the idle timeout period.
7394  */
7395 static void
7396 vhcache_flush_thread(void *arg)
7397 {
7398 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7399 	clock_t idle_time, quit_at_ticks;
7400 	callb_cpr_t cprinfo;
7401 
7402 	/* number of seconds to sleep idle before exiting */
7403 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7404 
7405 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7406 	    "mdi_vhcache_flush");
7407 	mutex_enter(&vhc->vhc_lock);
7408 	for (; ; ) {
7409 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7410 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7411 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7412 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7413 				(void) cv_timedwait(&vhc->vhc_cv,
7414 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7415 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7416 			} else {
7417 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7418 				mutex_exit(&vhc->vhc_lock);
7419 
7420 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7421 					vhcache_dirty(vhc);
7422 
7423 				mutex_enter(&vhc->vhc_lock);
7424 			}
7425 		}
7426 
7427 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7428 
7429 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7430 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7431 		    ddi_get_lbolt() < quit_at_ticks) {
7432 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7433 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7434 			    quit_at_ticks);
7435 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7436 		}
7437 
7438 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7439 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7440 			goto out;
7441 	}
7442 
7443 out:
7444 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7445 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7446 	CALLB_CPR_EXIT(&cprinfo);
7447 }
7448 
7449 /*
7450  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7451  */
7452 static void
7453 vhcache_dirty(mdi_vhci_config_t *vhc)
7454 {
7455 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7456 	int create_thread;
7457 
7458 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7459 	/* do not flush cache until the cache is fully built */
7460 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7461 		rw_exit(&vhcache->vhcache_lock);
7462 		return;
7463 	}
7464 	rw_exit(&vhcache->vhcache_lock);
7465 
7466 	mutex_enter(&vhc->vhc_lock);
7467 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7468 		mutex_exit(&vhc->vhc_lock);
7469 		return;
7470 	}
7471 
7472 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7473 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7474 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7475 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7476 		cv_broadcast(&vhc->vhc_cv);
7477 		create_thread = 0;
7478 	} else {
7479 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7480 		create_thread = 1;
7481 	}
7482 	mutex_exit(&vhc->vhc_lock);
7483 
7484 	if (create_thread)
7485 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7486 		    0, &p0, TS_RUN, minclsyspri);
7487 }
7488 
7489 /*
7490  * phci bus config structure - one for for each phci bus config operation that
7491  * we initiate on behalf of a vhci.
7492  */
7493 typedef struct mdi_phci_bus_config_s {
7494 	char *phbc_phci_path;
7495 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7496 	struct mdi_phci_bus_config_s *phbc_next;
7497 } mdi_phci_bus_config_t;
7498 
7499 /* vhci bus config structure - one for each vhci bus config operation */
7500 typedef struct mdi_vhci_bus_config_s {
7501 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7502 	major_t vhbc_op_major;		/* bus config op major */
7503 	uint_t vhbc_op_flags;		/* bus config op flags */
7504 	kmutex_t vhbc_lock;
7505 	kcondvar_t vhbc_cv;
7506 	int vhbc_thr_count;
7507 } mdi_vhci_bus_config_t;
7508 
7509 /*
7510  * bus config the specified phci
7511  */
7512 static void
7513 bus_config_phci(void *arg)
7514 {
7515 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7516 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7517 	dev_info_t *ph_dip;
7518 
7519 	/*
7520 	 * first configure all path components upto phci and then configure
7521 	 * the phci children.
7522 	 */
7523 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7524 	    != NULL) {
7525 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7526 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7527 			(void) ndi_devi_config_driver(ph_dip,
7528 			    vhbc->vhbc_op_flags,
7529 			    vhbc->vhbc_op_major);
7530 		} else
7531 			(void) ndi_devi_config(ph_dip,
7532 			    vhbc->vhbc_op_flags);
7533 
7534 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7535 		ndi_rele_devi(ph_dip);
7536 	}
7537 
7538 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7539 	kmem_free(phbc, sizeof (*phbc));
7540 
7541 	mutex_enter(&vhbc->vhbc_lock);
7542 	vhbc->vhbc_thr_count--;
7543 	if (vhbc->vhbc_thr_count == 0)
7544 		cv_broadcast(&vhbc->vhbc_cv);
7545 	mutex_exit(&vhbc->vhbc_lock);
7546 }
7547 
7548 /*
7549  * Bus config all phcis associated with the vhci in parallel.
7550  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7551  */
7552 static void
7553 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7554     ddi_bus_config_op_t op, major_t maj)
7555 {
7556 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7557 	mdi_vhci_bus_config_t *vhbc;
7558 	mdi_vhcache_phci_t *cphci;
7559 
7560 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7561 	if (vhcache->vhcache_phci_head == NULL) {
7562 		rw_exit(&vhcache->vhcache_lock);
7563 		return;
7564 	}
7565 
7566 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7567 
7568 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7569 	    cphci = cphci->cphci_next) {
7570 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7571 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7572 		    KM_SLEEP);
7573 		phbc->phbc_vhbusconfig = vhbc;
7574 		phbc->phbc_next = phbc_head;
7575 		phbc_head = phbc;
7576 		vhbc->vhbc_thr_count++;
7577 	}
7578 	rw_exit(&vhcache->vhcache_lock);
7579 
7580 	vhbc->vhbc_op = op;
7581 	vhbc->vhbc_op_major = maj;
7582 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7583 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7584 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7585 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7586 
7587 	/* now create threads to initiate bus config on all phcis in parallel */
7588 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7589 		phbc_next = phbc->phbc_next;
7590 		if (mdi_mtc_off)
7591 			bus_config_phci((void *)phbc);
7592 		else
7593 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7594 			    0, &p0, TS_RUN, minclsyspri);
7595 	}
7596 
7597 	mutex_enter(&vhbc->vhbc_lock);
7598 	/* wait until all threads exit */
7599 	while (vhbc->vhbc_thr_count > 0)
7600 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7601 	mutex_exit(&vhbc->vhbc_lock);
7602 
7603 	mutex_destroy(&vhbc->vhbc_lock);
7604 	cv_destroy(&vhbc->vhbc_cv);
7605 	kmem_free(vhbc, sizeof (*vhbc));
7606 }
7607 
7608 /*
7609  * Single threaded version of bus_config_all_phcis()
7610  */
7611 static void
7612 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7613     ddi_bus_config_op_t op, major_t maj)
7614 {
7615 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7616 
7617 	single_threaded_vhconfig_enter(vhc);
7618 	bus_config_all_phcis(vhcache, flags, op, maj);
7619 	single_threaded_vhconfig_exit(vhc);
7620 }
7621 
7622 /*
7623  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7624  * The path includes the child component in addition to the phci path.
7625  */
7626 static int
7627 bus_config_one_phci_child(char *path)
7628 {
7629 	dev_info_t *ph_dip, *child;
7630 	char *devnm;
7631 	int rv = MDI_FAILURE;
7632 
7633 	/* extract the child component of the phci */
7634 	devnm = strrchr(path, '/');
7635 	*devnm++ = '\0';
7636 
7637 	/*
7638 	 * first configure all path components upto phci and then
7639 	 * configure the phci child.
7640 	 */
7641 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7642 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7643 		    NDI_SUCCESS) {
7644 			/*
7645 			 * release the hold that ndi_devi_config_one() placed
7646 			 */
7647 			ndi_rele_devi(child);
7648 			rv = MDI_SUCCESS;
7649 		}
7650 
7651 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7652 		ndi_rele_devi(ph_dip);
7653 	}
7654 
7655 	devnm--;
7656 	*devnm = '/';
7657 	return (rv);
7658 }
7659 
7660 /*
7661  * Build a list of phci client paths for the specified vhci client.
7662  * The list includes only those phci client paths which aren't configured yet.
7663  */
7664 static mdi_phys_path_t *
7665 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7666 {
7667 	mdi_vhcache_pathinfo_t *cpi;
7668 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7669 	int config_path, len;
7670 
7671 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7672 		/*
7673 		 * include only those paths that aren't configured.
7674 		 */
7675 		config_path = 0;
7676 		if (cpi->cpi_pip == NULL)
7677 			config_path = 1;
7678 		else {
7679 			MDI_PI_LOCK(cpi->cpi_pip);
7680 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7681 				config_path = 1;
7682 			MDI_PI_UNLOCK(cpi->cpi_pip);
7683 		}
7684 
7685 		if (config_path) {
7686 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7687 			len = strlen(cpi->cpi_cphci->cphci_path) +
7688 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7689 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7690 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7691 			    cpi->cpi_cphci->cphci_path, ct_name,
7692 			    cpi->cpi_addr);
7693 			pp->phys_path_next = NULL;
7694 
7695 			if (pp_head == NULL)
7696 				pp_head = pp;
7697 			else
7698 				pp_tail->phys_path_next = pp;
7699 			pp_tail = pp;
7700 		}
7701 	}
7702 
7703 	return (pp_head);
7704 }
7705 
7706 /*
7707  * Free the memory allocated for phci client path list.
7708  */
7709 static void
7710 free_phclient_path_list(mdi_phys_path_t *pp_head)
7711 {
7712 	mdi_phys_path_t *pp, *pp_next;
7713 
7714 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7715 		pp_next = pp->phys_path_next;
7716 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7717 		kmem_free(pp, sizeof (*pp));
7718 	}
7719 }
7720 
7721 /*
7722  * Allocated async client structure and initialize with the specified values.
7723  */
7724 static mdi_async_client_config_t *
7725 alloc_async_client_config(char *ct_name, char *ct_addr,
7726     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7727 {
7728 	mdi_async_client_config_t *acc;
7729 
7730 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7731 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7732 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7733 	acc->acc_phclient_path_list_head = pp_head;
7734 	init_vhcache_lookup_token(&acc->acc_token, tok);
7735 	acc->acc_next = NULL;
7736 	return (acc);
7737 }
7738 
7739 /*
7740  * Free the memory allocated for the async client structure and their members.
7741  */
7742 static void
7743 free_async_client_config(mdi_async_client_config_t *acc)
7744 {
7745 	if (acc->acc_phclient_path_list_head)
7746 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7747 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7748 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7749 	kmem_free(acc, sizeof (*acc));
7750 }
7751 
7752 /*
7753  * Sort vhcache pathinfos (cpis) of the specified client.
7754  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7755  * flag set come at the beginning of the list. All cpis which have this
7756  * flag set come at the end of the list.
7757  */
7758 static void
7759 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7760 {
7761 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7762 
7763 	cpi_head = cct->cct_cpi_head;
7764 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7765 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7766 		cpi_next = cpi->cpi_next;
7767 		enqueue_vhcache_pathinfo(cct, cpi);
7768 	}
7769 }
7770 
7771 /*
7772  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7773  * every vhcache pathinfo of the specified client. If not adjust the flag
7774  * setting appropriately.
7775  *
7776  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7777  * on-disk vhci cache. So every time this flag is updated the cache must be
7778  * flushed.
7779  */
7780 static void
7781 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7782     mdi_vhcache_lookup_token_t *tok)
7783 {
7784 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7785 	mdi_vhcache_client_t *cct;
7786 	mdi_vhcache_pathinfo_t *cpi;
7787 
7788 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7789 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7790 	    == NULL) {
7791 		rw_exit(&vhcache->vhcache_lock);
7792 		return;
7793 	}
7794 
7795 	/*
7796 	 * to avoid unnecessary on-disk cache updates, first check if an
7797 	 * update is really needed. If no update is needed simply return.
7798 	 */
7799 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7800 		if ((cpi->cpi_pip != NULL &&
7801 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7802 		    (cpi->cpi_pip == NULL &&
7803 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7804 			break;
7805 		}
7806 	}
7807 	if (cpi == NULL) {
7808 		rw_exit(&vhcache->vhcache_lock);
7809 		return;
7810 	}
7811 
7812 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7813 		rw_exit(&vhcache->vhcache_lock);
7814 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7815 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7816 		    tok)) == NULL) {
7817 			rw_exit(&vhcache->vhcache_lock);
7818 			return;
7819 		}
7820 	}
7821 
7822 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7823 		if (cpi->cpi_pip != NULL)
7824 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7825 		else
7826 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7827 	}
7828 	sort_vhcache_paths(cct);
7829 
7830 	rw_exit(&vhcache->vhcache_lock);
7831 	vhcache_dirty(vhc);
7832 }
7833 
7834 /*
7835  * Configure all specified paths of the client.
7836  */
7837 static void
7838 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7839     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7840 {
7841 	mdi_phys_path_t *pp;
7842 
7843 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7844 		(void) bus_config_one_phci_child(pp->phys_path);
7845 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7846 }
7847 
7848 /*
7849  * Dequeue elements from vhci async client config list and bus configure
7850  * their corresponding phci clients.
7851  */
7852 static void
7853 config_client_paths_thread(void *arg)
7854 {
7855 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7856 	mdi_async_client_config_t *acc;
7857 	clock_t quit_at_ticks;
7858 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7859 	callb_cpr_t cprinfo;
7860 
7861 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7862 	    "mdi_config_client_paths");
7863 
7864 	for (; ; ) {
7865 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7866 
7867 		mutex_enter(&vhc->vhc_lock);
7868 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7869 		    vhc->vhc_acc_list_head == NULL &&
7870 		    ddi_get_lbolt() < quit_at_ticks) {
7871 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7872 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7873 			    quit_at_ticks);
7874 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7875 		}
7876 
7877 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7878 		    vhc->vhc_acc_list_head == NULL)
7879 			goto out;
7880 
7881 		acc = vhc->vhc_acc_list_head;
7882 		vhc->vhc_acc_list_head = acc->acc_next;
7883 		if (vhc->vhc_acc_list_head == NULL)
7884 			vhc->vhc_acc_list_tail = NULL;
7885 		vhc->vhc_acc_count--;
7886 		mutex_exit(&vhc->vhc_lock);
7887 
7888 		config_client_paths_sync(vhc, acc->acc_ct_name,
7889 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7890 		    &acc->acc_token);
7891 
7892 		free_async_client_config(acc);
7893 	}
7894 
7895 out:
7896 	vhc->vhc_acc_thrcount--;
7897 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7898 	CALLB_CPR_EXIT(&cprinfo);
7899 }
7900 
7901 /*
7902  * Arrange for all the phci client paths (pp_head) for the specified client
7903  * to be bus configured asynchronously by a thread.
7904  */
7905 static void
7906 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7907     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7908 {
7909 	mdi_async_client_config_t *acc, *newacc;
7910 	int create_thread;
7911 
7912 	if (pp_head == NULL)
7913 		return;
7914 
7915 	if (mdi_mtc_off) {
7916 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7917 		free_phclient_path_list(pp_head);
7918 		return;
7919 	}
7920 
7921 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7922 	ASSERT(newacc);
7923 
7924 	mutex_enter(&vhc->vhc_lock);
7925 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7926 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7927 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7928 			free_async_client_config(newacc);
7929 			mutex_exit(&vhc->vhc_lock);
7930 			return;
7931 		}
7932 	}
7933 
7934 	if (vhc->vhc_acc_list_head == NULL)
7935 		vhc->vhc_acc_list_head = newacc;
7936 	else
7937 		vhc->vhc_acc_list_tail->acc_next = newacc;
7938 	vhc->vhc_acc_list_tail = newacc;
7939 	vhc->vhc_acc_count++;
7940 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7941 		cv_broadcast(&vhc->vhc_cv);
7942 		create_thread = 0;
7943 	} else {
7944 		vhc->vhc_acc_thrcount++;
7945 		create_thread = 1;
7946 	}
7947 	mutex_exit(&vhc->vhc_lock);
7948 
7949 	if (create_thread)
7950 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
7951 		    0, &p0, TS_RUN, minclsyspri);
7952 }
7953 
7954 /*
7955  * Return number of online paths for the specified client.
7956  */
7957 static int
7958 nonline_paths(mdi_vhcache_client_t *cct)
7959 {
7960 	mdi_vhcache_pathinfo_t *cpi;
7961 	int online_count = 0;
7962 
7963 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7964 		if (cpi->cpi_pip != NULL) {
7965 			MDI_PI_LOCK(cpi->cpi_pip);
7966 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
7967 				online_count++;
7968 			MDI_PI_UNLOCK(cpi->cpi_pip);
7969 		}
7970 	}
7971 
7972 	return (online_count);
7973 }
7974 
7975 /*
7976  * Bus configure all paths for the specified vhci client.
7977  * If at least one path for the client is already online, the remaining paths
7978  * will be configured asynchronously. Otherwise, it synchronously configures
7979  * the paths until at least one path is online and then rest of the paths
7980  * will be configured asynchronously.
7981  */
7982 static void
7983 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
7984 {
7985 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7986 	mdi_phys_path_t *pp_head, *pp;
7987 	mdi_vhcache_client_t *cct;
7988 	mdi_vhcache_lookup_token_t tok;
7989 
7990 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7991 
7992 	init_vhcache_lookup_token(&tok, NULL);
7993 
7994 	if (ct_name == NULL || ct_addr == NULL ||
7995 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
7996 	    == NULL ||
7997 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
7998 		rw_exit(&vhcache->vhcache_lock);
7999 		return;
8000 	}
8001 
8002 	/* if at least one path is online, configure the rest asynchronously */
8003 	if (nonline_paths(cct) > 0) {
8004 		rw_exit(&vhcache->vhcache_lock);
8005 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8006 		return;
8007 	}
8008 
8009 	rw_exit(&vhcache->vhcache_lock);
8010 
8011 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8012 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8013 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8014 
8015 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8016 			    ct_addr, &tok)) == NULL) {
8017 				rw_exit(&vhcache->vhcache_lock);
8018 				goto out;
8019 			}
8020 
8021 			if (nonline_paths(cct) > 0 &&
8022 			    pp->phys_path_next != NULL) {
8023 				rw_exit(&vhcache->vhcache_lock);
8024 				config_client_paths_async(vhc, ct_name, ct_addr,
8025 				    pp->phys_path_next, &tok);
8026 				pp->phys_path_next = NULL;
8027 				goto out;
8028 			}
8029 
8030 			rw_exit(&vhcache->vhcache_lock);
8031 		}
8032 	}
8033 
8034 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8035 out:
8036 	free_phclient_path_list(pp_head);
8037 }
8038 
8039 static void
8040 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8041 {
8042 	mutex_enter(&vhc->vhc_lock);
8043 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8044 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8045 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8046 	mutex_exit(&vhc->vhc_lock);
8047 }
8048 
8049 static void
8050 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8051 {
8052 	mutex_enter(&vhc->vhc_lock);
8053 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8054 	cv_broadcast(&vhc->vhc_cv);
8055 	mutex_exit(&vhc->vhc_lock);
8056 }
8057 
8058 /*
8059  * Attach the phci driver instances associated with the vhci:
8060  * If root is mounted attach all phci driver instances.
8061  * If root is not mounted, attach the instances of only those phci
8062  * drivers that have the root support.
8063  */
8064 static void
8065 attach_phci_drivers(mdi_vhci_config_t *vhc)
8066 {
8067 	int  i;
8068 	major_t m;
8069 
8070 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
8071 		if (modrootloaded == 0 &&
8072 		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
8073 			continue;
8074 
8075 		m = ddi_name_to_major(
8076 		    vhc->vhc_phci_driver_list[i].phdriver_name);
8077 		if (m != (major_t)-1) {
8078 			if (ddi_hold_installed_driver(m) != NULL)
8079 				ddi_rele_driver(m);
8080 		}
8081 	}
8082 }
8083 
8084 /*
8085  * Build vhci cache:
8086  *
8087  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8088  * the phci driver instances. During this process the cache gets built.
8089  *
8090  * Cache is built fully if the root is mounted.
8091  * If the root is not mounted, phci drivers that do not have root support
8092  * are not attached. As a result the cache is built partially. The entries
8093  * in the cache reflect only those phci drivers that have root support.
8094  */
8095 static int
8096 build_vhci_cache(mdi_vhci_config_t *vhc)
8097 {
8098 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8099 
8100 	single_threaded_vhconfig_enter(vhc);
8101 
8102 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8103 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8104 		rw_exit(&vhcache->vhcache_lock);
8105 		single_threaded_vhconfig_exit(vhc);
8106 		return (0);
8107 	}
8108 	rw_exit(&vhcache->vhcache_lock);
8109 
8110 	attach_phci_drivers(vhc);
8111 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8112 	    BUS_CONFIG_ALL, (major_t)-1);
8113 
8114 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8115 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8116 	rw_exit(&vhcache->vhcache_lock);
8117 
8118 	single_threaded_vhconfig_exit(vhc);
8119 	vhcache_dirty(vhc);
8120 	return (1);
8121 }
8122 
8123 /*
8124  * Determine if discovery of paths is needed.
8125  */
8126 static int
8127 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8128 {
8129 	int rv = 1;
8130 
8131 	mutex_enter(&vhc->vhc_lock);
8132 	if (i_ddi_io_initialized() == 0) {
8133 		if (vhc->vhc_path_discovery_boot > 0) {
8134 			vhc->vhc_path_discovery_boot--;
8135 			goto out;
8136 		}
8137 	} else {
8138 		if (vhc->vhc_path_discovery_postboot > 0) {
8139 			vhc->vhc_path_discovery_postboot--;
8140 			goto out;
8141 		}
8142 	}
8143 
8144 	/*
8145 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8146 	 * This is to avoid a series of full path discoveries when opening
8147 	 * stale /dev/[r]dsk links.
8148 	 */
8149 	if (mdi_path_discovery_interval != -1 &&
8150 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8151 		goto out;
8152 
8153 	rv = 0;
8154 out:
8155 	mutex_exit(&vhc->vhc_lock);
8156 	return (rv);
8157 }
8158 
8159 /*
8160  * Discover all paths:
8161  *
8162  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8163  * driver instances. During this process all paths will be discovered.
8164  */
8165 static int
8166 vhcache_discover_paths(mdi_vhci_config_t *vhc)
8167 {
8168 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8169 	int rv = 0;
8170 
8171 	single_threaded_vhconfig_enter(vhc);
8172 
8173 	if (vhcache_do_discovery(vhc)) {
8174 		attach_phci_drivers(vhc);
8175 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8176 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8177 
8178 		mutex_enter(&vhc->vhc_lock);
8179 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8180 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8181 		mutex_exit(&vhc->vhc_lock);
8182 		rv = 1;
8183 	}
8184 
8185 	single_threaded_vhconfig_exit(vhc);
8186 	return (rv);
8187 }
8188 
8189 /*
8190  * Generic vhci bus config implementation:
8191  *
8192  * Parameters
8193  *	vdip	vhci dip
8194  *	flags	bus config flags
8195  *	op	bus config operation
8196  *	The remaining parameters are bus config operation specific
8197  *
8198  * for BUS_CONFIG_ONE
8199  *	arg	pointer to name@addr
8200  *	child	upon successful return from this function, *child will be
8201  *		set to the configured and held devinfo child node of vdip.
8202  *	ct_addr	pointer to client address (i.e. GUID)
8203  *
8204  * for BUS_CONFIG_DRIVER
8205  *	arg	major number of the driver
8206  *	child and ct_addr parameters are ignored
8207  *
8208  * for BUS_CONFIG_ALL
8209  *	arg, child, and ct_addr parameters are ignored
8210  *
8211  * Note that for the rest of the bus config operations, this function simply
8212  * calls the framework provided default bus config routine.
8213  */
8214 int
8215 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8216     void *arg, dev_info_t **child, char *ct_addr)
8217 {
8218 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8219 	mdi_vhci_config_t *vhc = vh->vh_config;
8220 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8221 	int rv = 0;
8222 	int params_valid = 0;
8223 	char *cp;
8224 
8225 	/*
8226 	 * While bus configuring phcis, the phci driver interactions with MDI
8227 	 * cause child nodes to be enumerated under the vhci node for which
8228 	 * they need to ndi_devi_enter the vhci node.
8229 	 *
8230 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8231 	 * for the bus config operations on phcis to finish while holding the
8232 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8233 	 * phcis and call the default framework provided bus config function
8234 	 * if we are called with ndi_devi_enter lock held.
8235 	 */
8236 	if (DEVI_BUSY_OWNED(vdip)) {
8237 		MDI_DEBUG(2, (CE_NOTE, vdip,
8238 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8239 		goto default_bus_config;
8240 	}
8241 
8242 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8243 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8244 		rw_exit(&vhcache->vhcache_lock);
8245 		rv = build_vhci_cache(vhc);
8246 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8247 	}
8248 
8249 	switch (op) {
8250 	case BUS_CONFIG_ONE:
8251 		if (arg != NULL && ct_addr != NULL) {
8252 			/* extract node name */
8253 			cp = (char *)arg;
8254 			while (*cp != '\0' && *cp != '@')
8255 				cp++;
8256 			if (*cp == '@') {
8257 				params_valid = 1;
8258 				*cp = '\0';
8259 				config_client_paths(vhc, (char *)arg, ct_addr);
8260 				/* config_client_paths() releases cache_lock */
8261 				*cp = '@';
8262 				break;
8263 			}
8264 		}
8265 
8266 		rw_exit(&vhcache->vhcache_lock);
8267 		break;
8268 
8269 	case BUS_CONFIG_DRIVER:
8270 		rw_exit(&vhcache->vhcache_lock);
8271 		if (rv == 0)
8272 			st_bus_config_all_phcis(vhc, flags, op,
8273 			    (major_t)(uintptr_t)arg);
8274 		break;
8275 
8276 	case BUS_CONFIG_ALL:
8277 		rw_exit(&vhcache->vhcache_lock);
8278 		if (rv == 0)
8279 			st_bus_config_all_phcis(vhc, flags, op, -1);
8280 		break;
8281 
8282 	default:
8283 		rw_exit(&vhcache->vhcache_lock);
8284 		break;
8285 	}
8286 
8287 
8288 default_bus_config:
8289 	/*
8290 	 * All requested child nodes are enumerated under the vhci.
8291 	 * Now configure them.
8292 	 */
8293 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8294 	    NDI_SUCCESS) {
8295 		return (MDI_SUCCESS);
8296 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8297 		/* discover all paths and try configuring again */
8298 		if (vhcache_discover_paths(vhc) &&
8299 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8300 		    NDI_SUCCESS)
8301 			return (MDI_SUCCESS);
8302 	}
8303 
8304 	return (MDI_FAILURE);
8305 }
8306 
8307 /*
8308  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8309  */
8310 static nvlist_t *
8311 read_on_disk_vhci_cache(char *vhci_class)
8312 {
8313 	nvlist_t *nvl;
8314 	int err;
8315 	char *filename;
8316 
8317 	filename = vhclass2vhcache_filename(vhci_class);
8318 
8319 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8320 		kmem_free(filename, strlen(filename) + 1);
8321 		return (nvl);
8322 	} else if (err == EIO)
8323 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8324 	else if (err == EINVAL)
8325 		cmn_err(CE_WARN,
8326 		    "%s: data file corrupted, will recreate\n", filename);
8327 
8328 	kmem_free(filename, strlen(filename) + 1);
8329 	return (NULL);
8330 }
8331 
8332 /*
8333  * Read on-disk vhci cache into nvlists for all vhci classes.
8334  * Called during booting by i_ddi_read_devices_files().
8335  */
8336 void
8337 mdi_read_devices_files(void)
8338 {
8339 	int i;
8340 
8341 	for (i = 0; i < N_VHCI_CLASSES; i++)
8342 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8343 }
8344 
8345 /*
8346  * Remove all stale entries from vhci cache.
8347  */
8348 static void
8349 clean_vhcache(mdi_vhci_config_t *vhc)
8350 {
8351 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8352 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8353 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8354 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8355 
8356 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8357 
8358 	cct_head = vhcache->vhcache_client_head;
8359 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8360 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8361 		cct_next = cct->cct_next;
8362 
8363 		cpi_head = cct->cct_cpi_head;
8364 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8365 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8366 			cpi_next = cpi->cpi_next;
8367 			if (cpi->cpi_pip != NULL) {
8368 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8369 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8370 			} else
8371 				free_vhcache_pathinfo(cpi);
8372 		}
8373 
8374 		if (cct->cct_cpi_head != NULL)
8375 			enqueue_vhcache_client(vhcache, cct);
8376 		else {
8377 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8378 			    (mod_hash_key_t)cct->cct_name_addr);
8379 			free_vhcache_client(cct);
8380 		}
8381 	}
8382 
8383 	cphci_head = vhcache->vhcache_phci_head;
8384 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8385 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8386 		cphci_next = cphci->cphci_next;
8387 		if (cphci->cphci_phci != NULL)
8388 			enqueue_vhcache_phci(vhcache, cphci);
8389 		else
8390 			free_vhcache_phci(cphci);
8391 	}
8392 
8393 	vhcache->vhcache_clean_time = lbolt64;
8394 	rw_exit(&vhcache->vhcache_lock);
8395 	vhcache_dirty(vhc);
8396 }
8397 
8398 /*
8399  * Remove all stale entries from vhci cache.
8400  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8401  */
8402 void
8403 mdi_clean_vhcache(void)
8404 {
8405 	mdi_vhci_t *vh;
8406 
8407 	mutex_enter(&mdi_mutex);
8408 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8409 		vh->vh_refcnt++;
8410 		mutex_exit(&mdi_mutex);
8411 		clean_vhcache(vh->vh_config);
8412 		mutex_enter(&mdi_mutex);
8413 		vh->vh_refcnt--;
8414 	}
8415 	mutex_exit(&mdi_mutex);
8416 }
8417 
8418 /*
8419  * mdi_vhci_walk_clients():
8420  *		Walker routine to traverse client dev_info nodes
8421  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8422  * below the client, including nexus devices, which we dont want.
8423  * So we just traverse the immediate siblings, starting from 1st client.
8424  */
8425 void
8426 mdi_vhci_walk_clients(dev_info_t *vdip,
8427     int (*f)(dev_info_t *, void *), void *arg)
8428 {
8429 	dev_info_t	*cdip;
8430 	mdi_client_t	*ct;
8431 
8432 	mutex_enter(&mdi_mutex);
8433 
8434 	cdip = ddi_get_child(vdip);
8435 
8436 	while (cdip) {
8437 		ct = i_devi_get_client(cdip);
8438 		MDI_CLIENT_LOCK(ct);
8439 
8440 		switch ((*f)(cdip, arg)) {
8441 		case DDI_WALK_CONTINUE:
8442 			cdip = ddi_get_next_sibling(cdip);
8443 			MDI_CLIENT_UNLOCK(ct);
8444 			break;
8445 
8446 		default:
8447 			MDI_CLIENT_UNLOCK(ct);
8448 			mutex_exit(&mdi_mutex);
8449 			return;
8450 		}
8451 	}
8452 
8453 	mutex_exit(&mdi_mutex);
8454 }
8455 
8456 /*
8457  * mdi_vhci_walk_phcis():
8458  *		Walker routine to traverse phci dev_info nodes
8459  */
8460 void
8461 mdi_vhci_walk_phcis(dev_info_t *vdip,
8462     int (*f)(dev_info_t *, void *), void *arg)
8463 {
8464 	mdi_vhci_t	*vh = NULL;
8465 	mdi_phci_t	*ph = NULL;
8466 
8467 	mutex_enter(&mdi_mutex);
8468 
8469 	vh = i_devi_get_vhci(vdip);
8470 	ph = vh->vh_phci_head;
8471 
8472 	while (ph) {
8473 		MDI_PHCI_LOCK(ph);
8474 
8475 		switch ((*f)(ph->ph_dip, arg)) {
8476 		case DDI_WALK_CONTINUE:
8477 			MDI_PHCI_UNLOCK(ph);
8478 			ph = ph->ph_next;
8479 			break;
8480 
8481 		default:
8482 			MDI_PHCI_UNLOCK(ph);
8483 			mutex_exit(&mdi_mutex);
8484 			return;
8485 		}
8486 	}
8487 
8488 	mutex_exit(&mdi_mutex);
8489 }
8490 
8491 
8492 /*
8493  * mdi_walk_vhcis():
8494  *		Walker routine to traverse vhci dev_info nodes
8495  */
8496 void
8497 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8498 {
8499 	mdi_vhci_t	*vh = NULL;
8500 
8501 	mutex_enter(&mdi_mutex);
8502 	/*
8503 	 * Scan for already registered vhci
8504 	 */
8505 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8506 		vh->vh_refcnt++;
8507 		mutex_exit(&mdi_mutex);
8508 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8509 			mutex_enter(&mdi_mutex);
8510 			vh->vh_refcnt--;
8511 			break;
8512 		} else {
8513 			mutex_enter(&mdi_mutex);
8514 			vh->vh_refcnt--;
8515 		}
8516 	}
8517 
8518 	mutex_exit(&mdi_mutex);
8519 }
8520 
8521 /*
8522  * i_mdi_log_sysevent():
8523  *		Logs events for pickup by syseventd
8524  */
8525 static void
8526 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8527 {
8528 	char		*path_name;
8529 	nvlist_t	*attr_list;
8530 
8531 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8532 	    KM_SLEEP) != DDI_SUCCESS) {
8533 		goto alloc_failed;
8534 	}
8535 
8536 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8537 	(void) ddi_pathname(dip, path_name);
8538 
8539 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8540 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8541 		goto error;
8542 	}
8543 
8544 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8545 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8546 		goto error;
8547 	}
8548 
8549 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8550 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8551 		goto error;
8552 	}
8553 
8554 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8555 	    path_name) != DDI_SUCCESS) {
8556 		goto error;
8557 	}
8558 
8559 	if (nvlist_add_string(attr_list, DDI_CLASS,
8560 	    ph_vh_class) != DDI_SUCCESS) {
8561 		goto error;
8562 	}
8563 
8564 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8565 	    attr_list, NULL, DDI_SLEEP);
8566 
8567 error:
8568 	kmem_free(path_name, MAXPATHLEN);
8569 	nvlist_free(attr_list);
8570 	return;
8571 
8572 alloc_failed:
8573 	MDI_DEBUG(1, (CE_WARN, dip,
8574 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8575 }
8576