xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision 7ddc9b1afd18f260b9fb78ec7732facd91769131)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
28  * detailed discussion of the overall mpxio architecture.
29  *
30  * Default locking order:
31  *
32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 #include <sys/sysmacros.h>
71 
72 #ifdef	DEBUG
73 #include <sys/debug.h>
74 int	mdi_debug = 1;
75 int	mdi_debug_logonly = 0;
76 #define	MDI_DEBUG(level, stmnt) \
77 	    if (mdi_debug >= (level)) i_mdi_log stmnt
78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
79 #else	/* !DEBUG */
80 #define	MDI_DEBUG(level, stmnt)
81 #endif	/* DEBUG */
82 
83 extern pri_t	minclsyspri;
84 extern int	modrootloaded;
85 
86 /*
87  * Global mutex:
88  * Protects vHCI list and structure members.
89  */
90 kmutex_t	mdi_mutex;
91 
92 /*
93  * Registered vHCI class driver lists
94  */
95 int		mdi_vhci_count;
96 mdi_vhci_t	*mdi_vhci_head;
97 mdi_vhci_t	*mdi_vhci_tail;
98 
99 /*
100  * Client Hash Table size
101  */
102 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
103 
104 /*
105  * taskq interface definitions
106  */
107 #define	MDI_TASKQ_N_THREADS	8
108 #define	MDI_TASKQ_PRI		minclsyspri
109 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
110 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
111 
112 taskq_t				*mdi_taskq;
113 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
114 
115 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
116 
117 /*
118  * The data should be "quiet" for this interval (in seconds) before the
119  * vhci cached data is flushed to the disk.
120  */
121 static int mdi_vhcache_flush_delay = 10;
122 
123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
124 static int mdi_vhcache_flush_daemon_idle_time = 60;
125 
126 /*
127  * MDI falls back to discovery of all paths when a bus_config_one fails.
128  * The following parameters can be used to tune this operation.
129  *
130  * mdi_path_discovery_boot
131  *	Number of times path discovery will be attempted during early boot.
132  *	Probably there is no reason to ever set this value to greater than one.
133  *
134  * mdi_path_discovery_postboot
135  *	Number of times path discovery will be attempted after early boot.
136  *	Set it to a minimum of two to allow for discovery of iscsi paths which
137  *	may happen very late during booting.
138  *
139  * mdi_path_discovery_interval
140  *	Minimum number of seconds MDI will wait between successive discovery
141  *	of all paths. Set it to -1 to disable discovery of all paths.
142  */
143 static int mdi_path_discovery_boot = 1;
144 static int mdi_path_discovery_postboot = 2;
145 static int mdi_path_discovery_interval = 10;
146 
147 /*
148  * number of seconds the asynchronous configuration thread will sleep idle
149  * before exiting.
150  */
151 static int mdi_async_config_idle_time = 600;
152 
153 static int mdi_bus_config_cache_hash_size = 256;
154 
155 /* turns off multithreaded configuration for certain operations */
156 static int mdi_mtc_off = 0;
157 
158 /*
159  * The "path" to a pathinfo node is identical to the /devices path to a
160  * devinfo node had the device been enumerated under a pHCI instead of
161  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
162  * This association persists across create/delete of the pathinfo nodes,
163  * but not across reboot.
164  */
165 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
166 static int		mdi_pathmap_hash_size = 256;
167 static kmutex_t		mdi_pathmap_mutex;
168 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
169 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
170 
171 /*
172  * MDI component property name/value string definitions
173  */
174 const char 		*mdi_component_prop = "mpxio-component";
175 const char		*mdi_component_prop_vhci = "vhci";
176 const char		*mdi_component_prop_phci = "phci";
177 const char		*mdi_component_prop_client = "client";
178 
179 /*
180  * MDI client global unique identifier property name
181  */
182 const char		*mdi_client_guid_prop = "client-guid";
183 
184 /*
185  * MDI client load balancing property name/value string definitions
186  */
187 const char		*mdi_load_balance = "load-balance";
188 const char		*mdi_load_balance_none = "none";
189 const char		*mdi_load_balance_rr = "round-robin";
190 const char		*mdi_load_balance_lba = "logical-block";
191 
192 /*
193  * Obsolete vHCI class definition; to be removed after Leadville update
194  */
195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
196 
197 static char vhci_greeting[] =
198 	"\tThere already exists one vHCI driver for class %s\n"
199 	"\tOnly one vHCI driver for each class is allowed\n";
200 
201 /*
202  * Static function prototypes
203  */
204 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
205 static int		i_mdi_client_offline(dev_info_t *, uint_t);
206 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
207 static void		i_mdi_phci_post_detach(dev_info_t *,
208 			    ddi_detach_cmd_t, int);
209 static int		i_mdi_client_pre_detach(dev_info_t *,
210 			    ddi_detach_cmd_t);
211 static void		i_mdi_client_post_detach(dev_info_t *,
212 			    ddi_detach_cmd_t, int);
213 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
214 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
215 static int 		i_mdi_lba_lb(mdi_client_t *ct,
216 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
217 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
218 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
219 static void		i_mdi_pm_reset_client(mdi_client_t *);
220 static int		i_mdi_power_all_phci(mdi_client_t *);
221 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
222 
223 
224 /*
225  * Internal mdi_pathinfo node functions
226  */
227 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
228 
229 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
230 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
231 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
232 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
233 static void		i_mdi_phci_unlock(mdi_phci_t *);
234 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
235 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
236 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
237 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
238 			    mdi_client_t *);
239 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
240 static void		i_mdi_client_remove_path(mdi_client_t *,
241 			    mdi_pathinfo_t *);
242 
243 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
244 			    mdi_pathinfo_state_t, int);
245 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
246 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
247 			    char **, int);
248 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
249 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
250 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
251 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
252 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
253 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
254 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
255 static void		i_mdi_client_update_state(mdi_client_t *);
256 static int		i_mdi_client_compute_state(mdi_client_t *,
257 			    mdi_phci_t *);
258 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
259 static void		i_mdi_client_unlock(mdi_client_t *);
260 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
261 static mdi_client_t	*i_devi_get_client(dev_info_t *);
262 /*
263  * NOTE: this will be removed once the NWS files are changed to use the new
264  * mdi_{enable,disable}_path interfaces
265  */
266 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
267 				int, int);
268 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
269 				mdi_vhci_t *vh, int flags, int op);
270 /*
271  * Failover related function prototypes
272  */
273 static int		i_mdi_failover(void *);
274 
275 /*
276  * misc internal functions
277  */
278 static int		i_mdi_get_hash_key(char *);
279 static int		i_map_nvlist_error_to_mdi(int);
280 static void		i_mdi_report_path_state(mdi_client_t *,
281 			    mdi_pathinfo_t *);
282 
283 static void		setup_vhci_cache(mdi_vhci_t *);
284 static int		destroy_vhci_cache(mdi_vhci_t *);
285 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
286 static boolean_t	stop_vhcache_flush_thread(void *, int);
287 static void		free_string_array(char **, int);
288 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
289 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
290 static void		free_vhcache_client(mdi_vhcache_client_t *);
291 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
292 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
293 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
294 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
295 static void		vhcache_pi_add(mdi_vhci_config_t *,
296 			    struct mdi_pathinfo *);
297 static void		vhcache_pi_remove(mdi_vhci_config_t *,
298 			    struct mdi_pathinfo *);
299 static void		free_phclient_path_list(mdi_phys_path_t *);
300 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
301 static int		flush_vhcache(mdi_vhci_config_t *, int);
302 static void		vhcache_dirty(mdi_vhci_config_t *);
303 static void		free_async_client_config(mdi_async_client_config_t *);
304 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
305 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
306 static nvlist_t		*read_on_disk_vhci_cache(char *);
307 extern int		fread_nvlist(char *, nvlist_t **);
308 extern int		fwrite_nvlist(char *, nvlist_t *);
309 
310 /* called once when first vhci registers with mdi */
311 static void
312 i_mdi_init()
313 {
314 	static int initialized = 0;
315 
316 	if (initialized)
317 		return;
318 	initialized = 1;
319 
320 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
321 
322 	/* Create our taskq resources */
323 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
324 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
325 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
326 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
327 
328 	/* Allocate ['path_instance' <-> "path"] maps */
329 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
330 	mdi_pathmap_bypath = mod_hash_create_strhash(
331 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
332 	    mod_hash_null_valdtor);
333 	mdi_pathmap_byinstance = mod_hash_create_idhash(
334 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
335 	    mod_hash_null_valdtor);
336 }
337 
338 /*
339  * mdi_get_component_type():
340  *		Return mpxio component type
341  * Return Values:
342  *		MDI_COMPONENT_NONE
343  *		MDI_COMPONENT_VHCI
344  *		MDI_COMPONENT_PHCI
345  *		MDI_COMPONENT_CLIENT
346  * XXX This doesn't work under multi-level MPxIO and should be
347  *	removed when clients migrate mdi_component_is_*() interfaces.
348  */
349 int
350 mdi_get_component_type(dev_info_t *dip)
351 {
352 	return (DEVI(dip)->devi_mdi_component);
353 }
354 
355 /*
356  * mdi_vhci_register():
357  *		Register a vHCI module with the mpxio framework
358  *		mdi_vhci_register() is called by vHCI drivers to register the
359  *		'class_driver' vHCI driver and its MDI entrypoints with the
360  *		mpxio framework.  The vHCI driver must call this interface as
361  *		part of its attach(9e) handler.
362  *		Competing threads may try to attach mdi_vhci_register() as
363  *		the vHCI drivers are loaded and attached as a result of pHCI
364  *		driver instance registration (mdi_phci_register()) with the
365  *		framework.
366  * Return Values:
367  *		MDI_SUCCESS
368  *		MDI_FAILURE
369  */
370 /*ARGSUSED*/
371 int
372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
373     int flags)
374 {
375 	mdi_vhci_t		*vh = NULL;
376 
377 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
378 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
379 
380 	i_mdi_init();
381 
382 	mutex_enter(&mdi_mutex);
383 	/*
384 	 * Scan for already registered vhci
385 	 */
386 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
387 		if (strcmp(vh->vh_class, class) == 0) {
388 			/*
389 			 * vHCI has already been created.  Check for valid
390 			 * vHCI ops registration.  We only support one vHCI
391 			 * module per class
392 			 */
393 			if (vh->vh_ops != NULL) {
394 				mutex_exit(&mdi_mutex);
395 				cmn_err(CE_NOTE, vhci_greeting, class);
396 				return (MDI_FAILURE);
397 			}
398 			break;
399 		}
400 	}
401 
402 	/*
403 	 * if not yet created, create the vHCI component
404 	 */
405 	if (vh == NULL) {
406 		struct client_hash	*hash = NULL;
407 		char			*load_balance;
408 
409 		/*
410 		 * Allocate and initialize the mdi extensions
411 		 */
412 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
413 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
414 		    KM_SLEEP);
415 		vh->vh_client_table = hash;
416 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
417 		(void) strcpy(vh->vh_class, class);
418 		vh->vh_lb = LOAD_BALANCE_RR;
419 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
420 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
421 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
422 				vh->vh_lb = LOAD_BALANCE_NONE;
423 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
424 				    == 0) {
425 				vh->vh_lb = LOAD_BALANCE_LBA;
426 			}
427 			ddi_prop_free(load_balance);
428 		}
429 
430 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
431 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
432 
433 		/*
434 		 * Store the vHCI ops vectors
435 		 */
436 		vh->vh_dip = vdip;
437 		vh->vh_ops = vops;
438 
439 		setup_vhci_cache(vh);
440 
441 		if (mdi_vhci_head == NULL) {
442 			mdi_vhci_head = vh;
443 		}
444 		if (mdi_vhci_tail) {
445 			mdi_vhci_tail->vh_next = vh;
446 		}
447 		mdi_vhci_tail = vh;
448 		mdi_vhci_count++;
449 	}
450 
451 	/*
452 	 * Claim the devfs node as a vhci component
453 	 */
454 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
455 
456 	/*
457 	 * Initialize our back reference from dev_info node
458 	 */
459 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
460 	mutex_exit(&mdi_mutex);
461 	return (MDI_SUCCESS);
462 }
463 
464 /*
465  * mdi_vhci_unregister():
466  *		Unregister a vHCI module from mpxio framework
467  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
468  * 		of a vhci to unregister it from the framework.
469  * Return Values:
470  *		MDI_SUCCESS
471  *		MDI_FAILURE
472  */
473 /*ARGSUSED*/
474 int
475 mdi_vhci_unregister(dev_info_t *vdip, int flags)
476 {
477 	mdi_vhci_t	*found, *vh, *prev = NULL;
478 
479 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
480 
481 	/*
482 	 * Check for invalid VHCI
483 	 */
484 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
485 		return (MDI_FAILURE);
486 
487 	/*
488 	 * Scan the list of registered vHCIs for a match
489 	 */
490 	mutex_enter(&mdi_mutex);
491 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
492 		if (found == vh)
493 			break;
494 		prev = found;
495 	}
496 
497 	if (found == NULL) {
498 		mutex_exit(&mdi_mutex);
499 		return (MDI_FAILURE);
500 	}
501 
502 	/*
503 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
504 	 * should have been unregistered, before a vHCI can be
505 	 * unregistered.
506 	 */
507 	MDI_VHCI_PHCI_LOCK(vh);
508 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
509 		MDI_VHCI_PHCI_UNLOCK(vh);
510 		mutex_exit(&mdi_mutex);
511 		return (MDI_FAILURE);
512 	}
513 	MDI_VHCI_PHCI_UNLOCK(vh);
514 
515 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
516 		mutex_exit(&mdi_mutex);
517 		return (MDI_FAILURE);
518 	}
519 
520 	/*
521 	 * Remove the vHCI from the global list
522 	 */
523 	if (vh == mdi_vhci_head) {
524 		mdi_vhci_head = vh->vh_next;
525 	} else {
526 		prev->vh_next = vh->vh_next;
527 	}
528 	if (vh == mdi_vhci_tail) {
529 		mdi_vhci_tail = prev;
530 	}
531 	mdi_vhci_count--;
532 	mutex_exit(&mdi_mutex);
533 
534 	vh->vh_ops = NULL;
535 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
536 	DEVI(vdip)->devi_mdi_xhci = NULL;
537 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
538 	kmem_free(vh->vh_client_table,
539 	    mdi_client_table_size * sizeof (struct client_hash));
540 	mutex_destroy(&vh->vh_phci_mutex);
541 	mutex_destroy(&vh->vh_client_mutex);
542 
543 	kmem_free(vh, sizeof (mdi_vhci_t));
544 	return (MDI_SUCCESS);
545 }
546 
547 /*
548  * i_mdi_vhci_class2vhci():
549  *		Look for a matching vHCI module given a vHCI class name
550  * Return Values:
551  *		Handle to a vHCI component
552  *		NULL
553  */
554 static mdi_vhci_t *
555 i_mdi_vhci_class2vhci(char *class)
556 {
557 	mdi_vhci_t	*vh = NULL;
558 
559 	ASSERT(!MUTEX_HELD(&mdi_mutex));
560 
561 	mutex_enter(&mdi_mutex);
562 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
563 		if (strcmp(vh->vh_class, class) == 0) {
564 			break;
565 		}
566 	}
567 	mutex_exit(&mdi_mutex);
568 	return (vh);
569 }
570 
571 /*
572  * i_devi_get_vhci():
573  *		Utility function to get the handle to a vHCI component
574  * Return Values:
575  *		Handle to a vHCI component
576  *		NULL
577  */
578 mdi_vhci_t *
579 i_devi_get_vhci(dev_info_t *vdip)
580 {
581 	mdi_vhci_t	*vh = NULL;
582 	if (MDI_VHCI(vdip)) {
583 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
584 	}
585 	return (vh);
586 }
587 
588 /*
589  * mdi_phci_register():
590  *		Register a pHCI module with mpxio framework
591  *		mdi_phci_register() is called by pHCI drivers to register with
592  *		the mpxio framework and a specific 'class_driver' vHCI.  The
593  *		pHCI driver must call this interface as part of its attach(9e)
594  *		handler.
595  * Return Values:
596  *		MDI_SUCCESS
597  *		MDI_FAILURE
598  */
599 /*ARGSUSED*/
600 int
601 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
602 {
603 	mdi_phci_t		*ph;
604 	mdi_vhci_t		*vh;
605 	char			*data;
606 	char			*pathname;
607 
608 	/*
609 	 * Some subsystems, like fcp, perform pHCI registration from a
610 	 * different thread than the one doing the pHCI attach(9E) - the
611 	 * driver attach code is waiting for this other thread to complete.
612 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
613 	 * (indicating that some thread has done an ndi_devi_enter of parent)
614 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
615 	 */
616 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
617 
618 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
619 	(void) ddi_pathname(pdip, pathname);
620 
621 	/*
622 	 * Check for mpxio-disable property. Enable mpxio if the property is
623 	 * missing or not set to "yes".
624 	 * If the property is set to "yes" then emit a brief message.
625 	 */
626 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
627 	    &data) == DDI_SUCCESS)) {
628 		if (strcmp(data, "yes") == 0) {
629 			MDI_DEBUG(1, (CE_CONT, pdip,
630 			    "?%s (%s%d) multipath capabilities "
631 			    "disabled via %s.conf.\n", pathname,
632 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
633 			    ddi_driver_name(pdip)));
634 			ddi_prop_free(data);
635 			kmem_free(pathname, MAXPATHLEN);
636 			return (MDI_FAILURE);
637 		}
638 		ddi_prop_free(data);
639 	}
640 
641 	kmem_free(pathname, MAXPATHLEN);
642 
643 	/*
644 	 * Search for a matching vHCI
645 	 */
646 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
647 	if (vh == NULL) {
648 		return (MDI_FAILURE);
649 	}
650 
651 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
652 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
653 	ph->ph_dip = pdip;
654 	ph->ph_vhci = vh;
655 	ph->ph_next = NULL;
656 	ph->ph_unstable = 0;
657 	ph->ph_vprivate = 0;
658 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
659 
660 	MDI_PHCI_LOCK(ph);
661 	MDI_PHCI_SET_POWER_UP(ph);
662 	MDI_PHCI_UNLOCK(ph);
663 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
664 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
665 
666 	vhcache_phci_add(vh->vh_config, ph);
667 
668 	MDI_VHCI_PHCI_LOCK(vh);
669 	if (vh->vh_phci_head == NULL) {
670 		vh->vh_phci_head = ph;
671 	}
672 	if (vh->vh_phci_tail) {
673 		vh->vh_phci_tail->ph_next = ph;
674 	}
675 	vh->vh_phci_tail = ph;
676 	vh->vh_phci_count++;
677 	MDI_VHCI_PHCI_UNLOCK(vh);
678 
679 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
680 	return (MDI_SUCCESS);
681 }
682 
683 /*
684  * mdi_phci_unregister():
685  *		Unregister a pHCI module from mpxio framework
686  *		mdi_phci_unregister() is called by the pHCI drivers from their
687  *		detach(9E) handler to unregister their instances from the
688  *		framework.
689  * Return Values:
690  *		MDI_SUCCESS
691  *		MDI_FAILURE
692  */
693 /*ARGSUSED*/
694 int
695 mdi_phci_unregister(dev_info_t *pdip, int flags)
696 {
697 	mdi_vhci_t		*vh;
698 	mdi_phci_t		*ph;
699 	mdi_phci_t		*tmp;
700 	mdi_phci_t		*prev = NULL;
701 
702 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
703 
704 	ph = i_devi_get_phci(pdip);
705 	if (ph == NULL) {
706 		MDI_DEBUG(1, (CE_WARN, pdip,
707 		    "!pHCI unregister: Not a valid pHCI"));
708 		return (MDI_FAILURE);
709 	}
710 
711 	vh = ph->ph_vhci;
712 	ASSERT(vh != NULL);
713 	if (vh == NULL) {
714 		MDI_DEBUG(1, (CE_WARN, pdip,
715 		    "!pHCI unregister: Not a valid vHCI"));
716 		return (MDI_FAILURE);
717 	}
718 
719 	MDI_VHCI_PHCI_LOCK(vh);
720 	tmp = vh->vh_phci_head;
721 	while (tmp) {
722 		if (tmp == ph) {
723 			break;
724 		}
725 		prev = tmp;
726 		tmp = tmp->ph_next;
727 	}
728 
729 	if (ph == vh->vh_phci_head) {
730 		vh->vh_phci_head = ph->ph_next;
731 	} else {
732 		prev->ph_next = ph->ph_next;
733 	}
734 
735 	if (ph == vh->vh_phci_tail) {
736 		vh->vh_phci_tail = prev;
737 	}
738 
739 	vh->vh_phci_count--;
740 	MDI_VHCI_PHCI_UNLOCK(vh);
741 
742 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
743 	    ESC_DDI_INITIATOR_UNREGISTER);
744 	vhcache_phci_remove(vh->vh_config, ph);
745 	cv_destroy(&ph->ph_unstable_cv);
746 	mutex_destroy(&ph->ph_mutex);
747 	kmem_free(ph, sizeof (mdi_phci_t));
748 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
749 	DEVI(pdip)->devi_mdi_xhci = NULL;
750 	return (MDI_SUCCESS);
751 }
752 
753 /*
754  * i_devi_get_phci():
755  * 		Utility function to return the phci extensions.
756  */
757 static mdi_phci_t *
758 i_devi_get_phci(dev_info_t *pdip)
759 {
760 	mdi_phci_t	*ph = NULL;
761 	if (MDI_PHCI(pdip)) {
762 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
763 	}
764 	return (ph);
765 }
766 
767 /*
768  * Single thread mdi entry into devinfo node for modifying its children.
769  * If necessary we perform an ndi_devi_enter of the vHCI before doing
770  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
771  * for the vHCI and one for the pHCI.
772  */
773 void
774 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
775 {
776 	dev_info_t	*vdip;
777 	int		vcircular, pcircular;
778 
779 	/* Verify calling context */
780 	ASSERT(MDI_PHCI(phci_dip));
781 	vdip = mdi_devi_get_vdip(phci_dip);
782 	ASSERT(vdip);			/* A pHCI always has a vHCI */
783 
784 	/*
785 	 * If pHCI is detaching then the framework has already entered the
786 	 * vHCI on a threads that went down the code path leading to
787 	 * detach_node().  This framework enter of the vHCI during pHCI
788 	 * detach is done to avoid deadlock with vHCI power management
789 	 * operations which enter the vHCI and the enter down the path
790 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
791 	 * enter of the vHCI on frameworks vHCI enter that has already
792 	 * occurred - this is OK because we know that the framework thread
793 	 * doing detach is waiting for our completion.
794 	 *
795 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
796 	 * race with detach - but we can't do that because the framework has
797 	 * already entered the parent, so we have some complexity instead.
798 	 */
799 	for (;;) {
800 		if (ndi_devi_tryenter(vdip, &vcircular)) {
801 			ASSERT(vcircular != -1);
802 			if (DEVI_IS_DETACHING(phci_dip)) {
803 				ndi_devi_exit(vdip, vcircular);
804 				vcircular = -1;
805 			}
806 			break;
807 		} else if (DEVI_IS_DETACHING(phci_dip)) {
808 			vcircular = -1;
809 			break;
810 		} else {
811 			delay(1);
812 		}
813 	}
814 
815 	ndi_devi_enter(phci_dip, &pcircular);
816 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
817 }
818 
819 /*
820  * Release mdi_devi_enter or successful mdi_devi_tryenter.
821  */
822 void
823 mdi_devi_exit(dev_info_t *phci_dip, int circular)
824 {
825 	dev_info_t	*vdip;
826 	int		vcircular, pcircular;
827 
828 	/* Verify calling context */
829 	ASSERT(MDI_PHCI(phci_dip));
830 	vdip = mdi_devi_get_vdip(phci_dip);
831 	ASSERT(vdip);			/* A pHCI always has a vHCI */
832 
833 	/* extract two circular recursion values from single int */
834 	pcircular = (short)(circular & 0xFFFF);
835 	vcircular = (short)((circular >> 16) & 0xFFFF);
836 
837 	ndi_devi_exit(phci_dip, pcircular);
838 	if (vcircular != -1)
839 		ndi_devi_exit(vdip, vcircular);
840 }
841 
842 /*
843  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
844  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
845  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
846  * with vHCI power management code during path online/offline.  Each
847  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
848  * occur within the scope of an active mdi_devi_enter that establishes the
849  * circular value.
850  */
851 void
852 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
853 {
854 	int		pcircular;
855 
856 	/* Verify calling context */
857 	ASSERT(MDI_PHCI(phci_dip));
858 
859 	pcircular = (short)(circular & 0xFFFF);
860 	ndi_devi_exit(phci_dip, pcircular);
861 }
862 
863 void
864 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
865 {
866 	int		pcircular;
867 
868 	/* Verify calling context */
869 	ASSERT(MDI_PHCI(phci_dip));
870 
871 	ndi_devi_enter(phci_dip, &pcircular);
872 
873 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
874 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
875 }
876 
877 /*
878  * mdi_devi_get_vdip():
879  *		given a pHCI dip return vHCI dip
880  */
881 dev_info_t *
882 mdi_devi_get_vdip(dev_info_t *pdip)
883 {
884 	mdi_phci_t	*ph;
885 
886 	ph = i_devi_get_phci(pdip);
887 	if (ph && ph->ph_vhci)
888 		return (ph->ph_vhci->vh_dip);
889 	return (NULL);
890 }
891 
892 /*
893  * mdi_devi_pdip_entered():
894  *		Return 1 if we are vHCI and have done an ndi_devi_enter
895  *		of a pHCI
896  */
897 int
898 mdi_devi_pdip_entered(dev_info_t *vdip)
899 {
900 	mdi_vhci_t	*vh;
901 	mdi_phci_t	*ph;
902 
903 	vh = i_devi_get_vhci(vdip);
904 	if (vh == NULL)
905 		return (0);
906 
907 	MDI_VHCI_PHCI_LOCK(vh);
908 	ph = vh->vh_phci_head;
909 	while (ph) {
910 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
911 			MDI_VHCI_PHCI_UNLOCK(vh);
912 			return (1);
913 		}
914 		ph = ph->ph_next;
915 	}
916 	MDI_VHCI_PHCI_UNLOCK(vh);
917 	return (0);
918 }
919 
920 /*
921  * mdi_phci_path2devinfo():
922  * 		Utility function to search for a valid phci device given
923  *		the devfs pathname.
924  */
925 dev_info_t *
926 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
927 {
928 	char		*temp_pathname;
929 	mdi_vhci_t	*vh;
930 	mdi_phci_t	*ph;
931 	dev_info_t 	*pdip = NULL;
932 
933 	vh = i_devi_get_vhci(vdip);
934 	ASSERT(vh != NULL);
935 
936 	if (vh == NULL) {
937 		/*
938 		 * Invalid vHCI component, return failure
939 		 */
940 		return (NULL);
941 	}
942 
943 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
944 	MDI_VHCI_PHCI_LOCK(vh);
945 	ph = vh->vh_phci_head;
946 	while (ph != NULL) {
947 		pdip = ph->ph_dip;
948 		ASSERT(pdip != NULL);
949 		*temp_pathname = '\0';
950 		(void) ddi_pathname(pdip, temp_pathname);
951 		if (strcmp(temp_pathname, pathname) == 0) {
952 			break;
953 		}
954 		ph = ph->ph_next;
955 	}
956 	if (ph == NULL) {
957 		pdip = NULL;
958 	}
959 	MDI_VHCI_PHCI_UNLOCK(vh);
960 	kmem_free(temp_pathname, MAXPATHLEN);
961 	return (pdip);
962 }
963 
964 /*
965  * mdi_phci_get_path_count():
966  * 		get number of path information nodes associated with a given
967  *		pHCI device.
968  */
969 int
970 mdi_phci_get_path_count(dev_info_t *pdip)
971 {
972 	mdi_phci_t	*ph;
973 	int		count = 0;
974 
975 	ph = i_devi_get_phci(pdip);
976 	if (ph != NULL) {
977 		count = ph->ph_path_count;
978 	}
979 	return (count);
980 }
981 
982 /*
983  * i_mdi_phci_lock():
984  *		Lock a pHCI device
985  * Return Values:
986  *		None
987  * Note:
988  *		The default locking order is:
989  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
990  *		But there are number of situations where locks need to be
991  *		grabbed in reverse order.  This routine implements try and lock
992  *		mechanism depending on the requested parameter option.
993  */
994 static void
995 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
996 {
997 	if (pip) {
998 		/* Reverse locking is requested. */
999 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1000 			/*
1001 			 * tryenter failed. Try to grab again
1002 			 * after a small delay
1003 			 */
1004 			MDI_PI_HOLD(pip);
1005 			MDI_PI_UNLOCK(pip);
1006 			delay(1);
1007 			MDI_PI_LOCK(pip);
1008 			MDI_PI_RELE(pip);
1009 		}
1010 	} else {
1011 		MDI_PHCI_LOCK(ph);
1012 	}
1013 }
1014 
1015 /*
1016  * i_mdi_phci_unlock():
1017  *		Unlock the pHCI component
1018  */
1019 static void
1020 i_mdi_phci_unlock(mdi_phci_t *ph)
1021 {
1022 	MDI_PHCI_UNLOCK(ph);
1023 }
1024 
1025 /*
1026  * i_mdi_devinfo_create():
1027  *		create client device's devinfo node
1028  * Return Values:
1029  *		dev_info
1030  *		NULL
1031  * Notes:
1032  */
1033 static dev_info_t *
1034 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1035 	char **compatible, int ncompatible)
1036 {
1037 	dev_info_t *cdip = NULL;
1038 
1039 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1040 
1041 	/* Verify for duplicate entry */
1042 	cdip = i_mdi_devinfo_find(vh, name, guid);
1043 	ASSERT(cdip == NULL);
1044 	if (cdip) {
1045 		cmn_err(CE_WARN,
1046 		    "i_mdi_devinfo_create: client dip %p already exists",
1047 			(void *)cdip);
1048 	}
1049 
1050 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1051 	if (cdip == NULL)
1052 		goto fail;
1053 
1054 	/*
1055 	 * Create component type and Global unique identifier
1056 	 * properties
1057 	 */
1058 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1059 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1060 		goto fail;
1061 	}
1062 
1063 	/* Decorate the node with compatible property */
1064 	if (compatible &&
1065 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1066 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1067 		goto fail;
1068 	}
1069 
1070 	return (cdip);
1071 
1072 fail:
1073 	if (cdip) {
1074 		(void) ndi_prop_remove_all(cdip);
1075 		(void) ndi_devi_free(cdip);
1076 	}
1077 	return (NULL);
1078 }
1079 
1080 /*
1081  * i_mdi_devinfo_find():
1082  *		Find a matching devinfo node for given client node name
1083  *		and its guid.
1084  * Return Values:
1085  *		Handle to a dev_info node or NULL
1086  */
1087 static dev_info_t *
1088 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1089 {
1090 	char			*data;
1091 	dev_info_t 		*cdip = NULL;
1092 	dev_info_t 		*ndip = NULL;
1093 	int			circular;
1094 
1095 	ndi_devi_enter(vh->vh_dip, &circular);
1096 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1097 	while ((cdip = ndip) != NULL) {
1098 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1099 
1100 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1101 			continue;
1102 		}
1103 
1104 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1105 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1106 		    &data) != DDI_PROP_SUCCESS) {
1107 			continue;
1108 		}
1109 
1110 		if (strcmp(data, guid) != 0) {
1111 			ddi_prop_free(data);
1112 			continue;
1113 		}
1114 		ddi_prop_free(data);
1115 		break;
1116 	}
1117 	ndi_devi_exit(vh->vh_dip, circular);
1118 	return (cdip);
1119 }
1120 
1121 /*
1122  * i_mdi_devinfo_remove():
1123  *		Remove a client device node
1124  */
1125 static int
1126 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1127 {
1128 	int	rv = MDI_SUCCESS;
1129 
1130 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1131 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1132 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
1133 		if (rv != NDI_SUCCESS) {
1134 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
1135 			    " failed. cdip = %p\n", (void *)cdip));
1136 		}
1137 		/*
1138 		 * Convert to MDI error code
1139 		 */
1140 		switch (rv) {
1141 		case NDI_SUCCESS:
1142 			rv = MDI_SUCCESS;
1143 			break;
1144 		case NDI_BUSY:
1145 			rv = MDI_BUSY;
1146 			break;
1147 		default:
1148 			rv = MDI_FAILURE;
1149 			break;
1150 		}
1151 	}
1152 	return (rv);
1153 }
1154 
1155 /*
1156  * i_devi_get_client()
1157  *		Utility function to get mpxio component extensions
1158  */
1159 static mdi_client_t *
1160 i_devi_get_client(dev_info_t *cdip)
1161 {
1162 	mdi_client_t	*ct = NULL;
1163 
1164 	if (MDI_CLIENT(cdip)) {
1165 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1166 	}
1167 	return (ct);
1168 }
1169 
1170 /*
1171  * i_mdi_is_child_present():
1172  *		Search for the presence of client device dev_info node
1173  */
1174 static int
1175 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1176 {
1177 	int		rv = MDI_FAILURE;
1178 	struct dev_info	*dip;
1179 	int		circular;
1180 
1181 	ndi_devi_enter(vdip, &circular);
1182 	dip = DEVI(vdip)->devi_child;
1183 	while (dip) {
1184 		if (dip == DEVI(cdip)) {
1185 			rv = MDI_SUCCESS;
1186 			break;
1187 		}
1188 		dip = dip->devi_sibling;
1189 	}
1190 	ndi_devi_exit(vdip, circular);
1191 	return (rv);
1192 }
1193 
1194 
1195 /*
1196  * i_mdi_client_lock():
1197  *		Grab client component lock
1198  * Return Values:
1199  *		None
1200  * Note:
1201  *		The default locking order is:
1202  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1203  *		But there are number of situations where locks need to be
1204  *		grabbed in reverse order.  This routine implements try and lock
1205  *		mechanism depending on the requested parameter option.
1206  */
1207 static void
1208 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1209 {
1210 	if (pip) {
1211 		/*
1212 		 * Reverse locking is requested.
1213 		 */
1214 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1215 			/*
1216 			 * tryenter failed. Try to grab again
1217 			 * after a small delay
1218 			 */
1219 			MDI_PI_HOLD(pip);
1220 			MDI_PI_UNLOCK(pip);
1221 			delay(1);
1222 			MDI_PI_LOCK(pip);
1223 			MDI_PI_RELE(pip);
1224 		}
1225 	} else {
1226 		MDI_CLIENT_LOCK(ct);
1227 	}
1228 }
1229 
1230 /*
1231  * i_mdi_client_unlock():
1232  *		Unlock a client component
1233  */
1234 static void
1235 i_mdi_client_unlock(mdi_client_t *ct)
1236 {
1237 	MDI_CLIENT_UNLOCK(ct);
1238 }
1239 
1240 /*
1241  * i_mdi_client_alloc():
1242  * 		Allocate and initialize a client structure.  Caller should
1243  *		hold the vhci client lock.
1244  * Return Values:
1245  *		Handle to a client component
1246  */
1247 /*ARGSUSED*/
1248 static mdi_client_t *
1249 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1250 {
1251 	mdi_client_t	*ct;
1252 
1253 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1254 
1255 	/*
1256 	 * Allocate and initialize a component structure.
1257 	 */
1258 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1259 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1260 	ct->ct_hnext = NULL;
1261 	ct->ct_hprev = NULL;
1262 	ct->ct_dip = NULL;
1263 	ct->ct_vhci = vh;
1264 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1265 	(void) strcpy(ct->ct_drvname, name);
1266 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1267 	(void) strcpy(ct->ct_guid, lguid);
1268 	ct->ct_cprivate = NULL;
1269 	ct->ct_vprivate = NULL;
1270 	ct->ct_flags = 0;
1271 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1272 	MDI_CLIENT_LOCK(ct);
1273 	MDI_CLIENT_SET_OFFLINE(ct);
1274 	MDI_CLIENT_SET_DETACH(ct);
1275 	MDI_CLIENT_SET_POWER_UP(ct);
1276 	MDI_CLIENT_UNLOCK(ct);
1277 	ct->ct_failover_flags = 0;
1278 	ct->ct_failover_status = 0;
1279 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1280 	ct->ct_unstable = 0;
1281 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1282 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1283 	ct->ct_lb = vh->vh_lb;
1284 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1285 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1286 	ct->ct_path_count = 0;
1287 	ct->ct_path_head = NULL;
1288 	ct->ct_path_tail = NULL;
1289 	ct->ct_path_last = NULL;
1290 
1291 	/*
1292 	 * Add this client component to our client hash queue
1293 	 */
1294 	i_mdi_client_enlist_table(vh, ct);
1295 	return (ct);
1296 }
1297 
1298 /*
1299  * i_mdi_client_enlist_table():
1300  *		Attach the client device to the client hash table. Caller
1301  *		should hold the vhci client lock.
1302  */
1303 static void
1304 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1305 {
1306 	int 			index;
1307 	struct client_hash	*head;
1308 
1309 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1310 
1311 	index = i_mdi_get_hash_key(ct->ct_guid);
1312 	head = &vh->vh_client_table[index];
1313 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1314 	head->ct_hash_head = ct;
1315 	head->ct_hash_count++;
1316 	vh->vh_client_count++;
1317 }
1318 
1319 /*
1320  * i_mdi_client_delist_table():
1321  *		Attach the client device to the client hash table.
1322  *		Caller should hold the vhci client lock.
1323  */
1324 static void
1325 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1326 {
1327 	int			index;
1328 	char			*guid;
1329 	struct client_hash 	*head;
1330 	mdi_client_t		*next;
1331 	mdi_client_t		*last;
1332 
1333 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1334 
1335 	guid = ct->ct_guid;
1336 	index = i_mdi_get_hash_key(guid);
1337 	head = &vh->vh_client_table[index];
1338 
1339 	last = NULL;
1340 	next = (mdi_client_t *)head->ct_hash_head;
1341 	while (next != NULL) {
1342 		if (next == ct) {
1343 			break;
1344 		}
1345 		last = next;
1346 		next = next->ct_hnext;
1347 	}
1348 
1349 	if (next) {
1350 		head->ct_hash_count--;
1351 		if (last == NULL) {
1352 			head->ct_hash_head = ct->ct_hnext;
1353 		} else {
1354 			last->ct_hnext = ct->ct_hnext;
1355 		}
1356 		ct->ct_hnext = NULL;
1357 		vh->vh_client_count--;
1358 	}
1359 }
1360 
1361 
1362 /*
1363  * i_mdi_client_free():
1364  *		Free a client component
1365  */
1366 static int
1367 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1368 {
1369 	int		rv = MDI_SUCCESS;
1370 	int		flags = ct->ct_flags;
1371 	dev_info_t	*cdip;
1372 	dev_info_t	*vdip;
1373 
1374 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1375 
1376 	vdip = vh->vh_dip;
1377 	cdip = ct->ct_dip;
1378 
1379 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1380 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1381 	DEVI(cdip)->devi_mdi_client = NULL;
1382 
1383 	/*
1384 	 * Clear out back ref. to dev_info_t node
1385 	 */
1386 	ct->ct_dip = NULL;
1387 
1388 	/*
1389 	 * Remove this client from our hash queue
1390 	 */
1391 	i_mdi_client_delist_table(vh, ct);
1392 
1393 	/*
1394 	 * Uninitialize and free the component
1395 	 */
1396 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1397 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1398 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1399 	cv_destroy(&ct->ct_failover_cv);
1400 	cv_destroy(&ct->ct_unstable_cv);
1401 	cv_destroy(&ct->ct_powerchange_cv);
1402 	mutex_destroy(&ct->ct_mutex);
1403 	kmem_free(ct, sizeof (*ct));
1404 
1405 	if (cdip != NULL) {
1406 		MDI_VHCI_CLIENT_UNLOCK(vh);
1407 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1408 		MDI_VHCI_CLIENT_LOCK(vh);
1409 	}
1410 	return (rv);
1411 }
1412 
1413 /*
1414  * i_mdi_client_find():
1415  * 		Find the client structure corresponding to a given guid
1416  *		Caller should hold the vhci client lock.
1417  */
1418 static mdi_client_t *
1419 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1420 {
1421 	int			index;
1422 	struct client_hash	*head;
1423 	mdi_client_t		*ct;
1424 
1425 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1426 
1427 	index = i_mdi_get_hash_key(guid);
1428 	head = &vh->vh_client_table[index];
1429 
1430 	ct = head->ct_hash_head;
1431 	while (ct != NULL) {
1432 		if (strcmp(ct->ct_guid, guid) == 0 &&
1433 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1434 			break;
1435 		}
1436 		ct = ct->ct_hnext;
1437 	}
1438 	return (ct);
1439 }
1440 
1441 /*
1442  * i_mdi_client_update_state():
1443  *		Compute and update client device state
1444  * Notes:
1445  *		A client device can be in any of three possible states:
1446  *
1447  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1448  *		one online/standby paths. Can tolerate failures.
1449  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1450  *		no alternate paths available as standby. A failure on the online
1451  *		would result in loss of access to device data.
1452  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1453  *		no paths available to access the device.
1454  */
1455 static void
1456 i_mdi_client_update_state(mdi_client_t *ct)
1457 {
1458 	int state;
1459 
1460 	ASSERT(MDI_CLIENT_LOCKED(ct));
1461 	state = i_mdi_client_compute_state(ct, NULL);
1462 	MDI_CLIENT_SET_STATE(ct, state);
1463 }
1464 
1465 /*
1466  * i_mdi_client_compute_state():
1467  *		Compute client device state
1468  *
1469  *		mdi_phci_t *	Pointer to pHCI structure which should
1470  *				while computing the new value.  Used by
1471  *				i_mdi_phci_offline() to find the new
1472  *				client state after DR of a pHCI.
1473  */
1474 static int
1475 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1476 {
1477 	int		state;
1478 	int		online_count = 0;
1479 	int		standby_count = 0;
1480 	mdi_pathinfo_t	*pip, *next;
1481 
1482 	ASSERT(MDI_CLIENT_LOCKED(ct));
1483 	pip = ct->ct_path_head;
1484 	while (pip != NULL) {
1485 		MDI_PI_LOCK(pip);
1486 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1487 		if (MDI_PI(pip)->pi_phci == ph) {
1488 			MDI_PI_UNLOCK(pip);
1489 			pip = next;
1490 			continue;
1491 		}
1492 
1493 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1494 				== MDI_PATHINFO_STATE_ONLINE)
1495 			online_count++;
1496 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1497 				== MDI_PATHINFO_STATE_STANDBY)
1498 			standby_count++;
1499 		MDI_PI_UNLOCK(pip);
1500 		pip = next;
1501 	}
1502 
1503 	if (online_count == 0) {
1504 		if (standby_count == 0) {
1505 			state = MDI_CLIENT_STATE_FAILED;
1506 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1507 			    " ct = %p\n", (void *)ct));
1508 		} else if (standby_count == 1) {
1509 			state = MDI_CLIENT_STATE_DEGRADED;
1510 		} else {
1511 			state = MDI_CLIENT_STATE_OPTIMAL;
1512 		}
1513 	} else if (online_count == 1) {
1514 		if (standby_count == 0) {
1515 			state = MDI_CLIENT_STATE_DEGRADED;
1516 		} else {
1517 			state = MDI_CLIENT_STATE_OPTIMAL;
1518 		}
1519 	} else {
1520 		state = MDI_CLIENT_STATE_OPTIMAL;
1521 	}
1522 	return (state);
1523 }
1524 
1525 /*
1526  * i_mdi_client2devinfo():
1527  *		Utility function
1528  */
1529 dev_info_t *
1530 i_mdi_client2devinfo(mdi_client_t *ct)
1531 {
1532 	return (ct->ct_dip);
1533 }
1534 
1535 /*
1536  * mdi_client_path2_devinfo():
1537  * 		Given the parent devinfo and child devfs pathname, search for
1538  *		a valid devfs node handle.
1539  */
1540 dev_info_t *
1541 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1542 {
1543 	dev_info_t 	*cdip = NULL;
1544 	dev_info_t 	*ndip = NULL;
1545 	char		*temp_pathname;
1546 	int		circular;
1547 
1548 	/*
1549 	 * Allocate temp buffer
1550 	 */
1551 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1552 
1553 	/*
1554 	 * Lock parent against changes
1555 	 */
1556 	ndi_devi_enter(vdip, &circular);
1557 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1558 	while ((cdip = ndip) != NULL) {
1559 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1560 
1561 		*temp_pathname = '\0';
1562 		(void) ddi_pathname(cdip, temp_pathname);
1563 		if (strcmp(temp_pathname, pathname) == 0) {
1564 			break;
1565 		}
1566 	}
1567 	/*
1568 	 * Release devinfo lock
1569 	 */
1570 	ndi_devi_exit(vdip, circular);
1571 
1572 	/*
1573 	 * Free the temp buffer
1574 	 */
1575 	kmem_free(temp_pathname, MAXPATHLEN);
1576 	return (cdip);
1577 }
1578 
1579 /*
1580  * mdi_client_get_path_count():
1581  * 		Utility function to get number of path information nodes
1582  *		associated with a given client device.
1583  */
1584 int
1585 mdi_client_get_path_count(dev_info_t *cdip)
1586 {
1587 	mdi_client_t	*ct;
1588 	int		count = 0;
1589 
1590 	ct = i_devi_get_client(cdip);
1591 	if (ct != NULL) {
1592 		count = ct->ct_path_count;
1593 	}
1594 	return (count);
1595 }
1596 
1597 
1598 /*
1599  * i_mdi_get_hash_key():
1600  * 		Create a hash using strings as keys
1601  *
1602  */
1603 static int
1604 i_mdi_get_hash_key(char *str)
1605 {
1606 	uint32_t	g, hash = 0;
1607 	char		*p;
1608 
1609 	for (p = str; *p != '\0'; p++) {
1610 		g = *p;
1611 		hash += g;
1612 	}
1613 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1614 }
1615 
1616 /*
1617  * mdi_get_lb_policy():
1618  * 		Get current load balancing policy for a given client device
1619  */
1620 client_lb_t
1621 mdi_get_lb_policy(dev_info_t *cdip)
1622 {
1623 	client_lb_t	lb = LOAD_BALANCE_NONE;
1624 	mdi_client_t	*ct;
1625 
1626 	ct = i_devi_get_client(cdip);
1627 	if (ct != NULL) {
1628 		lb = ct->ct_lb;
1629 	}
1630 	return (lb);
1631 }
1632 
1633 /*
1634  * mdi_set_lb_region_size():
1635  * 		Set current region size for the load-balance
1636  */
1637 int
1638 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1639 {
1640 	mdi_client_t	*ct;
1641 	int		rv = MDI_FAILURE;
1642 
1643 	ct = i_devi_get_client(cdip);
1644 	if (ct != NULL && ct->ct_lb_args != NULL) {
1645 		ct->ct_lb_args->region_size = region_size;
1646 		rv = MDI_SUCCESS;
1647 	}
1648 	return (rv);
1649 }
1650 
1651 /*
1652  * mdi_Set_lb_policy():
1653  * 		Set current load balancing policy for a given client device
1654  */
1655 int
1656 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1657 {
1658 	mdi_client_t	*ct;
1659 	int		rv = MDI_FAILURE;
1660 
1661 	ct = i_devi_get_client(cdip);
1662 	if (ct != NULL) {
1663 		ct->ct_lb = lb;
1664 		rv = MDI_SUCCESS;
1665 	}
1666 	return (rv);
1667 }
1668 
1669 /*
1670  * mdi_failover():
1671  *		failover function called by the vHCI drivers to initiate
1672  *		a failover operation.  This is typically due to non-availability
1673  *		of online paths to route I/O requests.  Failover can be
1674  *		triggered through user application also.
1675  *
1676  *		The vHCI driver calls mdi_failover() to initiate a failover
1677  *		operation. mdi_failover() calls back into the vHCI driver's
1678  *		vo_failover() entry point to perform the actual failover
1679  *		operation.  The reason for requiring the vHCI driver to
1680  *		initiate failover by calling mdi_failover(), instead of directly
1681  *		executing vo_failover() itself, is to ensure that the mdi
1682  *		framework can keep track of the client state properly.
1683  *		Additionally, mdi_failover() provides as a convenience the
1684  *		option of performing the failover operation synchronously or
1685  *		asynchronously
1686  *
1687  *		Upon successful completion of the failover operation, the
1688  *		paths that were previously ONLINE will be in the STANDBY state,
1689  *		and the newly activated paths will be in the ONLINE state.
1690  *
1691  *		The flags modifier determines whether the activation is done
1692  *		synchronously: MDI_FAILOVER_SYNC
1693  * Return Values:
1694  *		MDI_SUCCESS
1695  *		MDI_FAILURE
1696  *		MDI_BUSY
1697  */
1698 /*ARGSUSED*/
1699 int
1700 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1701 {
1702 	int			rv;
1703 	mdi_client_t		*ct;
1704 
1705 	ct = i_devi_get_client(cdip);
1706 	ASSERT(ct != NULL);
1707 	if (ct == NULL) {
1708 		/* cdip is not a valid client device. Nothing more to do. */
1709 		return (MDI_FAILURE);
1710 	}
1711 
1712 	MDI_CLIENT_LOCK(ct);
1713 
1714 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1715 		/* A path to the client is being freed */
1716 		MDI_CLIENT_UNLOCK(ct);
1717 		return (MDI_BUSY);
1718 	}
1719 
1720 
1721 	if (MDI_CLIENT_IS_FAILED(ct)) {
1722 		/*
1723 		 * Client is in failed state. Nothing more to do.
1724 		 */
1725 		MDI_CLIENT_UNLOCK(ct);
1726 		return (MDI_FAILURE);
1727 	}
1728 
1729 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1730 		/*
1731 		 * Failover is already in progress; return BUSY
1732 		 */
1733 		MDI_CLIENT_UNLOCK(ct);
1734 		return (MDI_BUSY);
1735 	}
1736 	/*
1737 	 * Make sure that mdi_pathinfo node state changes are processed.
1738 	 * We do not allow failovers to progress while client path state
1739 	 * changes are in progress
1740 	 */
1741 	if (ct->ct_unstable) {
1742 		if (flags == MDI_FAILOVER_ASYNC) {
1743 			MDI_CLIENT_UNLOCK(ct);
1744 			return (MDI_BUSY);
1745 		} else {
1746 			while (ct->ct_unstable)
1747 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1748 		}
1749 	}
1750 
1751 	/*
1752 	 * Client device is in stable state. Before proceeding, perform sanity
1753 	 * checks again.
1754 	 */
1755 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1756 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1757 		/*
1758 		 * Client is in failed state. Nothing more to do.
1759 		 */
1760 		MDI_CLIENT_UNLOCK(ct);
1761 		return (MDI_FAILURE);
1762 	}
1763 
1764 	/*
1765 	 * Set the client state as failover in progress.
1766 	 */
1767 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1768 	ct->ct_failover_flags = flags;
1769 	MDI_CLIENT_UNLOCK(ct);
1770 
1771 	if (flags == MDI_FAILOVER_ASYNC) {
1772 		/*
1773 		 * Submit the initiate failover request via CPR safe
1774 		 * taskq threads.
1775 		 */
1776 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1777 		    ct, KM_SLEEP);
1778 		return (MDI_ACCEPT);
1779 	} else {
1780 		/*
1781 		 * Synchronous failover mode.  Typically invoked from the user
1782 		 * land.
1783 		 */
1784 		rv = i_mdi_failover(ct);
1785 	}
1786 	return (rv);
1787 }
1788 
1789 /*
1790  * i_mdi_failover():
1791  *		internal failover function. Invokes vHCI drivers failover
1792  *		callback function and process the failover status
1793  * Return Values:
1794  *		None
1795  *
1796  * Note: A client device in failover state can not be detached or freed.
1797  */
1798 static int
1799 i_mdi_failover(void *arg)
1800 {
1801 	int		rv = MDI_SUCCESS;
1802 	mdi_client_t	*ct = (mdi_client_t *)arg;
1803 	mdi_vhci_t	*vh = ct->ct_vhci;
1804 
1805 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1806 
1807 	if (vh->vh_ops->vo_failover != NULL) {
1808 		/*
1809 		 * Call vHCI drivers callback routine
1810 		 */
1811 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1812 		    ct->ct_failover_flags);
1813 	}
1814 
1815 	MDI_CLIENT_LOCK(ct);
1816 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1817 
1818 	/*
1819 	 * Save the failover return status
1820 	 */
1821 	ct->ct_failover_status = rv;
1822 
1823 	/*
1824 	 * As a result of failover, client status would have been changed.
1825 	 * Update the client state and wake up anyone waiting on this client
1826 	 * device.
1827 	 */
1828 	i_mdi_client_update_state(ct);
1829 
1830 	cv_broadcast(&ct->ct_failover_cv);
1831 	MDI_CLIENT_UNLOCK(ct);
1832 	return (rv);
1833 }
1834 
1835 /*
1836  * Load balancing is logical block.
1837  * IOs within the range described by region_size
1838  * would go on the same path. This would improve the
1839  * performance by cache-hit on some of the RAID devices.
1840  * Search only for online paths(At some point we
1841  * may want to balance across target ports).
1842  * If no paths are found then default to round-robin.
1843  */
1844 static int
1845 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1846 {
1847 	int		path_index = -1;
1848 	int		online_path_count = 0;
1849 	int		online_nonpref_path_count = 0;
1850 	int 		region_size = ct->ct_lb_args->region_size;
1851 	mdi_pathinfo_t	*pip;
1852 	mdi_pathinfo_t	*next;
1853 	int		preferred, path_cnt;
1854 
1855 	pip = ct->ct_path_head;
1856 	while (pip) {
1857 		MDI_PI_LOCK(pip);
1858 		if (MDI_PI(pip)->pi_state ==
1859 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1860 			online_path_count++;
1861 		} else if (MDI_PI(pip)->pi_state ==
1862 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1863 			online_nonpref_path_count++;
1864 		}
1865 		next = (mdi_pathinfo_t *)
1866 		    MDI_PI(pip)->pi_client_link;
1867 		MDI_PI_UNLOCK(pip);
1868 		pip = next;
1869 	}
1870 	/* if found any online/preferred then use this type */
1871 	if (online_path_count > 0) {
1872 		path_cnt = online_path_count;
1873 		preferred = 1;
1874 	} else if (online_nonpref_path_count > 0) {
1875 		path_cnt = online_nonpref_path_count;
1876 		preferred = 0;
1877 	} else {
1878 		path_cnt = 0;
1879 	}
1880 	if (path_cnt) {
1881 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1882 		pip = ct->ct_path_head;
1883 		while (pip && path_index != -1) {
1884 			MDI_PI_LOCK(pip);
1885 			if (path_index == 0 &&
1886 			    (MDI_PI(pip)->pi_state ==
1887 			    MDI_PATHINFO_STATE_ONLINE) &&
1888 				MDI_PI(pip)->pi_preferred == preferred) {
1889 				MDI_PI_HOLD(pip);
1890 				MDI_PI_UNLOCK(pip);
1891 				*ret_pip = pip;
1892 				return (MDI_SUCCESS);
1893 			}
1894 			path_index --;
1895 			next = (mdi_pathinfo_t *)
1896 			    MDI_PI(pip)->pi_client_link;
1897 			MDI_PI_UNLOCK(pip);
1898 			pip = next;
1899 		}
1900 		if (pip == NULL) {
1901 			MDI_DEBUG(4, (CE_NOTE, NULL,
1902 			    "!lba %llx, no pip !!\n",
1903 				bp->b_lblkno));
1904 		} else {
1905 			MDI_DEBUG(4, (CE_NOTE, NULL,
1906 			    "!lba %llx, no pip for path_index, "
1907 			    "pip %p\n", bp->b_lblkno, (void *)pip));
1908 		}
1909 	}
1910 	return (MDI_FAILURE);
1911 }
1912 
1913 /*
1914  * mdi_select_path():
1915  *		select a path to access a client device.
1916  *
1917  *		mdi_select_path() function is called by the vHCI drivers to
1918  *		select a path to route the I/O request to.  The caller passes
1919  *		the block I/O data transfer structure ("buf") as one of the
1920  *		parameters.  The mpxio framework uses the buf structure
1921  *		contents to maintain per path statistics (total I/O size /
1922  *		count pending).  If more than one online paths are available to
1923  *		select, the framework automatically selects a suitable path
1924  *		for routing I/O request. If a failover operation is active for
1925  *		this client device the call shall be failed with MDI_BUSY error
1926  *		code.
1927  *
1928  *		By default this function returns a suitable path in online
1929  *		state based on the current load balancing policy.  Currently
1930  *		we support LOAD_BALANCE_NONE (Previously selected online path
1931  *		will continue to be used till the path is usable) and
1932  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1933  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1934  *		based on the logical block).  The load balancing
1935  *		through vHCI drivers configuration file (driver.conf).
1936  *
1937  *		vHCI drivers may override this default behavior by specifying
1938  *		appropriate flags.  The meaning of the thrid argument depends
1939  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
1940  *		then the argument is the "path instance" of the path to select.
1941  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
1942  *		"start_pip". A non NULL "start_pip" is the starting point to
1943  *		walk and find the next appropriate path.  The following values
1944  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
1945  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
1946  *		STANDBY path).
1947  *
1948  *		The non-standard behavior is used by the scsi_vhci driver,
1949  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1950  *		attach of client devices (to avoid an unnecessary failover
1951  *		when the STANDBY path comes up first), during failover
1952  *		(to activate a STANDBY path as ONLINE).
1953  *
1954  *		The selected path is returned in a a mdi_hold_path() state
1955  *		(pi_ref_cnt). Caller should release the hold by calling
1956  *		mdi_rele_path().
1957  *
1958  * Return Values:
1959  *		MDI_SUCCESS	- Completed successfully
1960  *		MDI_BUSY 	- Client device is busy failing over
1961  *		MDI_NOPATH	- Client device is online, but no valid path are
1962  *				  available to access this client device
1963  *		MDI_FAILURE	- Invalid client device or state
1964  *		MDI_DEVI_ONLINING
1965  *				- Client device (struct dev_info state) is in
1966  *				  onlining state.
1967  */
1968 
1969 /*ARGSUSED*/
1970 int
1971 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1972     void *arg, mdi_pathinfo_t **ret_pip)
1973 {
1974 	mdi_client_t	*ct;
1975 	mdi_pathinfo_t	*pip;
1976 	mdi_pathinfo_t	*next;
1977 	mdi_pathinfo_t	*head;
1978 	mdi_pathinfo_t	*start;
1979 	client_lb_t	lbp;	/* load balancing policy */
1980 	int		sb = 1;	/* standard behavior */
1981 	int		preferred = 1;	/* preferred path */
1982 	int		cond, cont = 1;
1983 	int		retry = 0;
1984 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
1985 	int		path_instance;	/* request specific path instance */
1986 
1987 	/* determine type of arg based on flags */
1988 	if (flags & MDI_SELECT_PATH_INSTANCE) {
1989 		flags &= ~MDI_SELECT_PATH_INSTANCE;
1990 		path_instance = (int)(intptr_t)arg;
1991 		start_pip = NULL;
1992 	} else {
1993 		path_instance = 0;
1994 		start_pip = (mdi_pathinfo_t *)arg;
1995 	}
1996 
1997 	if (flags != 0) {
1998 		/*
1999 		 * disable default behavior
2000 		 */
2001 		sb = 0;
2002 	}
2003 
2004 	*ret_pip = NULL;
2005 	ct = i_devi_get_client(cdip);
2006 	if (ct == NULL) {
2007 		/* mdi extensions are NULL, Nothing more to do */
2008 		return (MDI_FAILURE);
2009 	}
2010 
2011 	MDI_CLIENT_LOCK(ct);
2012 
2013 	if (sb) {
2014 		if (MDI_CLIENT_IS_FAILED(ct)) {
2015 			/*
2016 			 * Client is not ready to accept any I/O requests.
2017 			 * Fail this request.
2018 			 */
2019 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2020 			    "client state offline ct = %p\n", (void *)ct));
2021 			MDI_CLIENT_UNLOCK(ct);
2022 			return (MDI_FAILURE);
2023 		}
2024 
2025 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2026 			/*
2027 			 * Check for Failover is in progress. If so tell the
2028 			 * caller that this device is busy.
2029 			 */
2030 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2031 			    "client failover in progress ct = %p\n",
2032 			    (void *)ct));
2033 			MDI_CLIENT_UNLOCK(ct);
2034 			return (MDI_BUSY);
2035 		}
2036 
2037 		/*
2038 		 * Check to see whether the client device is attached.
2039 		 * If not so, let the vHCI driver manually select a path
2040 		 * (standby) and let the probe/attach process to continue.
2041 		 */
2042 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2043 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining "
2044 			    "ct = %p\n", (void *)ct));
2045 			MDI_CLIENT_UNLOCK(ct);
2046 			return (MDI_DEVI_ONLINING);
2047 		}
2048 	}
2049 
2050 	/*
2051 	 * Cache in the client list head.  If head of the list is NULL
2052 	 * return MDI_NOPATH
2053 	 */
2054 	head = ct->ct_path_head;
2055 	if (head == NULL) {
2056 		MDI_CLIENT_UNLOCK(ct);
2057 		return (MDI_NOPATH);
2058 	}
2059 
2060 	/* Caller is specifying a specific pathinfo path by path_instance */
2061 	if (path_instance) {
2062 		/* search for pathinfo with correct path_instance */
2063 		for (pip = head;
2064 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2065 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2066 			;
2067 
2068 		/* If path can't be selected then MDI_FAILURE is returned. */
2069 		if (pip == NULL) {
2070 			MDI_CLIENT_UNLOCK(ct);
2071 			return (MDI_FAILURE);
2072 		}
2073 
2074 		/* verify state of path */
2075 		MDI_PI_LOCK(pip);
2076 		if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) {
2077 			MDI_PI_UNLOCK(pip);
2078 			MDI_CLIENT_UNLOCK(ct);
2079 			return (MDI_FAILURE);
2080 		}
2081 
2082 		/*
2083 		 * Return the path in hold state. Caller should release the
2084 		 * lock by calling mdi_rele_path()
2085 		 */
2086 		MDI_PI_HOLD(pip);
2087 		MDI_PI_UNLOCK(pip);
2088 		ct->ct_path_last = pip;
2089 		*ret_pip = pip;
2090 		MDI_CLIENT_UNLOCK(ct);
2091 		return (MDI_SUCCESS);
2092 	}
2093 
2094 	/*
2095 	 * for non default behavior, bypass current
2096 	 * load balancing policy and always use LOAD_BALANCE_RR
2097 	 * except that the start point will be adjusted based
2098 	 * on the provided start_pip
2099 	 */
2100 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2101 
2102 	switch (lbp) {
2103 	case LOAD_BALANCE_NONE:
2104 		/*
2105 		 * Load balancing is None  or Alternate path mode
2106 		 * Start looking for a online mdi_pathinfo node starting from
2107 		 * last known selected path
2108 		 */
2109 		preferred = 1;
2110 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2111 		if (pip == NULL) {
2112 			pip = head;
2113 		}
2114 		start = pip;
2115 		do {
2116 			MDI_PI_LOCK(pip);
2117 			/*
2118 			 * No need to explicitly check if the path is disabled.
2119 			 * Since we are checking for state == ONLINE and the
2120 			 * same veriable is used for DISABLE/ENABLE information.
2121 			 */
2122 			if ((MDI_PI(pip)->pi_state  ==
2123 				MDI_PATHINFO_STATE_ONLINE) &&
2124 				preferred == MDI_PI(pip)->pi_preferred) {
2125 				/*
2126 				 * Return the path in hold state. Caller should
2127 				 * release the lock by calling mdi_rele_path()
2128 				 */
2129 				MDI_PI_HOLD(pip);
2130 				MDI_PI_UNLOCK(pip);
2131 				ct->ct_path_last = pip;
2132 				*ret_pip = pip;
2133 				MDI_CLIENT_UNLOCK(ct);
2134 				return (MDI_SUCCESS);
2135 			}
2136 
2137 			/*
2138 			 * Path is busy.
2139 			 */
2140 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2141 			    MDI_PI_IS_TRANSIENT(pip))
2142 				retry = 1;
2143 			/*
2144 			 * Keep looking for a next available online path
2145 			 */
2146 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2147 			if (next == NULL) {
2148 				next = head;
2149 			}
2150 			MDI_PI_UNLOCK(pip);
2151 			pip = next;
2152 			if (start == pip && preferred) {
2153 				preferred = 0;
2154 			} else if (start == pip && !preferred) {
2155 				cont = 0;
2156 			}
2157 		} while (cont);
2158 		break;
2159 
2160 	case LOAD_BALANCE_LBA:
2161 		/*
2162 		 * Make sure we are looking
2163 		 * for an online path. Otherwise, if it is for a STANDBY
2164 		 * path request, it will go through and fetch an ONLINE
2165 		 * path which is not desirable.
2166 		 */
2167 		if ((ct->ct_lb_args != NULL) &&
2168 			    (ct->ct_lb_args->region_size) && bp &&
2169 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2170 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2171 				    == MDI_SUCCESS) {
2172 				MDI_CLIENT_UNLOCK(ct);
2173 				return (MDI_SUCCESS);
2174 			}
2175 		}
2176 		/*  FALLTHROUGH */
2177 	case LOAD_BALANCE_RR:
2178 		/*
2179 		 * Load balancing is Round Robin. Start looking for a online
2180 		 * mdi_pathinfo node starting from last known selected path
2181 		 * as the start point.  If override flags are specified,
2182 		 * process accordingly.
2183 		 * If the search is already in effect(start_pip not null),
2184 		 * then lets just use the same path preference to continue the
2185 		 * traversal.
2186 		 */
2187 
2188 		if (start_pip != NULL) {
2189 			preferred = MDI_PI(start_pip)->pi_preferred;
2190 		} else {
2191 			preferred = 1;
2192 		}
2193 
2194 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2195 		if (start == NULL) {
2196 			pip = head;
2197 		} else {
2198 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2199 			if (pip == NULL) {
2200 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2201 					/*
2202 					 * Return since we hit the end of list
2203 					 */
2204 					MDI_CLIENT_UNLOCK(ct);
2205 					return (MDI_NOPATH);
2206 				}
2207 
2208 				if (!sb) {
2209 					if (preferred == 0) {
2210 						/*
2211 						 * Looks like we have completed
2212 						 * the traversal as preferred
2213 						 * value is 0. Time to bail out.
2214 						 */
2215 						*ret_pip = NULL;
2216 						MDI_CLIENT_UNLOCK(ct);
2217 						return (MDI_NOPATH);
2218 					} else {
2219 						/*
2220 						 * Looks like we reached the
2221 						 * end of the list. Lets enable
2222 						 * traversal of non preferred
2223 						 * paths.
2224 						 */
2225 						preferred = 0;
2226 					}
2227 				}
2228 				pip = head;
2229 			}
2230 		}
2231 		start = pip;
2232 		do {
2233 			MDI_PI_LOCK(pip);
2234 			if (sb) {
2235 				cond = ((MDI_PI(pip)->pi_state ==
2236 				    MDI_PATHINFO_STATE_ONLINE &&
2237 					MDI_PI(pip)->pi_preferred ==
2238 						preferred) ? 1 : 0);
2239 			} else {
2240 				if (flags == MDI_SELECT_ONLINE_PATH) {
2241 					cond = ((MDI_PI(pip)->pi_state ==
2242 					    MDI_PATHINFO_STATE_ONLINE &&
2243 						MDI_PI(pip)->pi_preferred ==
2244 						preferred) ? 1 : 0);
2245 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2246 					cond = ((MDI_PI(pip)->pi_state ==
2247 					    MDI_PATHINFO_STATE_STANDBY &&
2248 						MDI_PI(pip)->pi_preferred ==
2249 						preferred) ? 1 : 0);
2250 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2251 				    MDI_SELECT_STANDBY_PATH)) {
2252 					cond = (((MDI_PI(pip)->pi_state ==
2253 					    MDI_PATHINFO_STATE_ONLINE ||
2254 					    (MDI_PI(pip)->pi_state ==
2255 					    MDI_PATHINFO_STATE_STANDBY)) &&
2256 						MDI_PI(pip)->pi_preferred ==
2257 						preferred) ? 1 : 0);
2258 				} else if (flags ==
2259 					(MDI_SELECT_STANDBY_PATH |
2260 					MDI_SELECT_ONLINE_PATH |
2261 					MDI_SELECT_USER_DISABLE_PATH)) {
2262 					cond = (((MDI_PI(pip)->pi_state ==
2263 					    MDI_PATHINFO_STATE_ONLINE ||
2264 					    (MDI_PI(pip)->pi_state ==
2265 					    MDI_PATHINFO_STATE_STANDBY) ||
2266 						(MDI_PI(pip)->pi_state ==
2267 					    (MDI_PATHINFO_STATE_ONLINE|
2268 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2269 						(MDI_PI(pip)->pi_state ==
2270 					    (MDI_PATHINFO_STATE_STANDBY |
2271 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2272 						MDI_PI(pip)->pi_preferred ==
2273 						preferred) ? 1 : 0);
2274 				} else if (flags ==
2275 				    (MDI_SELECT_STANDBY_PATH |
2276 				    MDI_SELECT_ONLINE_PATH |
2277 				    MDI_SELECT_NO_PREFERRED)) {
2278 					cond = (((MDI_PI(pip)->pi_state ==
2279 					    MDI_PATHINFO_STATE_ONLINE) ||
2280 					    (MDI_PI(pip)->pi_state ==
2281 					    MDI_PATHINFO_STATE_STANDBY))
2282 					    ? 1 : 0);
2283 				} else {
2284 					cond = 0;
2285 				}
2286 			}
2287 			/*
2288 			 * No need to explicitly check if the path is disabled.
2289 			 * Since we are checking for state == ONLINE and the
2290 			 * same veriable is used for DISABLE/ENABLE information.
2291 			 */
2292 			if (cond) {
2293 				/*
2294 				 * Return the path in hold state. Caller should
2295 				 * release the lock by calling mdi_rele_path()
2296 				 */
2297 				MDI_PI_HOLD(pip);
2298 				MDI_PI_UNLOCK(pip);
2299 				if (sb)
2300 					ct->ct_path_last = pip;
2301 				*ret_pip = pip;
2302 				MDI_CLIENT_UNLOCK(ct);
2303 				return (MDI_SUCCESS);
2304 			}
2305 			/*
2306 			 * Path is busy.
2307 			 */
2308 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2309 			    MDI_PI_IS_TRANSIENT(pip))
2310 				retry = 1;
2311 
2312 			/*
2313 			 * Keep looking for a next available online path
2314 			 */
2315 do_again:
2316 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2317 			if (next == NULL) {
2318 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2319 					/*
2320 					 * Bail out since we hit the end of list
2321 					 */
2322 					MDI_PI_UNLOCK(pip);
2323 					break;
2324 				}
2325 
2326 				if (!sb) {
2327 					if (preferred == 1) {
2328 						/*
2329 						 * Looks like we reached the
2330 						 * end of the list. Lets enable
2331 						 * traversal of non preferred
2332 						 * paths.
2333 						 */
2334 						preferred = 0;
2335 						next = head;
2336 					} else {
2337 						/*
2338 						 * We have done both the passes
2339 						 * Preferred as well as for
2340 						 * Non-preferred. Bail out now.
2341 						 */
2342 						cont = 0;
2343 					}
2344 				} else {
2345 					/*
2346 					 * Standard behavior case.
2347 					 */
2348 					next = head;
2349 				}
2350 			}
2351 			MDI_PI_UNLOCK(pip);
2352 			if (cont == 0) {
2353 				break;
2354 			}
2355 			pip = next;
2356 
2357 			if (!sb) {
2358 				/*
2359 				 * We need to handle the selection of
2360 				 * non-preferred path in the following
2361 				 * case:
2362 				 *
2363 				 * +------+   +------+   +------+   +-----+
2364 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2365 				 * +------+   +------+   +------+   +-----+
2366 				 *
2367 				 * If we start the search with B, we need to
2368 				 * skip beyond B to pick C which is non -
2369 				 * preferred in the second pass. The following
2370 				 * test, if true, will allow us to skip over
2371 				 * the 'start'(B in the example) to select
2372 				 * other non preferred elements.
2373 				 */
2374 				if ((start_pip != NULL) && (start_pip == pip) &&
2375 				    (MDI_PI(start_pip)->pi_preferred
2376 				    != preferred)) {
2377 					/*
2378 					 * try again after going past the start
2379 					 * pip
2380 					 */
2381 					MDI_PI_LOCK(pip);
2382 					goto do_again;
2383 				}
2384 			} else {
2385 				/*
2386 				 * Standard behavior case
2387 				 */
2388 				if (start == pip && preferred) {
2389 					/* look for nonpreferred paths */
2390 					preferred = 0;
2391 				} else if (start == pip && !preferred) {
2392 					/*
2393 					 * Exit condition
2394 					 */
2395 					cont = 0;
2396 				}
2397 			}
2398 		} while (cont);
2399 		break;
2400 	}
2401 
2402 	MDI_CLIENT_UNLOCK(ct);
2403 	if (retry == 1) {
2404 		return (MDI_BUSY);
2405 	} else {
2406 		return (MDI_NOPATH);
2407 	}
2408 }
2409 
2410 /*
2411  * For a client, return the next available path to any phci
2412  *
2413  * Note:
2414  *		Caller should hold the branch's devinfo node to get a consistent
2415  *		snap shot of the mdi_pathinfo nodes.
2416  *
2417  *		Please note that even the list is stable the mdi_pathinfo
2418  *		node state and properties are volatile.  The caller should lock
2419  *		and unlock the nodes by calling mdi_pi_lock() and
2420  *		mdi_pi_unlock() functions to get a stable properties.
2421  *
2422  *		If there is a need to use the nodes beyond the hold of the
2423  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2424  *		need to be held against unexpected removal by calling
2425  *		mdi_hold_path() and should be released by calling
2426  *		mdi_rele_path() on completion.
2427  */
2428 mdi_pathinfo_t *
2429 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2430 {
2431 	mdi_client_t *ct;
2432 
2433 	if (!MDI_CLIENT(ct_dip))
2434 		return (NULL);
2435 
2436 	/*
2437 	 * Walk through client link
2438 	 */
2439 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2440 	ASSERT(ct != NULL);
2441 
2442 	if (pip == NULL)
2443 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2444 
2445 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2446 }
2447 
2448 /*
2449  * For a phci, return the next available path to any client
2450  * Note: ditto mdi_get_next_phci_path()
2451  */
2452 mdi_pathinfo_t *
2453 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2454 {
2455 	mdi_phci_t *ph;
2456 
2457 	if (!MDI_PHCI(ph_dip))
2458 		return (NULL);
2459 
2460 	/*
2461 	 * Walk through pHCI link
2462 	 */
2463 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2464 	ASSERT(ph != NULL);
2465 
2466 	if (pip == NULL)
2467 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2468 
2469 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2470 }
2471 
2472 /*
2473  * mdi_hold_path():
2474  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2475  * Return Values:
2476  *		None
2477  */
2478 void
2479 mdi_hold_path(mdi_pathinfo_t *pip)
2480 {
2481 	if (pip) {
2482 		MDI_PI_LOCK(pip);
2483 		MDI_PI_HOLD(pip);
2484 		MDI_PI_UNLOCK(pip);
2485 	}
2486 }
2487 
2488 
2489 /*
2490  * mdi_rele_path():
2491  *		Release the mdi_pathinfo node which was selected
2492  *		through mdi_select_path() mechanism or manually held by
2493  *		calling mdi_hold_path().
2494  * Return Values:
2495  *		None
2496  */
2497 void
2498 mdi_rele_path(mdi_pathinfo_t *pip)
2499 {
2500 	if (pip) {
2501 		MDI_PI_LOCK(pip);
2502 		MDI_PI_RELE(pip);
2503 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2504 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2505 		}
2506 		MDI_PI_UNLOCK(pip);
2507 	}
2508 }
2509 
2510 /*
2511  * mdi_pi_lock():
2512  * 		Lock the mdi_pathinfo node.
2513  * Note:
2514  *		The caller should release the lock by calling mdi_pi_unlock()
2515  */
2516 void
2517 mdi_pi_lock(mdi_pathinfo_t *pip)
2518 {
2519 	ASSERT(pip != NULL);
2520 	if (pip) {
2521 		MDI_PI_LOCK(pip);
2522 	}
2523 }
2524 
2525 
2526 /*
2527  * mdi_pi_unlock():
2528  * 		Unlock the mdi_pathinfo node.
2529  * Note:
2530  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2531  */
2532 void
2533 mdi_pi_unlock(mdi_pathinfo_t *pip)
2534 {
2535 	ASSERT(pip != NULL);
2536 	if (pip) {
2537 		MDI_PI_UNLOCK(pip);
2538 	}
2539 }
2540 
2541 /*
2542  * mdi_pi_find():
2543  *		Search the list of mdi_pathinfo nodes attached to the
2544  *		pHCI/Client device node whose path address matches "paddr".
2545  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2546  *		found.
2547  * Return Values:
2548  *		mdi_pathinfo node handle
2549  *		NULL
2550  * Notes:
2551  *		Caller need not hold any locks to call this function.
2552  */
2553 mdi_pathinfo_t *
2554 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2555 {
2556 	mdi_phci_t		*ph;
2557 	mdi_vhci_t		*vh;
2558 	mdi_client_t		*ct;
2559 	mdi_pathinfo_t		*pip = NULL;
2560 
2561 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s",
2562 	    caddr ? caddr : "NULL", paddr ? paddr : "NULL"));
2563 	if ((pdip == NULL) || (paddr == NULL)) {
2564 		return (NULL);
2565 	}
2566 	ph = i_devi_get_phci(pdip);
2567 	if (ph == NULL) {
2568 		/*
2569 		 * Invalid pHCI device, Nothing more to do.
2570 		 */
2571 		MDI_DEBUG(2, (CE_WARN, pdip,
2572 		    "!mdi_pi_find: invalid phci"));
2573 		return (NULL);
2574 	}
2575 
2576 	vh = ph->ph_vhci;
2577 	if (vh == NULL) {
2578 		/*
2579 		 * Invalid vHCI device, Nothing more to do.
2580 		 */
2581 		MDI_DEBUG(2, (CE_WARN, pdip,
2582 		    "!mdi_pi_find: invalid vhci"));
2583 		return (NULL);
2584 	}
2585 
2586 	/*
2587 	 * Look for pathinfo node identified by paddr.
2588 	 */
2589 	if (caddr == NULL) {
2590 		/*
2591 		 * Find a mdi_pathinfo node under pHCI list for a matching
2592 		 * unit address.
2593 		 */
2594 		MDI_PHCI_LOCK(ph);
2595 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2596 			MDI_DEBUG(2, (CE_WARN, pdip,
2597 			    "!mdi_pi_find: offline phci %p", (void *)ph));
2598 			MDI_PHCI_UNLOCK(ph);
2599 			return (NULL);
2600 		}
2601 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2602 
2603 		while (pip != NULL) {
2604 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2605 				break;
2606 			}
2607 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2608 		}
2609 		MDI_PHCI_UNLOCK(ph);
2610 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p",
2611 		    (void *)pip));
2612 		return (pip);
2613 	}
2614 
2615 	/*
2616 	 * XXX - Is the rest of the code in this function really necessary?
2617 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2618 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2619 	 * whether the search is based on the pathinfo nodes attached to
2620 	 * the pHCI or the client node, the result will be the same.
2621 	 */
2622 
2623 	/*
2624 	 * Find the client device corresponding to 'caddr'
2625 	 */
2626 	MDI_VHCI_CLIENT_LOCK(vh);
2627 
2628 	/*
2629 	 * XXX - Passing NULL to the following function works as long as the
2630 	 * the client addresses (caddr) are unique per vhci basis.
2631 	 */
2632 	ct = i_mdi_client_find(vh, NULL, caddr);
2633 	if (ct == NULL) {
2634 		/*
2635 		 * Client not found, Obviously mdi_pathinfo node has not been
2636 		 * created yet.
2637 		 */
2638 		MDI_VHCI_CLIENT_UNLOCK(vh);
2639 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not "
2640 		    "found for caddr %s", caddr ? caddr : "NULL"));
2641 		return (NULL);
2642 	}
2643 
2644 	/*
2645 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2646 	 * pHCI and paddr
2647 	 */
2648 	MDI_CLIENT_LOCK(ct);
2649 
2650 	/*
2651 	 * Release the global mutex as it is no more needed. Note: We always
2652 	 * respect the locking order while acquiring.
2653 	 */
2654 	MDI_VHCI_CLIENT_UNLOCK(vh);
2655 
2656 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2657 	while (pip != NULL) {
2658 		/*
2659 		 * Compare the unit address
2660 		 */
2661 		if ((MDI_PI(pip)->pi_phci == ph) &&
2662 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2663 			break;
2664 		}
2665 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2666 	}
2667 	MDI_CLIENT_UNLOCK(ct);
2668 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip));
2669 	return (pip);
2670 }
2671 
2672 /*
2673  * mdi_pi_alloc():
2674  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2675  *		The mdi_pathinfo node returned by this function identifies a
2676  *		unique device path is capable of having properties attached
2677  *		and passed to mdi_pi_online() to fully attach and online the
2678  *		path and client device node.
2679  *		The mdi_pathinfo node returned by this function must be
2680  *		destroyed using mdi_pi_free() if the path is no longer
2681  *		operational or if the caller fails to attach a client device
2682  *		node when calling mdi_pi_online(). The framework will not free
2683  *		the resources allocated.
2684  *		This function can be called from both interrupt and kernel
2685  *		contexts.  DDI_NOSLEEP flag should be used while calling
2686  *		from interrupt contexts.
2687  * Return Values:
2688  *		MDI_SUCCESS
2689  *		MDI_FAILURE
2690  *		MDI_NOMEM
2691  */
2692 /*ARGSUSED*/
2693 int
2694 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2695     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2696 {
2697 	mdi_vhci_t	*vh;
2698 	mdi_phci_t	*ph;
2699 	mdi_client_t	*ct;
2700 	mdi_pathinfo_t	*pip = NULL;
2701 	dev_info_t	*cdip;
2702 	int		rv = MDI_NOMEM;
2703 	int		path_allocated = 0;
2704 
2705 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s",
2706 	    cname ? cname : "NULL", caddr ? caddr : "NULL",
2707 	    paddr ? paddr : "NULL"));
2708 
2709 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2710 	    ret_pip == NULL) {
2711 		/* Nothing more to do */
2712 		return (MDI_FAILURE);
2713 	}
2714 
2715 	*ret_pip = NULL;
2716 
2717 	/* No allocations on detaching pHCI */
2718 	if (DEVI_IS_DETACHING(pdip)) {
2719 		/* Invalid pHCI device, return failure */
2720 		MDI_DEBUG(1, (CE_WARN, pdip,
2721 		    "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip));
2722 		return (MDI_FAILURE);
2723 	}
2724 
2725 	ph = i_devi_get_phci(pdip);
2726 	ASSERT(ph != NULL);
2727 	if (ph == NULL) {
2728 		/* Invalid pHCI device, return failure */
2729 		MDI_DEBUG(1, (CE_WARN, pdip,
2730 		    "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip));
2731 		return (MDI_FAILURE);
2732 	}
2733 
2734 	MDI_PHCI_LOCK(ph);
2735 	vh = ph->ph_vhci;
2736 	if (vh == NULL) {
2737 		/* Invalid vHCI device, return failure */
2738 		MDI_DEBUG(1, (CE_WARN, pdip,
2739 		    "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip));
2740 		MDI_PHCI_UNLOCK(ph);
2741 		return (MDI_FAILURE);
2742 	}
2743 
2744 	if (MDI_PHCI_IS_READY(ph) == 0) {
2745 		/*
2746 		 * Do not allow new node creation when pHCI is in
2747 		 * offline/suspended states
2748 		 */
2749 		MDI_DEBUG(1, (CE_WARN, pdip,
2750 		    "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph));
2751 		MDI_PHCI_UNLOCK(ph);
2752 		return (MDI_BUSY);
2753 	}
2754 	MDI_PHCI_UNSTABLE(ph);
2755 	MDI_PHCI_UNLOCK(ph);
2756 
2757 	/* look for a matching client, create one if not found */
2758 	MDI_VHCI_CLIENT_LOCK(vh);
2759 	ct = i_mdi_client_find(vh, cname, caddr);
2760 	if (ct == NULL) {
2761 		ct = i_mdi_client_alloc(vh, cname, caddr);
2762 		ASSERT(ct != NULL);
2763 	}
2764 
2765 	if (ct->ct_dip == NULL) {
2766 		/*
2767 		 * Allocate a devinfo node
2768 		 */
2769 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2770 		    compatible, ncompatible);
2771 		if (ct->ct_dip == NULL) {
2772 			(void) i_mdi_client_free(vh, ct);
2773 			goto fail;
2774 		}
2775 	}
2776 	cdip = ct->ct_dip;
2777 
2778 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2779 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2780 
2781 	MDI_CLIENT_LOCK(ct);
2782 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2783 	while (pip != NULL) {
2784 		/*
2785 		 * Compare the unit address
2786 		 */
2787 		if ((MDI_PI(pip)->pi_phci == ph) &&
2788 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2789 			break;
2790 		}
2791 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2792 	}
2793 	MDI_CLIENT_UNLOCK(ct);
2794 
2795 	if (pip == NULL) {
2796 		/*
2797 		 * This is a new path for this client device.  Allocate and
2798 		 * initialize a new pathinfo node
2799 		 */
2800 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2801 		ASSERT(pip != NULL);
2802 		path_allocated = 1;
2803 	}
2804 	rv = MDI_SUCCESS;
2805 
2806 fail:
2807 	/*
2808 	 * Release the global mutex.
2809 	 */
2810 	MDI_VHCI_CLIENT_UNLOCK(vh);
2811 
2812 	/*
2813 	 * Mark the pHCI as stable
2814 	 */
2815 	MDI_PHCI_LOCK(ph);
2816 	MDI_PHCI_STABLE(ph);
2817 	MDI_PHCI_UNLOCK(ph);
2818 	*ret_pip = pip;
2819 
2820 	MDI_DEBUG(2, (CE_NOTE, pdip,
2821 	    "!mdi_pi_alloc_compatible: alloc %p", (void *)pip));
2822 
2823 	if (path_allocated)
2824 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2825 
2826 	return (rv);
2827 }
2828 
2829 /*ARGSUSED*/
2830 int
2831 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2832     int flags, mdi_pathinfo_t **ret_pip)
2833 {
2834 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2835 	    flags, ret_pip));
2836 }
2837 
2838 /*
2839  * i_mdi_pi_alloc():
2840  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2841  * Return Values:
2842  *		mdi_pathinfo
2843  */
2844 /*ARGSUSED*/
2845 static mdi_pathinfo_t *
2846 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2847 {
2848 	mdi_pathinfo_t	*pip;
2849 	int		ct_circular;
2850 	int		ph_circular;
2851 	static char	path[MAXPATHLEN];
2852 	char		*path_persistent;
2853 	int		path_instance;
2854 	mod_hash_val_t	hv;
2855 
2856 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2857 
2858 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2859 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2860 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2861 	    MDI_PATHINFO_STATE_TRANSIENT;
2862 
2863 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2864 		MDI_PI_SET_USER_DISABLE(pip);
2865 
2866 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2867 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2868 
2869 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2870 		MDI_PI_SET_DRV_DISABLE(pip);
2871 
2872 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2873 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2874 	MDI_PI(pip)->pi_client = ct;
2875 	MDI_PI(pip)->pi_phci = ph;
2876 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2877 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2878 
2879         /*
2880 	 * We form the "path" to the pathinfo node, and see if we have
2881 	 * already allocated a 'path_instance' for that "path".  If so,
2882 	 * we use the already allocated 'path_instance'.  If not, we
2883 	 * allocate a new 'path_instance' and associate it with a copy of
2884 	 * the "path" string (which is never freed). The association
2885 	 * between a 'path_instance' this "path" string persists until
2886 	 * reboot.
2887 	 */
2888         mutex_enter(&mdi_pathmap_mutex);
2889 	(void) ddi_pathname(ph->ph_dip, path);
2890 	(void) sprintf(path + strlen(path), "/%s@%s",
2891 	    ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr);
2892         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2893                 path_instance = (uint_t)(intptr_t)hv;
2894         } else {
2895 		/* allocate a new 'path_instance' and persistent "path" */
2896 		path_instance = mdi_pathmap_instance++;
2897 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2898                 (void) mod_hash_insert(mdi_pathmap_bypath,
2899                     (mod_hash_key_t)path_persistent,
2900                     (mod_hash_val_t)(intptr_t)path_instance);
2901 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2902 		    (mod_hash_key_t)(intptr_t)path_instance,
2903 		    (mod_hash_val_t)path_persistent);
2904         }
2905         mutex_exit(&mdi_pathmap_mutex);
2906 	MDI_PI(pip)->pi_path_instance = path_instance;
2907 
2908 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2909 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2910 	MDI_PI(pip)->pi_pprivate = NULL;
2911 	MDI_PI(pip)->pi_cprivate = NULL;
2912 	MDI_PI(pip)->pi_vprivate = NULL;
2913 	MDI_PI(pip)->pi_client_link = NULL;
2914 	MDI_PI(pip)->pi_phci_link = NULL;
2915 	MDI_PI(pip)->pi_ref_cnt = 0;
2916 	MDI_PI(pip)->pi_kstats = NULL;
2917 	MDI_PI(pip)->pi_preferred = 1;
2918 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2919 
2920 	/*
2921 	 * Lock both dev_info nodes against changes in parallel.
2922 	 *
2923 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
2924 	 * This atypical operation is done to synchronize pathinfo nodes
2925 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
2926 	 * the pathinfo nodes are children of the Client.
2927 	 */
2928 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2929 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2930 
2931 	i_mdi_phci_add_path(ph, pip);
2932 	i_mdi_client_add_path(ct, pip);
2933 
2934 	ndi_devi_exit(ph->ph_dip, ph_circular);
2935 	ndi_devi_exit(ct->ct_dip, ct_circular);
2936 
2937 	return (pip);
2938 }
2939 
2940 /*
2941  * mdi_pi_pathname_by_instance():
2942  *	Lookup of "path" by 'path_instance'. Return "path".
2943  *	NOTE: returned "path" remains valid forever (until reboot).
2944  */
2945 char *
2946 mdi_pi_pathname_by_instance(int path_instance)
2947 {
2948 	char		*path;
2949 	mod_hash_val_t	hv;
2950 
2951 	/* mdi_pathmap lookup of "path" by 'path_instance' */
2952 	mutex_enter(&mdi_pathmap_mutex);
2953 	if (mod_hash_find(mdi_pathmap_byinstance,
2954 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
2955 		path = (char *)hv;
2956 	else
2957 		path = NULL;
2958 	mutex_exit(&mdi_pathmap_mutex);
2959 	return (path);
2960 }
2961 
2962 /*
2963  * i_mdi_phci_add_path():
2964  * 		Add a mdi_pathinfo node to pHCI list.
2965  * Notes:
2966  *		Caller should per-pHCI mutex
2967  */
2968 static void
2969 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2970 {
2971 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2972 
2973 	MDI_PHCI_LOCK(ph);
2974 	if (ph->ph_path_head == NULL) {
2975 		ph->ph_path_head = pip;
2976 	} else {
2977 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2978 	}
2979 	ph->ph_path_tail = pip;
2980 	ph->ph_path_count++;
2981 	MDI_PHCI_UNLOCK(ph);
2982 }
2983 
2984 /*
2985  * i_mdi_client_add_path():
2986  *		Add mdi_pathinfo node to client list
2987  */
2988 static void
2989 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2990 {
2991 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2992 
2993 	MDI_CLIENT_LOCK(ct);
2994 	if (ct->ct_path_head == NULL) {
2995 		ct->ct_path_head = pip;
2996 	} else {
2997 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2998 	}
2999 	ct->ct_path_tail = pip;
3000 	ct->ct_path_count++;
3001 	MDI_CLIENT_UNLOCK(ct);
3002 }
3003 
3004 /*
3005  * mdi_pi_free():
3006  *		Free the mdi_pathinfo node and also client device node if this
3007  *		is the last path to the device
3008  * Return Values:
3009  *		MDI_SUCCESS
3010  *		MDI_FAILURE
3011  *		MDI_BUSY
3012  */
3013 /*ARGSUSED*/
3014 int
3015 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3016 {
3017 	int		rv = MDI_FAILURE;
3018 	mdi_vhci_t	*vh;
3019 	mdi_phci_t	*ph;
3020 	mdi_client_t	*ct;
3021 	int		(*f)();
3022 	int		client_held = 0;
3023 
3024 	MDI_PI_LOCK(pip);
3025 	ph = MDI_PI(pip)->pi_phci;
3026 	ASSERT(ph != NULL);
3027 	if (ph == NULL) {
3028 		/*
3029 		 * Invalid pHCI device, return failure
3030 		 */
3031 		MDI_DEBUG(1, (CE_WARN, NULL,
3032 		    "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip));
3033 		MDI_PI_UNLOCK(pip);
3034 		return (MDI_FAILURE);
3035 	}
3036 
3037 	vh = ph->ph_vhci;
3038 	ASSERT(vh != NULL);
3039 	if (vh == NULL) {
3040 		/* Invalid pHCI device, return failure */
3041 		MDI_DEBUG(1, (CE_WARN, NULL,
3042 		    "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip));
3043 		MDI_PI_UNLOCK(pip);
3044 		return (MDI_FAILURE);
3045 	}
3046 
3047 	ct = MDI_PI(pip)->pi_client;
3048 	ASSERT(ct != NULL);
3049 	if (ct == NULL) {
3050 		/*
3051 		 * Invalid Client device, return failure
3052 		 */
3053 		MDI_DEBUG(1, (CE_WARN, NULL,
3054 		    "!mdi_pi_free: invalid client pip=%p", (void *)pip));
3055 		MDI_PI_UNLOCK(pip);
3056 		return (MDI_FAILURE);
3057 	}
3058 
3059 	/*
3060 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3061 	 * if the node state is either offline or init and the reference count
3062 	 * is zero.
3063 	 */
3064 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3065 	    MDI_PI_IS_INITING(pip))) {
3066 		/*
3067 		 * Node is busy
3068 		 */
3069 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3070 		    "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip));
3071 		MDI_PI_UNLOCK(pip);
3072 		return (MDI_BUSY);
3073 	}
3074 
3075 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3076 		/*
3077 		 * Give a chance for pending I/Os to complete.
3078 		 */
3079 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: "
3080 		    "%d cmds still pending on path: %p\n",
3081 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3082 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3083 		    &MDI_PI(pip)->pi_mutex,
3084 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3085 			/*
3086 			 * The timeout time reached without ref_cnt being zero
3087 			 * being signaled.
3088 			 */
3089 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3090 			    "!mdi_pi_free: "
3091 			    "Timeout reached on path %p without the cond\n",
3092 			    (void *)pip));
3093 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3094 			    "!mdi_pi_free: "
3095 			    "%d cmds still pending on path: %p\n",
3096 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3097 			MDI_PI_UNLOCK(pip);
3098 			return (MDI_BUSY);
3099 		}
3100 	}
3101 	if (MDI_PI(pip)->pi_pm_held) {
3102 		client_held = 1;
3103 	}
3104 	MDI_PI_UNLOCK(pip);
3105 
3106 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3107 
3108 	MDI_CLIENT_LOCK(ct);
3109 
3110 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3111 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3112 
3113 	/*
3114 	 * Wait till failover is complete before removing this node.
3115 	 */
3116 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3117 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3118 
3119 	MDI_CLIENT_UNLOCK(ct);
3120 	MDI_VHCI_CLIENT_LOCK(vh);
3121 	MDI_CLIENT_LOCK(ct);
3122 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3123 
3124 	if (!MDI_PI_IS_INITING(pip)) {
3125 		f = vh->vh_ops->vo_pi_uninit;
3126 		if (f != NULL) {
3127 			rv = (*f)(vh->vh_dip, pip, 0);
3128 		}
3129 	}
3130 	/*
3131 	 * If vo_pi_uninit() completed successfully.
3132 	 */
3133 	if (rv == MDI_SUCCESS) {
3134 		if (client_held) {
3135 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
3136 			    "i_mdi_pm_rele_client\n"));
3137 			i_mdi_pm_rele_client(ct, 1);
3138 		}
3139 		i_mdi_pi_free(ph, pip, ct);
3140 		if (ct->ct_path_count == 0) {
3141 			/*
3142 			 * Client lost its last path.
3143 			 * Clean up the client device
3144 			 */
3145 			MDI_CLIENT_UNLOCK(ct);
3146 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3147 			MDI_VHCI_CLIENT_UNLOCK(vh);
3148 			return (rv);
3149 		}
3150 	}
3151 	MDI_CLIENT_UNLOCK(ct);
3152 	MDI_VHCI_CLIENT_UNLOCK(vh);
3153 
3154 	if (rv == MDI_FAILURE)
3155 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3156 
3157 	return (rv);
3158 }
3159 
3160 /*
3161  * i_mdi_pi_free():
3162  *		Free the mdi_pathinfo node
3163  */
3164 static void
3165 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3166 {
3167 	int	ct_circular;
3168 	int	ph_circular;
3169 
3170 	ASSERT(MDI_CLIENT_LOCKED(ct));
3171 
3172 	/*
3173 	 * remove any per-path kstats
3174 	 */
3175 	i_mdi_pi_kstat_destroy(pip);
3176 
3177 	/* See comments in i_mdi_pi_alloc() */
3178 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3179 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3180 
3181 	i_mdi_client_remove_path(ct, pip);
3182 	i_mdi_phci_remove_path(ph, pip);
3183 
3184 	ndi_devi_exit(ph->ph_dip, ph_circular);
3185 	ndi_devi_exit(ct->ct_dip, ct_circular);
3186 
3187 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3188 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3189 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3190 	if (MDI_PI(pip)->pi_addr) {
3191 		kmem_free(MDI_PI(pip)->pi_addr,
3192 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3193 		MDI_PI(pip)->pi_addr = NULL;
3194 	}
3195 
3196 	if (MDI_PI(pip)->pi_prop) {
3197 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3198 		MDI_PI(pip)->pi_prop = NULL;
3199 	}
3200 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3201 }
3202 
3203 
3204 /*
3205  * i_mdi_phci_remove_path():
3206  * 		Remove a mdi_pathinfo node from pHCI list.
3207  * Notes:
3208  *		Caller should hold per-pHCI mutex
3209  */
3210 static void
3211 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3212 {
3213 	mdi_pathinfo_t	*prev = NULL;
3214 	mdi_pathinfo_t	*path = NULL;
3215 
3216 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3217 
3218 	MDI_PHCI_LOCK(ph);
3219 	path = ph->ph_path_head;
3220 	while (path != NULL) {
3221 		if (path == pip) {
3222 			break;
3223 		}
3224 		prev = path;
3225 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3226 	}
3227 
3228 	if (path) {
3229 		ph->ph_path_count--;
3230 		if (prev) {
3231 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3232 		} else {
3233 			ph->ph_path_head =
3234 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3235 		}
3236 		if (ph->ph_path_tail == path) {
3237 			ph->ph_path_tail = prev;
3238 		}
3239 	}
3240 
3241 	/*
3242 	 * Clear the pHCI link
3243 	 */
3244 	MDI_PI(pip)->pi_phci_link = NULL;
3245 	MDI_PI(pip)->pi_phci = NULL;
3246 	MDI_PHCI_UNLOCK(ph);
3247 }
3248 
3249 /*
3250  * i_mdi_client_remove_path():
3251  * 		Remove a mdi_pathinfo node from client path list.
3252  */
3253 static void
3254 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3255 {
3256 	mdi_pathinfo_t	*prev = NULL;
3257 	mdi_pathinfo_t	*path;
3258 
3259 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3260 
3261 	ASSERT(MDI_CLIENT_LOCKED(ct));
3262 	path = ct->ct_path_head;
3263 	while (path != NULL) {
3264 		if (path == pip) {
3265 			break;
3266 		}
3267 		prev = path;
3268 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3269 	}
3270 
3271 	if (path) {
3272 		ct->ct_path_count--;
3273 		if (prev) {
3274 			MDI_PI(prev)->pi_client_link =
3275 			    MDI_PI(path)->pi_client_link;
3276 		} else {
3277 			ct->ct_path_head =
3278 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3279 		}
3280 		if (ct->ct_path_tail == path) {
3281 			ct->ct_path_tail = prev;
3282 		}
3283 		if (ct->ct_path_last == path) {
3284 			ct->ct_path_last = ct->ct_path_head;
3285 		}
3286 	}
3287 	MDI_PI(pip)->pi_client_link = NULL;
3288 	MDI_PI(pip)->pi_client = NULL;
3289 }
3290 
3291 /*
3292  * i_mdi_pi_state_change():
3293  *		online a mdi_pathinfo node
3294  *
3295  * Return Values:
3296  *		MDI_SUCCESS
3297  *		MDI_FAILURE
3298  */
3299 /*ARGSUSED*/
3300 static int
3301 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3302 {
3303 	int		rv = MDI_SUCCESS;
3304 	mdi_vhci_t	*vh;
3305 	mdi_phci_t	*ph;
3306 	mdi_client_t	*ct;
3307 	int		(*f)();
3308 	dev_info_t	*cdip;
3309 
3310 	MDI_PI_LOCK(pip);
3311 
3312 	ph = MDI_PI(pip)->pi_phci;
3313 	ASSERT(ph);
3314 	if (ph == NULL) {
3315 		/*
3316 		 * Invalid pHCI device, fail the request
3317 		 */
3318 		MDI_PI_UNLOCK(pip);
3319 		MDI_DEBUG(1, (CE_WARN, NULL,
3320 		    "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip));
3321 		return (MDI_FAILURE);
3322 	}
3323 
3324 	vh = ph->ph_vhci;
3325 	ASSERT(vh);
3326 	if (vh == NULL) {
3327 		/*
3328 		 * Invalid vHCI device, fail the request
3329 		 */
3330 		MDI_PI_UNLOCK(pip);
3331 		MDI_DEBUG(1, (CE_WARN, NULL,
3332 		    "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip));
3333 		return (MDI_FAILURE);
3334 	}
3335 
3336 	ct = MDI_PI(pip)->pi_client;
3337 	ASSERT(ct != NULL);
3338 	if (ct == NULL) {
3339 		/*
3340 		 * Invalid client device, fail the request
3341 		 */
3342 		MDI_PI_UNLOCK(pip);
3343 		MDI_DEBUG(1, (CE_WARN, NULL,
3344 		    "!mdi_pi_state_change: invalid client pip=%p",
3345 		    (void *)pip));
3346 		return (MDI_FAILURE);
3347 	}
3348 
3349 	/*
3350 	 * If this path has not been initialized yet, Callback vHCI driver's
3351 	 * pathinfo node initialize entry point
3352 	 */
3353 
3354 	if (MDI_PI_IS_INITING(pip)) {
3355 		MDI_PI_UNLOCK(pip);
3356 		f = vh->vh_ops->vo_pi_init;
3357 		if (f != NULL) {
3358 			rv = (*f)(vh->vh_dip, pip, 0);
3359 			if (rv != MDI_SUCCESS) {
3360 				MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3361 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3362 				    (void *)vh, (void *)pip));
3363 				return (MDI_FAILURE);
3364 			}
3365 		}
3366 		MDI_PI_LOCK(pip);
3367 		MDI_PI_CLEAR_TRANSIENT(pip);
3368 	}
3369 
3370 	/*
3371 	 * Do not allow state transition when pHCI is in offline/suspended
3372 	 * states
3373 	 */
3374 	i_mdi_phci_lock(ph, pip);
3375 	if (MDI_PHCI_IS_READY(ph) == 0) {
3376 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3377 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p",
3378 		    (void *)ph));
3379 		MDI_PI_UNLOCK(pip);
3380 		i_mdi_phci_unlock(ph);
3381 		return (MDI_BUSY);
3382 	}
3383 	MDI_PHCI_UNSTABLE(ph);
3384 	i_mdi_phci_unlock(ph);
3385 
3386 	/*
3387 	 * Check if mdi_pathinfo state is in transient state.
3388 	 * If yes, offlining is in progress and wait till transient state is
3389 	 * cleared.
3390 	 */
3391 	if (MDI_PI_IS_TRANSIENT(pip)) {
3392 		while (MDI_PI_IS_TRANSIENT(pip)) {
3393 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3394 			    &MDI_PI(pip)->pi_mutex);
3395 		}
3396 	}
3397 
3398 	/*
3399 	 * Grab the client lock in reverse order sequence and release the
3400 	 * mdi_pathinfo mutex.
3401 	 */
3402 	i_mdi_client_lock(ct, pip);
3403 	MDI_PI_UNLOCK(pip);
3404 
3405 	/*
3406 	 * Wait till failover state is cleared
3407 	 */
3408 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3409 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3410 
3411 	/*
3412 	 * Mark the mdi_pathinfo node state as transient
3413 	 */
3414 	MDI_PI_LOCK(pip);
3415 	switch (state) {
3416 	case MDI_PATHINFO_STATE_ONLINE:
3417 		MDI_PI_SET_ONLINING(pip);
3418 		break;
3419 
3420 	case MDI_PATHINFO_STATE_STANDBY:
3421 		MDI_PI_SET_STANDBYING(pip);
3422 		break;
3423 
3424 	case MDI_PATHINFO_STATE_FAULT:
3425 		/*
3426 		 * Mark the pathinfo state as FAULTED
3427 		 */
3428 		MDI_PI_SET_FAULTING(pip);
3429 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3430 		break;
3431 
3432 	case MDI_PATHINFO_STATE_OFFLINE:
3433 		/*
3434 		 * ndi_devi_offline() cannot hold pip or ct locks.
3435 		 */
3436 		MDI_PI_UNLOCK(pip);
3437 		/*
3438 		 * Don't offline the client dev_info node unless we have
3439 		 * no available paths left at all.
3440 		 */
3441 		cdip = ct->ct_dip;
3442 		if ((flag & NDI_DEVI_REMOVE) &&
3443 		    (ct->ct_path_count == 1)) {
3444 			i_mdi_client_unlock(ct);
3445 			rv = ndi_devi_offline(cdip, 0);
3446 			if (rv != NDI_SUCCESS) {
3447 				/*
3448 				 * Convert to MDI error code
3449 				 */
3450 				switch (rv) {
3451 				case NDI_BUSY:
3452 					rv = MDI_BUSY;
3453 					break;
3454 				default:
3455 					rv = MDI_FAILURE;
3456 					break;
3457 				}
3458 				goto state_change_exit;
3459 			} else {
3460 				i_mdi_client_lock(ct, NULL);
3461 			}
3462 		}
3463 		/*
3464 		 * Mark the mdi_pathinfo node state as transient
3465 		 */
3466 		MDI_PI_LOCK(pip);
3467 		MDI_PI_SET_OFFLINING(pip);
3468 		break;
3469 	}
3470 	MDI_PI_UNLOCK(pip);
3471 	MDI_CLIENT_UNSTABLE(ct);
3472 	i_mdi_client_unlock(ct);
3473 
3474 	f = vh->vh_ops->vo_pi_state_change;
3475 	if (f != NULL)
3476 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3477 
3478 	MDI_CLIENT_LOCK(ct);
3479 	MDI_PI_LOCK(pip);
3480 	if (rv == MDI_NOT_SUPPORTED) {
3481 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3482 	}
3483 	if (rv != MDI_SUCCESS) {
3484 		MDI_DEBUG(2, (CE_WARN, ct->ct_dip,
3485 		    "!vo_pi_state_change: failed rv = %x", rv));
3486 	}
3487 	if (MDI_PI_IS_TRANSIENT(pip)) {
3488 		if (rv == MDI_SUCCESS) {
3489 			MDI_PI_CLEAR_TRANSIENT(pip);
3490 		} else {
3491 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3492 		}
3493 	}
3494 
3495 	/*
3496 	 * Wake anyone waiting for this mdi_pathinfo node
3497 	 */
3498 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3499 	MDI_PI_UNLOCK(pip);
3500 
3501 	/*
3502 	 * Mark the client device as stable
3503 	 */
3504 	MDI_CLIENT_STABLE(ct);
3505 	if (rv == MDI_SUCCESS) {
3506 		if (ct->ct_unstable == 0) {
3507 			cdip = ct->ct_dip;
3508 
3509 			/*
3510 			 * Onlining the mdi_pathinfo node will impact the
3511 			 * client state Update the client and dev_info node
3512 			 * state accordingly
3513 			 */
3514 			rv = NDI_SUCCESS;
3515 			i_mdi_client_update_state(ct);
3516 			switch (MDI_CLIENT_STATE(ct)) {
3517 			case MDI_CLIENT_STATE_OPTIMAL:
3518 			case MDI_CLIENT_STATE_DEGRADED:
3519 				if (cdip && !i_ddi_devi_attached(cdip) &&
3520 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3521 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3522 
3523 					/*
3524 					 * Must do ndi_devi_online() through
3525 					 * hotplug thread for deferred
3526 					 * attach mechanism to work
3527 					 */
3528 					MDI_CLIENT_UNLOCK(ct);
3529 					rv = ndi_devi_online(cdip, 0);
3530 					MDI_CLIENT_LOCK(ct);
3531 					if ((rv != NDI_SUCCESS) &&
3532 					    (MDI_CLIENT_STATE(ct) ==
3533 					    MDI_CLIENT_STATE_DEGRADED)) {
3534 						/*
3535 						 * ndi_devi_online failed.
3536 						 * Reset client flags to
3537 						 * offline.
3538 						 */
3539 						MDI_DEBUG(1, (CE_WARN, cdip,
3540 						    "!ndi_devi_online: failed "
3541 						    " Error: %x", rv));
3542 						MDI_CLIENT_SET_OFFLINE(ct);
3543 					}
3544 					if (rv != NDI_SUCCESS) {
3545 						/* Reset the path state */
3546 						MDI_PI_LOCK(pip);
3547 						MDI_PI(pip)->pi_state =
3548 						    MDI_PI_OLD_STATE(pip);
3549 						MDI_PI_UNLOCK(pip);
3550 					}
3551 				}
3552 				break;
3553 
3554 			case MDI_CLIENT_STATE_FAILED:
3555 				/*
3556 				 * This is the last path case for
3557 				 * non-user initiated events.
3558 				 */
3559 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3560 				    cdip && (i_ddi_node_state(cdip) >=
3561 				    DS_INITIALIZED)) {
3562 					MDI_CLIENT_UNLOCK(ct);
3563 					rv = ndi_devi_offline(cdip, 0);
3564 					MDI_CLIENT_LOCK(ct);
3565 
3566 					if (rv != NDI_SUCCESS) {
3567 						/*
3568 						 * ndi_devi_offline failed.
3569 						 * Reset client flags to
3570 						 * online as the path could not
3571 						 * be offlined.
3572 						 */
3573 						MDI_DEBUG(1, (CE_WARN, cdip,
3574 						    "!ndi_devi_offline: failed "
3575 						    " Error: %x", rv));
3576 						MDI_CLIENT_SET_ONLINE(ct);
3577 					}
3578 				}
3579 				break;
3580 			}
3581 			/*
3582 			 * Convert to MDI error code
3583 			 */
3584 			switch (rv) {
3585 			case NDI_SUCCESS:
3586 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3587 				i_mdi_report_path_state(ct, pip);
3588 				rv = MDI_SUCCESS;
3589 				break;
3590 			case NDI_BUSY:
3591 				rv = MDI_BUSY;
3592 				break;
3593 			default:
3594 				rv = MDI_FAILURE;
3595 				break;
3596 			}
3597 		}
3598 	}
3599 	MDI_CLIENT_UNLOCK(ct);
3600 
3601 state_change_exit:
3602 	/*
3603 	 * Mark the pHCI as stable again.
3604 	 */
3605 	MDI_PHCI_LOCK(ph);
3606 	MDI_PHCI_STABLE(ph);
3607 	MDI_PHCI_UNLOCK(ph);
3608 	return (rv);
3609 }
3610 
3611 /*
3612  * mdi_pi_online():
3613  *		Place the path_info node in the online state.  The path is
3614  *		now available to be selected by mdi_select_path() for
3615  *		transporting I/O requests to client devices.
3616  * Return Values:
3617  *		MDI_SUCCESS
3618  *		MDI_FAILURE
3619  */
3620 int
3621 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3622 {
3623 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3624 	int		client_held = 0;
3625 	int		rv;
3626 	int		se_flag;
3627 	int		kmem_flag;
3628 
3629 	ASSERT(ct != NULL);
3630 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3631 	if (rv != MDI_SUCCESS)
3632 		return (rv);
3633 
3634 	MDI_PI_LOCK(pip);
3635 	if (MDI_PI(pip)->pi_pm_held == 0) {
3636 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3637 		    "i_mdi_pm_hold_pip %p\n", (void *)pip));
3638 		i_mdi_pm_hold_pip(pip);
3639 		client_held = 1;
3640 	}
3641 	MDI_PI_UNLOCK(pip);
3642 
3643 	if (client_held) {
3644 		MDI_CLIENT_LOCK(ct);
3645 		if (ct->ct_power_cnt == 0) {
3646 			rv = i_mdi_power_all_phci(ct);
3647 		}
3648 
3649 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3650 		    "i_mdi_pm_hold_client %p\n", (void *)ct));
3651 		i_mdi_pm_hold_client(ct, 1);
3652 		MDI_CLIENT_UNLOCK(ct);
3653 	}
3654 
3655 	/* determine interrupt context */
3656 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3657 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3658 
3659 	/* A new path is online.  Invalidate DINFOCACHE snap shot. */
3660 	i_ddi_di_cache_invalidate(kmem_flag);
3661 
3662 	return (rv);
3663 }
3664 
3665 /*
3666  * mdi_pi_standby():
3667  *		Place the mdi_pathinfo node in standby state
3668  *
3669  * Return Values:
3670  *		MDI_SUCCESS
3671  *		MDI_FAILURE
3672  */
3673 int
3674 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3675 {
3676 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3677 }
3678 
3679 /*
3680  * mdi_pi_fault():
3681  *		Place the mdi_pathinfo node in fault'ed state
3682  * Return Values:
3683  *		MDI_SUCCESS
3684  *		MDI_FAILURE
3685  */
3686 int
3687 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3688 {
3689 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3690 }
3691 
3692 /*
3693  * mdi_pi_offline():
3694  *		Offline a mdi_pathinfo node.
3695  * Return Values:
3696  *		MDI_SUCCESS
3697  *		MDI_FAILURE
3698  */
3699 int
3700 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3701 {
3702 	int	ret, client_held = 0;
3703 	mdi_client_t	*ct;
3704 	int		se_flag;
3705 	int		kmem_flag;
3706 
3707 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3708 
3709 	if (ret == MDI_SUCCESS) {
3710 		MDI_PI_LOCK(pip);
3711 		if (MDI_PI(pip)->pi_pm_held) {
3712 			client_held = 1;
3713 		}
3714 		MDI_PI_UNLOCK(pip);
3715 
3716 		if (client_held) {
3717 			ct = MDI_PI(pip)->pi_client;
3718 			MDI_CLIENT_LOCK(ct);
3719 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3720 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3721 			i_mdi_pm_rele_client(ct, 1);
3722 			MDI_CLIENT_UNLOCK(ct);
3723 		}
3724 
3725 		/* determine interrupt context */
3726 		se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3727 		kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3728 
3729 		/* pathinfo is offlined. update DINFOCACHE. */
3730 		i_ddi_di_cache_invalidate(kmem_flag);
3731 	}
3732 
3733 	return (ret);
3734 }
3735 
3736 /*
3737  * i_mdi_pi_offline():
3738  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3739  */
3740 static int
3741 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3742 {
3743 	dev_info_t	*vdip = NULL;
3744 	mdi_vhci_t	*vh = NULL;
3745 	mdi_client_t	*ct = NULL;
3746 	int		(*f)();
3747 	int		rv;
3748 
3749 	MDI_PI_LOCK(pip);
3750 	ct = MDI_PI(pip)->pi_client;
3751 	ASSERT(ct != NULL);
3752 
3753 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3754 		/*
3755 		 * Give a chance for pending I/Os to complete.
3756 		 */
3757 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3758 		    "%d cmds still pending on path: %p\n",
3759 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3760 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3761 		    &MDI_PI(pip)->pi_mutex,
3762 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3763 			/*
3764 			 * The timeout time reached without ref_cnt being zero
3765 			 * being signaled.
3766 			 */
3767 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3768 			    "Timeout reached on path %p without the cond\n",
3769 			    (void *)pip));
3770 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3771 			    "%d cmds still pending on path: %p\n",
3772 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3773 		}
3774 	}
3775 	vh = ct->ct_vhci;
3776 	vdip = vh->vh_dip;
3777 
3778 	/*
3779 	 * Notify vHCI that has registered this event
3780 	 */
3781 	ASSERT(vh->vh_ops);
3782 	f = vh->vh_ops->vo_pi_state_change;
3783 
3784 	if (f != NULL) {
3785 		MDI_PI_UNLOCK(pip);
3786 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3787 		    flags)) != MDI_SUCCESS) {
3788 			MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3789 			    "!vo_path_offline failed "
3790 			    "vdip %p, pip %p", (void *)vdip, (void *)pip));
3791 		}
3792 		MDI_PI_LOCK(pip);
3793 	}
3794 
3795 	/*
3796 	 * Set the mdi_pathinfo node state and clear the transient condition
3797 	 */
3798 	MDI_PI_SET_OFFLINE(pip);
3799 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3800 	MDI_PI_UNLOCK(pip);
3801 
3802 	MDI_CLIENT_LOCK(ct);
3803 	if (rv == MDI_SUCCESS) {
3804 		if (ct->ct_unstable == 0) {
3805 			dev_info_t	*cdip = ct->ct_dip;
3806 
3807 			/*
3808 			 * Onlining the mdi_pathinfo node will impact the
3809 			 * client state Update the client and dev_info node
3810 			 * state accordingly
3811 			 */
3812 			i_mdi_client_update_state(ct);
3813 			rv = NDI_SUCCESS;
3814 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3815 				if (cdip &&
3816 				    (i_ddi_node_state(cdip) >=
3817 				    DS_INITIALIZED)) {
3818 					MDI_CLIENT_UNLOCK(ct);
3819 					rv = ndi_devi_offline(cdip, 0);
3820 					MDI_CLIENT_LOCK(ct);
3821 					if (rv != NDI_SUCCESS) {
3822 						/*
3823 						 * ndi_devi_offline failed.
3824 						 * Reset client flags to
3825 						 * online.
3826 						 */
3827 						MDI_DEBUG(4, (CE_WARN, cdip,
3828 						    "!ndi_devi_offline: failed "
3829 						    " Error: %x", rv));
3830 						MDI_CLIENT_SET_ONLINE(ct);
3831 					}
3832 				}
3833 			}
3834 			/*
3835 			 * Convert to MDI error code
3836 			 */
3837 			switch (rv) {
3838 			case NDI_SUCCESS:
3839 				rv = MDI_SUCCESS;
3840 				break;
3841 			case NDI_BUSY:
3842 				rv = MDI_BUSY;
3843 				break;
3844 			default:
3845 				rv = MDI_FAILURE;
3846 				break;
3847 			}
3848 		}
3849 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3850 		i_mdi_report_path_state(ct, pip);
3851 	}
3852 
3853 	MDI_CLIENT_UNLOCK(ct);
3854 
3855 	/*
3856 	 * Change in the mdi_pathinfo node state will impact the client state
3857 	 */
3858 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3859 	    (void *)ct, (void *)pip));
3860 	return (rv);
3861 }
3862 
3863 
3864 /*
3865  * mdi_pi_get_addr():
3866  *		Get the unit address associated with a mdi_pathinfo node
3867  *
3868  * Return Values:
3869  *		char *
3870  */
3871 char *
3872 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3873 {
3874 	if (pip == NULL)
3875 		return (NULL);
3876 
3877 	return (MDI_PI(pip)->pi_addr);
3878 }
3879 
3880 /*
3881  * mdi_pi_get_path_instance():
3882  *		Get the 'path_instance' of a mdi_pathinfo node
3883  *
3884  * Return Values:
3885  *		path_instance
3886  */
3887 int
3888 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
3889 {
3890 	if (pip == NULL)
3891 		return (0);
3892 
3893 	return (MDI_PI(pip)->pi_path_instance);
3894 }
3895 
3896 /*
3897  * mdi_pi_pathname():
3898  *		Return pointer to path to pathinfo node.
3899  */
3900 char *
3901 mdi_pi_pathname(mdi_pathinfo_t *pip)
3902 {
3903 	if (pip == NULL)
3904 		return (NULL);
3905 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
3906 }
3907 
3908 char *
3909 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
3910 {
3911 	char *obp_path = NULL;
3912 	if ((pip == NULL) || (path == NULL))
3913 		return (NULL);
3914 
3915 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
3916 		(void) strcpy(path, obp_path);
3917 		(void) mdi_prop_free(obp_path);
3918 	} else {
3919 		path = NULL;
3920 	}
3921 	return (path);
3922 }
3923 
3924 int
3925 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
3926 {
3927 	dev_info_t *pdip;
3928 	char obp_path[MAXPATHLEN];
3929 
3930 	if (pip == NULL)
3931 		return (MDI_FAILURE);
3932 	bzero(obp_path, sizeof (obp_path));
3933 
3934 	pdip = mdi_pi_get_phci(pip);
3935 	if (pdip == NULL)
3936 		return (MDI_FAILURE);
3937 
3938 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
3939 		(void) ddi_pathname(pdip, obp_path);
3940 	}
3941 
3942 	if (component) {
3943 		(void) strncat(obp_path, "/", sizeof (obp_path));
3944 		(void) strncat(obp_path, component, sizeof (obp_path));
3945 	}
3946 
3947 	return (mdi_prop_update_string(pip, "obp-path", obp_path));
3948 }
3949 
3950 /*
3951  * mdi_pi_get_client():
3952  *		Get the client devinfo associated with a mdi_pathinfo node
3953  *
3954  * Return Values:
3955  *		Handle to client device dev_info node
3956  */
3957 dev_info_t *
3958 mdi_pi_get_client(mdi_pathinfo_t *pip)
3959 {
3960 	dev_info_t	*dip = NULL;
3961 	if (pip) {
3962 		dip = MDI_PI(pip)->pi_client->ct_dip;
3963 	}
3964 	return (dip);
3965 }
3966 
3967 /*
3968  * mdi_pi_get_phci():
3969  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3970  * Return Values:
3971  *		Handle to dev_info node
3972  */
3973 dev_info_t *
3974 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3975 {
3976 	dev_info_t	*dip = NULL;
3977 	if (pip) {
3978 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3979 	}
3980 	return (dip);
3981 }
3982 
3983 /*
3984  * mdi_pi_get_client_private():
3985  *		Get the client private information associated with the
3986  *		mdi_pathinfo node
3987  */
3988 void *
3989 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3990 {
3991 	void *cprivate = NULL;
3992 	if (pip) {
3993 		cprivate = MDI_PI(pip)->pi_cprivate;
3994 	}
3995 	return (cprivate);
3996 }
3997 
3998 /*
3999  * mdi_pi_set_client_private():
4000  *		Set the client private information in the mdi_pathinfo node
4001  */
4002 void
4003 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4004 {
4005 	if (pip) {
4006 		MDI_PI(pip)->pi_cprivate = priv;
4007 	}
4008 }
4009 
4010 /*
4011  * mdi_pi_get_phci_private():
4012  *		Get the pHCI private information associated with the
4013  *		mdi_pathinfo node
4014  */
4015 caddr_t
4016 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4017 {
4018 	caddr_t	pprivate = NULL;
4019 	if (pip) {
4020 		pprivate = MDI_PI(pip)->pi_pprivate;
4021 	}
4022 	return (pprivate);
4023 }
4024 
4025 /*
4026  * mdi_pi_set_phci_private():
4027  *		Set the pHCI private information in the mdi_pathinfo node
4028  */
4029 void
4030 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4031 {
4032 	if (pip) {
4033 		MDI_PI(pip)->pi_pprivate = priv;
4034 	}
4035 }
4036 
4037 /*
4038  * mdi_pi_get_state():
4039  *		Get the mdi_pathinfo node state. Transient states are internal
4040  *		and not provided to the users
4041  */
4042 mdi_pathinfo_state_t
4043 mdi_pi_get_state(mdi_pathinfo_t *pip)
4044 {
4045 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4046 
4047 	if (pip) {
4048 		if (MDI_PI_IS_TRANSIENT(pip)) {
4049 			/*
4050 			 * mdi_pathinfo is in state transition.  Return the
4051 			 * last good state.
4052 			 */
4053 			state = MDI_PI_OLD_STATE(pip);
4054 		} else {
4055 			state = MDI_PI_STATE(pip);
4056 		}
4057 	}
4058 	return (state);
4059 }
4060 
4061 /*
4062  * Note that the following function needs to be the new interface for
4063  * mdi_pi_get_state when mpxio gets integrated to ON.
4064  */
4065 int
4066 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4067 		uint32_t *ext_state)
4068 {
4069 	*state = MDI_PATHINFO_STATE_INIT;
4070 
4071 	if (pip) {
4072 		if (MDI_PI_IS_TRANSIENT(pip)) {
4073 			/*
4074 			 * mdi_pathinfo is in state transition.  Return the
4075 			 * last good state.
4076 			 */
4077 			*state = MDI_PI_OLD_STATE(pip);
4078 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4079 		} else {
4080 			*state = MDI_PI_STATE(pip);
4081 			*ext_state = MDI_PI_EXT_STATE(pip);
4082 		}
4083 	}
4084 	return (MDI_SUCCESS);
4085 }
4086 
4087 /*
4088  * mdi_pi_get_preferred:
4089  *	Get the preferred path flag
4090  */
4091 int
4092 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4093 {
4094 	if (pip) {
4095 		return (MDI_PI(pip)->pi_preferred);
4096 	}
4097 	return (0);
4098 }
4099 
4100 /*
4101  * mdi_pi_set_preferred:
4102  *	Set the preferred path flag
4103  */
4104 void
4105 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4106 {
4107 	if (pip) {
4108 		MDI_PI(pip)->pi_preferred = preferred;
4109 	}
4110 }
4111 
4112 /*
4113  * mdi_pi_set_state():
4114  *		Set the mdi_pathinfo node state
4115  */
4116 void
4117 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4118 {
4119 	uint32_t	ext_state;
4120 
4121 	if (pip) {
4122 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4123 		MDI_PI(pip)->pi_state = state;
4124 		MDI_PI(pip)->pi_state |= ext_state;
4125 	}
4126 }
4127 
4128 /*
4129  * Property functions:
4130  */
4131 int
4132 i_map_nvlist_error_to_mdi(int val)
4133 {
4134 	int rv;
4135 
4136 	switch (val) {
4137 	case 0:
4138 		rv = DDI_PROP_SUCCESS;
4139 		break;
4140 	case EINVAL:
4141 	case ENOTSUP:
4142 		rv = DDI_PROP_INVAL_ARG;
4143 		break;
4144 	case ENOMEM:
4145 		rv = DDI_PROP_NO_MEMORY;
4146 		break;
4147 	default:
4148 		rv = DDI_PROP_NOT_FOUND;
4149 		break;
4150 	}
4151 	return (rv);
4152 }
4153 
4154 /*
4155  * mdi_pi_get_next_prop():
4156  * 		Property walk function.  The caller should hold mdi_pi_lock()
4157  *		and release by calling mdi_pi_unlock() at the end of walk to
4158  *		get a consistent value.
4159  */
4160 nvpair_t *
4161 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4162 {
4163 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4164 		return (NULL);
4165 	}
4166 	ASSERT(MDI_PI_LOCKED(pip));
4167 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4168 }
4169 
4170 /*
4171  * mdi_prop_remove():
4172  * 		Remove the named property from the named list.
4173  */
4174 int
4175 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4176 {
4177 	if (pip == NULL) {
4178 		return (DDI_PROP_NOT_FOUND);
4179 	}
4180 	ASSERT(!MDI_PI_LOCKED(pip));
4181 	MDI_PI_LOCK(pip);
4182 	if (MDI_PI(pip)->pi_prop == NULL) {
4183 		MDI_PI_UNLOCK(pip);
4184 		return (DDI_PROP_NOT_FOUND);
4185 	}
4186 	if (name) {
4187 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4188 	} else {
4189 		char		nvp_name[MAXNAMELEN];
4190 		nvpair_t	*nvp;
4191 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4192 		while (nvp) {
4193 			nvpair_t	*next;
4194 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4195 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
4196 			    nvpair_name(nvp));
4197 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4198 			    nvp_name);
4199 			nvp = next;
4200 		}
4201 	}
4202 	MDI_PI_UNLOCK(pip);
4203 	return (DDI_PROP_SUCCESS);
4204 }
4205 
4206 /*
4207  * mdi_prop_size():
4208  * 		Get buffer size needed to pack the property data.
4209  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4210  *		buffer size.
4211  */
4212 int
4213 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4214 {
4215 	int	rv;
4216 	size_t	bufsize;
4217 
4218 	*buflenp = 0;
4219 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4220 		return (DDI_PROP_NOT_FOUND);
4221 	}
4222 	ASSERT(MDI_PI_LOCKED(pip));
4223 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4224 	    &bufsize, NV_ENCODE_NATIVE);
4225 	*buflenp = bufsize;
4226 	return (i_map_nvlist_error_to_mdi(rv));
4227 }
4228 
4229 /*
4230  * mdi_prop_pack():
4231  * 		pack the property list.  The caller should hold the
4232  *		mdi_pathinfo_t node to get a consistent data
4233  */
4234 int
4235 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4236 {
4237 	int	rv;
4238 	size_t	bufsize;
4239 
4240 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4241 		return (DDI_PROP_NOT_FOUND);
4242 	}
4243 
4244 	ASSERT(MDI_PI_LOCKED(pip));
4245 
4246 	bufsize = buflen;
4247 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4248 	    NV_ENCODE_NATIVE, KM_SLEEP);
4249 
4250 	return (i_map_nvlist_error_to_mdi(rv));
4251 }
4252 
4253 /*
4254  * mdi_prop_update_byte():
4255  *		Create/Update a byte property
4256  */
4257 int
4258 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4259 {
4260 	int rv;
4261 
4262 	if (pip == NULL) {
4263 		return (DDI_PROP_INVAL_ARG);
4264 	}
4265 	ASSERT(!MDI_PI_LOCKED(pip));
4266 	MDI_PI_LOCK(pip);
4267 	if (MDI_PI(pip)->pi_prop == NULL) {
4268 		MDI_PI_UNLOCK(pip);
4269 		return (DDI_PROP_NOT_FOUND);
4270 	}
4271 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4272 	MDI_PI_UNLOCK(pip);
4273 	return (i_map_nvlist_error_to_mdi(rv));
4274 }
4275 
4276 /*
4277  * mdi_prop_update_byte_array():
4278  *		Create/Update a byte array property
4279  */
4280 int
4281 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4282     uint_t nelements)
4283 {
4284 	int rv;
4285 
4286 	if (pip == NULL) {
4287 		return (DDI_PROP_INVAL_ARG);
4288 	}
4289 	ASSERT(!MDI_PI_LOCKED(pip));
4290 	MDI_PI_LOCK(pip);
4291 	if (MDI_PI(pip)->pi_prop == NULL) {
4292 		MDI_PI_UNLOCK(pip);
4293 		return (DDI_PROP_NOT_FOUND);
4294 	}
4295 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4296 	MDI_PI_UNLOCK(pip);
4297 	return (i_map_nvlist_error_to_mdi(rv));
4298 }
4299 
4300 /*
4301  * mdi_prop_update_int():
4302  *		Create/Update a 32 bit integer property
4303  */
4304 int
4305 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4306 {
4307 	int rv;
4308 
4309 	if (pip == NULL) {
4310 		return (DDI_PROP_INVAL_ARG);
4311 	}
4312 	ASSERT(!MDI_PI_LOCKED(pip));
4313 	MDI_PI_LOCK(pip);
4314 	if (MDI_PI(pip)->pi_prop == NULL) {
4315 		MDI_PI_UNLOCK(pip);
4316 		return (DDI_PROP_NOT_FOUND);
4317 	}
4318 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4319 	MDI_PI_UNLOCK(pip);
4320 	return (i_map_nvlist_error_to_mdi(rv));
4321 }
4322 
4323 /*
4324  * mdi_prop_update_int64():
4325  *		Create/Update a 64 bit integer property
4326  */
4327 int
4328 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4329 {
4330 	int rv;
4331 
4332 	if (pip == NULL) {
4333 		return (DDI_PROP_INVAL_ARG);
4334 	}
4335 	ASSERT(!MDI_PI_LOCKED(pip));
4336 	MDI_PI_LOCK(pip);
4337 	if (MDI_PI(pip)->pi_prop == NULL) {
4338 		MDI_PI_UNLOCK(pip);
4339 		return (DDI_PROP_NOT_FOUND);
4340 	}
4341 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4342 	MDI_PI_UNLOCK(pip);
4343 	return (i_map_nvlist_error_to_mdi(rv));
4344 }
4345 
4346 /*
4347  * mdi_prop_update_int_array():
4348  *		Create/Update a int array property
4349  */
4350 int
4351 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4352 	    uint_t nelements)
4353 {
4354 	int rv;
4355 
4356 	if (pip == NULL) {
4357 		return (DDI_PROP_INVAL_ARG);
4358 	}
4359 	ASSERT(!MDI_PI_LOCKED(pip));
4360 	MDI_PI_LOCK(pip);
4361 	if (MDI_PI(pip)->pi_prop == NULL) {
4362 		MDI_PI_UNLOCK(pip);
4363 		return (DDI_PROP_NOT_FOUND);
4364 	}
4365 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4366 	    nelements);
4367 	MDI_PI_UNLOCK(pip);
4368 	return (i_map_nvlist_error_to_mdi(rv));
4369 }
4370 
4371 /*
4372  * mdi_prop_update_string():
4373  *		Create/Update a string property
4374  */
4375 int
4376 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4377 {
4378 	int rv;
4379 
4380 	if (pip == NULL) {
4381 		return (DDI_PROP_INVAL_ARG);
4382 	}
4383 	ASSERT(!MDI_PI_LOCKED(pip));
4384 	MDI_PI_LOCK(pip);
4385 	if (MDI_PI(pip)->pi_prop == NULL) {
4386 		MDI_PI_UNLOCK(pip);
4387 		return (DDI_PROP_NOT_FOUND);
4388 	}
4389 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4390 	MDI_PI_UNLOCK(pip);
4391 	return (i_map_nvlist_error_to_mdi(rv));
4392 }
4393 
4394 /*
4395  * mdi_prop_update_string_array():
4396  *		Create/Update a string array property
4397  */
4398 int
4399 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4400     uint_t nelements)
4401 {
4402 	int rv;
4403 
4404 	if (pip == NULL) {
4405 		return (DDI_PROP_INVAL_ARG);
4406 	}
4407 	ASSERT(!MDI_PI_LOCKED(pip));
4408 	MDI_PI_LOCK(pip);
4409 	if (MDI_PI(pip)->pi_prop == NULL) {
4410 		MDI_PI_UNLOCK(pip);
4411 		return (DDI_PROP_NOT_FOUND);
4412 	}
4413 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4414 	    nelements);
4415 	MDI_PI_UNLOCK(pip);
4416 	return (i_map_nvlist_error_to_mdi(rv));
4417 }
4418 
4419 /*
4420  * mdi_prop_lookup_byte():
4421  * 		Look for byte property identified by name.  The data returned
4422  *		is the actual property and valid as long as mdi_pathinfo_t node
4423  *		is alive.
4424  */
4425 int
4426 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4427 {
4428 	int rv;
4429 
4430 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4431 		return (DDI_PROP_NOT_FOUND);
4432 	}
4433 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4434 	return (i_map_nvlist_error_to_mdi(rv));
4435 }
4436 
4437 
4438 /*
4439  * mdi_prop_lookup_byte_array():
4440  * 		Look for byte array property identified by name.  The data
4441  *		returned is the actual property and valid as long as
4442  *		mdi_pathinfo_t node is alive.
4443  */
4444 int
4445 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4446     uint_t *nelements)
4447 {
4448 	int rv;
4449 
4450 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4451 		return (DDI_PROP_NOT_FOUND);
4452 	}
4453 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4454 	    nelements);
4455 	return (i_map_nvlist_error_to_mdi(rv));
4456 }
4457 
4458 /*
4459  * mdi_prop_lookup_int():
4460  * 		Look for int property identified by name.  The data returned
4461  *		is the actual property and valid as long as mdi_pathinfo_t
4462  *		node is alive.
4463  */
4464 int
4465 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4466 {
4467 	int rv;
4468 
4469 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4470 		return (DDI_PROP_NOT_FOUND);
4471 	}
4472 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4473 	return (i_map_nvlist_error_to_mdi(rv));
4474 }
4475 
4476 /*
4477  * mdi_prop_lookup_int64():
4478  * 		Look for int64 property identified by name.  The data returned
4479  *		is the actual property and valid as long as mdi_pathinfo_t node
4480  *		is alive.
4481  */
4482 int
4483 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4484 {
4485 	int rv;
4486 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4487 		return (DDI_PROP_NOT_FOUND);
4488 	}
4489 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4490 	return (i_map_nvlist_error_to_mdi(rv));
4491 }
4492 
4493 /*
4494  * mdi_prop_lookup_int_array():
4495  * 		Look for int array property identified by name.  The data
4496  *		returned is the actual property and valid as long as
4497  *		mdi_pathinfo_t node is alive.
4498  */
4499 int
4500 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4501     uint_t *nelements)
4502 {
4503 	int rv;
4504 
4505 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4506 		return (DDI_PROP_NOT_FOUND);
4507 	}
4508 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4509 	    (int32_t **)data, nelements);
4510 	return (i_map_nvlist_error_to_mdi(rv));
4511 }
4512 
4513 /*
4514  * mdi_prop_lookup_string():
4515  * 		Look for string property identified by name.  The data
4516  *		returned is the actual property and valid as long as
4517  *		mdi_pathinfo_t node is alive.
4518  */
4519 int
4520 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4521 {
4522 	int rv;
4523 
4524 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4525 		return (DDI_PROP_NOT_FOUND);
4526 	}
4527 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4528 	return (i_map_nvlist_error_to_mdi(rv));
4529 }
4530 
4531 /*
4532  * mdi_prop_lookup_string_array():
4533  * 		Look for string array property identified by name.  The data
4534  *		returned is the actual property and valid as long as
4535  *		mdi_pathinfo_t node is alive.
4536  */
4537 int
4538 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4539     uint_t *nelements)
4540 {
4541 	int rv;
4542 
4543 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4544 		return (DDI_PROP_NOT_FOUND);
4545 	}
4546 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4547 	    nelements);
4548 	return (i_map_nvlist_error_to_mdi(rv));
4549 }
4550 
4551 /*
4552  * mdi_prop_free():
4553  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4554  *		functions return the pointer to actual property data and not a
4555  *		copy of it.  So the data returned is valid as long as
4556  *		mdi_pathinfo_t node is valid.
4557  */
4558 /*ARGSUSED*/
4559 int
4560 mdi_prop_free(void *data)
4561 {
4562 	return (DDI_PROP_SUCCESS);
4563 }
4564 
4565 /*ARGSUSED*/
4566 static void
4567 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4568 {
4569 	char		*phci_path, *ct_path;
4570 	char		*ct_status;
4571 	char		*status;
4572 	dev_info_t	*dip = ct->ct_dip;
4573 	char		lb_buf[64];
4574 
4575 	ASSERT(MDI_CLIENT_LOCKED(ct));
4576 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4577 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4578 		return;
4579 	}
4580 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4581 		ct_status = "optimal";
4582 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4583 		ct_status = "degraded";
4584 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4585 		ct_status = "failed";
4586 	} else {
4587 		ct_status = "unknown";
4588 	}
4589 
4590 	if (MDI_PI_IS_OFFLINE(pip)) {
4591 		status = "offline";
4592 	} else if (MDI_PI_IS_ONLINE(pip)) {
4593 		status = "online";
4594 	} else if (MDI_PI_IS_STANDBY(pip)) {
4595 		status = "standby";
4596 	} else if (MDI_PI_IS_FAULT(pip)) {
4597 		status = "faulted";
4598 	} else {
4599 		status = "unknown";
4600 	}
4601 
4602 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4603 		(void) snprintf(lb_buf, sizeof (lb_buf),
4604 		    "%s, region-size: %d", mdi_load_balance_lba,
4605 			ct->ct_lb_args->region_size);
4606 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4607 		(void) snprintf(lb_buf, sizeof (lb_buf),
4608 		    "%s", mdi_load_balance_none);
4609 	} else {
4610 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4611 		    mdi_load_balance_rr);
4612 	}
4613 
4614 	if (dip) {
4615 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4616 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4617 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4618 		    "path %s (%s%d) to target address: %s is %s"
4619 		    " Load balancing: %s\n",
4620 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4621 		    ddi_get_instance(dip), ct_status,
4622 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4623 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4624 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4625 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4626 		kmem_free(phci_path, MAXPATHLEN);
4627 		kmem_free(ct_path, MAXPATHLEN);
4628 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4629 	}
4630 }
4631 
4632 #ifdef	DEBUG
4633 /*
4634  * i_mdi_log():
4635  *		Utility function for error message management
4636  *
4637  */
4638 /*PRINTFLIKE3*/
4639 static void
4640 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4641 {
4642 	char		name[MAXNAMELEN];
4643 	char		buf[MAXNAMELEN];
4644 	char		*bp;
4645 	va_list		ap;
4646 	int		log_only = 0;
4647 	int		boot_only = 0;
4648 	int		console_only = 0;
4649 
4650 	if (dip) {
4651 		(void) snprintf(name, MAXNAMELEN, "%s%d: ",
4652 		    ddi_node_name(dip), ddi_get_instance(dip));
4653 	} else {
4654 		name[0] = 0;
4655 	}
4656 
4657 	va_start(ap, fmt);
4658 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4659 	va_end(ap);
4660 
4661 	switch (buf[0]) {
4662 	case '!':
4663 		bp = &buf[1];
4664 		log_only = 1;
4665 		break;
4666 	case '?':
4667 		bp = &buf[1];
4668 		boot_only = 1;
4669 		break;
4670 	case '^':
4671 		bp = &buf[1];
4672 		console_only = 1;
4673 		break;
4674 	default:
4675 		bp = buf;
4676 		break;
4677 	}
4678 	if (mdi_debug_logonly) {
4679 		log_only = 1;
4680 		boot_only = 0;
4681 		console_only = 0;
4682 	}
4683 
4684 	switch (level) {
4685 	case CE_NOTE:
4686 		level = CE_CONT;
4687 		/* FALLTHROUGH */
4688 	case CE_CONT:
4689 	case CE_WARN:
4690 	case CE_PANIC:
4691 		if (boot_only) {
4692 			cmn_err(level, "?mdi: %s%s", name, bp);
4693 		} else if (console_only) {
4694 			cmn_err(level, "^mdi: %s%s", name, bp);
4695 		} else if (log_only) {
4696 			cmn_err(level, "!mdi: %s%s", name, bp);
4697 		} else {
4698 			cmn_err(level, "mdi: %s%s", name, bp);
4699 		}
4700 		break;
4701 	default:
4702 		cmn_err(level, "mdi: %s%s", name, bp);
4703 		break;
4704 	}
4705 }
4706 #endif	/* DEBUG */
4707 
4708 void
4709 i_mdi_client_online(dev_info_t *ct_dip)
4710 {
4711 	mdi_client_t	*ct;
4712 
4713 	/*
4714 	 * Client online notification. Mark client state as online
4715 	 * restore our binding with dev_info node
4716 	 */
4717 	ct = i_devi_get_client(ct_dip);
4718 	ASSERT(ct != NULL);
4719 	MDI_CLIENT_LOCK(ct);
4720 	MDI_CLIENT_SET_ONLINE(ct);
4721 	/* catch for any memory leaks */
4722 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4723 	ct->ct_dip = ct_dip;
4724 
4725 	if (ct->ct_power_cnt == 0)
4726 		(void) i_mdi_power_all_phci(ct);
4727 
4728 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4729 	    "i_mdi_pm_hold_client %p\n", (void *)ct));
4730 	i_mdi_pm_hold_client(ct, 1);
4731 
4732 	MDI_CLIENT_UNLOCK(ct);
4733 }
4734 
4735 void
4736 i_mdi_phci_online(dev_info_t *ph_dip)
4737 {
4738 	mdi_phci_t	*ph;
4739 
4740 	/* pHCI online notification. Mark state accordingly */
4741 	ph = i_devi_get_phci(ph_dip);
4742 	ASSERT(ph != NULL);
4743 	MDI_PHCI_LOCK(ph);
4744 	MDI_PHCI_SET_ONLINE(ph);
4745 	MDI_PHCI_UNLOCK(ph);
4746 }
4747 
4748 /*
4749  * mdi_devi_online():
4750  * 		Online notification from NDI framework on pHCI/client
4751  *		device online.
4752  * Return Values:
4753  *		NDI_SUCCESS
4754  *		MDI_FAILURE
4755  */
4756 /*ARGSUSED*/
4757 int
4758 mdi_devi_online(dev_info_t *dip, uint_t flags)
4759 {
4760 	if (MDI_PHCI(dip)) {
4761 		i_mdi_phci_online(dip);
4762 	}
4763 
4764 	if (MDI_CLIENT(dip)) {
4765 		i_mdi_client_online(dip);
4766 	}
4767 	return (NDI_SUCCESS);
4768 }
4769 
4770 /*
4771  * mdi_devi_offline():
4772  * 		Offline notification from NDI framework on pHCI/Client device
4773  *		offline.
4774  *
4775  * Return Values:
4776  *		NDI_SUCCESS
4777  *		NDI_FAILURE
4778  */
4779 /*ARGSUSED*/
4780 int
4781 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4782 {
4783 	int		rv = NDI_SUCCESS;
4784 
4785 	if (MDI_CLIENT(dip)) {
4786 		rv = i_mdi_client_offline(dip, flags);
4787 		if (rv != NDI_SUCCESS)
4788 			return (rv);
4789 	}
4790 
4791 	if (MDI_PHCI(dip)) {
4792 		rv = i_mdi_phci_offline(dip, flags);
4793 
4794 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4795 			/* set client back online */
4796 			i_mdi_client_online(dip);
4797 		}
4798 	}
4799 
4800 	return (rv);
4801 }
4802 
4803 /*ARGSUSED*/
4804 static int
4805 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4806 {
4807 	int		rv = NDI_SUCCESS;
4808 	mdi_phci_t	*ph;
4809 	mdi_client_t	*ct;
4810 	mdi_pathinfo_t	*pip;
4811 	mdi_pathinfo_t	*next;
4812 	mdi_pathinfo_t	*failed_pip = NULL;
4813 	dev_info_t	*cdip;
4814 
4815 	/*
4816 	 * pHCI component offline notification
4817 	 * Make sure that this pHCI instance is free to be offlined.
4818 	 * If it is OK to proceed, Offline and remove all the child
4819 	 * mdi_pathinfo nodes.  This process automatically offlines
4820 	 * corresponding client devices, for which this pHCI provides
4821 	 * critical services.
4822 	 */
4823 	ph = i_devi_get_phci(dip);
4824 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n",
4825 	    (void *)dip, (void *)ph));
4826 	if (ph == NULL) {
4827 		return (rv);
4828 	}
4829 
4830 	MDI_PHCI_LOCK(ph);
4831 
4832 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4833 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined",
4834 		    (void *)ph));
4835 		MDI_PHCI_UNLOCK(ph);
4836 		return (NDI_SUCCESS);
4837 	}
4838 
4839 	/*
4840 	 * Check to see if the pHCI can be offlined
4841 	 */
4842 	if (ph->ph_unstable) {
4843 		MDI_DEBUG(1, (CE_WARN, dip,
4844 		    "!One or more target devices are in transient "
4845 		    "state. This device can not be removed at "
4846 		    "this moment. Please try again later."));
4847 		MDI_PHCI_UNLOCK(ph);
4848 		return (NDI_BUSY);
4849 	}
4850 
4851 	pip = ph->ph_path_head;
4852 	while (pip != NULL) {
4853 		MDI_PI_LOCK(pip);
4854 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4855 
4856 		/*
4857 		 * The mdi_pathinfo state is OK. Check the client state.
4858 		 * If failover in progress fail the pHCI from offlining
4859 		 */
4860 		ct = MDI_PI(pip)->pi_client;
4861 		i_mdi_client_lock(ct, pip);
4862 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4863 		    (ct->ct_unstable)) {
4864 			/*
4865 			 * Failover is in progress, Fail the DR
4866 			 */
4867 			MDI_DEBUG(1, (CE_WARN, dip,
4868 			    "!pHCI device (%s%d) is Busy. %s",
4869 			    ddi_driver_name(dip), ddi_get_instance(dip),
4870 			    "This device can not be removed at "
4871 			    "this moment. Please try again later."));
4872 			MDI_PI_UNLOCK(pip);
4873 			i_mdi_client_unlock(ct);
4874 			MDI_PHCI_UNLOCK(ph);
4875 			return (NDI_BUSY);
4876 		}
4877 		MDI_PI_UNLOCK(pip);
4878 
4879 		/*
4880 		 * Check to see of we are removing the last path of this
4881 		 * client device...
4882 		 */
4883 		cdip = ct->ct_dip;
4884 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4885 		    (i_mdi_client_compute_state(ct, ph) ==
4886 		    MDI_CLIENT_STATE_FAILED)) {
4887 			i_mdi_client_unlock(ct);
4888 			MDI_PHCI_UNLOCK(ph);
4889 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4890 				/*
4891 				 * ndi_devi_offline() failed.
4892 				 * This pHCI provides the critical path
4893 				 * to one or more client devices.
4894 				 * Return busy.
4895 				 */
4896 				MDI_PHCI_LOCK(ph);
4897 				MDI_DEBUG(1, (CE_WARN, dip,
4898 				    "!pHCI device (%s%d) is Busy. %s",
4899 				    ddi_driver_name(dip), ddi_get_instance(dip),
4900 				    "This device can not be removed at "
4901 				    "this moment. Please try again later."));
4902 				failed_pip = pip;
4903 				break;
4904 			} else {
4905 				MDI_PHCI_LOCK(ph);
4906 				pip = next;
4907 			}
4908 		} else {
4909 			i_mdi_client_unlock(ct);
4910 			pip = next;
4911 		}
4912 	}
4913 
4914 	if (failed_pip) {
4915 		pip = ph->ph_path_head;
4916 		while (pip != failed_pip) {
4917 			MDI_PI_LOCK(pip);
4918 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4919 			ct = MDI_PI(pip)->pi_client;
4920 			i_mdi_client_lock(ct, pip);
4921 			cdip = ct->ct_dip;
4922 			switch (MDI_CLIENT_STATE(ct)) {
4923 			case MDI_CLIENT_STATE_OPTIMAL:
4924 			case MDI_CLIENT_STATE_DEGRADED:
4925 				if (cdip) {
4926 					MDI_PI_UNLOCK(pip);
4927 					i_mdi_client_unlock(ct);
4928 					MDI_PHCI_UNLOCK(ph);
4929 					(void) ndi_devi_online(cdip, 0);
4930 					MDI_PHCI_LOCK(ph);
4931 					pip = next;
4932 					continue;
4933 				}
4934 				break;
4935 
4936 			case MDI_CLIENT_STATE_FAILED:
4937 				if (cdip) {
4938 					MDI_PI_UNLOCK(pip);
4939 					i_mdi_client_unlock(ct);
4940 					MDI_PHCI_UNLOCK(ph);
4941 					(void) ndi_devi_offline(cdip, 0);
4942 					MDI_PHCI_LOCK(ph);
4943 					pip = next;
4944 					continue;
4945 				}
4946 				break;
4947 			}
4948 			MDI_PI_UNLOCK(pip);
4949 			i_mdi_client_unlock(ct);
4950 			pip = next;
4951 		}
4952 		MDI_PHCI_UNLOCK(ph);
4953 		return (NDI_BUSY);
4954 	}
4955 
4956 	/*
4957 	 * Mark the pHCI as offline
4958 	 */
4959 	MDI_PHCI_SET_OFFLINE(ph);
4960 
4961 	/*
4962 	 * Mark the child mdi_pathinfo nodes as transient
4963 	 */
4964 	pip = ph->ph_path_head;
4965 	while (pip != NULL) {
4966 		MDI_PI_LOCK(pip);
4967 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4968 		MDI_PI_SET_OFFLINING(pip);
4969 		MDI_PI_UNLOCK(pip);
4970 		pip = next;
4971 	}
4972 	MDI_PHCI_UNLOCK(ph);
4973 	/*
4974 	 * Give a chance for any pending commands to execute
4975 	 */
4976 	delay(1);
4977 	MDI_PHCI_LOCK(ph);
4978 	pip = ph->ph_path_head;
4979 	while (pip != NULL) {
4980 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4981 		(void) i_mdi_pi_offline(pip, flags);
4982 		MDI_PI_LOCK(pip);
4983 		ct = MDI_PI(pip)->pi_client;
4984 		if (!MDI_PI_IS_OFFLINE(pip)) {
4985 			MDI_DEBUG(1, (CE_WARN, dip,
4986 			    "!pHCI device (%s%d) is Busy. %s",
4987 			    ddi_driver_name(dip), ddi_get_instance(dip),
4988 			    "This device can not be removed at "
4989 			    "this moment. Please try again later."));
4990 			MDI_PI_UNLOCK(pip);
4991 			MDI_PHCI_SET_ONLINE(ph);
4992 			MDI_PHCI_UNLOCK(ph);
4993 			return (NDI_BUSY);
4994 		}
4995 		MDI_PI_UNLOCK(pip);
4996 		pip = next;
4997 	}
4998 	MDI_PHCI_UNLOCK(ph);
4999 
5000 	return (rv);
5001 }
5002 
5003 void
5004 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5005 {
5006 	mdi_phci_t	*ph;
5007 	mdi_client_t	*ct;
5008 	mdi_pathinfo_t	*pip;
5009 	mdi_pathinfo_t	*next;
5010 	dev_info_t	*cdip;
5011 
5012 	if (!MDI_PHCI(dip))
5013 		return;
5014 
5015 	ph = i_devi_get_phci(dip);
5016 	if (ph == NULL) {
5017 		return;
5018 	}
5019 
5020 	MDI_PHCI_LOCK(ph);
5021 
5022 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5023 		/* has no last path */
5024 		MDI_PHCI_UNLOCK(ph);
5025 		return;
5026 	}
5027 
5028 	pip = ph->ph_path_head;
5029 	while (pip != NULL) {
5030 		MDI_PI_LOCK(pip);
5031 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5032 
5033 		ct = MDI_PI(pip)->pi_client;
5034 		i_mdi_client_lock(ct, pip);
5035 		MDI_PI_UNLOCK(pip);
5036 
5037 		cdip = ct->ct_dip;
5038 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5039 		    (i_mdi_client_compute_state(ct, ph) ==
5040 		    MDI_CLIENT_STATE_FAILED)) {
5041 			/* Last path. Mark client dip as retiring */
5042 			i_mdi_client_unlock(ct);
5043 			MDI_PHCI_UNLOCK(ph);
5044 			(void) e_ddi_mark_retiring(cdip, cons_array);
5045 			MDI_PHCI_LOCK(ph);
5046 			pip = next;
5047 		} else {
5048 			i_mdi_client_unlock(ct);
5049 			pip = next;
5050 		}
5051 	}
5052 
5053 	MDI_PHCI_UNLOCK(ph);
5054 
5055 	return;
5056 }
5057 
5058 void
5059 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5060 {
5061 	mdi_phci_t	*ph;
5062 	mdi_client_t	*ct;
5063 	mdi_pathinfo_t	*pip;
5064 	mdi_pathinfo_t	*next;
5065 	dev_info_t	*cdip;
5066 
5067 	if (!MDI_PHCI(dip))
5068 		return;
5069 
5070 	ph = i_devi_get_phci(dip);
5071 	if (ph == NULL)
5072 		return;
5073 
5074 	MDI_PHCI_LOCK(ph);
5075 
5076 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5077 		MDI_PHCI_UNLOCK(ph);
5078 		/* not last path */
5079 		return;
5080 	}
5081 
5082 	if (ph->ph_unstable) {
5083 		MDI_PHCI_UNLOCK(ph);
5084 		/* can't check for constraints */
5085 		*constraint = 0;
5086 		return;
5087 	}
5088 
5089 	pip = ph->ph_path_head;
5090 	while (pip != NULL) {
5091 		MDI_PI_LOCK(pip);
5092 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5093 
5094 		/*
5095 		 * The mdi_pathinfo state is OK. Check the client state.
5096 		 * If failover in progress fail the pHCI from offlining
5097 		 */
5098 		ct = MDI_PI(pip)->pi_client;
5099 		i_mdi_client_lock(ct, pip);
5100 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5101 		    (ct->ct_unstable)) {
5102 			/*
5103 			 * Failover is in progress, can't check for constraints
5104 			 */
5105 			MDI_PI_UNLOCK(pip);
5106 			i_mdi_client_unlock(ct);
5107 			MDI_PHCI_UNLOCK(ph);
5108 			*constraint = 0;
5109 			return;
5110 		}
5111 		MDI_PI_UNLOCK(pip);
5112 
5113 		/*
5114 		 * Check to see of we are retiring the last path of this
5115 		 * client device...
5116 		 */
5117 		cdip = ct->ct_dip;
5118 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5119 		    (i_mdi_client_compute_state(ct, ph) ==
5120 		    MDI_CLIENT_STATE_FAILED)) {
5121 			i_mdi_client_unlock(ct);
5122 			MDI_PHCI_UNLOCK(ph);
5123 			(void) e_ddi_retire_notify(cdip, constraint);
5124 			MDI_PHCI_LOCK(ph);
5125 			pip = next;
5126 		} else {
5127 			i_mdi_client_unlock(ct);
5128 			pip = next;
5129 		}
5130 	}
5131 
5132 	MDI_PHCI_UNLOCK(ph);
5133 
5134 	return;
5135 }
5136 
5137 /*
5138  * offline the path(s) hanging off the PHCI. If the
5139  * last path to any client, check that constraints
5140  * have been applied.
5141  */
5142 void
5143 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
5144 {
5145 	mdi_phci_t	*ph;
5146 	mdi_client_t	*ct;
5147 	mdi_pathinfo_t	*pip;
5148 	mdi_pathinfo_t	*next;
5149 	dev_info_t	*cdip;
5150 	int		unstable = 0;
5151 	int		constraint;
5152 
5153 	if (!MDI_PHCI(dip))
5154 		return;
5155 
5156 	ph = i_devi_get_phci(dip);
5157 	if (ph == NULL) {
5158 		/* no last path and no pips */
5159 		return;
5160 	}
5161 
5162 	MDI_PHCI_LOCK(ph);
5163 
5164 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5165 		MDI_PHCI_UNLOCK(ph);
5166 		/* no last path and no pips */
5167 		return;
5168 	}
5169 
5170 	/*
5171 	 * Check to see if the pHCI can be offlined
5172 	 */
5173 	if (ph->ph_unstable) {
5174 		unstable = 1;
5175 	}
5176 
5177 	pip = ph->ph_path_head;
5178 	while (pip != NULL) {
5179 		MDI_PI_LOCK(pip);
5180 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5181 
5182 		/*
5183 		 * if failover in progress fail the pHCI from offlining
5184 		 */
5185 		ct = MDI_PI(pip)->pi_client;
5186 		i_mdi_client_lock(ct, pip);
5187 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5188 		    (ct->ct_unstable)) {
5189 			unstable = 1;
5190 		}
5191 		MDI_PI_UNLOCK(pip);
5192 
5193 		/*
5194 		 * Check to see of we are removing the last path of this
5195 		 * client device...
5196 		 */
5197 		cdip = ct->ct_dip;
5198 		if (!phci_only && cdip &&
5199 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5200 		    (i_mdi_client_compute_state(ct, ph) ==
5201 		    MDI_CLIENT_STATE_FAILED)) {
5202 			i_mdi_client_unlock(ct);
5203 			MDI_PHCI_UNLOCK(ph);
5204 			/*
5205 			 * We don't retire clients we just retire the
5206 			 * path to a client. If it is the last path
5207 			 * to a client, constraints are checked and
5208 			 * if we pass the last path is offlined. MPXIO will
5209 			 * then fail all I/Os to the client. Since we don't
5210 			 * want to retire the client on a path error
5211 			 * set constraint = 0 so that the client dip
5212 			 * is not retired.
5213 			 */
5214 			constraint = 0;
5215 			(void) e_ddi_retire_finalize(cdip, &constraint);
5216 			MDI_PHCI_LOCK(ph);
5217 			pip = next;
5218 		} else {
5219 			i_mdi_client_unlock(ct);
5220 			pip = next;
5221 		}
5222 	}
5223 
5224 	/*
5225 	 * Cannot offline pip(s)
5226 	 */
5227 	if (unstable) {
5228 		cmn_err(CE_WARN, "PHCI in transient state, cannot "
5229 		    "retire, dip = %p", (void *)dip);
5230 		MDI_PHCI_UNLOCK(ph);
5231 		return;
5232 	}
5233 
5234 	/*
5235 	 * Mark the pHCI as offline
5236 	 */
5237 	MDI_PHCI_SET_OFFLINE(ph);
5238 
5239 	/*
5240 	 * Mark the child mdi_pathinfo nodes as transient
5241 	 */
5242 	pip = ph->ph_path_head;
5243 	while (pip != NULL) {
5244 		MDI_PI_LOCK(pip);
5245 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5246 		MDI_PI_SET_OFFLINING(pip);
5247 		MDI_PI_UNLOCK(pip);
5248 		pip = next;
5249 	}
5250 	MDI_PHCI_UNLOCK(ph);
5251 	/*
5252 	 * Give a chance for any pending commands to execute
5253 	 */
5254 	delay(1);
5255 	MDI_PHCI_LOCK(ph);
5256 	pip = ph->ph_path_head;
5257 	while (pip != NULL) {
5258 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5259 		(void) i_mdi_pi_offline(pip, 0);
5260 		MDI_PI_LOCK(pip);
5261 		ct = MDI_PI(pip)->pi_client;
5262 		if (!MDI_PI_IS_OFFLINE(pip)) {
5263 			cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
5264 			    "PHCI dip = %p", (void *)dip);
5265 			MDI_PI_UNLOCK(pip);
5266 			MDI_PHCI_SET_ONLINE(ph);
5267 			MDI_PHCI_UNLOCK(ph);
5268 			return;
5269 		}
5270 		MDI_PI_UNLOCK(pip);
5271 		pip = next;
5272 	}
5273 	MDI_PHCI_UNLOCK(ph);
5274 
5275 	return;
5276 }
5277 
5278 void
5279 mdi_phci_unretire(dev_info_t *dip)
5280 {
5281 	ASSERT(MDI_PHCI(dip));
5282 
5283 	/*
5284 	 * Online the phci
5285 	 */
5286 	i_mdi_phci_online(dip);
5287 }
5288 
5289 /*ARGSUSED*/
5290 static int
5291 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5292 {
5293 	int		rv = NDI_SUCCESS;
5294 	mdi_client_t	*ct;
5295 
5296 	/*
5297 	 * Client component to go offline.  Make sure that we are
5298 	 * not in failing over state and update client state
5299 	 * accordingly
5300 	 */
5301 	ct = i_devi_get_client(dip);
5302 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n",
5303 	    (void *)dip, (void *)ct));
5304 	if (ct != NULL) {
5305 		MDI_CLIENT_LOCK(ct);
5306 		if (ct->ct_unstable) {
5307 			/*
5308 			 * One or more paths are in transient state,
5309 			 * Dont allow offline of a client device
5310 			 */
5311 			MDI_DEBUG(1, (CE_WARN, dip,
5312 			    "!One or more paths to this device is "
5313 			    "in transient state. This device can not "
5314 			    "be removed at this moment. "
5315 			    "Please try again later."));
5316 			MDI_CLIENT_UNLOCK(ct);
5317 			return (NDI_BUSY);
5318 		}
5319 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5320 			/*
5321 			 * Failover is in progress, Dont allow DR of
5322 			 * a client device
5323 			 */
5324 			MDI_DEBUG(1, (CE_WARN, dip,
5325 			    "!Client device (%s%d) is Busy. %s",
5326 			    ddi_driver_name(dip), ddi_get_instance(dip),
5327 			    "This device can not be removed at "
5328 			    "this moment. Please try again later."));
5329 			MDI_CLIENT_UNLOCK(ct);
5330 			return (NDI_BUSY);
5331 		}
5332 		MDI_CLIENT_SET_OFFLINE(ct);
5333 
5334 		/*
5335 		 * Unbind our relationship with the dev_info node
5336 		 */
5337 		if (flags & NDI_DEVI_REMOVE) {
5338 			ct->ct_dip = NULL;
5339 		}
5340 		MDI_CLIENT_UNLOCK(ct);
5341 	}
5342 	return (rv);
5343 }
5344 
5345 /*
5346  * mdi_pre_attach():
5347  *		Pre attach() notification handler
5348  */
5349 /*ARGSUSED*/
5350 int
5351 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5352 {
5353 	/* don't support old DDI_PM_RESUME */
5354 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5355 	    (cmd == DDI_PM_RESUME))
5356 		return (DDI_FAILURE);
5357 
5358 	return (DDI_SUCCESS);
5359 }
5360 
5361 /*
5362  * mdi_post_attach():
5363  *		Post attach() notification handler
5364  */
5365 /*ARGSUSED*/
5366 void
5367 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5368 {
5369 	mdi_phci_t	*ph;
5370 	mdi_client_t	*ct;
5371 	mdi_vhci_t	*vh;
5372 
5373 	if (MDI_PHCI(dip)) {
5374 		ph = i_devi_get_phci(dip);
5375 		ASSERT(ph != NULL);
5376 
5377 		MDI_PHCI_LOCK(ph);
5378 		switch (cmd) {
5379 		case DDI_ATTACH:
5380 			MDI_DEBUG(2, (CE_NOTE, dip,
5381 			    "!pHCI post_attach: called %p\n", (void *)ph));
5382 			if (error == DDI_SUCCESS) {
5383 				MDI_PHCI_SET_ATTACH(ph);
5384 			} else {
5385 				MDI_DEBUG(1, (CE_NOTE, dip,
5386 				    "!pHCI post_attach: failed error=%d\n",
5387 				    error));
5388 				MDI_PHCI_SET_DETACH(ph);
5389 			}
5390 			break;
5391 
5392 		case DDI_RESUME:
5393 			MDI_DEBUG(2, (CE_NOTE, dip,
5394 			    "!pHCI post_resume: called %p\n", (void *)ph));
5395 			if (error == DDI_SUCCESS) {
5396 				MDI_PHCI_SET_RESUME(ph);
5397 			} else {
5398 				MDI_DEBUG(1, (CE_NOTE, dip,
5399 				    "!pHCI post_resume: failed error=%d\n",
5400 				    error));
5401 				MDI_PHCI_SET_SUSPEND(ph);
5402 			}
5403 			break;
5404 		}
5405 		MDI_PHCI_UNLOCK(ph);
5406 	}
5407 
5408 	if (MDI_CLIENT(dip)) {
5409 		ct = i_devi_get_client(dip);
5410 		ASSERT(ct != NULL);
5411 
5412 		MDI_CLIENT_LOCK(ct);
5413 		switch (cmd) {
5414 		case DDI_ATTACH:
5415 			MDI_DEBUG(2, (CE_NOTE, dip,
5416 			    "!Client post_attach: called %p\n", (void *)ct));
5417 			if (error != DDI_SUCCESS) {
5418 				MDI_DEBUG(1, (CE_NOTE, dip,
5419 				    "!Client post_attach: failed error=%d\n",
5420 				    error));
5421 				MDI_CLIENT_SET_DETACH(ct);
5422 				MDI_DEBUG(4, (CE_WARN, dip,
5423 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
5424 				i_mdi_pm_reset_client(ct);
5425 				break;
5426 			}
5427 
5428 			/*
5429 			 * Client device has successfully attached, inform
5430 			 * the vhci.
5431 			 */
5432 			vh = ct->ct_vhci;
5433 			if (vh->vh_ops->vo_client_attached)
5434 				(*vh->vh_ops->vo_client_attached)(dip);
5435 
5436 			MDI_CLIENT_SET_ATTACH(ct);
5437 			break;
5438 
5439 		case DDI_RESUME:
5440 			MDI_DEBUG(2, (CE_NOTE, dip,
5441 			    "!Client post_attach: called %p\n", (void *)ct));
5442 			if (error == DDI_SUCCESS) {
5443 				MDI_CLIENT_SET_RESUME(ct);
5444 			} else {
5445 				MDI_DEBUG(1, (CE_NOTE, dip,
5446 				    "!Client post_resume: failed error=%d\n",
5447 				    error));
5448 				MDI_CLIENT_SET_SUSPEND(ct);
5449 			}
5450 			break;
5451 		}
5452 		MDI_CLIENT_UNLOCK(ct);
5453 	}
5454 }
5455 
5456 /*
5457  * mdi_pre_detach():
5458  *		Pre detach notification handler
5459  */
5460 /*ARGSUSED*/
5461 int
5462 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5463 {
5464 	int rv = DDI_SUCCESS;
5465 
5466 	if (MDI_CLIENT(dip)) {
5467 		(void) i_mdi_client_pre_detach(dip, cmd);
5468 	}
5469 
5470 	if (MDI_PHCI(dip)) {
5471 		rv = i_mdi_phci_pre_detach(dip, cmd);
5472 	}
5473 
5474 	return (rv);
5475 }
5476 
5477 /*ARGSUSED*/
5478 static int
5479 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5480 {
5481 	int		rv = DDI_SUCCESS;
5482 	mdi_phci_t	*ph;
5483 	mdi_client_t	*ct;
5484 	mdi_pathinfo_t	*pip;
5485 	mdi_pathinfo_t	*failed_pip = NULL;
5486 	mdi_pathinfo_t	*next;
5487 
5488 	ph = i_devi_get_phci(dip);
5489 	if (ph == NULL) {
5490 		return (rv);
5491 	}
5492 
5493 	MDI_PHCI_LOCK(ph);
5494 	switch (cmd) {
5495 	case DDI_DETACH:
5496 		MDI_DEBUG(2, (CE_NOTE, dip,
5497 		    "!pHCI pre_detach: called %p\n", (void *)ph));
5498 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5499 			/*
5500 			 * mdi_pathinfo nodes are still attached to
5501 			 * this pHCI. Fail the detach for this pHCI.
5502 			 */
5503 			MDI_DEBUG(2, (CE_WARN, dip,
5504 			    "!pHCI pre_detach: "
5505 			    "mdi_pathinfo nodes are still attached "
5506 			    "%p\n", (void *)ph));
5507 			rv = DDI_FAILURE;
5508 			break;
5509 		}
5510 		MDI_PHCI_SET_DETACH(ph);
5511 		break;
5512 
5513 	case DDI_SUSPEND:
5514 		/*
5515 		 * pHCI is getting suspended.  Since mpxio client
5516 		 * devices may not be suspended at this point, to avoid
5517 		 * a potential stack overflow, it is important to suspend
5518 		 * client devices before pHCI can be suspended.
5519 		 */
5520 
5521 		MDI_DEBUG(2, (CE_NOTE, dip,
5522 		    "!pHCI pre_suspend: called %p\n", (void *)ph));
5523 		/*
5524 		 * Suspend all the client devices accessible through this pHCI
5525 		 */
5526 		pip = ph->ph_path_head;
5527 		while (pip != NULL && rv == DDI_SUCCESS) {
5528 			dev_info_t *cdip;
5529 			MDI_PI_LOCK(pip);
5530 			next =
5531 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5532 			ct = MDI_PI(pip)->pi_client;
5533 			i_mdi_client_lock(ct, pip);
5534 			cdip = ct->ct_dip;
5535 			MDI_PI_UNLOCK(pip);
5536 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5537 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5538 				i_mdi_client_unlock(ct);
5539 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5540 				    DDI_SUCCESS) {
5541 					/*
5542 					 * Suspend of one of the client
5543 					 * device has failed.
5544 					 */
5545 					MDI_DEBUG(1, (CE_WARN, dip,
5546 					    "!Suspend of device (%s%d) failed.",
5547 					    ddi_driver_name(cdip),
5548 					    ddi_get_instance(cdip)));
5549 					failed_pip = pip;
5550 					break;
5551 				}
5552 			} else {
5553 				i_mdi_client_unlock(ct);
5554 			}
5555 			pip = next;
5556 		}
5557 
5558 		if (rv == DDI_SUCCESS) {
5559 			/*
5560 			 * Suspend of client devices is complete. Proceed
5561 			 * with pHCI suspend.
5562 			 */
5563 			MDI_PHCI_SET_SUSPEND(ph);
5564 		} else {
5565 			/*
5566 			 * Revert back all the suspended client device states
5567 			 * to converse.
5568 			 */
5569 			pip = ph->ph_path_head;
5570 			while (pip != failed_pip) {
5571 				dev_info_t *cdip;
5572 				MDI_PI_LOCK(pip);
5573 				next =
5574 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5575 				ct = MDI_PI(pip)->pi_client;
5576 				i_mdi_client_lock(ct, pip);
5577 				cdip = ct->ct_dip;
5578 				MDI_PI_UNLOCK(pip);
5579 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5580 					i_mdi_client_unlock(ct);
5581 					(void) devi_attach(cdip, DDI_RESUME);
5582 				} else {
5583 					i_mdi_client_unlock(ct);
5584 				}
5585 				pip = next;
5586 			}
5587 		}
5588 		break;
5589 
5590 	default:
5591 		rv = DDI_FAILURE;
5592 		break;
5593 	}
5594 	MDI_PHCI_UNLOCK(ph);
5595 	return (rv);
5596 }
5597 
5598 /*ARGSUSED*/
5599 static int
5600 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5601 {
5602 	int		rv = DDI_SUCCESS;
5603 	mdi_client_t	*ct;
5604 
5605 	ct = i_devi_get_client(dip);
5606 	if (ct == NULL) {
5607 		return (rv);
5608 	}
5609 
5610 	MDI_CLIENT_LOCK(ct);
5611 	switch (cmd) {
5612 	case DDI_DETACH:
5613 		MDI_DEBUG(2, (CE_NOTE, dip,
5614 		    "!Client pre_detach: called %p\n", (void *)ct));
5615 		MDI_CLIENT_SET_DETACH(ct);
5616 		break;
5617 
5618 	case DDI_SUSPEND:
5619 		MDI_DEBUG(2, (CE_NOTE, dip,
5620 		    "!Client pre_suspend: called %p\n", (void *)ct));
5621 		MDI_CLIENT_SET_SUSPEND(ct);
5622 		break;
5623 
5624 	default:
5625 		rv = DDI_FAILURE;
5626 		break;
5627 	}
5628 	MDI_CLIENT_UNLOCK(ct);
5629 	return (rv);
5630 }
5631 
5632 /*
5633  * mdi_post_detach():
5634  *		Post detach notification handler
5635  */
5636 /*ARGSUSED*/
5637 void
5638 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5639 {
5640 	/*
5641 	 * Detach/Suspend of mpxio component failed. Update our state
5642 	 * too
5643 	 */
5644 	if (MDI_PHCI(dip))
5645 		i_mdi_phci_post_detach(dip, cmd, error);
5646 
5647 	if (MDI_CLIENT(dip))
5648 		i_mdi_client_post_detach(dip, cmd, error);
5649 }
5650 
5651 /*ARGSUSED*/
5652 static void
5653 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5654 {
5655 	mdi_phci_t	*ph;
5656 
5657 	/*
5658 	 * Detach/Suspend of phci component failed. Update our state
5659 	 * too
5660 	 */
5661 	ph = i_devi_get_phci(dip);
5662 	if (ph == NULL) {
5663 		return;
5664 	}
5665 
5666 	MDI_PHCI_LOCK(ph);
5667 	/*
5668 	 * Detach of pHCI failed. Restore back converse
5669 	 * state
5670 	 */
5671 	switch (cmd) {
5672 	case DDI_DETACH:
5673 		MDI_DEBUG(2, (CE_NOTE, dip,
5674 		    "!pHCI post_detach: called %p\n", (void *)ph));
5675 		if (error != DDI_SUCCESS)
5676 			MDI_PHCI_SET_ATTACH(ph);
5677 		break;
5678 
5679 	case DDI_SUSPEND:
5680 		MDI_DEBUG(2, (CE_NOTE, dip,
5681 		    "!pHCI post_suspend: called %p\n", (void *)ph));
5682 		if (error != DDI_SUCCESS)
5683 			MDI_PHCI_SET_RESUME(ph);
5684 		break;
5685 	}
5686 	MDI_PHCI_UNLOCK(ph);
5687 }
5688 
5689 /*ARGSUSED*/
5690 static void
5691 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5692 {
5693 	mdi_client_t	*ct;
5694 
5695 	ct = i_devi_get_client(dip);
5696 	if (ct == NULL) {
5697 		return;
5698 	}
5699 	MDI_CLIENT_LOCK(ct);
5700 	/*
5701 	 * Detach of Client failed. Restore back converse
5702 	 * state
5703 	 */
5704 	switch (cmd) {
5705 	case DDI_DETACH:
5706 		MDI_DEBUG(2, (CE_NOTE, dip,
5707 		    "!Client post_detach: called %p\n", (void *)ct));
5708 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5709 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5710 			    "i_mdi_pm_rele_client\n"));
5711 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5712 		} else {
5713 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5714 			    "i_mdi_pm_reset_client\n"));
5715 			i_mdi_pm_reset_client(ct);
5716 		}
5717 		if (error != DDI_SUCCESS)
5718 			MDI_CLIENT_SET_ATTACH(ct);
5719 		break;
5720 
5721 	case DDI_SUSPEND:
5722 		MDI_DEBUG(2, (CE_NOTE, dip,
5723 		    "!Client post_suspend: called %p\n", (void *)ct));
5724 		if (error != DDI_SUCCESS)
5725 			MDI_CLIENT_SET_RESUME(ct);
5726 		break;
5727 	}
5728 	MDI_CLIENT_UNLOCK(ct);
5729 }
5730 
5731 int
5732 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
5733 {
5734 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
5735 }
5736 
5737 /*
5738  * create and install per-path (client - pHCI) statistics
5739  * I/O stats supported: nread, nwritten, reads, and writes
5740  * Error stats - hard errors, soft errors, & transport errors
5741  */
5742 int
5743 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
5744 {
5745 	kstat_t			*kiosp, *kerrsp;
5746 	struct pi_errs		*nsp;
5747 	struct mdi_pi_kstats	*mdi_statp;
5748 
5749 	if (MDI_PI(pip)->pi_kstats != NULL)
5750 		return (MDI_SUCCESS);
5751 
5752 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5753 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
5754 		return (MDI_FAILURE);
5755 	}
5756 
5757 	(void) strcat(ksname, ",err");
5758 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5759 	    KSTAT_TYPE_NAMED,
5760 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5761 	if (kerrsp == NULL) {
5762 		kstat_delete(kiosp);
5763 		return (MDI_FAILURE);
5764 	}
5765 
5766 	nsp = (struct pi_errs *)kerrsp->ks_data;
5767 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5768 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5769 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5770 	    KSTAT_DATA_UINT32);
5771 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5772 	    KSTAT_DATA_UINT32);
5773 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5774 	    KSTAT_DATA_UINT32);
5775 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5776 	    KSTAT_DATA_UINT32);
5777 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5778 	    KSTAT_DATA_UINT32);
5779 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5780 	    KSTAT_DATA_UINT32);
5781 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5782 	    KSTAT_DATA_UINT32);
5783 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5784 
5785 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5786 	mdi_statp->pi_kstat_ref = 1;
5787 	mdi_statp->pi_kstat_iostats = kiosp;
5788 	mdi_statp->pi_kstat_errstats = kerrsp;
5789 	kstat_install(kiosp);
5790 	kstat_install(kerrsp);
5791 	MDI_PI(pip)->pi_kstats = mdi_statp;
5792 	return (MDI_SUCCESS);
5793 }
5794 
5795 /*
5796  * destroy per-path properties
5797  */
5798 static void
5799 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5800 {
5801 
5802 	struct mdi_pi_kstats *mdi_statp;
5803 
5804 	if (MDI_PI(pip)->pi_kstats == NULL)
5805 		return;
5806 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5807 		return;
5808 
5809 	MDI_PI(pip)->pi_kstats = NULL;
5810 
5811 	/*
5812 	 * the kstat may be shared between multiple pathinfo nodes
5813 	 * decrement this pathinfo's usage, removing the kstats
5814 	 * themselves when the last pathinfo reference is removed.
5815 	 */
5816 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5817 	if (--mdi_statp->pi_kstat_ref != 0)
5818 		return;
5819 
5820 	kstat_delete(mdi_statp->pi_kstat_iostats);
5821 	kstat_delete(mdi_statp->pi_kstat_errstats);
5822 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5823 }
5824 
5825 /*
5826  * update I/O paths KSTATS
5827  */
5828 void
5829 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5830 {
5831 	kstat_t *iostatp;
5832 	size_t xfer_cnt;
5833 
5834 	ASSERT(pip != NULL);
5835 
5836 	/*
5837 	 * I/O can be driven across a path prior to having path
5838 	 * statistics available, i.e. probe(9e).
5839 	 */
5840 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5841 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5842 		xfer_cnt = bp->b_bcount - bp->b_resid;
5843 		if (bp->b_flags & B_READ) {
5844 			KSTAT_IO_PTR(iostatp)->reads++;
5845 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5846 		} else {
5847 			KSTAT_IO_PTR(iostatp)->writes++;
5848 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5849 		}
5850 	}
5851 }
5852 
5853 /*
5854  * Enable the path(specific client/target/initiator)
5855  * Enabling a path means that MPxIO may select the enabled path for routing
5856  * future I/O requests, subject to other path state constraints.
5857  */
5858 int
5859 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5860 {
5861 	mdi_phci_t	*ph;
5862 
5863 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5864 	if (ph == NULL) {
5865 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5866 			" failed. pip: %p ph = NULL\n", (void *)pip));
5867 		return (MDI_FAILURE);
5868 	}
5869 
5870 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5871 		MDI_ENABLE_OP);
5872 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5873 		" Returning success pip = %p. ph = %p\n",
5874 		(void *)pip, (void *)ph));
5875 	return (MDI_SUCCESS);
5876 
5877 }
5878 
5879 /*
5880  * Disable the path (specific client/target/initiator)
5881  * Disabling a path means that MPxIO will not select the disabled path for
5882  * routing any new I/O requests.
5883  */
5884 int
5885 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5886 {
5887 	mdi_phci_t	*ph;
5888 
5889 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5890 	if (ph == NULL) {
5891 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5892 			" failed. pip: %p ph = NULL\n", (void *)pip));
5893 		return (MDI_FAILURE);
5894 	}
5895 
5896 	(void) i_mdi_enable_disable_path(pip,
5897 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5898 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5899 		"Returning success pip = %p. ph = %p",
5900 		(void *)pip, (void *)ph));
5901 	return (MDI_SUCCESS);
5902 }
5903 
5904 /*
5905  * disable the path to a particular pHCI (pHCI specified in the phci_path
5906  * argument) for a particular client (specified in the client_path argument).
5907  * Disabling a path means that MPxIO will not select the disabled path for
5908  * routing any new I/O requests.
5909  * NOTE: this will be removed once the NWS files are changed to use the new
5910  * mdi_{enable,disable}_path interfaces
5911  */
5912 int
5913 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5914 {
5915 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5916 }
5917 
5918 /*
5919  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5920  * argument) for a particular client (specified in the client_path argument).
5921  * Enabling a path means that MPxIO may select the enabled path for routing
5922  * future I/O requests, subject to other path state constraints.
5923  * NOTE: this will be removed once the NWS files are changed to use the new
5924  * mdi_{enable,disable}_path interfaces
5925  */
5926 
5927 int
5928 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5929 {
5930 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5931 }
5932 
5933 /*
5934  * Common routine for doing enable/disable.
5935  */
5936 static mdi_pathinfo_t *
5937 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5938 		int op)
5939 {
5940 	int		sync_flag = 0;
5941 	int		rv;
5942 	mdi_pathinfo_t 	*next;
5943 	int		(*f)() = NULL;
5944 
5945 	f = vh->vh_ops->vo_pi_state_change;
5946 
5947 	sync_flag = (flags << 8) & 0xf00;
5948 
5949 	/*
5950 	 * Do a callback into the mdi consumer to let it
5951 	 * know that path is about to get enabled/disabled.
5952 	 */
5953 	if (f != NULL) {
5954 		rv = (*f)(vh->vh_dip, pip, 0,
5955 			MDI_PI_EXT_STATE(pip),
5956 			MDI_EXT_STATE_CHANGE | sync_flag |
5957 			op | MDI_BEFORE_STATE_CHANGE);
5958 		if (rv != MDI_SUCCESS) {
5959 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5960 			"!vo_pi_state_change: failed rv = %x", rv));
5961 		}
5962 	}
5963 	MDI_PI_LOCK(pip);
5964 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5965 
5966 	switch (flags) {
5967 		case USER_DISABLE:
5968 			if (op == MDI_DISABLE_OP) {
5969 				MDI_PI_SET_USER_DISABLE(pip);
5970 			} else {
5971 				MDI_PI_SET_USER_ENABLE(pip);
5972 			}
5973 			break;
5974 		case DRIVER_DISABLE:
5975 			if (op == MDI_DISABLE_OP) {
5976 				MDI_PI_SET_DRV_DISABLE(pip);
5977 			} else {
5978 				MDI_PI_SET_DRV_ENABLE(pip);
5979 			}
5980 			break;
5981 		case DRIVER_DISABLE_TRANSIENT:
5982 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
5983 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5984 			} else {
5985 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5986 			}
5987 			break;
5988 	}
5989 	MDI_PI_UNLOCK(pip);
5990 	/*
5991 	 * Do a callback into the mdi consumer to let it
5992 	 * know that path is now enabled/disabled.
5993 	 */
5994 	if (f != NULL) {
5995 		rv = (*f)(vh->vh_dip, pip, 0,
5996 			MDI_PI_EXT_STATE(pip),
5997 			MDI_EXT_STATE_CHANGE | sync_flag |
5998 			op | MDI_AFTER_STATE_CHANGE);
5999 		if (rv != MDI_SUCCESS) {
6000 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
6001 			"!vo_pi_state_change: failed rv = %x", rv));
6002 		}
6003 	}
6004 	return (next);
6005 }
6006 
6007 /*
6008  * Common routine for doing enable/disable.
6009  * NOTE: this will be removed once the NWS files are changed to use the new
6010  * mdi_{enable,disable}_path has been putback
6011  */
6012 int
6013 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6014 {
6015 
6016 	mdi_phci_t	*ph;
6017 	mdi_vhci_t	*vh = NULL;
6018 	mdi_client_t	*ct;
6019 	mdi_pathinfo_t	*next, *pip;
6020 	int		found_it;
6021 
6022 	ph = i_devi_get_phci(pdip);
6023 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6024 		"Op = %d pdip = %p cdip = %p\n", op, (void *)pdip,
6025 		(void *)cdip));
6026 	if (ph == NULL) {
6027 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
6028 			"Op %d failed. ph = NULL\n", op));
6029 		return (MDI_FAILURE);
6030 	}
6031 
6032 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6033 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6034 			"Op Invalid operation = %d\n", op));
6035 		return (MDI_FAILURE);
6036 	}
6037 
6038 	vh = ph->ph_vhci;
6039 
6040 	if (cdip == NULL) {
6041 		/*
6042 		 * Need to mark the Phci as enabled/disabled.
6043 		 */
6044 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6045 		"Op %d for the phci\n", op));
6046 		MDI_PHCI_LOCK(ph);
6047 		switch (flags) {
6048 			case USER_DISABLE:
6049 				if (op == MDI_DISABLE_OP) {
6050 					MDI_PHCI_SET_USER_DISABLE(ph);
6051 				} else {
6052 					MDI_PHCI_SET_USER_ENABLE(ph);
6053 				}
6054 				break;
6055 			case DRIVER_DISABLE:
6056 				if (op == MDI_DISABLE_OP) {
6057 					MDI_PHCI_SET_DRV_DISABLE(ph);
6058 				} else {
6059 					MDI_PHCI_SET_DRV_ENABLE(ph);
6060 				}
6061 				break;
6062 			case DRIVER_DISABLE_TRANSIENT:
6063 				if (op == MDI_DISABLE_OP) {
6064 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6065 				} else {
6066 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6067 				}
6068 				break;
6069 			default:
6070 				MDI_PHCI_UNLOCK(ph);
6071 				MDI_DEBUG(1, (CE_NOTE, NULL,
6072 				"!i_mdi_pi_enable_disable:"
6073 				" Invalid flag argument= %d\n", flags));
6074 		}
6075 
6076 		/*
6077 		 * Phci has been disabled. Now try to enable/disable
6078 		 * path info's to each client.
6079 		 */
6080 		pip = ph->ph_path_head;
6081 		while (pip != NULL) {
6082 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6083 		}
6084 		MDI_PHCI_UNLOCK(ph);
6085 	} else {
6086 
6087 		/*
6088 		 * Disable a specific client.
6089 		 */
6090 		ct = i_devi_get_client(cdip);
6091 		if (ct == NULL) {
6092 			MDI_DEBUG(1, (CE_NOTE, NULL,
6093 			"!i_mdi_pi_enable_disable:"
6094 			" failed. ct = NULL operation = %d\n", op));
6095 			return (MDI_FAILURE);
6096 		}
6097 
6098 		MDI_CLIENT_LOCK(ct);
6099 		pip = ct->ct_path_head;
6100 		found_it = 0;
6101 		while (pip != NULL) {
6102 			MDI_PI_LOCK(pip);
6103 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6104 			if (MDI_PI(pip)->pi_phci == ph) {
6105 				MDI_PI_UNLOCK(pip);
6106 				found_it = 1;
6107 				break;
6108 			}
6109 			MDI_PI_UNLOCK(pip);
6110 			pip = next;
6111 		}
6112 
6113 
6114 		MDI_CLIENT_UNLOCK(ct);
6115 		if (found_it == 0) {
6116 			MDI_DEBUG(1, (CE_NOTE, NULL,
6117 			"!i_mdi_pi_enable_disable:"
6118 			" failed. Could not find corresponding pip\n"));
6119 			return (MDI_FAILURE);
6120 		}
6121 
6122 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6123 	}
6124 
6125 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6126 		"Op %d Returning success pdip = %p cdip = %p\n",
6127 		op, (void *)pdip, (void *)cdip));
6128 	return (MDI_SUCCESS);
6129 }
6130 
6131 /*
6132  * Ensure phci powered up
6133  */
6134 static void
6135 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6136 {
6137 	dev_info_t	*ph_dip;
6138 
6139 	ASSERT(pip != NULL);
6140 	ASSERT(MDI_PI_LOCKED(pip));
6141 
6142 	if (MDI_PI(pip)->pi_pm_held) {
6143 		return;
6144 	}
6145 
6146 	ph_dip = mdi_pi_get_phci(pip);
6147 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n",
6148 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6149 	if (ph_dip == NULL) {
6150 		return;
6151 	}
6152 
6153 	MDI_PI_UNLOCK(pip);
6154 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6155 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6156 
6157 	pm_hold_power(ph_dip);
6158 
6159 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6160 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6161 	MDI_PI_LOCK(pip);
6162 
6163 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6164 	if (DEVI(ph_dip)->devi_pm_info)
6165 		MDI_PI(pip)->pi_pm_held = 1;
6166 }
6167 
6168 /*
6169  * Allow phci powered down
6170  */
6171 static void
6172 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6173 {
6174 	dev_info_t	*ph_dip = NULL;
6175 
6176 	ASSERT(pip != NULL);
6177 	ASSERT(MDI_PI_LOCKED(pip));
6178 
6179 	if (MDI_PI(pip)->pi_pm_held == 0) {
6180 		return;
6181 	}
6182 
6183 	ph_dip = mdi_pi_get_phci(pip);
6184 	ASSERT(ph_dip != NULL);
6185 
6186 	MDI_PI_UNLOCK(pip);
6187 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n",
6188 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6189 
6190 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6191 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6192 	pm_rele_power(ph_dip);
6193 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6194 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6195 
6196 	MDI_PI_LOCK(pip);
6197 	MDI_PI(pip)->pi_pm_held = 0;
6198 }
6199 
6200 static void
6201 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6202 {
6203 	ASSERT(MDI_CLIENT_LOCKED(ct));
6204 
6205 	ct->ct_power_cnt += incr;
6206 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p "
6207 	    "ct_power_cnt = %d incr = %d\n", (void *)ct,
6208 	    ct->ct_power_cnt, incr));
6209 	ASSERT(ct->ct_power_cnt >= 0);
6210 }
6211 
6212 static void
6213 i_mdi_rele_all_phci(mdi_client_t *ct)
6214 {
6215 	mdi_pathinfo_t  *pip;
6216 
6217 	ASSERT(MDI_CLIENT_LOCKED(ct));
6218 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6219 	while (pip != NULL) {
6220 		mdi_hold_path(pip);
6221 		MDI_PI_LOCK(pip);
6222 		i_mdi_pm_rele_pip(pip);
6223 		MDI_PI_UNLOCK(pip);
6224 		mdi_rele_path(pip);
6225 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6226 	}
6227 }
6228 
6229 static void
6230 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6231 {
6232 	ASSERT(MDI_CLIENT_LOCKED(ct));
6233 
6234 	if (i_ddi_devi_attached(ct->ct_dip)) {
6235 		ct->ct_power_cnt -= decr;
6236 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p "
6237 		    "ct_power_cnt = %d decr = %d\n",
6238 		    (void *)ct, ct->ct_power_cnt, decr));
6239 	}
6240 
6241 	ASSERT(ct->ct_power_cnt >= 0);
6242 	if (ct->ct_power_cnt == 0) {
6243 		i_mdi_rele_all_phci(ct);
6244 		return;
6245 	}
6246 }
6247 
6248 static void
6249 i_mdi_pm_reset_client(mdi_client_t *ct)
6250 {
6251 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p "
6252 	    "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt));
6253 	ASSERT(MDI_CLIENT_LOCKED(ct));
6254 	ct->ct_power_cnt = 0;
6255 	i_mdi_rele_all_phci(ct);
6256 	ct->ct_powercnt_config = 0;
6257 	ct->ct_powercnt_unconfig = 0;
6258 	ct->ct_powercnt_reset = 1;
6259 }
6260 
6261 static int
6262 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6263 {
6264 	int		ret;
6265 	dev_info_t	*ph_dip;
6266 
6267 	MDI_PI_LOCK(pip);
6268 	i_mdi_pm_hold_pip(pip);
6269 
6270 	ph_dip = mdi_pi_get_phci(pip);
6271 	MDI_PI_UNLOCK(pip);
6272 
6273 	/* bring all components of phci to full power */
6274 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6275 	    "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip),
6276 	    ddi_get_instance(ph_dip), (void *)pip));
6277 
6278 	ret = pm_powerup(ph_dip);
6279 
6280 	if (ret == DDI_FAILURE) {
6281 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6282 		    "pm_powerup FAILED for %s%d %p\n",
6283 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip),
6284 		    (void *)pip));
6285 
6286 		MDI_PI_LOCK(pip);
6287 		i_mdi_pm_rele_pip(pip);
6288 		MDI_PI_UNLOCK(pip);
6289 		return (MDI_FAILURE);
6290 	}
6291 
6292 	return (MDI_SUCCESS);
6293 }
6294 
6295 static int
6296 i_mdi_power_all_phci(mdi_client_t *ct)
6297 {
6298 	mdi_pathinfo_t  *pip;
6299 	int		succeeded = 0;
6300 
6301 	ASSERT(MDI_CLIENT_LOCKED(ct));
6302 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6303 	while (pip != NULL) {
6304 		/*
6305 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6306 		 * or MDI_PATHINFO_STATE_OFFLINE.
6307 		 */
6308 		if (MDI_PI_IS_INIT(pip) ||
6309 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6310 			mdi_hold_path(pip);
6311 			MDI_CLIENT_UNLOCK(ct);
6312 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6313 				succeeded = 1;
6314 
6315 			ASSERT(ct == MDI_PI(pip)->pi_client);
6316 			MDI_CLIENT_LOCK(ct);
6317 			mdi_rele_path(pip);
6318 		}
6319 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6320 	}
6321 
6322 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6323 }
6324 
6325 /*
6326  * mdi_bus_power():
6327  *		1. Place the phci(s) into powered up state so that
6328  *		   client can do power management
6329  *		2. Ensure phci powered up as client power managing
6330  * Return Values:
6331  *		MDI_SUCCESS
6332  *		MDI_FAILURE
6333  */
6334 int
6335 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6336     void *arg, void *result)
6337 {
6338 	int			ret = MDI_SUCCESS;
6339 	pm_bp_child_pwrchg_t	*bpc;
6340 	mdi_client_t		*ct;
6341 	dev_info_t		*cdip;
6342 	pm_bp_has_changed_t	*bphc;
6343 
6344 	/*
6345 	 * BUS_POWER_NOINVOL not supported
6346 	 */
6347 	if (op == BUS_POWER_NOINVOL)
6348 		return (MDI_FAILURE);
6349 
6350 	/*
6351 	 * ignore other OPs.
6352 	 * return quickly to save cou cycles on the ct processing
6353 	 */
6354 	switch (op) {
6355 	case BUS_POWER_PRE_NOTIFICATION:
6356 	case BUS_POWER_POST_NOTIFICATION:
6357 		bpc = (pm_bp_child_pwrchg_t *)arg;
6358 		cdip = bpc->bpc_dip;
6359 		break;
6360 	case BUS_POWER_HAS_CHANGED:
6361 		bphc = (pm_bp_has_changed_t *)arg;
6362 		cdip = bphc->bphc_dip;
6363 		break;
6364 	default:
6365 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6366 	}
6367 
6368 	ASSERT(MDI_CLIENT(cdip));
6369 
6370 	ct = i_devi_get_client(cdip);
6371 	if (ct == NULL)
6372 		return (MDI_FAILURE);
6373 
6374 	/*
6375 	 * wait till the mdi_pathinfo node state change are processed
6376 	 */
6377 	MDI_CLIENT_LOCK(ct);
6378 	switch (op) {
6379 	case BUS_POWER_PRE_NOTIFICATION:
6380 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6381 		    "BUS_POWER_PRE_NOTIFICATION:"
6382 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6383 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6384 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6385 
6386 		/* serialize power level change per client */
6387 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6388 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6389 
6390 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6391 
6392 		if (ct->ct_power_cnt == 0) {
6393 			ret = i_mdi_power_all_phci(ct);
6394 		}
6395 
6396 		/*
6397 		 * if new_level > 0:
6398 		 *	- hold phci(s)
6399 		 *	- power up phci(s) if not already
6400 		 * ignore power down
6401 		 */
6402 		if (bpc->bpc_nlevel > 0) {
6403 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6404 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6405 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
6406 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6407 			}
6408 		}
6409 		break;
6410 	case BUS_POWER_POST_NOTIFICATION:
6411 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6412 		    "BUS_POWER_POST_NOTIFICATION:"
6413 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
6414 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6415 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6416 		    *(int *)result));
6417 
6418 		if (*(int *)result == DDI_SUCCESS) {
6419 			if (bpc->bpc_nlevel > 0) {
6420 				MDI_CLIENT_SET_POWER_UP(ct);
6421 			} else {
6422 				MDI_CLIENT_SET_POWER_DOWN(ct);
6423 			}
6424 		}
6425 
6426 		/* release the hold we did in pre-notification */
6427 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6428 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6429 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6430 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6431 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6432 		}
6433 
6434 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6435 			/* another thread might started attaching */
6436 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6437 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6438 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
6439 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6440 			/* detaching has been taken care in pm_post_unconfig */
6441 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6442 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6443 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
6444 				i_mdi_pm_reset_client(ct);
6445 			}
6446 		}
6447 
6448 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6449 		cv_broadcast(&ct->ct_powerchange_cv);
6450 
6451 		break;
6452 
6453 	/* need to do more */
6454 	case BUS_POWER_HAS_CHANGED:
6455 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
6456 		    "BUS_POWER_HAS_CHANGED:"
6457 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6458 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6459 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6460 
6461 		if (bphc->bphc_nlevel > 0 &&
6462 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6463 			if (ct->ct_power_cnt == 0) {
6464 				ret = i_mdi_power_all_phci(ct);
6465 			}
6466 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6467 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
6468 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6469 		}
6470 
6471 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6472 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6473 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6474 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6475 		}
6476 		break;
6477 	}
6478 
6479 	MDI_CLIENT_UNLOCK(ct);
6480 	return (ret);
6481 }
6482 
6483 static int
6484 i_mdi_pm_pre_config_one(dev_info_t *child)
6485 {
6486 	int		ret = MDI_SUCCESS;
6487 	mdi_client_t	*ct;
6488 
6489 	ct = i_devi_get_client(child);
6490 	if (ct == NULL)
6491 		return (MDI_FAILURE);
6492 
6493 	MDI_CLIENT_LOCK(ct);
6494 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6495 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6496 
6497 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6498 		MDI_CLIENT_UNLOCK(ct);
6499 		MDI_DEBUG(4, (CE_NOTE, child,
6500 		    "i_mdi_pm_pre_config_one already configured\n"));
6501 		return (MDI_SUCCESS);
6502 	}
6503 
6504 	if (ct->ct_powercnt_config) {
6505 		MDI_CLIENT_UNLOCK(ct);
6506 		MDI_DEBUG(4, (CE_NOTE, child,
6507 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
6508 		return (MDI_SUCCESS);
6509 	}
6510 
6511 	if (ct->ct_power_cnt == 0) {
6512 		ret = i_mdi_power_all_phci(ct);
6513 	}
6514 	MDI_DEBUG(4, (CE_NOTE, child,
6515 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
6516 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6517 	ct->ct_powercnt_config = 1;
6518 	ct->ct_powercnt_reset = 0;
6519 	MDI_CLIENT_UNLOCK(ct);
6520 	return (ret);
6521 }
6522 
6523 static int
6524 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6525 {
6526 	int			ret = MDI_SUCCESS;
6527 	dev_info_t		*cdip;
6528 	int			circ;
6529 
6530 	ASSERT(MDI_VHCI(vdip));
6531 
6532 	/* ndi_devi_config_one */
6533 	if (child) {
6534 		ASSERT(DEVI_BUSY_OWNED(vdip));
6535 		return (i_mdi_pm_pre_config_one(child));
6536 	}
6537 
6538 	/* devi_config_common */
6539 	ndi_devi_enter(vdip, &circ);
6540 	cdip = ddi_get_child(vdip);
6541 	while (cdip) {
6542 		dev_info_t *next = ddi_get_next_sibling(cdip);
6543 
6544 		ret = i_mdi_pm_pre_config_one(cdip);
6545 		if (ret != MDI_SUCCESS)
6546 			break;
6547 		cdip = next;
6548 	}
6549 	ndi_devi_exit(vdip, circ);
6550 	return (ret);
6551 }
6552 
6553 static int
6554 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6555 {
6556 	int		ret = MDI_SUCCESS;
6557 	mdi_client_t	*ct;
6558 
6559 	ct = i_devi_get_client(child);
6560 	if (ct == NULL)
6561 		return (MDI_FAILURE);
6562 
6563 	MDI_CLIENT_LOCK(ct);
6564 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6565 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6566 
6567 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6568 		MDI_DEBUG(4, (CE_NOTE, child,
6569 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6570 		MDI_CLIENT_UNLOCK(ct);
6571 		return (MDI_SUCCESS);
6572 	}
6573 
6574 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6575 	    (flags & NDI_AUTODETACH)) {
6576 		MDI_DEBUG(4, (CE_NOTE, child,
6577 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6578 		MDI_CLIENT_UNLOCK(ct);
6579 		return (MDI_FAILURE);
6580 	}
6581 
6582 	if (ct->ct_powercnt_unconfig) {
6583 		MDI_DEBUG(4, (CE_NOTE, child,
6584 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6585 		MDI_CLIENT_UNLOCK(ct);
6586 		*held = 1;
6587 		return (MDI_SUCCESS);
6588 	}
6589 
6590 	if (ct->ct_power_cnt == 0) {
6591 		ret = i_mdi_power_all_phci(ct);
6592 	}
6593 	MDI_DEBUG(4, (CE_NOTE, child,
6594 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6595 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6596 	ct->ct_powercnt_unconfig = 1;
6597 	ct->ct_powercnt_reset = 0;
6598 	MDI_CLIENT_UNLOCK(ct);
6599 	if (ret == MDI_SUCCESS)
6600 		*held = 1;
6601 	return (ret);
6602 }
6603 
6604 static int
6605 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6606     int flags)
6607 {
6608 	int			ret = MDI_SUCCESS;
6609 	dev_info_t		*cdip;
6610 	int			circ;
6611 
6612 	ASSERT(MDI_VHCI(vdip));
6613 	*held = 0;
6614 
6615 	/* ndi_devi_unconfig_one */
6616 	if (child) {
6617 		ASSERT(DEVI_BUSY_OWNED(vdip));
6618 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6619 	}
6620 
6621 	/* devi_unconfig_common */
6622 	ndi_devi_enter(vdip, &circ);
6623 	cdip = ddi_get_child(vdip);
6624 	while (cdip) {
6625 		dev_info_t *next = ddi_get_next_sibling(cdip);
6626 
6627 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6628 		cdip = next;
6629 	}
6630 	ndi_devi_exit(vdip, circ);
6631 
6632 	if (*held)
6633 		ret = MDI_SUCCESS;
6634 
6635 	return (ret);
6636 }
6637 
6638 static void
6639 i_mdi_pm_post_config_one(dev_info_t *child)
6640 {
6641 	mdi_client_t	*ct;
6642 
6643 	ct = i_devi_get_client(child);
6644 	if (ct == NULL)
6645 		return;
6646 
6647 	MDI_CLIENT_LOCK(ct);
6648 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6649 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6650 
6651 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6652 		MDI_DEBUG(4, (CE_NOTE, child,
6653 		    "i_mdi_pm_post_config_one NOT configured\n"));
6654 		MDI_CLIENT_UNLOCK(ct);
6655 		return;
6656 	}
6657 
6658 	/* client has not been updated */
6659 	if (MDI_CLIENT_IS_FAILED(ct)) {
6660 		MDI_DEBUG(4, (CE_NOTE, child,
6661 		    "i_mdi_pm_post_config_one NOT configured\n"));
6662 		MDI_CLIENT_UNLOCK(ct);
6663 		return;
6664 	}
6665 
6666 	/* another thread might have powered it down or detached it */
6667 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6668 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6669 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6670 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6671 		MDI_DEBUG(4, (CE_NOTE, child,
6672 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6673 		i_mdi_pm_reset_client(ct);
6674 	} else {
6675 		mdi_pathinfo_t  *pip, *next;
6676 		int	valid_path_count = 0;
6677 
6678 		MDI_DEBUG(4, (CE_NOTE, child,
6679 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6680 		pip = ct->ct_path_head;
6681 		while (pip != NULL) {
6682 			MDI_PI_LOCK(pip);
6683 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6684 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6685 				valid_path_count ++;
6686 			MDI_PI_UNLOCK(pip);
6687 			pip = next;
6688 		}
6689 		i_mdi_pm_rele_client(ct, valid_path_count);
6690 	}
6691 	ct->ct_powercnt_config = 0;
6692 	MDI_CLIENT_UNLOCK(ct);
6693 }
6694 
6695 static void
6696 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
6697 {
6698 	int		circ;
6699 	dev_info_t	*cdip;
6700 
6701 	ASSERT(MDI_VHCI(vdip));
6702 
6703 	/* ndi_devi_config_one */
6704 	if (child) {
6705 		ASSERT(DEVI_BUSY_OWNED(vdip));
6706 		i_mdi_pm_post_config_one(child);
6707 		return;
6708 	}
6709 
6710 	/* devi_config_common */
6711 	ndi_devi_enter(vdip, &circ);
6712 	cdip = ddi_get_child(vdip);
6713 	while (cdip) {
6714 		dev_info_t *next = ddi_get_next_sibling(cdip);
6715 
6716 		i_mdi_pm_post_config_one(cdip);
6717 		cdip = next;
6718 	}
6719 	ndi_devi_exit(vdip, circ);
6720 }
6721 
6722 static void
6723 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6724 {
6725 	mdi_client_t	*ct;
6726 
6727 	ct = i_devi_get_client(child);
6728 	if (ct == NULL)
6729 		return;
6730 
6731 	MDI_CLIENT_LOCK(ct);
6732 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6733 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6734 
6735 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6736 		MDI_DEBUG(4, (CE_NOTE, child,
6737 		    "i_mdi_pm_post_unconfig NOT held\n"));
6738 		MDI_CLIENT_UNLOCK(ct);
6739 		return;
6740 	}
6741 
6742 	/* failure detaching or another thread just attached it */
6743 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6744 	    i_ddi_devi_attached(ct->ct_dip)) ||
6745 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6746 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6747 		MDI_DEBUG(4, (CE_NOTE, child,
6748 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6749 		i_mdi_pm_reset_client(ct);
6750 	} else {
6751 		mdi_pathinfo_t  *pip, *next;
6752 		int	valid_path_count = 0;
6753 
6754 		MDI_DEBUG(4, (CE_NOTE, child,
6755 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6756 		pip = ct->ct_path_head;
6757 		while (pip != NULL) {
6758 			MDI_PI_LOCK(pip);
6759 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6760 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6761 				valid_path_count ++;
6762 			MDI_PI_UNLOCK(pip);
6763 			pip = next;
6764 		}
6765 		i_mdi_pm_rele_client(ct, valid_path_count);
6766 		ct->ct_powercnt_unconfig = 0;
6767 	}
6768 
6769 	MDI_CLIENT_UNLOCK(ct);
6770 }
6771 
6772 static void
6773 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
6774 {
6775 	int			circ;
6776 	dev_info_t		*cdip;
6777 
6778 	ASSERT(MDI_VHCI(vdip));
6779 
6780 	if (!held) {
6781 		MDI_DEBUG(4, (CE_NOTE, vdip,
6782 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6783 		return;
6784 	}
6785 
6786 	if (child) {
6787 		ASSERT(DEVI_BUSY_OWNED(vdip));
6788 		i_mdi_pm_post_unconfig_one(child);
6789 		return;
6790 	}
6791 
6792 	ndi_devi_enter(vdip, &circ);
6793 	cdip = ddi_get_child(vdip);
6794 	while (cdip) {
6795 		dev_info_t *next = ddi_get_next_sibling(cdip);
6796 
6797 		i_mdi_pm_post_unconfig_one(cdip);
6798 		cdip = next;
6799 	}
6800 	ndi_devi_exit(vdip, circ);
6801 }
6802 
6803 int
6804 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6805 {
6806 	int			circ, ret = MDI_SUCCESS;
6807 	dev_info_t		*client_dip = NULL;
6808 	mdi_client_t		*ct;
6809 
6810 	/*
6811 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6812 	 * Power up pHCI for the named client device.
6813 	 * Note: Before the client is enumerated under vhci by phci,
6814 	 * client_dip can be NULL. Then proceed to power up all the
6815 	 * pHCIs.
6816 	 */
6817 	if (devnm != NULL) {
6818 		ndi_devi_enter(vdip, &circ);
6819 		client_dip = ndi_devi_findchild(vdip, devnm);
6820 	}
6821 
6822 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n",
6823 	    op, devnm ? devnm : "NULL", (void *)client_dip));
6824 
6825 	switch (op) {
6826 	case MDI_PM_PRE_CONFIG:
6827 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6828 		break;
6829 
6830 	case MDI_PM_PRE_UNCONFIG:
6831 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6832 		    flags);
6833 		break;
6834 
6835 	case MDI_PM_POST_CONFIG:
6836 		i_mdi_pm_post_config(vdip, client_dip);
6837 		break;
6838 
6839 	case MDI_PM_POST_UNCONFIG:
6840 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6841 		break;
6842 
6843 	case MDI_PM_HOLD_POWER:
6844 	case MDI_PM_RELE_POWER:
6845 		ASSERT(args);
6846 
6847 		client_dip = (dev_info_t *)args;
6848 		ASSERT(MDI_CLIENT(client_dip));
6849 
6850 		ct = i_devi_get_client(client_dip);
6851 		MDI_CLIENT_LOCK(ct);
6852 
6853 		if (op == MDI_PM_HOLD_POWER) {
6854 			if (ct->ct_power_cnt == 0) {
6855 				(void) i_mdi_power_all_phci(ct);
6856 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6857 				    "mdi_power i_mdi_pm_hold_client\n"));
6858 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6859 			}
6860 		} else {
6861 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6862 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6863 				    "mdi_power i_mdi_pm_rele_client\n"));
6864 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6865 			} else {
6866 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6867 				    "mdi_power i_mdi_pm_reset_client\n"));
6868 				i_mdi_pm_reset_client(ct);
6869 			}
6870 		}
6871 
6872 		MDI_CLIENT_UNLOCK(ct);
6873 		break;
6874 
6875 	default:
6876 		break;
6877 	}
6878 
6879 	if (devnm)
6880 		ndi_devi_exit(vdip, circ);
6881 
6882 	return (ret);
6883 }
6884 
6885 int
6886 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6887 {
6888 	mdi_vhci_t *vhci;
6889 
6890 	if (!MDI_VHCI(dip))
6891 		return (MDI_FAILURE);
6892 
6893 	if (mdi_class) {
6894 		vhci = DEVI(dip)->devi_mdi_xhci;
6895 		ASSERT(vhci);
6896 		*mdi_class = vhci->vh_class;
6897 	}
6898 
6899 	return (MDI_SUCCESS);
6900 }
6901 
6902 int
6903 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6904 {
6905 	mdi_phci_t *phci;
6906 
6907 	if (!MDI_PHCI(dip))
6908 		return (MDI_FAILURE);
6909 
6910 	if (mdi_class) {
6911 		phci = DEVI(dip)->devi_mdi_xhci;
6912 		ASSERT(phci);
6913 		*mdi_class = phci->ph_vhci->vh_class;
6914 	}
6915 
6916 	return (MDI_SUCCESS);
6917 }
6918 
6919 int
6920 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6921 {
6922 	mdi_client_t *client;
6923 
6924 	if (!MDI_CLIENT(dip))
6925 		return (MDI_FAILURE);
6926 
6927 	if (mdi_class) {
6928 		client = DEVI(dip)->devi_mdi_client;
6929 		ASSERT(client);
6930 		*mdi_class = client->ct_vhci->vh_class;
6931 	}
6932 
6933 	return (MDI_SUCCESS);
6934 }
6935 
6936 void *
6937 mdi_client_get_vhci_private(dev_info_t *dip)
6938 {
6939 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6940 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6941 		mdi_client_t	*ct;
6942 		ct = i_devi_get_client(dip);
6943 		return (ct->ct_vprivate);
6944 	}
6945 	return (NULL);
6946 }
6947 
6948 void
6949 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6950 {
6951 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6952 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6953 		mdi_client_t	*ct;
6954 		ct = i_devi_get_client(dip);
6955 		ct->ct_vprivate = data;
6956 	}
6957 }
6958 /*
6959  * mdi_pi_get_vhci_private():
6960  *		Get the vhci private information associated with the
6961  *		mdi_pathinfo node
6962  */
6963 void *
6964 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6965 {
6966 	caddr_t	vprivate = NULL;
6967 	if (pip) {
6968 		vprivate = MDI_PI(pip)->pi_vprivate;
6969 	}
6970 	return (vprivate);
6971 }
6972 
6973 /*
6974  * mdi_pi_set_vhci_private():
6975  *		Set the vhci private information in the mdi_pathinfo node
6976  */
6977 void
6978 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6979 {
6980 	if (pip) {
6981 		MDI_PI(pip)->pi_vprivate = priv;
6982 	}
6983 }
6984 
6985 /*
6986  * mdi_phci_get_vhci_private():
6987  *		Get the vhci private information associated with the
6988  *		mdi_phci node
6989  */
6990 void *
6991 mdi_phci_get_vhci_private(dev_info_t *dip)
6992 {
6993 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6994 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6995 		mdi_phci_t	*ph;
6996 		ph = i_devi_get_phci(dip);
6997 		return (ph->ph_vprivate);
6998 	}
6999 	return (NULL);
7000 }
7001 
7002 /*
7003  * mdi_phci_set_vhci_private():
7004  *		Set the vhci private information in the mdi_phci node
7005  */
7006 void
7007 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7008 {
7009 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7010 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7011 		mdi_phci_t	*ph;
7012 		ph = i_devi_get_phci(dip);
7013 		ph->ph_vprivate = priv;
7014 	}
7015 }
7016 
7017 /*
7018  * List of vhci class names:
7019  * A vhci class name must be in this list only if the corresponding vhci
7020  * driver intends to use the mdi provided bus config implementation
7021  * (i.e., mdi_vhci_bus_config()).
7022  */
7023 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7024 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7025 
7026 /*
7027  * During boot time, the on-disk vhci cache for every vhci class is read
7028  * in the form of an nvlist and stored here.
7029  */
7030 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7031 
7032 /* nvpair names in vhci cache nvlist */
7033 #define	MDI_VHCI_CACHE_VERSION	1
7034 #define	MDI_NVPNAME_VERSION	"version"
7035 #define	MDI_NVPNAME_PHCIS	"phcis"
7036 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7037 
7038 /*
7039  * Given vhci class name, return its on-disk vhci cache filename.
7040  * Memory for the returned filename which includes the full path is allocated
7041  * by this function.
7042  */
7043 static char *
7044 vhclass2vhcache_filename(char *vhclass)
7045 {
7046 	char *filename;
7047 	int len;
7048 	static char *fmt = "/etc/devices/mdi_%s_cache";
7049 
7050 	/*
7051 	 * fmt contains the on-disk vhci cache file name format;
7052 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7053 	 */
7054 
7055 	/* the -1 below is to account for "%s" in the format string */
7056 	len = strlen(fmt) + strlen(vhclass) - 1;
7057 	filename = kmem_alloc(len, KM_SLEEP);
7058 	(void) snprintf(filename, len, fmt, vhclass);
7059 	ASSERT(len == (strlen(filename) + 1));
7060 	return (filename);
7061 }
7062 
7063 /*
7064  * initialize the vhci cache related data structures and read the on-disk
7065  * vhci cached data into memory.
7066  */
7067 static void
7068 setup_vhci_cache(mdi_vhci_t *vh)
7069 {
7070 	mdi_vhci_config_t *vhc;
7071 	mdi_vhci_cache_t *vhcache;
7072 	int i;
7073 	nvlist_t *nvl = NULL;
7074 
7075 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7076 	vh->vh_config = vhc;
7077 	vhcache = &vhc->vhc_vhcache;
7078 
7079 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7080 
7081 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7082 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7083 
7084 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7085 
7086 	/*
7087 	 * Create string hash; same as mod_hash_create_strhash() except that
7088 	 * we use NULL key destructor.
7089 	 */
7090 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7091 	    mdi_bus_config_cache_hash_size,
7092 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7093 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7094 
7095 	/*
7096 	 * The on-disk vhci cache is read during booting prior to the
7097 	 * lights-out period by mdi_read_devices_files().
7098 	 */
7099 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7100 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7101 			nvl = vhcache_nvl[i];
7102 			vhcache_nvl[i] = NULL;
7103 			break;
7104 		}
7105 	}
7106 
7107 	/*
7108 	 * this is to cover the case of some one manually causing unloading
7109 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7110 	 */
7111 	if (nvl == NULL && modrootloaded)
7112 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7113 
7114 	if (nvl != NULL) {
7115 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7116 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7117 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7118 		else  {
7119 			cmn_err(CE_WARN,
7120 			    "%s: data file corrupted, will recreate\n",
7121 			    vhc->vhc_vhcache_filename);
7122 		}
7123 		rw_exit(&vhcache->vhcache_lock);
7124 		nvlist_free(nvl);
7125 	}
7126 
7127 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7128 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7129 
7130 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7131 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7132 }
7133 
7134 /*
7135  * free all vhci cache related resources
7136  */
7137 static int
7138 destroy_vhci_cache(mdi_vhci_t *vh)
7139 {
7140 	mdi_vhci_config_t *vhc = vh->vh_config;
7141 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7142 	mdi_vhcache_phci_t *cphci, *cphci_next;
7143 	mdi_vhcache_client_t *cct, *cct_next;
7144 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7145 
7146 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7147 		return (MDI_FAILURE);
7148 
7149 	kmem_free(vhc->vhc_vhcache_filename,
7150 	    strlen(vhc->vhc_vhcache_filename) + 1);
7151 
7152 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7153 
7154 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7155 	    cphci = cphci_next) {
7156 		cphci_next = cphci->cphci_next;
7157 		free_vhcache_phci(cphci);
7158 	}
7159 
7160 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7161 		cct_next = cct->cct_next;
7162 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7163 			cpi_next = cpi->cpi_next;
7164 			free_vhcache_pathinfo(cpi);
7165 		}
7166 		free_vhcache_client(cct);
7167 	}
7168 
7169 	rw_destroy(&vhcache->vhcache_lock);
7170 
7171 	mutex_destroy(&vhc->vhc_lock);
7172 	cv_destroy(&vhc->vhc_cv);
7173 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7174 	return (MDI_SUCCESS);
7175 }
7176 
7177 /*
7178  * Stop all vhci cache related async threads and free their resources.
7179  */
7180 static int
7181 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7182 {
7183 	mdi_async_client_config_t *acc, *acc_next;
7184 
7185 	mutex_enter(&vhc->vhc_lock);
7186 	vhc->vhc_flags |= MDI_VHC_EXIT;
7187 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7188 	cv_broadcast(&vhc->vhc_cv);
7189 
7190 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7191 	    vhc->vhc_acc_thrcount != 0) {
7192 		mutex_exit(&vhc->vhc_lock);
7193 		delay(1);
7194 		mutex_enter(&vhc->vhc_lock);
7195 	}
7196 
7197 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7198 
7199 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7200 		acc_next = acc->acc_next;
7201 		free_async_client_config(acc);
7202 	}
7203 	vhc->vhc_acc_list_head = NULL;
7204 	vhc->vhc_acc_list_tail = NULL;
7205 	vhc->vhc_acc_count = 0;
7206 
7207 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7208 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7209 		mutex_exit(&vhc->vhc_lock);
7210 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7211 			vhcache_dirty(vhc);
7212 			return (MDI_FAILURE);
7213 		}
7214 	} else
7215 		mutex_exit(&vhc->vhc_lock);
7216 
7217 	if (callb_delete(vhc->vhc_cbid) != 0)
7218 		return (MDI_FAILURE);
7219 
7220 	return (MDI_SUCCESS);
7221 }
7222 
7223 /*
7224  * Stop vhci cache flush thread
7225  */
7226 /* ARGSUSED */
7227 static boolean_t
7228 stop_vhcache_flush_thread(void *arg, int code)
7229 {
7230 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7231 
7232 	mutex_enter(&vhc->vhc_lock);
7233 	vhc->vhc_flags |= MDI_VHC_EXIT;
7234 	cv_broadcast(&vhc->vhc_cv);
7235 
7236 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7237 		mutex_exit(&vhc->vhc_lock);
7238 		delay(1);
7239 		mutex_enter(&vhc->vhc_lock);
7240 	}
7241 
7242 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7243 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7244 		mutex_exit(&vhc->vhc_lock);
7245 		(void) flush_vhcache(vhc, 1);
7246 	} else
7247 		mutex_exit(&vhc->vhc_lock);
7248 
7249 	return (B_TRUE);
7250 }
7251 
7252 /*
7253  * Enqueue the vhcache phci (cphci) at the tail of the list
7254  */
7255 static void
7256 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7257 {
7258 	cphci->cphci_next = NULL;
7259 	if (vhcache->vhcache_phci_head == NULL)
7260 		vhcache->vhcache_phci_head = cphci;
7261 	else
7262 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7263 	vhcache->vhcache_phci_tail = cphci;
7264 }
7265 
7266 /*
7267  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7268  */
7269 static void
7270 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7271     mdi_vhcache_pathinfo_t *cpi)
7272 {
7273 	cpi->cpi_next = NULL;
7274 	if (cct->cct_cpi_head == NULL)
7275 		cct->cct_cpi_head = cpi;
7276 	else
7277 		cct->cct_cpi_tail->cpi_next = cpi;
7278 	cct->cct_cpi_tail = cpi;
7279 }
7280 
7281 /*
7282  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7283  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7284  * flag set come at the beginning of the list. All cpis which have this
7285  * flag set come at the end of the list.
7286  */
7287 static void
7288 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7289     mdi_vhcache_pathinfo_t *newcpi)
7290 {
7291 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7292 
7293 	if (cct->cct_cpi_head == NULL ||
7294 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7295 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7296 	else {
7297 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7298 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7299 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7300 			;
7301 
7302 		if (prev_cpi == NULL)
7303 			cct->cct_cpi_head = newcpi;
7304 		else
7305 			prev_cpi->cpi_next = newcpi;
7306 
7307 		newcpi->cpi_next = cpi;
7308 
7309 		if (cpi == NULL)
7310 			cct->cct_cpi_tail = newcpi;
7311 	}
7312 }
7313 
7314 /*
7315  * Enqueue the vhcache client (cct) at the tail of the list
7316  */
7317 static void
7318 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7319     mdi_vhcache_client_t *cct)
7320 {
7321 	cct->cct_next = NULL;
7322 	if (vhcache->vhcache_client_head == NULL)
7323 		vhcache->vhcache_client_head = cct;
7324 	else
7325 		vhcache->vhcache_client_tail->cct_next = cct;
7326 	vhcache->vhcache_client_tail = cct;
7327 }
7328 
7329 static void
7330 free_string_array(char **str, int nelem)
7331 {
7332 	int i;
7333 
7334 	if (str) {
7335 		for (i = 0; i < nelem; i++) {
7336 			if (str[i])
7337 				kmem_free(str[i], strlen(str[i]) + 1);
7338 		}
7339 		kmem_free(str, sizeof (char *) * nelem);
7340 	}
7341 }
7342 
7343 static void
7344 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7345 {
7346 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7347 	kmem_free(cphci, sizeof (*cphci));
7348 }
7349 
7350 static void
7351 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7352 {
7353 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7354 	kmem_free(cpi, sizeof (*cpi));
7355 }
7356 
7357 static void
7358 free_vhcache_client(mdi_vhcache_client_t *cct)
7359 {
7360 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7361 	kmem_free(cct, sizeof (*cct));
7362 }
7363 
7364 static char *
7365 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7366 {
7367 	char *name_addr;
7368 	int len;
7369 
7370 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7371 	name_addr = kmem_alloc(len, KM_SLEEP);
7372 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7373 
7374 	if (ret_len)
7375 		*ret_len = len;
7376 	return (name_addr);
7377 }
7378 
7379 /*
7380  * Copy the contents of paddrnvl to vhci cache.
7381  * paddrnvl nvlist contains path information for a vhci client.
7382  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7383  */
7384 static void
7385 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7386     mdi_vhcache_client_t *cct)
7387 {
7388 	nvpair_t *nvp = NULL;
7389 	mdi_vhcache_pathinfo_t *cpi;
7390 	uint_t nelem;
7391 	uint32_t *val;
7392 
7393 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7394 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7395 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7396 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7397 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7398 		ASSERT(nelem == 2);
7399 		cpi->cpi_cphci = cphci_list[val[0]];
7400 		cpi->cpi_flags = val[1];
7401 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7402 	}
7403 }
7404 
7405 /*
7406  * Copy the contents of caddrmapnvl to vhci cache.
7407  * caddrmapnvl nvlist contains vhci client address to phci client address
7408  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7409  * this nvlist.
7410  */
7411 static void
7412 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7413     mdi_vhcache_phci_t *cphci_list[])
7414 {
7415 	nvpair_t *nvp = NULL;
7416 	nvlist_t *paddrnvl;
7417 	mdi_vhcache_client_t *cct;
7418 
7419 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7420 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7421 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7422 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7423 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7424 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7425 		/* the client must contain at least one path */
7426 		ASSERT(cct->cct_cpi_head != NULL);
7427 
7428 		enqueue_vhcache_client(vhcache, cct);
7429 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7430 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7431 	}
7432 }
7433 
7434 /*
7435  * Copy the contents of the main nvlist to vhci cache.
7436  *
7437  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7438  * The nvlist contains the mappings between the vhci client addresses and
7439  * their corresponding phci client addresses.
7440  *
7441  * The structure of the nvlist is as follows:
7442  *
7443  * Main nvlist:
7444  *	NAME		TYPE		DATA
7445  *	version		int32		version number
7446  *	phcis		string array	array of phci paths
7447  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7448  *
7449  * structure of c2paddrs_nvl:
7450  *	NAME		TYPE		DATA
7451  *	caddr1		nvlist_t	paddrs_nvl1
7452  *	caddr2		nvlist_t	paddrs_nvl2
7453  *	...
7454  * where caddr1, caddr2, ... are vhci client name and addresses in the
7455  * form of "<clientname>@<clientaddress>".
7456  * (for example: "ssd@2000002037cd9f72");
7457  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7458  *
7459  * structure of paddrs_nvl:
7460  *	NAME		TYPE		DATA
7461  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7462  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7463  *	...
7464  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7465  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7466  * phci-ids are integers that identify PHCIs to which the
7467  * the bus specific address belongs to. These integers are used as an index
7468  * into to the phcis string array in the main nvlist to get the PHCI path.
7469  */
7470 static int
7471 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7472 {
7473 	char **phcis, **phci_namep;
7474 	uint_t nphcis;
7475 	mdi_vhcache_phci_t *cphci, **cphci_list;
7476 	nvlist_t *caddrmapnvl;
7477 	int32_t ver;
7478 	int i;
7479 	size_t cphci_list_size;
7480 
7481 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7482 
7483 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7484 	    ver != MDI_VHCI_CACHE_VERSION)
7485 		return (MDI_FAILURE);
7486 
7487 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7488 	    &nphcis) != 0)
7489 		return (MDI_SUCCESS);
7490 
7491 	ASSERT(nphcis > 0);
7492 
7493 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7494 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7495 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7496 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7497 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7498 		enqueue_vhcache_phci(vhcache, cphci);
7499 		cphci_list[i] = cphci;
7500 	}
7501 
7502 	ASSERT(vhcache->vhcache_phci_head != NULL);
7503 
7504 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7505 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7506 
7507 	kmem_free(cphci_list, cphci_list_size);
7508 	return (MDI_SUCCESS);
7509 }
7510 
7511 /*
7512  * Build paddrnvl for the specified client using the information in the
7513  * vhci cache and add it to the caddrmapnnvl.
7514  * Returns 0 on success, errno on failure.
7515  */
7516 static int
7517 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7518     nvlist_t *caddrmapnvl)
7519 {
7520 	mdi_vhcache_pathinfo_t *cpi;
7521 	nvlist_t *nvl;
7522 	int err;
7523 	uint32_t val[2];
7524 
7525 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7526 
7527 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7528 		return (err);
7529 
7530 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7531 		val[0] = cpi->cpi_cphci->cphci_id;
7532 		val[1] = cpi->cpi_flags;
7533 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7534 		    != 0)
7535 			goto out;
7536 	}
7537 
7538 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7539 out:
7540 	nvlist_free(nvl);
7541 	return (err);
7542 }
7543 
7544 /*
7545  * Build caddrmapnvl using the information in the vhci cache
7546  * and add it to the mainnvl.
7547  * Returns 0 on success, errno on failure.
7548  */
7549 static int
7550 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7551 {
7552 	mdi_vhcache_client_t *cct;
7553 	nvlist_t *nvl;
7554 	int err;
7555 
7556 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7557 
7558 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7559 		return (err);
7560 
7561 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7562 	    cct = cct->cct_next) {
7563 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7564 			goto out;
7565 	}
7566 
7567 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7568 out:
7569 	nvlist_free(nvl);
7570 	return (err);
7571 }
7572 
7573 /*
7574  * Build nvlist using the information in the vhci cache.
7575  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7576  * Returns nvl on success, NULL on failure.
7577  */
7578 static nvlist_t *
7579 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7580 {
7581 	mdi_vhcache_phci_t *cphci;
7582 	uint_t phci_count;
7583 	char **phcis;
7584 	nvlist_t *nvl;
7585 	int err, i;
7586 
7587 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7588 		nvl = NULL;
7589 		goto out;
7590 	}
7591 
7592 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7593 	    MDI_VHCI_CACHE_VERSION)) != 0)
7594 		goto out;
7595 
7596 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7597 	if (vhcache->vhcache_phci_head == NULL) {
7598 		rw_exit(&vhcache->vhcache_lock);
7599 		return (nvl);
7600 	}
7601 
7602 	phci_count = 0;
7603 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7604 	    cphci = cphci->cphci_next)
7605 		cphci->cphci_id = phci_count++;
7606 
7607 	/* build phci pathname list */
7608 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7609 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7610 	    cphci = cphci->cphci_next, i++)
7611 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7612 
7613 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7614 	    phci_count);
7615 	free_string_array(phcis, phci_count);
7616 
7617 	if (err == 0 &&
7618 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7619 		rw_exit(&vhcache->vhcache_lock);
7620 		return (nvl);
7621 	}
7622 
7623 	rw_exit(&vhcache->vhcache_lock);
7624 out:
7625 	if (nvl)
7626 		nvlist_free(nvl);
7627 	return (NULL);
7628 }
7629 
7630 /*
7631  * Lookup vhcache phci structure for the specified phci path.
7632  */
7633 static mdi_vhcache_phci_t *
7634 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7635 {
7636 	mdi_vhcache_phci_t *cphci;
7637 
7638 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7639 
7640 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7641 	    cphci = cphci->cphci_next) {
7642 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7643 			return (cphci);
7644 	}
7645 
7646 	return (NULL);
7647 }
7648 
7649 /*
7650  * Lookup vhcache phci structure for the specified phci.
7651  */
7652 static mdi_vhcache_phci_t *
7653 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7654 {
7655 	mdi_vhcache_phci_t *cphci;
7656 
7657 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7658 
7659 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7660 	    cphci = cphci->cphci_next) {
7661 		if (cphci->cphci_phci == ph)
7662 			return (cphci);
7663 	}
7664 
7665 	return (NULL);
7666 }
7667 
7668 /*
7669  * Add the specified phci to the vhci cache if not already present.
7670  */
7671 static void
7672 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7673 {
7674 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7675 	mdi_vhcache_phci_t *cphci;
7676 	char *pathname;
7677 	int cache_updated;
7678 
7679 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7680 
7681 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7682 	(void) ddi_pathname(ph->ph_dip, pathname);
7683 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7684 	    != NULL) {
7685 		cphci->cphci_phci = ph;
7686 		cache_updated = 0;
7687 	} else {
7688 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7689 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7690 		cphci->cphci_phci = ph;
7691 		enqueue_vhcache_phci(vhcache, cphci);
7692 		cache_updated = 1;
7693 	}
7694 
7695 	rw_exit(&vhcache->vhcache_lock);
7696 
7697 	/*
7698 	 * Since a new phci has been added, reset
7699 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7700 	 * during next vhcache_discover_paths().
7701 	 */
7702 	mutex_enter(&vhc->vhc_lock);
7703 	vhc->vhc_path_discovery_cutoff_time = 0;
7704 	mutex_exit(&vhc->vhc_lock);
7705 
7706 	kmem_free(pathname, MAXPATHLEN);
7707 	if (cache_updated)
7708 		vhcache_dirty(vhc);
7709 }
7710 
7711 /*
7712  * Remove the reference to the specified phci from the vhci cache.
7713  */
7714 static void
7715 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7716 {
7717 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7718 	mdi_vhcache_phci_t *cphci;
7719 
7720 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7721 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7722 		/* do not remove the actual mdi_vhcache_phci structure */
7723 		cphci->cphci_phci = NULL;
7724 	}
7725 	rw_exit(&vhcache->vhcache_lock);
7726 }
7727 
7728 static void
7729 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7730     mdi_vhcache_lookup_token_t *src)
7731 {
7732 	if (src == NULL) {
7733 		dst->lt_cct = NULL;
7734 		dst->lt_cct_lookup_time = 0;
7735 	} else {
7736 		dst->lt_cct = src->lt_cct;
7737 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7738 	}
7739 }
7740 
7741 /*
7742  * Look up vhcache client for the specified client.
7743  */
7744 static mdi_vhcache_client_t *
7745 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7746     mdi_vhcache_lookup_token_t *token)
7747 {
7748 	mod_hash_val_t hv;
7749 	char *name_addr;
7750 	int len;
7751 
7752 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7753 
7754 	/*
7755 	 * If no vhcache clean occurred since the last lookup, we can
7756 	 * simply return the cct from the last lookup operation.
7757 	 * It works because ccts are never freed except during the vhcache
7758 	 * cleanup operation.
7759 	 */
7760 	if (token != NULL &&
7761 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7762 		return (token->lt_cct);
7763 
7764 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7765 	if (mod_hash_find(vhcache->vhcache_client_hash,
7766 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7767 		if (token) {
7768 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7769 			token->lt_cct_lookup_time = lbolt64;
7770 		}
7771 	} else {
7772 		if (token) {
7773 			token->lt_cct = NULL;
7774 			token->lt_cct_lookup_time = 0;
7775 		}
7776 		hv = NULL;
7777 	}
7778 	kmem_free(name_addr, len);
7779 	return ((mdi_vhcache_client_t *)hv);
7780 }
7781 
7782 /*
7783  * Add the specified path to the vhci cache if not already present.
7784  * Also add the vhcache client for the client corresponding to this path
7785  * if it doesn't already exist.
7786  */
7787 static void
7788 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7789 {
7790 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7791 	mdi_vhcache_client_t *cct;
7792 	mdi_vhcache_pathinfo_t *cpi;
7793 	mdi_phci_t *ph = pip->pi_phci;
7794 	mdi_client_t *ct = pip->pi_client;
7795 	int cache_updated = 0;
7796 
7797 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7798 
7799 	/* if vhcache client for this pip doesn't already exist, add it */
7800 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7801 	    NULL)) == NULL) {
7802 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7803 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7804 		    ct->ct_guid, NULL);
7805 		enqueue_vhcache_client(vhcache, cct);
7806 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7807 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7808 		cache_updated = 1;
7809 	}
7810 
7811 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7812 		if (cpi->cpi_cphci->cphci_phci == ph &&
7813 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7814 			cpi->cpi_pip = pip;
7815 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7816 				cpi->cpi_flags &=
7817 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7818 				sort_vhcache_paths(cct);
7819 				cache_updated = 1;
7820 			}
7821 			break;
7822 		}
7823 	}
7824 
7825 	if (cpi == NULL) {
7826 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7827 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7828 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7829 		ASSERT(cpi->cpi_cphci != NULL);
7830 		cpi->cpi_pip = pip;
7831 		enqueue_vhcache_pathinfo(cct, cpi);
7832 		cache_updated = 1;
7833 	}
7834 
7835 	rw_exit(&vhcache->vhcache_lock);
7836 
7837 	if (cache_updated)
7838 		vhcache_dirty(vhc);
7839 }
7840 
7841 /*
7842  * Remove the reference to the specified path from the vhci cache.
7843  */
7844 static void
7845 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7846 {
7847 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7848 	mdi_client_t *ct = pip->pi_client;
7849 	mdi_vhcache_client_t *cct;
7850 	mdi_vhcache_pathinfo_t *cpi;
7851 
7852 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7853 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7854 	    NULL)) != NULL) {
7855 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7856 		    cpi = cpi->cpi_next) {
7857 			if (cpi->cpi_pip == pip) {
7858 				cpi->cpi_pip = NULL;
7859 				break;
7860 			}
7861 		}
7862 	}
7863 	rw_exit(&vhcache->vhcache_lock);
7864 }
7865 
7866 /*
7867  * Flush the vhci cache to disk.
7868  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7869  */
7870 static int
7871 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7872 {
7873 	nvlist_t *nvl;
7874 	int err;
7875 	int rv;
7876 
7877 	/*
7878 	 * It is possible that the system may shutdown before
7879 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7880 	 * flushing the cache in this case do not check for
7881 	 * i_ddi_io_initialized when force flag is set.
7882 	 */
7883 	if (force_flag == 0 && !i_ddi_io_initialized())
7884 		return (MDI_FAILURE);
7885 
7886 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7887 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7888 		nvlist_free(nvl);
7889 	} else
7890 		err = EFAULT;
7891 
7892 	rv = MDI_SUCCESS;
7893 	mutex_enter(&vhc->vhc_lock);
7894 	if (err != 0) {
7895 		if (err == EROFS) {
7896 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7897 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7898 			    MDI_VHC_VHCACHE_DIRTY);
7899 		} else {
7900 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7901 				cmn_err(CE_CONT, "%s: update failed\n",
7902 				    vhc->vhc_vhcache_filename);
7903 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7904 			}
7905 			rv = MDI_FAILURE;
7906 		}
7907 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7908 		cmn_err(CE_CONT,
7909 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7910 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7911 	}
7912 	mutex_exit(&vhc->vhc_lock);
7913 
7914 	return (rv);
7915 }
7916 
7917 /*
7918  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7919  * Exits itself if left idle for the idle timeout period.
7920  */
7921 static void
7922 vhcache_flush_thread(void *arg)
7923 {
7924 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7925 	clock_t idle_time, quit_at_ticks;
7926 	callb_cpr_t cprinfo;
7927 
7928 	/* number of seconds to sleep idle before exiting */
7929 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7930 
7931 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7932 	    "mdi_vhcache_flush");
7933 	mutex_enter(&vhc->vhc_lock);
7934 	for (; ; ) {
7935 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7936 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7937 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7938 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7939 				(void) cv_timedwait(&vhc->vhc_cv,
7940 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7941 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7942 			} else {
7943 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7944 				mutex_exit(&vhc->vhc_lock);
7945 
7946 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7947 					vhcache_dirty(vhc);
7948 
7949 				mutex_enter(&vhc->vhc_lock);
7950 			}
7951 		}
7952 
7953 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7954 
7955 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7956 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7957 		    ddi_get_lbolt() < quit_at_ticks) {
7958 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7959 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7960 			    quit_at_ticks);
7961 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7962 		}
7963 
7964 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7965 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7966 			goto out;
7967 	}
7968 
7969 out:
7970 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7971 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7972 	CALLB_CPR_EXIT(&cprinfo);
7973 }
7974 
7975 /*
7976  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7977  */
7978 static void
7979 vhcache_dirty(mdi_vhci_config_t *vhc)
7980 {
7981 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7982 	int create_thread;
7983 
7984 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7985 	/* do not flush cache until the cache is fully built */
7986 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7987 		rw_exit(&vhcache->vhcache_lock);
7988 		return;
7989 	}
7990 	rw_exit(&vhcache->vhcache_lock);
7991 
7992 	mutex_enter(&vhc->vhc_lock);
7993 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7994 		mutex_exit(&vhc->vhc_lock);
7995 		return;
7996 	}
7997 
7998 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7999 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8000 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8001 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8002 		cv_broadcast(&vhc->vhc_cv);
8003 		create_thread = 0;
8004 	} else {
8005 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8006 		create_thread = 1;
8007 	}
8008 	mutex_exit(&vhc->vhc_lock);
8009 
8010 	if (create_thread)
8011 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8012 		    0, &p0, TS_RUN, minclsyspri);
8013 }
8014 
8015 /*
8016  * phci bus config structure - one for for each phci bus config operation that
8017  * we initiate on behalf of a vhci.
8018  */
8019 typedef struct mdi_phci_bus_config_s {
8020 	char *phbc_phci_path;
8021 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8022 	struct mdi_phci_bus_config_s *phbc_next;
8023 } mdi_phci_bus_config_t;
8024 
8025 /* vhci bus config structure - one for each vhci bus config operation */
8026 typedef struct mdi_vhci_bus_config_s {
8027 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8028 	major_t vhbc_op_major;		/* bus config op major */
8029 	uint_t vhbc_op_flags;		/* bus config op flags */
8030 	kmutex_t vhbc_lock;
8031 	kcondvar_t vhbc_cv;
8032 	int vhbc_thr_count;
8033 } mdi_vhci_bus_config_t;
8034 
8035 /*
8036  * bus config the specified phci
8037  */
8038 static void
8039 bus_config_phci(void *arg)
8040 {
8041 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8042 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8043 	dev_info_t *ph_dip;
8044 
8045 	/*
8046 	 * first configure all path components upto phci and then configure
8047 	 * the phci children.
8048 	 */
8049 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8050 	    != NULL) {
8051 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8052 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8053 			(void) ndi_devi_config_driver(ph_dip,
8054 			    vhbc->vhbc_op_flags,
8055 			    vhbc->vhbc_op_major);
8056 		} else
8057 			(void) ndi_devi_config(ph_dip,
8058 			    vhbc->vhbc_op_flags);
8059 
8060 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8061 		ndi_rele_devi(ph_dip);
8062 	}
8063 
8064 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8065 	kmem_free(phbc, sizeof (*phbc));
8066 
8067 	mutex_enter(&vhbc->vhbc_lock);
8068 	vhbc->vhbc_thr_count--;
8069 	if (vhbc->vhbc_thr_count == 0)
8070 		cv_broadcast(&vhbc->vhbc_cv);
8071 	mutex_exit(&vhbc->vhbc_lock);
8072 }
8073 
8074 /*
8075  * Bus config all phcis associated with the vhci in parallel.
8076  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8077  */
8078 static void
8079 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8080     ddi_bus_config_op_t op, major_t maj)
8081 {
8082 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8083 	mdi_vhci_bus_config_t *vhbc;
8084 	mdi_vhcache_phci_t *cphci;
8085 
8086 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8087 	if (vhcache->vhcache_phci_head == NULL) {
8088 		rw_exit(&vhcache->vhcache_lock);
8089 		return;
8090 	}
8091 
8092 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8093 
8094 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8095 	    cphci = cphci->cphci_next) {
8096 		/* skip phcis that haven't attached before root is available */
8097 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8098 			continue;
8099 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8100 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8101 		    KM_SLEEP);
8102 		phbc->phbc_vhbusconfig = vhbc;
8103 		phbc->phbc_next = phbc_head;
8104 		phbc_head = phbc;
8105 		vhbc->vhbc_thr_count++;
8106 	}
8107 	rw_exit(&vhcache->vhcache_lock);
8108 
8109 	vhbc->vhbc_op = op;
8110 	vhbc->vhbc_op_major = maj;
8111 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8112 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8113 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8114 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8115 
8116 	/* now create threads to initiate bus config on all phcis in parallel */
8117 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8118 		phbc_next = phbc->phbc_next;
8119 		if (mdi_mtc_off)
8120 			bus_config_phci((void *)phbc);
8121 		else
8122 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8123 			    0, &p0, TS_RUN, minclsyspri);
8124 	}
8125 
8126 	mutex_enter(&vhbc->vhbc_lock);
8127 	/* wait until all threads exit */
8128 	while (vhbc->vhbc_thr_count > 0)
8129 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8130 	mutex_exit(&vhbc->vhbc_lock);
8131 
8132 	mutex_destroy(&vhbc->vhbc_lock);
8133 	cv_destroy(&vhbc->vhbc_cv);
8134 	kmem_free(vhbc, sizeof (*vhbc));
8135 }
8136 
8137 /*
8138  * Single threaded version of bus_config_all_phcis()
8139  */
8140 static void
8141 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8142     ddi_bus_config_op_t op, major_t maj)
8143 {
8144 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8145 
8146 	single_threaded_vhconfig_enter(vhc);
8147 	bus_config_all_phcis(vhcache, flags, op, maj);
8148 	single_threaded_vhconfig_exit(vhc);
8149 }
8150 
8151 /*
8152  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8153  * The path includes the child component in addition to the phci path.
8154  */
8155 static int
8156 bus_config_one_phci_child(char *path)
8157 {
8158 	dev_info_t *ph_dip, *child;
8159 	char *devnm;
8160 	int rv = MDI_FAILURE;
8161 
8162 	/* extract the child component of the phci */
8163 	devnm = strrchr(path, '/');
8164 	*devnm++ = '\0';
8165 
8166 	/*
8167 	 * first configure all path components upto phci and then
8168 	 * configure the phci child.
8169 	 */
8170 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8171 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8172 		    NDI_SUCCESS) {
8173 			/*
8174 			 * release the hold that ndi_devi_config_one() placed
8175 			 */
8176 			ndi_rele_devi(child);
8177 			rv = MDI_SUCCESS;
8178 		}
8179 
8180 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8181 		ndi_rele_devi(ph_dip);
8182 	}
8183 
8184 	devnm--;
8185 	*devnm = '/';
8186 	return (rv);
8187 }
8188 
8189 /*
8190  * Build a list of phci client paths for the specified vhci client.
8191  * The list includes only those phci client paths which aren't configured yet.
8192  */
8193 static mdi_phys_path_t *
8194 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8195 {
8196 	mdi_vhcache_pathinfo_t *cpi;
8197 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8198 	int config_path, len;
8199 
8200 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8201 		/*
8202 		 * include only those paths that aren't configured.
8203 		 */
8204 		config_path = 0;
8205 		if (cpi->cpi_pip == NULL)
8206 			config_path = 1;
8207 		else {
8208 			MDI_PI_LOCK(cpi->cpi_pip);
8209 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8210 				config_path = 1;
8211 			MDI_PI_UNLOCK(cpi->cpi_pip);
8212 		}
8213 
8214 		if (config_path) {
8215 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8216 			len = strlen(cpi->cpi_cphci->cphci_path) +
8217 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8218 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8219 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8220 			    cpi->cpi_cphci->cphci_path, ct_name,
8221 			    cpi->cpi_addr);
8222 			pp->phys_path_next = NULL;
8223 
8224 			if (pp_head == NULL)
8225 				pp_head = pp;
8226 			else
8227 				pp_tail->phys_path_next = pp;
8228 			pp_tail = pp;
8229 		}
8230 	}
8231 
8232 	return (pp_head);
8233 }
8234 
8235 /*
8236  * Free the memory allocated for phci client path list.
8237  */
8238 static void
8239 free_phclient_path_list(mdi_phys_path_t *pp_head)
8240 {
8241 	mdi_phys_path_t *pp, *pp_next;
8242 
8243 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8244 		pp_next = pp->phys_path_next;
8245 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8246 		kmem_free(pp, sizeof (*pp));
8247 	}
8248 }
8249 
8250 /*
8251  * Allocated async client structure and initialize with the specified values.
8252  */
8253 static mdi_async_client_config_t *
8254 alloc_async_client_config(char *ct_name, char *ct_addr,
8255     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8256 {
8257 	mdi_async_client_config_t *acc;
8258 
8259 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8260 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8261 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8262 	acc->acc_phclient_path_list_head = pp_head;
8263 	init_vhcache_lookup_token(&acc->acc_token, tok);
8264 	acc->acc_next = NULL;
8265 	return (acc);
8266 }
8267 
8268 /*
8269  * Free the memory allocated for the async client structure and their members.
8270  */
8271 static void
8272 free_async_client_config(mdi_async_client_config_t *acc)
8273 {
8274 	if (acc->acc_phclient_path_list_head)
8275 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8276 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8277 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8278 	kmem_free(acc, sizeof (*acc));
8279 }
8280 
8281 /*
8282  * Sort vhcache pathinfos (cpis) of the specified client.
8283  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8284  * flag set come at the beginning of the list. All cpis which have this
8285  * flag set come at the end of the list.
8286  */
8287 static void
8288 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8289 {
8290 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8291 
8292 	cpi_head = cct->cct_cpi_head;
8293 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8294 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8295 		cpi_next = cpi->cpi_next;
8296 		enqueue_vhcache_pathinfo(cct, cpi);
8297 	}
8298 }
8299 
8300 /*
8301  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8302  * every vhcache pathinfo of the specified client. If not adjust the flag
8303  * setting appropriately.
8304  *
8305  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8306  * on-disk vhci cache. So every time this flag is updated the cache must be
8307  * flushed.
8308  */
8309 static void
8310 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8311     mdi_vhcache_lookup_token_t *tok)
8312 {
8313 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8314 	mdi_vhcache_client_t *cct;
8315 	mdi_vhcache_pathinfo_t *cpi;
8316 
8317 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8318 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8319 	    == NULL) {
8320 		rw_exit(&vhcache->vhcache_lock);
8321 		return;
8322 	}
8323 
8324 	/*
8325 	 * to avoid unnecessary on-disk cache updates, first check if an
8326 	 * update is really needed. If no update is needed simply return.
8327 	 */
8328 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8329 		if ((cpi->cpi_pip != NULL &&
8330 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8331 		    (cpi->cpi_pip == NULL &&
8332 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8333 			break;
8334 		}
8335 	}
8336 	if (cpi == NULL) {
8337 		rw_exit(&vhcache->vhcache_lock);
8338 		return;
8339 	}
8340 
8341 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8342 		rw_exit(&vhcache->vhcache_lock);
8343 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8344 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8345 		    tok)) == NULL) {
8346 			rw_exit(&vhcache->vhcache_lock);
8347 			return;
8348 		}
8349 	}
8350 
8351 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8352 		if (cpi->cpi_pip != NULL)
8353 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8354 		else
8355 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8356 	}
8357 	sort_vhcache_paths(cct);
8358 
8359 	rw_exit(&vhcache->vhcache_lock);
8360 	vhcache_dirty(vhc);
8361 }
8362 
8363 /*
8364  * Configure all specified paths of the client.
8365  */
8366 static void
8367 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8368     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8369 {
8370 	mdi_phys_path_t *pp;
8371 
8372 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8373 		(void) bus_config_one_phci_child(pp->phys_path);
8374 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8375 }
8376 
8377 /*
8378  * Dequeue elements from vhci async client config list and bus configure
8379  * their corresponding phci clients.
8380  */
8381 static void
8382 config_client_paths_thread(void *arg)
8383 {
8384 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8385 	mdi_async_client_config_t *acc;
8386 	clock_t quit_at_ticks;
8387 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8388 	callb_cpr_t cprinfo;
8389 
8390 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8391 	    "mdi_config_client_paths");
8392 
8393 	for (; ; ) {
8394 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8395 
8396 		mutex_enter(&vhc->vhc_lock);
8397 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8398 		    vhc->vhc_acc_list_head == NULL &&
8399 		    ddi_get_lbolt() < quit_at_ticks) {
8400 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8401 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8402 			    quit_at_ticks);
8403 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8404 		}
8405 
8406 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8407 		    vhc->vhc_acc_list_head == NULL)
8408 			goto out;
8409 
8410 		acc = vhc->vhc_acc_list_head;
8411 		vhc->vhc_acc_list_head = acc->acc_next;
8412 		if (vhc->vhc_acc_list_head == NULL)
8413 			vhc->vhc_acc_list_tail = NULL;
8414 		vhc->vhc_acc_count--;
8415 		mutex_exit(&vhc->vhc_lock);
8416 
8417 		config_client_paths_sync(vhc, acc->acc_ct_name,
8418 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8419 		    &acc->acc_token);
8420 
8421 		free_async_client_config(acc);
8422 	}
8423 
8424 out:
8425 	vhc->vhc_acc_thrcount--;
8426 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8427 	CALLB_CPR_EXIT(&cprinfo);
8428 }
8429 
8430 /*
8431  * Arrange for all the phci client paths (pp_head) for the specified client
8432  * to be bus configured asynchronously by a thread.
8433  */
8434 static void
8435 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8436     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8437 {
8438 	mdi_async_client_config_t *acc, *newacc;
8439 	int create_thread;
8440 
8441 	if (pp_head == NULL)
8442 		return;
8443 
8444 	if (mdi_mtc_off) {
8445 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8446 		free_phclient_path_list(pp_head);
8447 		return;
8448 	}
8449 
8450 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8451 	ASSERT(newacc);
8452 
8453 	mutex_enter(&vhc->vhc_lock);
8454 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8455 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8456 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8457 			free_async_client_config(newacc);
8458 			mutex_exit(&vhc->vhc_lock);
8459 			return;
8460 		}
8461 	}
8462 
8463 	if (vhc->vhc_acc_list_head == NULL)
8464 		vhc->vhc_acc_list_head = newacc;
8465 	else
8466 		vhc->vhc_acc_list_tail->acc_next = newacc;
8467 	vhc->vhc_acc_list_tail = newacc;
8468 	vhc->vhc_acc_count++;
8469 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8470 		cv_broadcast(&vhc->vhc_cv);
8471 		create_thread = 0;
8472 	} else {
8473 		vhc->vhc_acc_thrcount++;
8474 		create_thread = 1;
8475 	}
8476 	mutex_exit(&vhc->vhc_lock);
8477 
8478 	if (create_thread)
8479 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8480 		    0, &p0, TS_RUN, minclsyspri);
8481 }
8482 
8483 /*
8484  * Return number of online paths for the specified client.
8485  */
8486 static int
8487 nonline_paths(mdi_vhcache_client_t *cct)
8488 {
8489 	mdi_vhcache_pathinfo_t *cpi;
8490 	int online_count = 0;
8491 
8492 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8493 		if (cpi->cpi_pip != NULL) {
8494 			MDI_PI_LOCK(cpi->cpi_pip);
8495 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8496 				online_count++;
8497 			MDI_PI_UNLOCK(cpi->cpi_pip);
8498 		}
8499 	}
8500 
8501 	return (online_count);
8502 }
8503 
8504 /*
8505  * Bus configure all paths for the specified vhci client.
8506  * If at least one path for the client is already online, the remaining paths
8507  * will be configured asynchronously. Otherwise, it synchronously configures
8508  * the paths until at least one path is online and then rest of the paths
8509  * will be configured asynchronously.
8510  */
8511 static void
8512 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8513 {
8514 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8515 	mdi_phys_path_t *pp_head, *pp;
8516 	mdi_vhcache_client_t *cct;
8517 	mdi_vhcache_lookup_token_t tok;
8518 
8519 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8520 
8521 	init_vhcache_lookup_token(&tok, NULL);
8522 
8523 	if (ct_name == NULL || ct_addr == NULL ||
8524 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8525 	    == NULL ||
8526 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8527 		rw_exit(&vhcache->vhcache_lock);
8528 		return;
8529 	}
8530 
8531 	/* if at least one path is online, configure the rest asynchronously */
8532 	if (nonline_paths(cct) > 0) {
8533 		rw_exit(&vhcache->vhcache_lock);
8534 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8535 		return;
8536 	}
8537 
8538 	rw_exit(&vhcache->vhcache_lock);
8539 
8540 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8541 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8542 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8543 
8544 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8545 			    ct_addr, &tok)) == NULL) {
8546 				rw_exit(&vhcache->vhcache_lock);
8547 				goto out;
8548 			}
8549 
8550 			if (nonline_paths(cct) > 0 &&
8551 			    pp->phys_path_next != NULL) {
8552 				rw_exit(&vhcache->vhcache_lock);
8553 				config_client_paths_async(vhc, ct_name, ct_addr,
8554 				    pp->phys_path_next, &tok);
8555 				pp->phys_path_next = NULL;
8556 				goto out;
8557 			}
8558 
8559 			rw_exit(&vhcache->vhcache_lock);
8560 		}
8561 	}
8562 
8563 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8564 out:
8565 	free_phclient_path_list(pp_head);
8566 }
8567 
8568 static void
8569 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8570 {
8571 	mutex_enter(&vhc->vhc_lock);
8572 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8573 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8574 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8575 	mutex_exit(&vhc->vhc_lock);
8576 }
8577 
8578 static void
8579 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8580 {
8581 	mutex_enter(&vhc->vhc_lock);
8582 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8583 	cv_broadcast(&vhc->vhc_cv);
8584 	mutex_exit(&vhc->vhc_lock);
8585 }
8586 
8587 typedef struct mdi_phci_driver_info {
8588 	char	*phdriver_name;	/* name of the phci driver */
8589 
8590 	/* set to non zero if the phci driver supports root device */
8591 	int	phdriver_root_support;
8592 } mdi_phci_driver_info_t;
8593 
8594 /*
8595  * vhci class and root support capability of a phci driver can be
8596  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8597  * phci driver.conf file. The built-in tables below contain this information
8598  * for those phci drivers whose driver.conf files don't yet contain this info.
8599  *
8600  * All phci drivers expect iscsi have root device support.
8601  */
8602 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8603 	{ "fp", 1 },
8604 	{ "iscsi", 0 },
8605 	{ "ibsrp", 1 }
8606 	};
8607 
8608 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8609 
8610 static void *
8611 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8612 {
8613 	void *new_ptr;
8614 
8615 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8616 	if (old_ptr) {
8617 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
8618 		kmem_free(old_ptr, old_size);
8619 	}
8620 	return (new_ptr);
8621 }
8622 
8623 static void
8624 add_to_phci_list(char ***driver_list, int **root_support_list,
8625     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8626 {
8627 	ASSERT(*cur_elements <= *max_elements);
8628 	if (*cur_elements == *max_elements) {
8629 		*max_elements += 10;
8630 		*driver_list = mdi_realloc(*driver_list,
8631 		    sizeof (char *) * (*cur_elements),
8632 		    sizeof (char *) * (*max_elements));
8633 		*root_support_list = mdi_realloc(*root_support_list,
8634 		    sizeof (int) * (*cur_elements),
8635 		    sizeof (int) * (*max_elements));
8636 	}
8637 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8638 	(*root_support_list)[*cur_elements] = root_support;
8639 	(*cur_elements)++;
8640 }
8641 
8642 static void
8643 get_phci_driver_list(char *vhci_class, char ***driver_list,
8644     int **root_support_list, int *cur_elements, int *max_elements)
8645 {
8646 	mdi_phci_driver_info_t	*st_driver_list, *p;
8647 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8648 	major_t		m;
8649 	struct devnames	*dnp;
8650 	ddi_prop_t	*propp;
8651 
8652 	*driver_list = NULL;
8653 	*root_support_list = NULL;
8654 	*cur_elements = 0;
8655 	*max_elements = 0;
8656 
8657 	/* add the phci drivers derived from the phci driver.conf files */
8658 	for (m = 0; m < devcnt; m++) {
8659 		dnp = &devnamesp[m];
8660 
8661 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8662 			LOCK_DEV_OPS(&dnp->dn_lock);
8663 			if (dnp->dn_global_prop_ptr != NULL &&
8664 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8665 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8666 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8667 			    strcmp(propp->prop_val, vhci_class) == 0) {
8668 
8669 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8670 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8671 				    &dnp->dn_global_prop_ptr->prop_list)
8672 				    == NULL) ? 1 : 0;
8673 
8674 				add_to_phci_list(driver_list, root_support_list,
8675 				    cur_elements, max_elements, dnp->dn_name,
8676 				    root_support);
8677 
8678 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8679 			} else
8680 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8681 		}
8682 	}
8683 
8684 	driver_conf_count = *cur_elements;
8685 
8686 	/* add the phci drivers specified in the built-in tables */
8687 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8688 		st_driver_list = scsi_phci_driver_list;
8689 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8690 		    sizeof (mdi_phci_driver_info_t);
8691 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8692 		st_driver_list = ib_phci_driver_list;
8693 		st_ndrivers = sizeof (ib_phci_driver_list) /
8694 		    sizeof (mdi_phci_driver_info_t);
8695 	} else {
8696 		st_driver_list = NULL;
8697 		st_ndrivers = 0;
8698 	}
8699 
8700 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8701 		/* add this phci driver if not already added before */
8702 		for (j = 0; j < driver_conf_count; j++) {
8703 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8704 				break;
8705 		}
8706 		if (j == driver_conf_count) {
8707 			add_to_phci_list(driver_list, root_support_list,
8708 			    cur_elements, max_elements, p->phdriver_name,
8709 			    p->phdriver_root_support);
8710 		}
8711 	}
8712 }
8713 
8714 /*
8715  * Attach the phci driver instances associated with the specified vhci class.
8716  * If root is mounted attach all phci driver instances.
8717  * If root is not mounted, attach the instances of only those phci
8718  * drivers that have the root support.
8719  */
8720 static void
8721 attach_phci_drivers(char *vhci_class)
8722 {
8723 	char	**driver_list, **p;
8724 	int	*root_support_list;
8725 	int	cur_elements, max_elements, i;
8726 	major_t	m;
8727 
8728 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8729 	    &cur_elements, &max_elements);
8730 
8731 	for (i = 0; i < cur_elements; i++) {
8732 		if (modrootloaded || root_support_list[i]) {
8733 			m = ddi_name_to_major(driver_list[i]);
8734 			if (m != DDI_MAJOR_T_NONE &&
8735 			    ddi_hold_installed_driver(m))
8736 				ddi_rele_driver(m);
8737 		}
8738 	}
8739 
8740 	if (driver_list) {
8741 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8742 			kmem_free(*p, strlen(*p) + 1);
8743 		kmem_free(driver_list, sizeof (char *) * max_elements);
8744 		kmem_free(root_support_list, sizeof (int) * max_elements);
8745 	}
8746 }
8747 
8748 /*
8749  * Build vhci cache:
8750  *
8751  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8752  * the phci driver instances. During this process the cache gets built.
8753  *
8754  * Cache is built fully if the root is mounted.
8755  * If the root is not mounted, phci drivers that do not have root support
8756  * are not attached. As a result the cache is built partially. The entries
8757  * in the cache reflect only those phci drivers that have root support.
8758  */
8759 static int
8760 build_vhci_cache(mdi_vhci_t *vh)
8761 {
8762 	mdi_vhci_config_t *vhc = vh->vh_config;
8763 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8764 
8765 	single_threaded_vhconfig_enter(vhc);
8766 
8767 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8768 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8769 		rw_exit(&vhcache->vhcache_lock);
8770 		single_threaded_vhconfig_exit(vhc);
8771 		return (0);
8772 	}
8773 	rw_exit(&vhcache->vhcache_lock);
8774 
8775 	attach_phci_drivers(vh->vh_class);
8776 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8777 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8778 
8779 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8780 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8781 	rw_exit(&vhcache->vhcache_lock);
8782 
8783 	single_threaded_vhconfig_exit(vhc);
8784 	vhcache_dirty(vhc);
8785 	return (1);
8786 }
8787 
8788 /*
8789  * Determine if discovery of paths is needed.
8790  */
8791 static int
8792 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8793 {
8794 	int rv = 1;
8795 
8796 	mutex_enter(&vhc->vhc_lock);
8797 	if (i_ddi_io_initialized() == 0) {
8798 		if (vhc->vhc_path_discovery_boot > 0) {
8799 			vhc->vhc_path_discovery_boot--;
8800 			goto out;
8801 		}
8802 	} else {
8803 		if (vhc->vhc_path_discovery_postboot > 0) {
8804 			vhc->vhc_path_discovery_postboot--;
8805 			goto out;
8806 		}
8807 	}
8808 
8809 	/*
8810 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8811 	 * This is to avoid a series of full path discoveries when opening
8812 	 * stale /dev/[r]dsk links.
8813 	 */
8814 	if (mdi_path_discovery_interval != -1 &&
8815 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8816 		goto out;
8817 
8818 	rv = 0;
8819 out:
8820 	mutex_exit(&vhc->vhc_lock);
8821 	return (rv);
8822 }
8823 
8824 /*
8825  * Discover all paths:
8826  *
8827  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8828  * driver instances. During this process all paths will be discovered.
8829  */
8830 static int
8831 vhcache_discover_paths(mdi_vhci_t *vh)
8832 {
8833 	mdi_vhci_config_t *vhc = vh->vh_config;
8834 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8835 	int rv = 0;
8836 
8837 	single_threaded_vhconfig_enter(vhc);
8838 
8839 	if (vhcache_do_discovery(vhc)) {
8840 		attach_phci_drivers(vh->vh_class);
8841 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8842 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8843 
8844 		mutex_enter(&vhc->vhc_lock);
8845 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8846 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8847 		mutex_exit(&vhc->vhc_lock);
8848 		rv = 1;
8849 	}
8850 
8851 	single_threaded_vhconfig_exit(vhc);
8852 	return (rv);
8853 }
8854 
8855 /*
8856  * Generic vhci bus config implementation:
8857  *
8858  * Parameters
8859  *	vdip	vhci dip
8860  *	flags	bus config flags
8861  *	op	bus config operation
8862  *	The remaining parameters are bus config operation specific
8863  *
8864  * for BUS_CONFIG_ONE
8865  *	arg	pointer to name@addr
8866  *	child	upon successful return from this function, *child will be
8867  *		set to the configured and held devinfo child node of vdip.
8868  *	ct_addr	pointer to client address (i.e. GUID)
8869  *
8870  * for BUS_CONFIG_DRIVER
8871  *	arg	major number of the driver
8872  *	child and ct_addr parameters are ignored
8873  *
8874  * for BUS_CONFIG_ALL
8875  *	arg, child, and ct_addr parameters are ignored
8876  *
8877  * Note that for the rest of the bus config operations, this function simply
8878  * calls the framework provided default bus config routine.
8879  */
8880 int
8881 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8882     void *arg, dev_info_t **child, char *ct_addr)
8883 {
8884 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8885 	mdi_vhci_config_t *vhc = vh->vh_config;
8886 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8887 	int rv = 0;
8888 	int params_valid = 0;
8889 	char *cp;
8890 
8891 	/*
8892 	 * To bus config vhcis we relay operation, possibly using another
8893 	 * thread, to phcis. The phci driver then interacts with MDI to cause
8894 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
8895 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
8896 	 * thread may be adding the child, to avoid deadlock we can't wait
8897 	 * for the relayed operations to complete if we have already entered
8898 	 * the vhci node.
8899 	 */
8900 	if (DEVI_BUSY_OWNED(vdip)) {
8901 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: "
8902 		    "vhci dip is busy owned %p\n", (void *)vdip));
8903 		goto default_bus_config;
8904 	}
8905 
8906 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8907 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8908 		rw_exit(&vhcache->vhcache_lock);
8909 		rv = build_vhci_cache(vh);
8910 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8911 	}
8912 
8913 	switch (op) {
8914 	case BUS_CONFIG_ONE:
8915 		if (arg != NULL && ct_addr != NULL) {
8916 			/* extract node name */
8917 			cp = (char *)arg;
8918 			while (*cp != '\0' && *cp != '@')
8919 				cp++;
8920 			if (*cp == '@') {
8921 				params_valid = 1;
8922 				*cp = '\0';
8923 				config_client_paths(vhc, (char *)arg, ct_addr);
8924 				/* config_client_paths() releases cache_lock */
8925 				*cp = '@';
8926 				break;
8927 			}
8928 		}
8929 
8930 		rw_exit(&vhcache->vhcache_lock);
8931 		break;
8932 
8933 	case BUS_CONFIG_DRIVER:
8934 		rw_exit(&vhcache->vhcache_lock);
8935 		if (rv == 0)
8936 			st_bus_config_all_phcis(vhc, flags, op,
8937 			    (major_t)(uintptr_t)arg);
8938 		break;
8939 
8940 	case BUS_CONFIG_ALL:
8941 		rw_exit(&vhcache->vhcache_lock);
8942 		if (rv == 0)
8943 			st_bus_config_all_phcis(vhc, flags, op, -1);
8944 		break;
8945 
8946 	default:
8947 		rw_exit(&vhcache->vhcache_lock);
8948 		break;
8949 	}
8950 
8951 
8952 default_bus_config:
8953 	/*
8954 	 * All requested child nodes are enumerated under the vhci.
8955 	 * Now configure them.
8956 	 */
8957 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8958 	    NDI_SUCCESS) {
8959 		return (MDI_SUCCESS);
8960 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8961 		/* discover all paths and try configuring again */
8962 		if (vhcache_discover_paths(vh) &&
8963 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8964 		    NDI_SUCCESS)
8965 			return (MDI_SUCCESS);
8966 	}
8967 
8968 	return (MDI_FAILURE);
8969 }
8970 
8971 /*
8972  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8973  */
8974 static nvlist_t *
8975 read_on_disk_vhci_cache(char *vhci_class)
8976 {
8977 	nvlist_t *nvl;
8978 	int err;
8979 	char *filename;
8980 
8981 	filename = vhclass2vhcache_filename(vhci_class);
8982 
8983 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8984 		kmem_free(filename, strlen(filename) + 1);
8985 		return (nvl);
8986 	} else if (err == EIO)
8987 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8988 	else if (err == EINVAL)
8989 		cmn_err(CE_WARN,
8990 		    "%s: data file corrupted, will recreate\n", filename);
8991 
8992 	kmem_free(filename, strlen(filename) + 1);
8993 	return (NULL);
8994 }
8995 
8996 /*
8997  * Read on-disk vhci cache into nvlists for all vhci classes.
8998  * Called during booting by i_ddi_read_devices_files().
8999  */
9000 void
9001 mdi_read_devices_files(void)
9002 {
9003 	int i;
9004 
9005 	for (i = 0; i < N_VHCI_CLASSES; i++)
9006 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9007 }
9008 
9009 /*
9010  * Remove all stale entries from vhci cache.
9011  */
9012 static void
9013 clean_vhcache(mdi_vhci_config_t *vhc)
9014 {
9015 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9016 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
9017 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
9018 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
9019 
9020 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9021 
9022 	cct_head = vhcache->vhcache_client_head;
9023 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9024 	for (cct = cct_head; cct != NULL; cct = cct_next) {
9025 		cct_next = cct->cct_next;
9026 
9027 		cpi_head = cct->cct_cpi_head;
9028 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
9029 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
9030 			cpi_next = cpi->cpi_next;
9031 			if (cpi->cpi_pip != NULL) {
9032 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
9033 				enqueue_tail_vhcache_pathinfo(cct, cpi);
9034 			} else
9035 				free_vhcache_pathinfo(cpi);
9036 		}
9037 
9038 		if (cct->cct_cpi_head != NULL)
9039 			enqueue_vhcache_client(vhcache, cct);
9040 		else {
9041 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9042 			    (mod_hash_key_t)cct->cct_name_addr);
9043 			free_vhcache_client(cct);
9044 		}
9045 	}
9046 
9047 	cphci_head = vhcache->vhcache_phci_head;
9048 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9049 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
9050 		cphci_next = cphci->cphci_next;
9051 		if (cphci->cphci_phci != NULL)
9052 			enqueue_vhcache_phci(vhcache, cphci);
9053 		else
9054 			free_vhcache_phci(cphci);
9055 	}
9056 
9057 	vhcache->vhcache_clean_time = lbolt64;
9058 	rw_exit(&vhcache->vhcache_lock);
9059 	vhcache_dirty(vhc);
9060 }
9061 
9062 /*
9063  * Remove all stale entries from vhci cache.
9064  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9065  */
9066 void
9067 mdi_clean_vhcache(void)
9068 {
9069 	mdi_vhci_t *vh;
9070 
9071 	mutex_enter(&mdi_mutex);
9072 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9073 		vh->vh_refcnt++;
9074 		mutex_exit(&mdi_mutex);
9075 		clean_vhcache(vh->vh_config);
9076 		mutex_enter(&mdi_mutex);
9077 		vh->vh_refcnt--;
9078 	}
9079 	mutex_exit(&mdi_mutex);
9080 }
9081 
9082 /*
9083  * mdi_vhci_walk_clients():
9084  *		Walker routine to traverse client dev_info nodes
9085  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9086  * below the client, including nexus devices, which we dont want.
9087  * So we just traverse the immediate siblings, starting from 1st client.
9088  */
9089 void
9090 mdi_vhci_walk_clients(dev_info_t *vdip,
9091     int (*f)(dev_info_t *, void *), void *arg)
9092 {
9093 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9094 	dev_info_t	*cdip;
9095 	mdi_client_t	*ct;
9096 
9097 	MDI_VHCI_CLIENT_LOCK(vh);
9098 	cdip = ddi_get_child(vdip);
9099 	while (cdip) {
9100 		ct = i_devi_get_client(cdip);
9101 		MDI_CLIENT_LOCK(ct);
9102 
9103 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9104 			cdip = ddi_get_next_sibling(cdip);
9105 		else
9106 			cdip = NULL;
9107 
9108 		MDI_CLIENT_UNLOCK(ct);
9109 	}
9110 	MDI_VHCI_CLIENT_UNLOCK(vh);
9111 }
9112 
9113 /*
9114  * mdi_vhci_walk_phcis():
9115  *		Walker routine to traverse phci dev_info nodes
9116  */
9117 void
9118 mdi_vhci_walk_phcis(dev_info_t *vdip,
9119     int (*f)(dev_info_t *, void *), void *arg)
9120 {
9121 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9122 	mdi_phci_t	*ph, *next;
9123 
9124 	MDI_VHCI_PHCI_LOCK(vh);
9125 	ph = vh->vh_phci_head;
9126 	while (ph) {
9127 		MDI_PHCI_LOCK(ph);
9128 
9129 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9130 			next = ph->ph_next;
9131 		else
9132 			next = NULL;
9133 
9134 		MDI_PHCI_UNLOCK(ph);
9135 		ph = next;
9136 	}
9137 	MDI_VHCI_PHCI_UNLOCK(vh);
9138 }
9139 
9140 
9141 /*
9142  * mdi_walk_vhcis():
9143  *		Walker routine to traverse vhci dev_info nodes
9144  */
9145 void
9146 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9147 {
9148 	mdi_vhci_t	*vh = NULL;
9149 
9150 	mutex_enter(&mdi_mutex);
9151 	/*
9152 	 * Scan for already registered vhci
9153 	 */
9154 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9155 		vh->vh_refcnt++;
9156 		mutex_exit(&mdi_mutex);
9157 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9158 			mutex_enter(&mdi_mutex);
9159 			vh->vh_refcnt--;
9160 			break;
9161 		} else {
9162 			mutex_enter(&mdi_mutex);
9163 			vh->vh_refcnt--;
9164 		}
9165 	}
9166 
9167 	mutex_exit(&mdi_mutex);
9168 }
9169 
9170 /*
9171  * i_mdi_log_sysevent():
9172  *		Logs events for pickup by syseventd
9173  */
9174 static void
9175 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9176 {
9177 	char		*path_name;
9178 	nvlist_t	*attr_list;
9179 
9180 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9181 	    KM_SLEEP) != DDI_SUCCESS) {
9182 		goto alloc_failed;
9183 	}
9184 
9185 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9186 	(void) ddi_pathname(dip, path_name);
9187 
9188 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9189 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9190 		goto error;
9191 	}
9192 
9193 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9194 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9195 		goto error;
9196 	}
9197 
9198 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9199 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9200 		goto error;
9201 	}
9202 
9203 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9204 	    path_name) != DDI_SUCCESS) {
9205 		goto error;
9206 	}
9207 
9208 	if (nvlist_add_string(attr_list, DDI_CLASS,
9209 	    ph_vh_class) != DDI_SUCCESS) {
9210 		goto error;
9211 	}
9212 
9213 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9214 	    attr_list, NULL, DDI_SLEEP);
9215 
9216 error:
9217 	kmem_free(path_name, MAXPATHLEN);
9218 	nvlist_free(attr_list);
9219 	return;
9220 
9221 alloc_failed:
9222 	MDI_DEBUG(1, (CE_WARN, dip,
9223 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
9224 }
9225 
9226 char **
9227 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9228 {
9229 	char	**driver_list, **ret_driver_list = NULL;
9230 	int	*root_support_list;
9231 	int	cur_elements, max_elements;
9232 
9233 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9234 	    &cur_elements, &max_elements);
9235 
9236 
9237 	if (driver_list) {
9238 		kmem_free(root_support_list, sizeof (int) * max_elements);
9239 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9240 		    * max_elements, sizeof (char *) * cur_elements);
9241 	}
9242 	*ndrivers = cur_elements;
9243 
9244 	return (ret_driver_list);
9245 
9246 }
9247 
9248 void
9249 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9250 {
9251 	char	**p;
9252 	int	i;
9253 
9254 	if (driver_list) {
9255 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9256 			kmem_free(*p, strlen(*p) + 1);
9257 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9258 	}
9259 }
9260