xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision 1b8adde7ba7d5e04395c141c5400dc2cffd7d809)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
28  * detailed discussion of the overall mpxio architecture.
29  *
30  * Default locking order:
31  *
32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 #include <sys/sysmacros.h>
71 
72 #ifdef	DEBUG
73 #include <sys/debug.h>
74 int	mdi_debug = 1;
75 int	mdi_debug_logonly = 0;
76 #define	MDI_DEBUG(level, stmnt) \
77 	    if (mdi_debug >= (level)) i_mdi_log stmnt
78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
79 #else	/* !DEBUG */
80 #define	MDI_DEBUG(level, stmnt)
81 #endif	/* DEBUG */
82 
83 extern pri_t	minclsyspri;
84 extern int	modrootloaded;
85 
86 /*
87  * Global mutex:
88  * Protects vHCI list and structure members.
89  */
90 kmutex_t	mdi_mutex;
91 
92 /*
93  * Registered vHCI class driver lists
94  */
95 int		mdi_vhci_count;
96 mdi_vhci_t	*mdi_vhci_head;
97 mdi_vhci_t	*mdi_vhci_tail;
98 
99 /*
100  * Client Hash Table size
101  */
102 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
103 
104 /*
105  * taskq interface definitions
106  */
107 #define	MDI_TASKQ_N_THREADS	8
108 #define	MDI_TASKQ_PRI		minclsyspri
109 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
110 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
111 
112 taskq_t				*mdi_taskq;
113 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
114 
115 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
116 
117 /*
118  * The data should be "quiet" for this interval (in seconds) before the
119  * vhci cached data is flushed to the disk.
120  */
121 static int mdi_vhcache_flush_delay = 10;
122 
123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
124 static int mdi_vhcache_flush_daemon_idle_time = 60;
125 
126 /*
127  * MDI falls back to discovery of all paths when a bus_config_one fails.
128  * The following parameters can be used to tune this operation.
129  *
130  * mdi_path_discovery_boot
131  *	Number of times path discovery will be attempted during early boot.
132  *	Probably there is no reason to ever set this value to greater than one.
133  *
134  * mdi_path_discovery_postboot
135  *	Number of times path discovery will be attempted after early boot.
136  *	Set it to a minimum of two to allow for discovery of iscsi paths which
137  *	may happen very late during booting.
138  *
139  * mdi_path_discovery_interval
140  *	Minimum number of seconds MDI will wait between successive discovery
141  *	of all paths. Set it to -1 to disable discovery of all paths.
142  */
143 static int mdi_path_discovery_boot = 1;
144 static int mdi_path_discovery_postboot = 2;
145 static int mdi_path_discovery_interval = 10;
146 
147 /*
148  * number of seconds the asynchronous configuration thread will sleep idle
149  * before exiting.
150  */
151 static int mdi_async_config_idle_time = 600;
152 
153 static int mdi_bus_config_cache_hash_size = 256;
154 
155 /* turns off multithreaded configuration for certain operations */
156 static int mdi_mtc_off = 0;
157 
158 /*
159  * The "path" to a pathinfo node is identical to the /devices path to a
160  * devinfo node had the device been enumerated under a pHCI instead of
161  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
162  * This association persists across create/delete of the pathinfo nodes,
163  * but not across reboot.
164  */
165 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
166 static int		mdi_pathmap_hash_size = 256;
167 static kmutex_t		mdi_pathmap_mutex;
168 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
169 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
170 
171 /*
172  * MDI component property name/value string definitions
173  */
174 const char 		*mdi_component_prop = "mpxio-component";
175 const char		*mdi_component_prop_vhci = "vhci";
176 const char		*mdi_component_prop_phci = "phci";
177 const char		*mdi_component_prop_client = "client";
178 
179 /*
180  * MDI client global unique identifier property name
181  */
182 const char		*mdi_client_guid_prop = "client-guid";
183 
184 /*
185  * MDI client load balancing property name/value string definitions
186  */
187 const char		*mdi_load_balance = "load-balance";
188 const char		*mdi_load_balance_none = "none";
189 const char		*mdi_load_balance_rr = "round-robin";
190 const char		*mdi_load_balance_lba = "logical-block";
191 
192 /*
193  * Obsolete vHCI class definition; to be removed after Leadville update
194  */
195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
196 
197 static char vhci_greeting[] =
198 	"\tThere already exists one vHCI driver for class %s\n"
199 	"\tOnly one vHCI driver for each class is allowed\n";
200 
201 /*
202  * Static function prototypes
203  */
204 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
205 static int		i_mdi_client_offline(dev_info_t *, uint_t);
206 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
207 static void		i_mdi_phci_post_detach(dev_info_t *,
208 			    ddi_detach_cmd_t, int);
209 static int		i_mdi_client_pre_detach(dev_info_t *,
210 			    ddi_detach_cmd_t);
211 static void		i_mdi_client_post_detach(dev_info_t *,
212 			    ddi_detach_cmd_t, int);
213 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
214 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
215 static int 		i_mdi_lba_lb(mdi_client_t *ct,
216 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
217 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
218 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
219 static void		i_mdi_pm_reset_client(mdi_client_t *);
220 static int		i_mdi_power_all_phci(mdi_client_t *);
221 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
222 
223 
224 /*
225  * Internal mdi_pathinfo node functions
226  */
227 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
228 
229 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
230 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
231 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
232 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
233 static void		i_mdi_phci_unlock(mdi_phci_t *);
234 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
235 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
236 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
237 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
238 			    mdi_client_t *);
239 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
240 static void		i_mdi_client_remove_path(mdi_client_t *,
241 			    mdi_pathinfo_t *);
242 
243 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
244 			    mdi_pathinfo_state_t, int);
245 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
246 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
247 			    char **, int);
248 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
249 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
250 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
251 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
252 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
253 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
254 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
255 static void		i_mdi_client_update_state(mdi_client_t *);
256 static int		i_mdi_client_compute_state(mdi_client_t *,
257 			    mdi_phci_t *);
258 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
259 static void		i_mdi_client_unlock(mdi_client_t *);
260 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
261 static mdi_client_t	*i_devi_get_client(dev_info_t *);
262 /*
263  * NOTE: this will be removed once the NWS files are changed to use the new
264  * mdi_{enable,disable}_path interfaces
265  */
266 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
267 				int, int);
268 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
269 				mdi_vhci_t *vh, int flags, int op);
270 /*
271  * Failover related function prototypes
272  */
273 static int		i_mdi_failover(void *);
274 
275 /*
276  * misc internal functions
277  */
278 static int		i_mdi_get_hash_key(char *);
279 static int		i_map_nvlist_error_to_mdi(int);
280 static void		i_mdi_report_path_state(mdi_client_t *,
281 			    mdi_pathinfo_t *);
282 
283 static void		setup_vhci_cache(mdi_vhci_t *);
284 static int		destroy_vhci_cache(mdi_vhci_t *);
285 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
286 static boolean_t	stop_vhcache_flush_thread(void *, int);
287 static void		free_string_array(char **, int);
288 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
289 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
290 static void		free_vhcache_client(mdi_vhcache_client_t *);
291 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
292 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
293 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
294 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
295 static void		vhcache_pi_add(mdi_vhci_config_t *,
296 			    struct mdi_pathinfo *);
297 static void		vhcache_pi_remove(mdi_vhci_config_t *,
298 			    struct mdi_pathinfo *);
299 static void		free_phclient_path_list(mdi_phys_path_t *);
300 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
301 static int		flush_vhcache(mdi_vhci_config_t *, int);
302 static void		vhcache_dirty(mdi_vhci_config_t *);
303 static void		free_async_client_config(mdi_async_client_config_t *);
304 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
305 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
306 static nvlist_t		*read_on_disk_vhci_cache(char *);
307 extern int		fread_nvlist(char *, nvlist_t **);
308 extern int		fwrite_nvlist(char *, nvlist_t *);
309 
310 /* called once when first vhci registers with mdi */
311 static void
312 i_mdi_init()
313 {
314 	static int initialized = 0;
315 
316 	if (initialized)
317 		return;
318 	initialized = 1;
319 
320 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
321 
322 	/* Create our taskq resources */
323 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
324 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
325 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
326 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
327 
328 	/* Allocate ['path_instance' <-> "path"] maps */
329 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
330 	mdi_pathmap_bypath = mod_hash_create_strhash(
331 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
332 	    mod_hash_null_valdtor);
333 	mdi_pathmap_byinstance = mod_hash_create_idhash(
334 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
335 	    mod_hash_null_valdtor);
336 }
337 
338 /*
339  * mdi_get_component_type():
340  *		Return mpxio component type
341  * Return Values:
342  *		MDI_COMPONENT_NONE
343  *		MDI_COMPONENT_VHCI
344  *		MDI_COMPONENT_PHCI
345  *		MDI_COMPONENT_CLIENT
346  * XXX This doesn't work under multi-level MPxIO and should be
347  *	removed when clients migrate mdi_component_is_*() interfaces.
348  */
349 int
350 mdi_get_component_type(dev_info_t *dip)
351 {
352 	return (DEVI(dip)->devi_mdi_component);
353 }
354 
355 /*
356  * mdi_vhci_register():
357  *		Register a vHCI module with the mpxio framework
358  *		mdi_vhci_register() is called by vHCI drivers to register the
359  *		'class_driver' vHCI driver and its MDI entrypoints with the
360  *		mpxio framework.  The vHCI driver must call this interface as
361  *		part of its attach(9e) handler.
362  *		Competing threads may try to attach mdi_vhci_register() as
363  *		the vHCI drivers are loaded and attached as a result of pHCI
364  *		driver instance registration (mdi_phci_register()) with the
365  *		framework.
366  * Return Values:
367  *		MDI_SUCCESS
368  *		MDI_FAILURE
369  */
370 /*ARGSUSED*/
371 int
372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
373     int flags)
374 {
375 	mdi_vhci_t		*vh = NULL;
376 
377 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
378 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
379 
380 	i_mdi_init();
381 
382 	mutex_enter(&mdi_mutex);
383 	/*
384 	 * Scan for already registered vhci
385 	 */
386 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
387 		if (strcmp(vh->vh_class, class) == 0) {
388 			/*
389 			 * vHCI has already been created.  Check for valid
390 			 * vHCI ops registration.  We only support one vHCI
391 			 * module per class
392 			 */
393 			if (vh->vh_ops != NULL) {
394 				mutex_exit(&mdi_mutex);
395 				cmn_err(CE_NOTE, vhci_greeting, class);
396 				return (MDI_FAILURE);
397 			}
398 			break;
399 		}
400 	}
401 
402 	/*
403 	 * if not yet created, create the vHCI component
404 	 */
405 	if (vh == NULL) {
406 		struct client_hash	*hash = NULL;
407 		char			*load_balance;
408 
409 		/*
410 		 * Allocate and initialize the mdi extensions
411 		 */
412 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
413 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
414 		    KM_SLEEP);
415 		vh->vh_client_table = hash;
416 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
417 		(void) strcpy(vh->vh_class, class);
418 		vh->vh_lb = LOAD_BALANCE_RR;
419 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
420 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
421 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
422 				vh->vh_lb = LOAD_BALANCE_NONE;
423 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
424 				    == 0) {
425 				vh->vh_lb = LOAD_BALANCE_LBA;
426 			}
427 			ddi_prop_free(load_balance);
428 		}
429 
430 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
431 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
432 
433 		/*
434 		 * Store the vHCI ops vectors
435 		 */
436 		vh->vh_dip = vdip;
437 		vh->vh_ops = vops;
438 
439 		setup_vhci_cache(vh);
440 
441 		if (mdi_vhci_head == NULL) {
442 			mdi_vhci_head = vh;
443 		}
444 		if (mdi_vhci_tail) {
445 			mdi_vhci_tail->vh_next = vh;
446 		}
447 		mdi_vhci_tail = vh;
448 		mdi_vhci_count++;
449 	}
450 
451 	/*
452 	 * Claim the devfs node as a vhci component
453 	 */
454 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
455 
456 	/*
457 	 * Initialize our back reference from dev_info node
458 	 */
459 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
460 	mutex_exit(&mdi_mutex);
461 	return (MDI_SUCCESS);
462 }
463 
464 /*
465  * mdi_vhci_unregister():
466  *		Unregister a vHCI module from mpxio framework
467  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
468  * 		of a vhci to unregister it from the framework.
469  * Return Values:
470  *		MDI_SUCCESS
471  *		MDI_FAILURE
472  */
473 /*ARGSUSED*/
474 int
475 mdi_vhci_unregister(dev_info_t *vdip, int flags)
476 {
477 	mdi_vhci_t	*found, *vh, *prev = NULL;
478 
479 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
480 
481 	/*
482 	 * Check for invalid VHCI
483 	 */
484 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
485 		return (MDI_FAILURE);
486 
487 	/*
488 	 * Scan the list of registered vHCIs for a match
489 	 */
490 	mutex_enter(&mdi_mutex);
491 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
492 		if (found == vh)
493 			break;
494 		prev = found;
495 	}
496 
497 	if (found == NULL) {
498 		mutex_exit(&mdi_mutex);
499 		return (MDI_FAILURE);
500 	}
501 
502 	/*
503 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
504 	 * should have been unregistered, before a vHCI can be
505 	 * unregistered.
506 	 */
507 	MDI_VHCI_PHCI_LOCK(vh);
508 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
509 		MDI_VHCI_PHCI_UNLOCK(vh);
510 		mutex_exit(&mdi_mutex);
511 		return (MDI_FAILURE);
512 	}
513 	MDI_VHCI_PHCI_UNLOCK(vh);
514 
515 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
516 		mutex_exit(&mdi_mutex);
517 		return (MDI_FAILURE);
518 	}
519 
520 	/*
521 	 * Remove the vHCI from the global list
522 	 */
523 	if (vh == mdi_vhci_head) {
524 		mdi_vhci_head = vh->vh_next;
525 	} else {
526 		prev->vh_next = vh->vh_next;
527 	}
528 	if (vh == mdi_vhci_tail) {
529 		mdi_vhci_tail = prev;
530 	}
531 	mdi_vhci_count--;
532 	mutex_exit(&mdi_mutex);
533 
534 	vh->vh_ops = NULL;
535 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
536 	DEVI(vdip)->devi_mdi_xhci = NULL;
537 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
538 	kmem_free(vh->vh_client_table,
539 	    mdi_client_table_size * sizeof (struct client_hash));
540 	mutex_destroy(&vh->vh_phci_mutex);
541 	mutex_destroy(&vh->vh_client_mutex);
542 
543 	kmem_free(vh, sizeof (mdi_vhci_t));
544 	return (MDI_SUCCESS);
545 }
546 
547 /*
548  * i_mdi_vhci_class2vhci():
549  *		Look for a matching vHCI module given a vHCI class name
550  * Return Values:
551  *		Handle to a vHCI component
552  *		NULL
553  */
554 static mdi_vhci_t *
555 i_mdi_vhci_class2vhci(char *class)
556 {
557 	mdi_vhci_t	*vh = NULL;
558 
559 	ASSERT(!MUTEX_HELD(&mdi_mutex));
560 
561 	mutex_enter(&mdi_mutex);
562 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
563 		if (strcmp(vh->vh_class, class) == 0) {
564 			break;
565 		}
566 	}
567 	mutex_exit(&mdi_mutex);
568 	return (vh);
569 }
570 
571 /*
572  * i_devi_get_vhci():
573  *		Utility function to get the handle to a vHCI component
574  * Return Values:
575  *		Handle to a vHCI component
576  *		NULL
577  */
578 mdi_vhci_t *
579 i_devi_get_vhci(dev_info_t *vdip)
580 {
581 	mdi_vhci_t	*vh = NULL;
582 	if (MDI_VHCI(vdip)) {
583 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
584 	}
585 	return (vh);
586 }
587 
588 /*
589  * mdi_phci_register():
590  *		Register a pHCI module with mpxio framework
591  *		mdi_phci_register() is called by pHCI drivers to register with
592  *		the mpxio framework and a specific 'class_driver' vHCI.  The
593  *		pHCI driver must call this interface as part of its attach(9e)
594  *		handler.
595  * Return Values:
596  *		MDI_SUCCESS
597  *		MDI_FAILURE
598  */
599 /*ARGSUSED*/
600 int
601 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
602 {
603 	mdi_phci_t		*ph;
604 	mdi_vhci_t		*vh;
605 	char			*data;
606 	char			*pathname;
607 
608 	/*
609 	 * Some subsystems, like fcp, perform pHCI registration from a
610 	 * different thread than the one doing the pHCI attach(9E) - the
611 	 * driver attach code is waiting for this other thread to complete.
612 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
613 	 * (indicating that some thread has done an ndi_devi_enter of parent)
614 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
615 	 */
616 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
617 
618 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
619 	(void) ddi_pathname(pdip, pathname);
620 
621 	/*
622 	 * Check for mpxio-disable property. Enable mpxio if the property is
623 	 * missing or not set to "yes".
624 	 * If the property is set to "yes" then emit a brief message.
625 	 */
626 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
627 	    &data) == DDI_SUCCESS)) {
628 		if (strcmp(data, "yes") == 0) {
629 			MDI_DEBUG(1, (CE_CONT, pdip,
630 			    "?%s (%s%d) multipath capabilities "
631 			    "disabled via %s.conf.\n", pathname,
632 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
633 			    ddi_driver_name(pdip)));
634 			ddi_prop_free(data);
635 			kmem_free(pathname, MAXPATHLEN);
636 			return (MDI_FAILURE);
637 		}
638 		ddi_prop_free(data);
639 	}
640 
641 	kmem_free(pathname, MAXPATHLEN);
642 
643 	/*
644 	 * Search for a matching vHCI
645 	 */
646 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
647 	if (vh == NULL) {
648 		return (MDI_FAILURE);
649 	}
650 
651 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
652 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
653 	ph->ph_dip = pdip;
654 	ph->ph_vhci = vh;
655 	ph->ph_next = NULL;
656 	ph->ph_unstable = 0;
657 	ph->ph_vprivate = 0;
658 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
659 
660 	MDI_PHCI_LOCK(ph);
661 	MDI_PHCI_SET_POWER_UP(ph);
662 	MDI_PHCI_UNLOCK(ph);
663 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
664 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
665 
666 	vhcache_phci_add(vh->vh_config, ph);
667 
668 	MDI_VHCI_PHCI_LOCK(vh);
669 	if (vh->vh_phci_head == NULL) {
670 		vh->vh_phci_head = ph;
671 	}
672 	if (vh->vh_phci_tail) {
673 		vh->vh_phci_tail->ph_next = ph;
674 	}
675 	vh->vh_phci_tail = ph;
676 	vh->vh_phci_count++;
677 	MDI_VHCI_PHCI_UNLOCK(vh);
678 
679 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
680 	return (MDI_SUCCESS);
681 }
682 
683 /*
684  * mdi_phci_unregister():
685  *		Unregister a pHCI module from mpxio framework
686  *		mdi_phci_unregister() is called by the pHCI drivers from their
687  *		detach(9E) handler to unregister their instances from the
688  *		framework.
689  * Return Values:
690  *		MDI_SUCCESS
691  *		MDI_FAILURE
692  */
693 /*ARGSUSED*/
694 int
695 mdi_phci_unregister(dev_info_t *pdip, int flags)
696 {
697 	mdi_vhci_t		*vh;
698 	mdi_phci_t		*ph;
699 	mdi_phci_t		*tmp;
700 	mdi_phci_t		*prev = NULL;
701 
702 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
703 
704 	ph = i_devi_get_phci(pdip);
705 	if (ph == NULL) {
706 		MDI_DEBUG(1, (CE_WARN, pdip,
707 		    "!pHCI unregister: Not a valid pHCI"));
708 		return (MDI_FAILURE);
709 	}
710 
711 	vh = ph->ph_vhci;
712 	ASSERT(vh != NULL);
713 	if (vh == NULL) {
714 		MDI_DEBUG(1, (CE_WARN, pdip,
715 		    "!pHCI unregister: Not a valid vHCI"));
716 		return (MDI_FAILURE);
717 	}
718 
719 	MDI_VHCI_PHCI_LOCK(vh);
720 	tmp = vh->vh_phci_head;
721 	while (tmp) {
722 		if (tmp == ph) {
723 			break;
724 		}
725 		prev = tmp;
726 		tmp = tmp->ph_next;
727 	}
728 
729 	if (ph == vh->vh_phci_head) {
730 		vh->vh_phci_head = ph->ph_next;
731 	} else {
732 		prev->ph_next = ph->ph_next;
733 	}
734 
735 	if (ph == vh->vh_phci_tail) {
736 		vh->vh_phci_tail = prev;
737 	}
738 
739 	vh->vh_phci_count--;
740 	MDI_VHCI_PHCI_UNLOCK(vh);
741 
742 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
743 	    ESC_DDI_INITIATOR_UNREGISTER);
744 	vhcache_phci_remove(vh->vh_config, ph);
745 	cv_destroy(&ph->ph_unstable_cv);
746 	mutex_destroy(&ph->ph_mutex);
747 	kmem_free(ph, sizeof (mdi_phci_t));
748 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
749 	DEVI(pdip)->devi_mdi_xhci = NULL;
750 	return (MDI_SUCCESS);
751 }
752 
753 /*
754  * i_devi_get_phci():
755  * 		Utility function to return the phci extensions.
756  */
757 static mdi_phci_t *
758 i_devi_get_phci(dev_info_t *pdip)
759 {
760 	mdi_phci_t	*ph = NULL;
761 	if (MDI_PHCI(pdip)) {
762 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
763 	}
764 	return (ph);
765 }
766 
767 /*
768  * Single thread mdi entry into devinfo node for modifying its children.
769  * If necessary we perform an ndi_devi_enter of the vHCI before doing
770  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
771  * for the vHCI and one for the pHCI.
772  */
773 void
774 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
775 {
776 	dev_info_t	*vdip;
777 	int		vcircular, pcircular;
778 
779 	/* Verify calling context */
780 	ASSERT(MDI_PHCI(phci_dip));
781 	vdip = mdi_devi_get_vdip(phci_dip);
782 	ASSERT(vdip);			/* A pHCI always has a vHCI */
783 
784 	/*
785 	 * If pHCI is detaching then the framework has already entered the
786 	 * vHCI on a threads that went down the code path leading to
787 	 * detach_node().  This framework enter of the vHCI during pHCI
788 	 * detach is done to avoid deadlock with vHCI power management
789 	 * operations which enter the vHCI and the enter down the path
790 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
791 	 * enter of the vHCI on frameworks vHCI enter that has already
792 	 * occurred - this is OK because we know that the framework thread
793 	 * doing detach is waiting for our completion.
794 	 *
795 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
796 	 * race with detach - but we can't do that because the framework has
797 	 * already entered the parent, so we have some complexity instead.
798 	 */
799 	for (;;) {
800 		if (ndi_devi_tryenter(vdip, &vcircular)) {
801 			ASSERT(vcircular != -1);
802 			if (DEVI_IS_DETACHING(phci_dip)) {
803 				ndi_devi_exit(vdip, vcircular);
804 				vcircular = -1;
805 			}
806 			break;
807 		} else if (DEVI_IS_DETACHING(phci_dip)) {
808 			vcircular = -1;
809 			break;
810 		} else {
811 			delay(1);
812 		}
813 	}
814 
815 	ndi_devi_enter(phci_dip, &pcircular);
816 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
817 }
818 
819 /*
820  * Release mdi_devi_enter or successful mdi_devi_tryenter.
821  */
822 void
823 mdi_devi_exit(dev_info_t *phci_dip, int circular)
824 {
825 	dev_info_t	*vdip;
826 	int		vcircular, pcircular;
827 
828 	/* Verify calling context */
829 	ASSERT(MDI_PHCI(phci_dip));
830 	vdip = mdi_devi_get_vdip(phci_dip);
831 	ASSERT(vdip);			/* A pHCI always has a vHCI */
832 
833 	/* extract two circular recursion values from single int */
834 	pcircular = (short)(circular & 0xFFFF);
835 	vcircular = (short)((circular >> 16) & 0xFFFF);
836 
837 	ndi_devi_exit(phci_dip, pcircular);
838 	if (vcircular != -1)
839 		ndi_devi_exit(vdip, vcircular);
840 }
841 
842 /*
843  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
844  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
845  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
846  * with vHCI power management code during path online/offline.  Each
847  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
848  * occur within the scope of an active mdi_devi_enter that establishes the
849  * circular value.
850  */
851 void
852 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
853 {
854 	int		pcircular;
855 
856 	/* Verify calling context */
857 	ASSERT(MDI_PHCI(phci_dip));
858 
859 	pcircular = (short)(circular & 0xFFFF);
860 	ndi_devi_exit(phci_dip, pcircular);
861 }
862 
863 void
864 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
865 {
866 	int		pcircular;
867 
868 	/* Verify calling context */
869 	ASSERT(MDI_PHCI(phci_dip));
870 
871 	ndi_devi_enter(phci_dip, &pcircular);
872 
873 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
874 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
875 }
876 
877 /*
878  * mdi_devi_get_vdip():
879  *		given a pHCI dip return vHCI dip
880  */
881 dev_info_t *
882 mdi_devi_get_vdip(dev_info_t *pdip)
883 {
884 	mdi_phci_t	*ph;
885 
886 	ph = i_devi_get_phci(pdip);
887 	if (ph && ph->ph_vhci)
888 		return (ph->ph_vhci->vh_dip);
889 	return (NULL);
890 }
891 
892 /*
893  * mdi_devi_pdip_entered():
894  *		Return 1 if we are vHCI and have done an ndi_devi_enter
895  *		of a pHCI
896  */
897 int
898 mdi_devi_pdip_entered(dev_info_t *vdip)
899 {
900 	mdi_vhci_t	*vh;
901 	mdi_phci_t	*ph;
902 
903 	vh = i_devi_get_vhci(vdip);
904 	if (vh == NULL)
905 		return (0);
906 
907 	MDI_VHCI_PHCI_LOCK(vh);
908 	ph = vh->vh_phci_head;
909 	while (ph) {
910 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
911 			MDI_VHCI_PHCI_UNLOCK(vh);
912 			return (1);
913 		}
914 		ph = ph->ph_next;
915 	}
916 	MDI_VHCI_PHCI_UNLOCK(vh);
917 	return (0);
918 }
919 
920 /*
921  * mdi_phci_path2devinfo():
922  * 		Utility function to search for a valid phci device given
923  *		the devfs pathname.
924  */
925 dev_info_t *
926 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
927 {
928 	char		*temp_pathname;
929 	mdi_vhci_t	*vh;
930 	mdi_phci_t	*ph;
931 	dev_info_t 	*pdip = NULL;
932 
933 	vh = i_devi_get_vhci(vdip);
934 	ASSERT(vh != NULL);
935 
936 	if (vh == NULL) {
937 		/*
938 		 * Invalid vHCI component, return failure
939 		 */
940 		return (NULL);
941 	}
942 
943 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
944 	MDI_VHCI_PHCI_LOCK(vh);
945 	ph = vh->vh_phci_head;
946 	while (ph != NULL) {
947 		pdip = ph->ph_dip;
948 		ASSERT(pdip != NULL);
949 		*temp_pathname = '\0';
950 		(void) ddi_pathname(pdip, temp_pathname);
951 		if (strcmp(temp_pathname, pathname) == 0) {
952 			break;
953 		}
954 		ph = ph->ph_next;
955 	}
956 	if (ph == NULL) {
957 		pdip = NULL;
958 	}
959 	MDI_VHCI_PHCI_UNLOCK(vh);
960 	kmem_free(temp_pathname, MAXPATHLEN);
961 	return (pdip);
962 }
963 
964 /*
965  * mdi_phci_get_path_count():
966  * 		get number of path information nodes associated with a given
967  *		pHCI device.
968  */
969 int
970 mdi_phci_get_path_count(dev_info_t *pdip)
971 {
972 	mdi_phci_t	*ph;
973 	int		count = 0;
974 
975 	ph = i_devi_get_phci(pdip);
976 	if (ph != NULL) {
977 		count = ph->ph_path_count;
978 	}
979 	return (count);
980 }
981 
982 /*
983  * i_mdi_phci_lock():
984  *		Lock a pHCI device
985  * Return Values:
986  *		None
987  * Note:
988  *		The default locking order is:
989  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
990  *		But there are number of situations where locks need to be
991  *		grabbed in reverse order.  This routine implements try and lock
992  *		mechanism depending on the requested parameter option.
993  */
994 static void
995 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
996 {
997 	if (pip) {
998 		/* Reverse locking is requested. */
999 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1000 			/*
1001 			 * tryenter failed. Try to grab again
1002 			 * after a small delay
1003 			 */
1004 			MDI_PI_HOLD(pip);
1005 			MDI_PI_UNLOCK(pip);
1006 			delay(1);
1007 			MDI_PI_LOCK(pip);
1008 			MDI_PI_RELE(pip);
1009 		}
1010 	} else {
1011 		MDI_PHCI_LOCK(ph);
1012 	}
1013 }
1014 
1015 /*
1016  * i_mdi_phci_unlock():
1017  *		Unlock the pHCI component
1018  */
1019 static void
1020 i_mdi_phci_unlock(mdi_phci_t *ph)
1021 {
1022 	MDI_PHCI_UNLOCK(ph);
1023 }
1024 
1025 /*
1026  * i_mdi_devinfo_create():
1027  *		create client device's devinfo node
1028  * Return Values:
1029  *		dev_info
1030  *		NULL
1031  * Notes:
1032  */
1033 static dev_info_t *
1034 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1035 	char **compatible, int ncompatible)
1036 {
1037 	dev_info_t *cdip = NULL;
1038 
1039 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1040 
1041 	/* Verify for duplicate entry */
1042 	cdip = i_mdi_devinfo_find(vh, name, guid);
1043 	ASSERT(cdip == NULL);
1044 	if (cdip) {
1045 		cmn_err(CE_WARN,
1046 		    "i_mdi_devinfo_create: client dip %p already exists",
1047 			(void *)cdip);
1048 	}
1049 
1050 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1051 	if (cdip == NULL)
1052 		goto fail;
1053 
1054 	/*
1055 	 * Create component type and Global unique identifier
1056 	 * properties
1057 	 */
1058 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1059 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1060 		goto fail;
1061 	}
1062 
1063 	/* Decorate the node with compatible property */
1064 	if (compatible &&
1065 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1066 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1067 		goto fail;
1068 	}
1069 
1070 	return (cdip);
1071 
1072 fail:
1073 	if (cdip) {
1074 		(void) ndi_prop_remove_all(cdip);
1075 		(void) ndi_devi_free(cdip);
1076 	}
1077 	return (NULL);
1078 }
1079 
1080 /*
1081  * i_mdi_devinfo_find():
1082  *		Find a matching devinfo node for given client node name
1083  *		and its guid.
1084  * Return Values:
1085  *		Handle to a dev_info node or NULL
1086  */
1087 static dev_info_t *
1088 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1089 {
1090 	char			*data;
1091 	dev_info_t 		*cdip = NULL;
1092 	dev_info_t 		*ndip = NULL;
1093 	int			circular;
1094 
1095 	ndi_devi_enter(vh->vh_dip, &circular);
1096 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1097 	while ((cdip = ndip) != NULL) {
1098 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1099 
1100 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1101 			continue;
1102 		}
1103 
1104 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1105 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1106 		    &data) != DDI_PROP_SUCCESS) {
1107 			continue;
1108 		}
1109 
1110 		if (strcmp(data, guid) != 0) {
1111 			ddi_prop_free(data);
1112 			continue;
1113 		}
1114 		ddi_prop_free(data);
1115 		break;
1116 	}
1117 	ndi_devi_exit(vh->vh_dip, circular);
1118 	return (cdip);
1119 }
1120 
1121 /*
1122  * i_mdi_devinfo_remove():
1123  *		Remove a client device node
1124  */
1125 static int
1126 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1127 {
1128 	int	rv = MDI_SUCCESS;
1129 
1130 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1131 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1132 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
1133 		if (rv != NDI_SUCCESS) {
1134 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
1135 			    " failed. cdip = %p\n", (void *)cdip));
1136 		}
1137 		/*
1138 		 * Convert to MDI error code
1139 		 */
1140 		switch (rv) {
1141 		case NDI_SUCCESS:
1142 			rv = MDI_SUCCESS;
1143 			break;
1144 		case NDI_BUSY:
1145 			rv = MDI_BUSY;
1146 			break;
1147 		default:
1148 			rv = MDI_FAILURE;
1149 			break;
1150 		}
1151 	}
1152 	return (rv);
1153 }
1154 
1155 /*
1156  * i_devi_get_client()
1157  *		Utility function to get mpxio component extensions
1158  */
1159 static mdi_client_t *
1160 i_devi_get_client(dev_info_t *cdip)
1161 {
1162 	mdi_client_t	*ct = NULL;
1163 
1164 	if (MDI_CLIENT(cdip)) {
1165 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1166 	}
1167 	return (ct);
1168 }
1169 
1170 /*
1171  * i_mdi_is_child_present():
1172  *		Search for the presence of client device dev_info node
1173  */
1174 static int
1175 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1176 {
1177 	int		rv = MDI_FAILURE;
1178 	struct dev_info	*dip;
1179 	int		circular;
1180 
1181 	ndi_devi_enter(vdip, &circular);
1182 	dip = DEVI(vdip)->devi_child;
1183 	while (dip) {
1184 		if (dip == DEVI(cdip)) {
1185 			rv = MDI_SUCCESS;
1186 			break;
1187 		}
1188 		dip = dip->devi_sibling;
1189 	}
1190 	ndi_devi_exit(vdip, circular);
1191 	return (rv);
1192 }
1193 
1194 
1195 /*
1196  * i_mdi_client_lock():
1197  *		Grab client component lock
1198  * Return Values:
1199  *		None
1200  * Note:
1201  *		The default locking order is:
1202  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1203  *		But there are number of situations where locks need to be
1204  *		grabbed in reverse order.  This routine implements try and lock
1205  *		mechanism depending on the requested parameter option.
1206  */
1207 static void
1208 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1209 {
1210 	if (pip) {
1211 		/*
1212 		 * Reverse locking is requested.
1213 		 */
1214 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1215 			/*
1216 			 * tryenter failed. Try to grab again
1217 			 * after a small delay
1218 			 */
1219 			MDI_PI_HOLD(pip);
1220 			MDI_PI_UNLOCK(pip);
1221 			delay(1);
1222 			MDI_PI_LOCK(pip);
1223 			MDI_PI_RELE(pip);
1224 		}
1225 	} else {
1226 		MDI_CLIENT_LOCK(ct);
1227 	}
1228 }
1229 
1230 /*
1231  * i_mdi_client_unlock():
1232  *		Unlock a client component
1233  */
1234 static void
1235 i_mdi_client_unlock(mdi_client_t *ct)
1236 {
1237 	MDI_CLIENT_UNLOCK(ct);
1238 }
1239 
1240 /*
1241  * i_mdi_client_alloc():
1242  * 		Allocate and initialize a client structure.  Caller should
1243  *		hold the vhci client lock.
1244  * Return Values:
1245  *		Handle to a client component
1246  */
1247 /*ARGSUSED*/
1248 static mdi_client_t *
1249 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1250 {
1251 	mdi_client_t	*ct;
1252 
1253 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1254 
1255 	/*
1256 	 * Allocate and initialize a component structure.
1257 	 */
1258 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1259 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1260 	ct->ct_hnext = NULL;
1261 	ct->ct_hprev = NULL;
1262 	ct->ct_dip = NULL;
1263 	ct->ct_vhci = vh;
1264 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1265 	(void) strcpy(ct->ct_drvname, name);
1266 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1267 	(void) strcpy(ct->ct_guid, lguid);
1268 	ct->ct_cprivate = NULL;
1269 	ct->ct_vprivate = NULL;
1270 	ct->ct_flags = 0;
1271 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1272 	MDI_CLIENT_LOCK(ct);
1273 	MDI_CLIENT_SET_OFFLINE(ct);
1274 	MDI_CLIENT_SET_DETACH(ct);
1275 	MDI_CLIENT_SET_POWER_UP(ct);
1276 	MDI_CLIENT_UNLOCK(ct);
1277 	ct->ct_failover_flags = 0;
1278 	ct->ct_failover_status = 0;
1279 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1280 	ct->ct_unstable = 0;
1281 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1282 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1283 	ct->ct_lb = vh->vh_lb;
1284 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1285 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1286 	ct->ct_path_count = 0;
1287 	ct->ct_path_head = NULL;
1288 	ct->ct_path_tail = NULL;
1289 	ct->ct_path_last = NULL;
1290 
1291 	/*
1292 	 * Add this client component to our client hash queue
1293 	 */
1294 	i_mdi_client_enlist_table(vh, ct);
1295 	return (ct);
1296 }
1297 
1298 /*
1299  * i_mdi_client_enlist_table():
1300  *		Attach the client device to the client hash table. Caller
1301  *		should hold the vhci client lock.
1302  */
1303 static void
1304 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1305 {
1306 	int 			index;
1307 	struct client_hash	*head;
1308 
1309 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1310 
1311 	index = i_mdi_get_hash_key(ct->ct_guid);
1312 	head = &vh->vh_client_table[index];
1313 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1314 	head->ct_hash_head = ct;
1315 	head->ct_hash_count++;
1316 	vh->vh_client_count++;
1317 }
1318 
1319 /*
1320  * i_mdi_client_delist_table():
1321  *		Attach the client device to the client hash table.
1322  *		Caller should hold the vhci client lock.
1323  */
1324 static void
1325 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1326 {
1327 	int			index;
1328 	char			*guid;
1329 	struct client_hash 	*head;
1330 	mdi_client_t		*next;
1331 	mdi_client_t		*last;
1332 
1333 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1334 
1335 	guid = ct->ct_guid;
1336 	index = i_mdi_get_hash_key(guid);
1337 	head = &vh->vh_client_table[index];
1338 
1339 	last = NULL;
1340 	next = (mdi_client_t *)head->ct_hash_head;
1341 	while (next != NULL) {
1342 		if (next == ct) {
1343 			break;
1344 		}
1345 		last = next;
1346 		next = next->ct_hnext;
1347 	}
1348 
1349 	if (next) {
1350 		head->ct_hash_count--;
1351 		if (last == NULL) {
1352 			head->ct_hash_head = ct->ct_hnext;
1353 		} else {
1354 			last->ct_hnext = ct->ct_hnext;
1355 		}
1356 		ct->ct_hnext = NULL;
1357 		vh->vh_client_count--;
1358 	}
1359 }
1360 
1361 
1362 /*
1363  * i_mdi_client_free():
1364  *		Free a client component
1365  */
1366 static int
1367 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1368 {
1369 	int		rv = MDI_SUCCESS;
1370 	int		flags = ct->ct_flags;
1371 	dev_info_t	*cdip;
1372 	dev_info_t	*vdip;
1373 
1374 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1375 
1376 	vdip = vh->vh_dip;
1377 	cdip = ct->ct_dip;
1378 
1379 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1380 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1381 	DEVI(cdip)->devi_mdi_client = NULL;
1382 
1383 	/*
1384 	 * Clear out back ref. to dev_info_t node
1385 	 */
1386 	ct->ct_dip = NULL;
1387 
1388 	/*
1389 	 * Remove this client from our hash queue
1390 	 */
1391 	i_mdi_client_delist_table(vh, ct);
1392 
1393 	/*
1394 	 * Uninitialize and free the component
1395 	 */
1396 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1397 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1398 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1399 	cv_destroy(&ct->ct_failover_cv);
1400 	cv_destroy(&ct->ct_unstable_cv);
1401 	cv_destroy(&ct->ct_powerchange_cv);
1402 	mutex_destroy(&ct->ct_mutex);
1403 	kmem_free(ct, sizeof (*ct));
1404 
1405 	if (cdip != NULL) {
1406 		MDI_VHCI_CLIENT_UNLOCK(vh);
1407 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1408 		MDI_VHCI_CLIENT_LOCK(vh);
1409 	}
1410 	return (rv);
1411 }
1412 
1413 /*
1414  * i_mdi_client_find():
1415  * 		Find the client structure corresponding to a given guid
1416  *		Caller should hold the vhci client lock.
1417  */
1418 static mdi_client_t *
1419 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1420 {
1421 	int			index;
1422 	struct client_hash	*head;
1423 	mdi_client_t		*ct;
1424 
1425 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1426 
1427 	index = i_mdi_get_hash_key(guid);
1428 	head = &vh->vh_client_table[index];
1429 
1430 	ct = head->ct_hash_head;
1431 	while (ct != NULL) {
1432 		if (strcmp(ct->ct_guid, guid) == 0 &&
1433 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1434 			break;
1435 		}
1436 		ct = ct->ct_hnext;
1437 	}
1438 	return (ct);
1439 }
1440 
1441 /*
1442  * i_mdi_client_update_state():
1443  *		Compute and update client device state
1444  * Notes:
1445  *		A client device can be in any of three possible states:
1446  *
1447  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1448  *		one online/standby paths. Can tolerate failures.
1449  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1450  *		no alternate paths available as standby. A failure on the online
1451  *		would result in loss of access to device data.
1452  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1453  *		no paths available to access the device.
1454  */
1455 static void
1456 i_mdi_client_update_state(mdi_client_t *ct)
1457 {
1458 	int state;
1459 
1460 	ASSERT(MDI_CLIENT_LOCKED(ct));
1461 	state = i_mdi_client_compute_state(ct, NULL);
1462 	MDI_CLIENT_SET_STATE(ct, state);
1463 }
1464 
1465 /*
1466  * i_mdi_client_compute_state():
1467  *		Compute client device state
1468  *
1469  *		mdi_phci_t *	Pointer to pHCI structure which should
1470  *				while computing the new value.  Used by
1471  *				i_mdi_phci_offline() to find the new
1472  *				client state after DR of a pHCI.
1473  */
1474 static int
1475 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1476 {
1477 	int		state;
1478 	int		online_count = 0;
1479 	int		standby_count = 0;
1480 	mdi_pathinfo_t	*pip, *next;
1481 
1482 	ASSERT(MDI_CLIENT_LOCKED(ct));
1483 	pip = ct->ct_path_head;
1484 	while (pip != NULL) {
1485 		MDI_PI_LOCK(pip);
1486 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1487 		if (MDI_PI(pip)->pi_phci == ph) {
1488 			MDI_PI_UNLOCK(pip);
1489 			pip = next;
1490 			continue;
1491 		}
1492 
1493 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1494 				== MDI_PATHINFO_STATE_ONLINE)
1495 			online_count++;
1496 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1497 				== MDI_PATHINFO_STATE_STANDBY)
1498 			standby_count++;
1499 		MDI_PI_UNLOCK(pip);
1500 		pip = next;
1501 	}
1502 
1503 	if (online_count == 0) {
1504 		if (standby_count == 0) {
1505 			state = MDI_CLIENT_STATE_FAILED;
1506 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1507 			    " ct = %p\n", (void *)ct));
1508 		} else if (standby_count == 1) {
1509 			state = MDI_CLIENT_STATE_DEGRADED;
1510 		} else {
1511 			state = MDI_CLIENT_STATE_OPTIMAL;
1512 		}
1513 	} else if (online_count == 1) {
1514 		if (standby_count == 0) {
1515 			state = MDI_CLIENT_STATE_DEGRADED;
1516 		} else {
1517 			state = MDI_CLIENT_STATE_OPTIMAL;
1518 		}
1519 	} else {
1520 		state = MDI_CLIENT_STATE_OPTIMAL;
1521 	}
1522 	return (state);
1523 }
1524 
1525 /*
1526  * i_mdi_client2devinfo():
1527  *		Utility function
1528  */
1529 dev_info_t *
1530 i_mdi_client2devinfo(mdi_client_t *ct)
1531 {
1532 	return (ct->ct_dip);
1533 }
1534 
1535 /*
1536  * mdi_client_path2_devinfo():
1537  * 		Given the parent devinfo and child devfs pathname, search for
1538  *		a valid devfs node handle.
1539  */
1540 dev_info_t *
1541 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1542 {
1543 	dev_info_t 	*cdip = NULL;
1544 	dev_info_t 	*ndip = NULL;
1545 	char		*temp_pathname;
1546 	int		circular;
1547 
1548 	/*
1549 	 * Allocate temp buffer
1550 	 */
1551 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1552 
1553 	/*
1554 	 * Lock parent against changes
1555 	 */
1556 	ndi_devi_enter(vdip, &circular);
1557 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1558 	while ((cdip = ndip) != NULL) {
1559 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1560 
1561 		*temp_pathname = '\0';
1562 		(void) ddi_pathname(cdip, temp_pathname);
1563 		if (strcmp(temp_pathname, pathname) == 0) {
1564 			break;
1565 		}
1566 	}
1567 	/*
1568 	 * Release devinfo lock
1569 	 */
1570 	ndi_devi_exit(vdip, circular);
1571 
1572 	/*
1573 	 * Free the temp buffer
1574 	 */
1575 	kmem_free(temp_pathname, MAXPATHLEN);
1576 	return (cdip);
1577 }
1578 
1579 /*
1580  * mdi_client_get_path_count():
1581  * 		Utility function to get number of path information nodes
1582  *		associated with a given client device.
1583  */
1584 int
1585 mdi_client_get_path_count(dev_info_t *cdip)
1586 {
1587 	mdi_client_t	*ct;
1588 	int		count = 0;
1589 
1590 	ct = i_devi_get_client(cdip);
1591 	if (ct != NULL) {
1592 		count = ct->ct_path_count;
1593 	}
1594 	return (count);
1595 }
1596 
1597 
1598 /*
1599  * i_mdi_get_hash_key():
1600  * 		Create a hash using strings as keys
1601  *
1602  */
1603 static int
1604 i_mdi_get_hash_key(char *str)
1605 {
1606 	uint32_t	g, hash = 0;
1607 	char		*p;
1608 
1609 	for (p = str; *p != '\0'; p++) {
1610 		g = *p;
1611 		hash += g;
1612 	}
1613 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1614 }
1615 
1616 /*
1617  * mdi_get_lb_policy():
1618  * 		Get current load balancing policy for a given client device
1619  */
1620 client_lb_t
1621 mdi_get_lb_policy(dev_info_t *cdip)
1622 {
1623 	client_lb_t	lb = LOAD_BALANCE_NONE;
1624 	mdi_client_t	*ct;
1625 
1626 	ct = i_devi_get_client(cdip);
1627 	if (ct != NULL) {
1628 		lb = ct->ct_lb;
1629 	}
1630 	return (lb);
1631 }
1632 
1633 /*
1634  * mdi_set_lb_region_size():
1635  * 		Set current region size for the load-balance
1636  */
1637 int
1638 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1639 {
1640 	mdi_client_t	*ct;
1641 	int		rv = MDI_FAILURE;
1642 
1643 	ct = i_devi_get_client(cdip);
1644 	if (ct != NULL && ct->ct_lb_args != NULL) {
1645 		ct->ct_lb_args->region_size = region_size;
1646 		rv = MDI_SUCCESS;
1647 	}
1648 	return (rv);
1649 }
1650 
1651 /*
1652  * mdi_Set_lb_policy():
1653  * 		Set current load balancing policy for a given client device
1654  */
1655 int
1656 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1657 {
1658 	mdi_client_t	*ct;
1659 	int		rv = MDI_FAILURE;
1660 
1661 	ct = i_devi_get_client(cdip);
1662 	if (ct != NULL) {
1663 		ct->ct_lb = lb;
1664 		rv = MDI_SUCCESS;
1665 	}
1666 	return (rv);
1667 }
1668 
1669 /*
1670  * mdi_failover():
1671  *		failover function called by the vHCI drivers to initiate
1672  *		a failover operation.  This is typically due to non-availability
1673  *		of online paths to route I/O requests.  Failover can be
1674  *		triggered through user application also.
1675  *
1676  *		The vHCI driver calls mdi_failover() to initiate a failover
1677  *		operation. mdi_failover() calls back into the vHCI driver's
1678  *		vo_failover() entry point to perform the actual failover
1679  *		operation.  The reason for requiring the vHCI driver to
1680  *		initiate failover by calling mdi_failover(), instead of directly
1681  *		executing vo_failover() itself, is to ensure that the mdi
1682  *		framework can keep track of the client state properly.
1683  *		Additionally, mdi_failover() provides as a convenience the
1684  *		option of performing the failover operation synchronously or
1685  *		asynchronously
1686  *
1687  *		Upon successful completion of the failover operation, the
1688  *		paths that were previously ONLINE will be in the STANDBY state,
1689  *		and the newly activated paths will be in the ONLINE state.
1690  *
1691  *		The flags modifier determines whether the activation is done
1692  *		synchronously: MDI_FAILOVER_SYNC
1693  * Return Values:
1694  *		MDI_SUCCESS
1695  *		MDI_FAILURE
1696  *		MDI_BUSY
1697  */
1698 /*ARGSUSED*/
1699 int
1700 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1701 {
1702 	int			rv;
1703 	mdi_client_t		*ct;
1704 
1705 	ct = i_devi_get_client(cdip);
1706 	ASSERT(ct != NULL);
1707 	if (ct == NULL) {
1708 		/* cdip is not a valid client device. Nothing more to do. */
1709 		return (MDI_FAILURE);
1710 	}
1711 
1712 	MDI_CLIENT_LOCK(ct);
1713 
1714 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1715 		/* A path to the client is being freed */
1716 		MDI_CLIENT_UNLOCK(ct);
1717 		return (MDI_BUSY);
1718 	}
1719 
1720 
1721 	if (MDI_CLIENT_IS_FAILED(ct)) {
1722 		/*
1723 		 * Client is in failed state. Nothing more to do.
1724 		 */
1725 		MDI_CLIENT_UNLOCK(ct);
1726 		return (MDI_FAILURE);
1727 	}
1728 
1729 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1730 		/*
1731 		 * Failover is already in progress; return BUSY
1732 		 */
1733 		MDI_CLIENT_UNLOCK(ct);
1734 		return (MDI_BUSY);
1735 	}
1736 	/*
1737 	 * Make sure that mdi_pathinfo node state changes are processed.
1738 	 * We do not allow failovers to progress while client path state
1739 	 * changes are in progress
1740 	 */
1741 	if (ct->ct_unstable) {
1742 		if (flags == MDI_FAILOVER_ASYNC) {
1743 			MDI_CLIENT_UNLOCK(ct);
1744 			return (MDI_BUSY);
1745 		} else {
1746 			while (ct->ct_unstable)
1747 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1748 		}
1749 	}
1750 
1751 	/*
1752 	 * Client device is in stable state. Before proceeding, perform sanity
1753 	 * checks again.
1754 	 */
1755 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1756 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1757 		/*
1758 		 * Client is in failed state. Nothing more to do.
1759 		 */
1760 		MDI_CLIENT_UNLOCK(ct);
1761 		return (MDI_FAILURE);
1762 	}
1763 
1764 	/*
1765 	 * Set the client state as failover in progress.
1766 	 */
1767 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1768 	ct->ct_failover_flags = flags;
1769 	MDI_CLIENT_UNLOCK(ct);
1770 
1771 	if (flags == MDI_FAILOVER_ASYNC) {
1772 		/*
1773 		 * Submit the initiate failover request via CPR safe
1774 		 * taskq threads.
1775 		 */
1776 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1777 		    ct, KM_SLEEP);
1778 		return (MDI_ACCEPT);
1779 	} else {
1780 		/*
1781 		 * Synchronous failover mode.  Typically invoked from the user
1782 		 * land.
1783 		 */
1784 		rv = i_mdi_failover(ct);
1785 	}
1786 	return (rv);
1787 }
1788 
1789 /*
1790  * i_mdi_failover():
1791  *		internal failover function. Invokes vHCI drivers failover
1792  *		callback function and process the failover status
1793  * Return Values:
1794  *		None
1795  *
1796  * Note: A client device in failover state can not be detached or freed.
1797  */
1798 static int
1799 i_mdi_failover(void *arg)
1800 {
1801 	int		rv = MDI_SUCCESS;
1802 	mdi_client_t	*ct = (mdi_client_t *)arg;
1803 	mdi_vhci_t	*vh = ct->ct_vhci;
1804 
1805 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1806 
1807 	if (vh->vh_ops->vo_failover != NULL) {
1808 		/*
1809 		 * Call vHCI drivers callback routine
1810 		 */
1811 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1812 		    ct->ct_failover_flags);
1813 	}
1814 
1815 	MDI_CLIENT_LOCK(ct);
1816 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1817 
1818 	/*
1819 	 * Save the failover return status
1820 	 */
1821 	ct->ct_failover_status = rv;
1822 
1823 	/*
1824 	 * As a result of failover, client status would have been changed.
1825 	 * Update the client state and wake up anyone waiting on this client
1826 	 * device.
1827 	 */
1828 	i_mdi_client_update_state(ct);
1829 
1830 	cv_broadcast(&ct->ct_failover_cv);
1831 	MDI_CLIENT_UNLOCK(ct);
1832 	return (rv);
1833 }
1834 
1835 /*
1836  * Load balancing is logical block.
1837  * IOs within the range described by region_size
1838  * would go on the same path. This would improve the
1839  * performance by cache-hit on some of the RAID devices.
1840  * Search only for online paths(At some point we
1841  * may want to balance across target ports).
1842  * If no paths are found then default to round-robin.
1843  */
1844 static int
1845 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1846 {
1847 	int		path_index = -1;
1848 	int		online_path_count = 0;
1849 	int		online_nonpref_path_count = 0;
1850 	int 		region_size = ct->ct_lb_args->region_size;
1851 	mdi_pathinfo_t	*pip;
1852 	mdi_pathinfo_t	*next;
1853 	int		preferred, path_cnt;
1854 
1855 	pip = ct->ct_path_head;
1856 	while (pip) {
1857 		MDI_PI_LOCK(pip);
1858 		if (MDI_PI(pip)->pi_state ==
1859 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1860 			online_path_count++;
1861 		} else if (MDI_PI(pip)->pi_state ==
1862 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1863 			online_nonpref_path_count++;
1864 		}
1865 		next = (mdi_pathinfo_t *)
1866 		    MDI_PI(pip)->pi_client_link;
1867 		MDI_PI_UNLOCK(pip);
1868 		pip = next;
1869 	}
1870 	/* if found any online/preferred then use this type */
1871 	if (online_path_count > 0) {
1872 		path_cnt = online_path_count;
1873 		preferred = 1;
1874 	} else if (online_nonpref_path_count > 0) {
1875 		path_cnt = online_nonpref_path_count;
1876 		preferred = 0;
1877 	} else {
1878 		path_cnt = 0;
1879 	}
1880 	if (path_cnt) {
1881 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1882 		pip = ct->ct_path_head;
1883 		while (pip && path_index != -1) {
1884 			MDI_PI_LOCK(pip);
1885 			if (path_index == 0 &&
1886 			    (MDI_PI(pip)->pi_state ==
1887 			    MDI_PATHINFO_STATE_ONLINE) &&
1888 				MDI_PI(pip)->pi_preferred == preferred) {
1889 				MDI_PI_HOLD(pip);
1890 				MDI_PI_UNLOCK(pip);
1891 				*ret_pip = pip;
1892 				return (MDI_SUCCESS);
1893 			}
1894 			path_index --;
1895 			next = (mdi_pathinfo_t *)
1896 			    MDI_PI(pip)->pi_client_link;
1897 			MDI_PI_UNLOCK(pip);
1898 			pip = next;
1899 		}
1900 		if (pip == NULL) {
1901 			MDI_DEBUG(4, (CE_NOTE, NULL,
1902 			    "!lba %llx, no pip !!\n",
1903 				bp->b_lblkno));
1904 		} else {
1905 			MDI_DEBUG(4, (CE_NOTE, NULL,
1906 			    "!lba %llx, no pip for path_index, "
1907 			    "pip %p\n", bp->b_lblkno, (void *)pip));
1908 		}
1909 	}
1910 	return (MDI_FAILURE);
1911 }
1912 
1913 /*
1914  * mdi_select_path():
1915  *		select a path to access a client device.
1916  *
1917  *		mdi_select_path() function is called by the vHCI drivers to
1918  *		select a path to route the I/O request to.  The caller passes
1919  *		the block I/O data transfer structure ("buf") as one of the
1920  *		parameters.  The mpxio framework uses the buf structure
1921  *		contents to maintain per path statistics (total I/O size /
1922  *		count pending).  If more than one online paths are available to
1923  *		select, the framework automatically selects a suitable path
1924  *		for routing I/O request. If a failover operation is active for
1925  *		this client device the call shall be failed with MDI_BUSY error
1926  *		code.
1927  *
1928  *		By default this function returns a suitable path in online
1929  *		state based on the current load balancing policy.  Currently
1930  *		we support LOAD_BALANCE_NONE (Previously selected online path
1931  *		will continue to be used till the path is usable) and
1932  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1933  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1934  *		based on the logical block).  The load balancing
1935  *		through vHCI drivers configuration file (driver.conf).
1936  *
1937  *		vHCI drivers may override this default behavior by specifying
1938  *		appropriate flags.  The meaning of the thrid argument depends
1939  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
1940  *		then the argument is the "path instance" of the path to select.
1941  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
1942  *		"start_pip". A non NULL "start_pip" is the starting point to
1943  *		walk and find the next appropriate path.  The following values
1944  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
1945  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
1946  *		STANDBY path).
1947  *
1948  *		The non-standard behavior is used by the scsi_vhci driver,
1949  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1950  *		attach of client devices (to avoid an unnecessary failover
1951  *		when the STANDBY path comes up first), during failover
1952  *		(to activate a STANDBY path as ONLINE).
1953  *
1954  *		The selected path is returned in a a mdi_hold_path() state
1955  *		(pi_ref_cnt). Caller should release the hold by calling
1956  *		mdi_rele_path().
1957  *
1958  * Return Values:
1959  *		MDI_SUCCESS	- Completed successfully
1960  *		MDI_BUSY 	- Client device is busy failing over
1961  *		MDI_NOPATH	- Client device is online, but no valid path are
1962  *				  available to access this client device
1963  *		MDI_FAILURE	- Invalid client device or state
1964  *		MDI_DEVI_ONLINING
1965  *				- Client device (struct dev_info state) is in
1966  *				  onlining state.
1967  */
1968 
1969 /*ARGSUSED*/
1970 int
1971 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1972     void *arg, mdi_pathinfo_t **ret_pip)
1973 {
1974 	mdi_client_t	*ct;
1975 	mdi_pathinfo_t	*pip;
1976 	mdi_pathinfo_t	*next;
1977 	mdi_pathinfo_t	*head;
1978 	mdi_pathinfo_t	*start;
1979 	client_lb_t	lbp;	/* load balancing policy */
1980 	int		sb = 1;	/* standard behavior */
1981 	int		preferred = 1;	/* preferred path */
1982 	int		cond, cont = 1;
1983 	int		retry = 0;
1984 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
1985 	int		path_instance;	/* request specific path instance */
1986 
1987 	/* determine type of arg based on flags */
1988 	if (flags & MDI_SELECT_PATH_INSTANCE) {
1989 		flags &= ~MDI_SELECT_PATH_INSTANCE;
1990 		path_instance = (int)(intptr_t)arg;
1991 		start_pip = NULL;
1992 	} else {
1993 		path_instance = 0;
1994 		start_pip = (mdi_pathinfo_t *)arg;
1995 	}
1996 
1997 	if (flags != 0) {
1998 		/*
1999 		 * disable default behavior
2000 		 */
2001 		sb = 0;
2002 	}
2003 
2004 	*ret_pip = NULL;
2005 	ct = i_devi_get_client(cdip);
2006 	if (ct == NULL) {
2007 		/* mdi extensions are NULL, Nothing more to do */
2008 		return (MDI_FAILURE);
2009 	}
2010 
2011 	MDI_CLIENT_LOCK(ct);
2012 
2013 	if (sb) {
2014 		if (MDI_CLIENT_IS_FAILED(ct)) {
2015 			/*
2016 			 * Client is not ready to accept any I/O requests.
2017 			 * Fail this request.
2018 			 */
2019 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2020 			    "client state offline ct = %p\n", (void *)ct));
2021 			MDI_CLIENT_UNLOCK(ct);
2022 			return (MDI_FAILURE);
2023 		}
2024 
2025 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2026 			/*
2027 			 * Check for Failover is in progress. If so tell the
2028 			 * caller that this device is busy.
2029 			 */
2030 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2031 			    "client failover in progress ct = %p\n",
2032 			    (void *)ct));
2033 			MDI_CLIENT_UNLOCK(ct);
2034 			return (MDI_BUSY);
2035 		}
2036 
2037 		/*
2038 		 * Check to see whether the client device is attached.
2039 		 * If not so, let the vHCI driver manually select a path
2040 		 * (standby) and let the probe/attach process to continue.
2041 		 */
2042 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2043 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining "
2044 			    "ct = %p\n", (void *)ct));
2045 			MDI_CLIENT_UNLOCK(ct);
2046 			return (MDI_DEVI_ONLINING);
2047 		}
2048 	}
2049 
2050 	/*
2051 	 * Cache in the client list head.  If head of the list is NULL
2052 	 * return MDI_NOPATH
2053 	 */
2054 	head = ct->ct_path_head;
2055 	if (head == NULL) {
2056 		MDI_CLIENT_UNLOCK(ct);
2057 		return (MDI_NOPATH);
2058 	}
2059 
2060 	/* Caller is specifying a specific pathinfo path by path_instance */
2061 	if (path_instance) {
2062 		/* search for pathinfo with correct path_instance */
2063 		for (pip = head;
2064 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2065 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2066 			;
2067 
2068 		/* If path can't be selected then MDI_FAILURE is returned. */
2069 		if (pip == NULL) {
2070 			MDI_CLIENT_UNLOCK(ct);
2071 			return (MDI_FAILURE);
2072 		}
2073 
2074 		/* verify state of path */
2075 		MDI_PI_LOCK(pip);
2076 		if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) {
2077 			MDI_PI_UNLOCK(pip);
2078 			MDI_CLIENT_UNLOCK(ct);
2079 			return (MDI_FAILURE);
2080 		}
2081 
2082 		/*
2083 		 * Return the path in hold state. Caller should release the
2084 		 * lock by calling mdi_rele_path()
2085 		 */
2086 		MDI_PI_HOLD(pip);
2087 		MDI_PI_UNLOCK(pip);
2088 		ct->ct_path_last = pip;
2089 		*ret_pip = pip;
2090 		MDI_CLIENT_UNLOCK(ct);
2091 		return (MDI_SUCCESS);
2092 	}
2093 
2094 	/*
2095 	 * for non default behavior, bypass current
2096 	 * load balancing policy and always use LOAD_BALANCE_RR
2097 	 * except that the start point will be adjusted based
2098 	 * on the provided start_pip
2099 	 */
2100 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2101 
2102 	switch (lbp) {
2103 	case LOAD_BALANCE_NONE:
2104 		/*
2105 		 * Load balancing is None  or Alternate path mode
2106 		 * Start looking for a online mdi_pathinfo node starting from
2107 		 * last known selected path
2108 		 */
2109 		preferred = 1;
2110 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2111 		if (pip == NULL) {
2112 			pip = head;
2113 		}
2114 		start = pip;
2115 		do {
2116 			MDI_PI_LOCK(pip);
2117 			/*
2118 			 * No need to explicitly check if the path is disabled.
2119 			 * Since we are checking for state == ONLINE and the
2120 			 * same veriable is used for DISABLE/ENABLE information.
2121 			 */
2122 			if ((MDI_PI(pip)->pi_state  ==
2123 				MDI_PATHINFO_STATE_ONLINE) &&
2124 				preferred == MDI_PI(pip)->pi_preferred) {
2125 				/*
2126 				 * Return the path in hold state. Caller should
2127 				 * release the lock by calling mdi_rele_path()
2128 				 */
2129 				MDI_PI_HOLD(pip);
2130 				MDI_PI_UNLOCK(pip);
2131 				ct->ct_path_last = pip;
2132 				*ret_pip = pip;
2133 				MDI_CLIENT_UNLOCK(ct);
2134 				return (MDI_SUCCESS);
2135 			}
2136 
2137 			/*
2138 			 * Path is busy.
2139 			 */
2140 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2141 			    MDI_PI_IS_TRANSIENT(pip))
2142 				retry = 1;
2143 			/*
2144 			 * Keep looking for a next available online path
2145 			 */
2146 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2147 			if (next == NULL) {
2148 				next = head;
2149 			}
2150 			MDI_PI_UNLOCK(pip);
2151 			pip = next;
2152 			if (start == pip && preferred) {
2153 				preferred = 0;
2154 			} else if (start == pip && !preferred) {
2155 				cont = 0;
2156 			}
2157 		} while (cont);
2158 		break;
2159 
2160 	case LOAD_BALANCE_LBA:
2161 		/*
2162 		 * Make sure we are looking
2163 		 * for an online path. Otherwise, if it is for a STANDBY
2164 		 * path request, it will go through and fetch an ONLINE
2165 		 * path which is not desirable.
2166 		 */
2167 		if ((ct->ct_lb_args != NULL) &&
2168 			    (ct->ct_lb_args->region_size) && bp &&
2169 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2170 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2171 				    == MDI_SUCCESS) {
2172 				MDI_CLIENT_UNLOCK(ct);
2173 				return (MDI_SUCCESS);
2174 			}
2175 		}
2176 		/*  FALLTHROUGH */
2177 	case LOAD_BALANCE_RR:
2178 		/*
2179 		 * Load balancing is Round Robin. Start looking for a online
2180 		 * mdi_pathinfo node starting from last known selected path
2181 		 * as the start point.  If override flags are specified,
2182 		 * process accordingly.
2183 		 * If the search is already in effect(start_pip not null),
2184 		 * then lets just use the same path preference to continue the
2185 		 * traversal.
2186 		 */
2187 
2188 		if (start_pip != NULL) {
2189 			preferred = MDI_PI(start_pip)->pi_preferred;
2190 		} else {
2191 			preferred = 1;
2192 		}
2193 
2194 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2195 		if (start == NULL) {
2196 			pip = head;
2197 		} else {
2198 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2199 			if (pip == NULL) {
2200 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2201 					/*
2202 					 * Return since we hit the end of list
2203 					 */
2204 					MDI_CLIENT_UNLOCK(ct);
2205 					return (MDI_NOPATH);
2206 				}
2207 
2208 				if (!sb) {
2209 					if (preferred == 0) {
2210 						/*
2211 						 * Looks like we have completed
2212 						 * the traversal as preferred
2213 						 * value is 0. Time to bail out.
2214 						 */
2215 						*ret_pip = NULL;
2216 						MDI_CLIENT_UNLOCK(ct);
2217 						return (MDI_NOPATH);
2218 					} else {
2219 						/*
2220 						 * Looks like we reached the
2221 						 * end of the list. Lets enable
2222 						 * traversal of non preferred
2223 						 * paths.
2224 						 */
2225 						preferred = 0;
2226 					}
2227 				}
2228 				pip = head;
2229 			}
2230 		}
2231 		start = pip;
2232 		do {
2233 			MDI_PI_LOCK(pip);
2234 			if (sb) {
2235 				cond = ((MDI_PI(pip)->pi_state ==
2236 				    MDI_PATHINFO_STATE_ONLINE &&
2237 					MDI_PI(pip)->pi_preferred ==
2238 						preferred) ? 1 : 0);
2239 			} else {
2240 				if (flags == MDI_SELECT_ONLINE_PATH) {
2241 					cond = ((MDI_PI(pip)->pi_state ==
2242 					    MDI_PATHINFO_STATE_ONLINE &&
2243 						MDI_PI(pip)->pi_preferred ==
2244 						preferred) ? 1 : 0);
2245 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2246 					cond = ((MDI_PI(pip)->pi_state ==
2247 					    MDI_PATHINFO_STATE_STANDBY &&
2248 						MDI_PI(pip)->pi_preferred ==
2249 						preferred) ? 1 : 0);
2250 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2251 				    MDI_SELECT_STANDBY_PATH)) {
2252 					cond = (((MDI_PI(pip)->pi_state ==
2253 					    MDI_PATHINFO_STATE_ONLINE ||
2254 					    (MDI_PI(pip)->pi_state ==
2255 					    MDI_PATHINFO_STATE_STANDBY)) &&
2256 						MDI_PI(pip)->pi_preferred ==
2257 						preferred) ? 1 : 0);
2258 				} else if (flags ==
2259 					(MDI_SELECT_STANDBY_PATH |
2260 					MDI_SELECT_ONLINE_PATH |
2261 					MDI_SELECT_USER_DISABLE_PATH)) {
2262 					cond = (((MDI_PI(pip)->pi_state ==
2263 					    MDI_PATHINFO_STATE_ONLINE ||
2264 					    (MDI_PI(pip)->pi_state ==
2265 					    MDI_PATHINFO_STATE_STANDBY) ||
2266 						(MDI_PI(pip)->pi_state ==
2267 					    (MDI_PATHINFO_STATE_ONLINE|
2268 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2269 						(MDI_PI(pip)->pi_state ==
2270 					    (MDI_PATHINFO_STATE_STANDBY |
2271 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2272 						MDI_PI(pip)->pi_preferred ==
2273 						preferred) ? 1 : 0);
2274 				} else if (flags ==
2275 				    (MDI_SELECT_STANDBY_PATH |
2276 				    MDI_SELECT_ONLINE_PATH |
2277 				    MDI_SELECT_NO_PREFERRED)) {
2278 					cond = (((MDI_PI(pip)->pi_state ==
2279 					    MDI_PATHINFO_STATE_ONLINE) ||
2280 					    (MDI_PI(pip)->pi_state ==
2281 					    MDI_PATHINFO_STATE_STANDBY))
2282 					    ? 1 : 0);
2283 				} else {
2284 					cond = 0;
2285 				}
2286 			}
2287 			/*
2288 			 * No need to explicitly check if the path is disabled.
2289 			 * Since we are checking for state == ONLINE and the
2290 			 * same veriable is used for DISABLE/ENABLE information.
2291 			 */
2292 			if (cond) {
2293 				/*
2294 				 * Return the path in hold state. Caller should
2295 				 * release the lock by calling mdi_rele_path()
2296 				 */
2297 				MDI_PI_HOLD(pip);
2298 				MDI_PI_UNLOCK(pip);
2299 				if (sb)
2300 					ct->ct_path_last = pip;
2301 				*ret_pip = pip;
2302 				MDI_CLIENT_UNLOCK(ct);
2303 				return (MDI_SUCCESS);
2304 			}
2305 			/*
2306 			 * Path is busy.
2307 			 */
2308 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2309 			    MDI_PI_IS_TRANSIENT(pip))
2310 				retry = 1;
2311 
2312 			/*
2313 			 * Keep looking for a next available online path
2314 			 */
2315 do_again:
2316 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2317 			if (next == NULL) {
2318 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2319 					/*
2320 					 * Bail out since we hit the end of list
2321 					 */
2322 					MDI_PI_UNLOCK(pip);
2323 					break;
2324 				}
2325 
2326 				if (!sb) {
2327 					if (preferred == 1) {
2328 						/*
2329 						 * Looks like we reached the
2330 						 * end of the list. Lets enable
2331 						 * traversal of non preferred
2332 						 * paths.
2333 						 */
2334 						preferred = 0;
2335 						next = head;
2336 					} else {
2337 						/*
2338 						 * We have done both the passes
2339 						 * Preferred as well as for
2340 						 * Non-preferred. Bail out now.
2341 						 */
2342 						cont = 0;
2343 					}
2344 				} else {
2345 					/*
2346 					 * Standard behavior case.
2347 					 */
2348 					next = head;
2349 				}
2350 			}
2351 			MDI_PI_UNLOCK(pip);
2352 			if (cont == 0) {
2353 				break;
2354 			}
2355 			pip = next;
2356 
2357 			if (!sb) {
2358 				/*
2359 				 * We need to handle the selection of
2360 				 * non-preferred path in the following
2361 				 * case:
2362 				 *
2363 				 * +------+   +------+   +------+   +-----+
2364 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2365 				 * +------+   +------+   +------+   +-----+
2366 				 *
2367 				 * If we start the search with B, we need to
2368 				 * skip beyond B to pick C which is non -
2369 				 * preferred in the second pass. The following
2370 				 * test, if true, will allow us to skip over
2371 				 * the 'start'(B in the example) to select
2372 				 * other non preferred elements.
2373 				 */
2374 				if ((start_pip != NULL) && (start_pip == pip) &&
2375 				    (MDI_PI(start_pip)->pi_preferred
2376 				    != preferred)) {
2377 					/*
2378 					 * try again after going past the start
2379 					 * pip
2380 					 */
2381 					MDI_PI_LOCK(pip);
2382 					goto do_again;
2383 				}
2384 			} else {
2385 				/*
2386 				 * Standard behavior case
2387 				 */
2388 				if (start == pip && preferred) {
2389 					/* look for nonpreferred paths */
2390 					preferred = 0;
2391 				} else if (start == pip && !preferred) {
2392 					/*
2393 					 * Exit condition
2394 					 */
2395 					cont = 0;
2396 				}
2397 			}
2398 		} while (cont);
2399 		break;
2400 	}
2401 
2402 	MDI_CLIENT_UNLOCK(ct);
2403 	if (retry == 1) {
2404 		return (MDI_BUSY);
2405 	} else {
2406 		return (MDI_NOPATH);
2407 	}
2408 }
2409 
2410 /*
2411  * For a client, return the next available path to any phci
2412  *
2413  * Note:
2414  *		Caller should hold the branch's devinfo node to get a consistent
2415  *		snap shot of the mdi_pathinfo nodes.
2416  *
2417  *		Please note that even the list is stable the mdi_pathinfo
2418  *		node state and properties are volatile.  The caller should lock
2419  *		and unlock the nodes by calling mdi_pi_lock() and
2420  *		mdi_pi_unlock() functions to get a stable properties.
2421  *
2422  *		If there is a need to use the nodes beyond the hold of the
2423  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2424  *		need to be held against unexpected removal by calling
2425  *		mdi_hold_path() and should be released by calling
2426  *		mdi_rele_path() on completion.
2427  */
2428 mdi_pathinfo_t *
2429 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2430 {
2431 	mdi_client_t *ct;
2432 
2433 	if (!MDI_CLIENT(ct_dip))
2434 		return (NULL);
2435 
2436 	/*
2437 	 * Walk through client link
2438 	 */
2439 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2440 	ASSERT(ct != NULL);
2441 
2442 	if (pip == NULL)
2443 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2444 
2445 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2446 }
2447 
2448 /*
2449  * For a phci, return the next available path to any client
2450  * Note: ditto mdi_get_next_phci_path()
2451  */
2452 mdi_pathinfo_t *
2453 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2454 {
2455 	mdi_phci_t *ph;
2456 
2457 	if (!MDI_PHCI(ph_dip))
2458 		return (NULL);
2459 
2460 	/*
2461 	 * Walk through pHCI link
2462 	 */
2463 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2464 	ASSERT(ph != NULL);
2465 
2466 	if (pip == NULL)
2467 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2468 
2469 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2470 }
2471 
2472 /*
2473  * mdi_hold_path():
2474  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2475  * Return Values:
2476  *		None
2477  */
2478 void
2479 mdi_hold_path(mdi_pathinfo_t *pip)
2480 {
2481 	if (pip) {
2482 		MDI_PI_LOCK(pip);
2483 		MDI_PI_HOLD(pip);
2484 		MDI_PI_UNLOCK(pip);
2485 	}
2486 }
2487 
2488 
2489 /*
2490  * mdi_rele_path():
2491  *		Release the mdi_pathinfo node which was selected
2492  *		through mdi_select_path() mechanism or manually held by
2493  *		calling mdi_hold_path().
2494  * Return Values:
2495  *		None
2496  */
2497 void
2498 mdi_rele_path(mdi_pathinfo_t *pip)
2499 {
2500 	if (pip) {
2501 		MDI_PI_LOCK(pip);
2502 		MDI_PI_RELE(pip);
2503 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2504 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2505 		}
2506 		MDI_PI_UNLOCK(pip);
2507 	}
2508 }
2509 
2510 /*
2511  * mdi_pi_lock():
2512  * 		Lock the mdi_pathinfo node.
2513  * Note:
2514  *		The caller should release the lock by calling mdi_pi_unlock()
2515  */
2516 void
2517 mdi_pi_lock(mdi_pathinfo_t *pip)
2518 {
2519 	ASSERT(pip != NULL);
2520 	if (pip) {
2521 		MDI_PI_LOCK(pip);
2522 	}
2523 }
2524 
2525 
2526 /*
2527  * mdi_pi_unlock():
2528  * 		Unlock the mdi_pathinfo node.
2529  * Note:
2530  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2531  */
2532 void
2533 mdi_pi_unlock(mdi_pathinfo_t *pip)
2534 {
2535 	ASSERT(pip != NULL);
2536 	if (pip) {
2537 		MDI_PI_UNLOCK(pip);
2538 	}
2539 }
2540 
2541 /*
2542  * mdi_pi_find():
2543  *		Search the list of mdi_pathinfo nodes attached to the
2544  *		pHCI/Client device node whose path address matches "paddr".
2545  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2546  *		found.
2547  * Return Values:
2548  *		mdi_pathinfo node handle
2549  *		NULL
2550  * Notes:
2551  *		Caller need not hold any locks to call this function.
2552  */
2553 mdi_pathinfo_t *
2554 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2555 {
2556 	mdi_phci_t		*ph;
2557 	mdi_vhci_t		*vh;
2558 	mdi_client_t		*ct;
2559 	mdi_pathinfo_t		*pip = NULL;
2560 
2561 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s",
2562 	    caddr ? caddr : "NULL", paddr ? paddr : "NULL"));
2563 	if ((pdip == NULL) || (paddr == NULL)) {
2564 		return (NULL);
2565 	}
2566 	ph = i_devi_get_phci(pdip);
2567 	if (ph == NULL) {
2568 		/*
2569 		 * Invalid pHCI device, Nothing more to do.
2570 		 */
2571 		MDI_DEBUG(2, (CE_WARN, pdip,
2572 		    "!mdi_pi_find: invalid phci"));
2573 		return (NULL);
2574 	}
2575 
2576 	vh = ph->ph_vhci;
2577 	if (vh == NULL) {
2578 		/*
2579 		 * Invalid vHCI device, Nothing more to do.
2580 		 */
2581 		MDI_DEBUG(2, (CE_WARN, pdip,
2582 		    "!mdi_pi_find: invalid vhci"));
2583 		return (NULL);
2584 	}
2585 
2586 	/*
2587 	 * Look for pathinfo node identified by paddr.
2588 	 */
2589 	if (caddr == NULL) {
2590 		/*
2591 		 * Find a mdi_pathinfo node under pHCI list for a matching
2592 		 * unit address.
2593 		 */
2594 		MDI_PHCI_LOCK(ph);
2595 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2596 			MDI_DEBUG(2, (CE_WARN, pdip,
2597 			    "!mdi_pi_find: offline phci %p", (void *)ph));
2598 			MDI_PHCI_UNLOCK(ph);
2599 			return (NULL);
2600 		}
2601 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2602 
2603 		while (pip != NULL) {
2604 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2605 				break;
2606 			}
2607 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2608 		}
2609 		MDI_PHCI_UNLOCK(ph);
2610 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p",
2611 		    (void *)pip));
2612 		return (pip);
2613 	}
2614 
2615 	/*
2616 	 * XXX - Is the rest of the code in this function really necessary?
2617 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2618 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2619 	 * whether the search is based on the pathinfo nodes attached to
2620 	 * the pHCI or the client node, the result will be the same.
2621 	 */
2622 
2623 	/*
2624 	 * Find the client device corresponding to 'caddr'
2625 	 */
2626 	MDI_VHCI_CLIENT_LOCK(vh);
2627 
2628 	/*
2629 	 * XXX - Passing NULL to the following function works as long as the
2630 	 * the client addresses (caddr) are unique per vhci basis.
2631 	 */
2632 	ct = i_mdi_client_find(vh, NULL, caddr);
2633 	if (ct == NULL) {
2634 		/*
2635 		 * Client not found, Obviously mdi_pathinfo node has not been
2636 		 * created yet.
2637 		 */
2638 		MDI_VHCI_CLIENT_UNLOCK(vh);
2639 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not "
2640 		    "found for caddr %s", caddr ? caddr : "NULL"));
2641 		return (NULL);
2642 	}
2643 
2644 	/*
2645 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2646 	 * pHCI and paddr
2647 	 */
2648 	MDI_CLIENT_LOCK(ct);
2649 
2650 	/*
2651 	 * Release the global mutex as it is no more needed. Note: We always
2652 	 * respect the locking order while acquiring.
2653 	 */
2654 	MDI_VHCI_CLIENT_UNLOCK(vh);
2655 
2656 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2657 	while (pip != NULL) {
2658 		/*
2659 		 * Compare the unit address
2660 		 */
2661 		if ((MDI_PI(pip)->pi_phci == ph) &&
2662 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2663 			break;
2664 		}
2665 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2666 	}
2667 	MDI_CLIENT_UNLOCK(ct);
2668 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip));
2669 	return (pip);
2670 }
2671 
2672 /*
2673  * mdi_pi_alloc():
2674  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2675  *		The mdi_pathinfo node returned by this function identifies a
2676  *		unique device path is capable of having properties attached
2677  *		and passed to mdi_pi_online() to fully attach and online the
2678  *		path and client device node.
2679  *		The mdi_pathinfo node returned by this function must be
2680  *		destroyed using mdi_pi_free() if the path is no longer
2681  *		operational or if the caller fails to attach a client device
2682  *		node when calling mdi_pi_online(). The framework will not free
2683  *		the resources allocated.
2684  *		This function can be called from both interrupt and kernel
2685  *		contexts.  DDI_NOSLEEP flag should be used while calling
2686  *		from interrupt contexts.
2687  * Return Values:
2688  *		MDI_SUCCESS
2689  *		MDI_FAILURE
2690  *		MDI_NOMEM
2691  */
2692 /*ARGSUSED*/
2693 int
2694 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2695     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2696 {
2697 	mdi_vhci_t	*vh;
2698 	mdi_phci_t	*ph;
2699 	mdi_client_t	*ct;
2700 	mdi_pathinfo_t	*pip = NULL;
2701 	dev_info_t	*cdip;
2702 	int		rv = MDI_NOMEM;
2703 	int		path_allocated = 0;
2704 
2705 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s",
2706 	    cname ? cname : "NULL", caddr ? caddr : "NULL",
2707 	    paddr ? paddr : "NULL"));
2708 
2709 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2710 	    ret_pip == NULL) {
2711 		/* Nothing more to do */
2712 		return (MDI_FAILURE);
2713 	}
2714 
2715 	*ret_pip = NULL;
2716 
2717 	/* No allocations on detaching pHCI */
2718 	if (DEVI_IS_DETACHING(pdip)) {
2719 		/* Invalid pHCI device, return failure */
2720 		MDI_DEBUG(1, (CE_WARN, pdip,
2721 		    "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip));
2722 		return (MDI_FAILURE);
2723 	}
2724 
2725 	ph = i_devi_get_phci(pdip);
2726 	ASSERT(ph != NULL);
2727 	if (ph == NULL) {
2728 		/* Invalid pHCI device, return failure */
2729 		MDI_DEBUG(1, (CE_WARN, pdip,
2730 		    "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip));
2731 		return (MDI_FAILURE);
2732 	}
2733 
2734 	MDI_PHCI_LOCK(ph);
2735 	vh = ph->ph_vhci;
2736 	if (vh == NULL) {
2737 		/* Invalid vHCI device, return failure */
2738 		MDI_DEBUG(1, (CE_WARN, pdip,
2739 		    "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip));
2740 		MDI_PHCI_UNLOCK(ph);
2741 		return (MDI_FAILURE);
2742 	}
2743 
2744 	if (MDI_PHCI_IS_READY(ph) == 0) {
2745 		/*
2746 		 * Do not allow new node creation when pHCI is in
2747 		 * offline/suspended states
2748 		 */
2749 		MDI_DEBUG(1, (CE_WARN, pdip,
2750 		    "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph));
2751 		MDI_PHCI_UNLOCK(ph);
2752 		return (MDI_BUSY);
2753 	}
2754 	MDI_PHCI_UNSTABLE(ph);
2755 	MDI_PHCI_UNLOCK(ph);
2756 
2757 	/* look for a matching client, create one if not found */
2758 	MDI_VHCI_CLIENT_LOCK(vh);
2759 	ct = i_mdi_client_find(vh, cname, caddr);
2760 	if (ct == NULL) {
2761 		ct = i_mdi_client_alloc(vh, cname, caddr);
2762 		ASSERT(ct != NULL);
2763 	}
2764 
2765 	if (ct->ct_dip == NULL) {
2766 		/*
2767 		 * Allocate a devinfo node
2768 		 */
2769 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2770 		    compatible, ncompatible);
2771 		if (ct->ct_dip == NULL) {
2772 			(void) i_mdi_client_free(vh, ct);
2773 			goto fail;
2774 		}
2775 	}
2776 	cdip = ct->ct_dip;
2777 
2778 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2779 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2780 
2781 	MDI_CLIENT_LOCK(ct);
2782 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2783 	while (pip != NULL) {
2784 		/*
2785 		 * Compare the unit address
2786 		 */
2787 		if ((MDI_PI(pip)->pi_phci == ph) &&
2788 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2789 			break;
2790 		}
2791 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2792 	}
2793 	MDI_CLIENT_UNLOCK(ct);
2794 
2795 	if (pip == NULL) {
2796 		/*
2797 		 * This is a new path for this client device.  Allocate and
2798 		 * initialize a new pathinfo node
2799 		 */
2800 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2801 		ASSERT(pip != NULL);
2802 		path_allocated = 1;
2803 	}
2804 	rv = MDI_SUCCESS;
2805 
2806 fail:
2807 	/*
2808 	 * Release the global mutex.
2809 	 */
2810 	MDI_VHCI_CLIENT_UNLOCK(vh);
2811 
2812 	/*
2813 	 * Mark the pHCI as stable
2814 	 */
2815 	MDI_PHCI_LOCK(ph);
2816 	MDI_PHCI_STABLE(ph);
2817 	MDI_PHCI_UNLOCK(ph);
2818 	*ret_pip = pip;
2819 
2820 	MDI_DEBUG(2, (CE_NOTE, pdip,
2821 	    "!mdi_pi_alloc_compatible: alloc %p", (void *)pip));
2822 
2823 	if (path_allocated)
2824 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2825 
2826 	return (rv);
2827 }
2828 
2829 /*ARGSUSED*/
2830 int
2831 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2832     int flags, mdi_pathinfo_t **ret_pip)
2833 {
2834 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2835 	    flags, ret_pip));
2836 }
2837 
2838 /*
2839  * i_mdi_pi_alloc():
2840  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2841  * Return Values:
2842  *		mdi_pathinfo
2843  */
2844 /*ARGSUSED*/
2845 static mdi_pathinfo_t *
2846 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2847 {
2848 	mdi_pathinfo_t	*pip;
2849 	int		ct_circular;
2850 	int		ph_circular;
2851 	static char	path[MAXPATHLEN];
2852 	char		*path_persistent;
2853 	int		path_instance;
2854 	mod_hash_val_t	hv;
2855 
2856 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2857 
2858 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2859 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2860 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2861 	    MDI_PATHINFO_STATE_TRANSIENT;
2862 
2863 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2864 		MDI_PI_SET_USER_DISABLE(pip);
2865 
2866 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2867 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2868 
2869 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2870 		MDI_PI_SET_DRV_DISABLE(pip);
2871 
2872 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2873 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2874 	MDI_PI(pip)->pi_client = ct;
2875 	MDI_PI(pip)->pi_phci = ph;
2876 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2877 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2878 
2879         /*
2880 	 * We form the "path" to the pathinfo node, and see if we have
2881 	 * already allocated a 'path_instance' for that "path".  If so,
2882 	 * we use the already allocated 'path_instance'.  If not, we
2883 	 * allocate a new 'path_instance' and associate it with a copy of
2884 	 * the "path" string (which is never freed). The association
2885 	 * between a 'path_instance' this "path" string persists until
2886 	 * reboot.
2887 	 */
2888         mutex_enter(&mdi_pathmap_mutex);
2889 	(void) ddi_pathname(ph->ph_dip, path);
2890 	(void) sprintf(path + strlen(path), "/%s@%s",
2891 	    ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr);
2892         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2893                 path_instance = (uint_t)(intptr_t)hv;
2894         } else {
2895 		/* allocate a new 'path_instance' and persistent "path" */
2896 		path_instance = mdi_pathmap_instance++;
2897 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2898                 (void) mod_hash_insert(mdi_pathmap_bypath,
2899                     (mod_hash_key_t)path_persistent,
2900                     (mod_hash_val_t)(intptr_t)path_instance);
2901 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2902 		    (mod_hash_key_t)(intptr_t)path_instance,
2903 		    (mod_hash_val_t)path_persistent);
2904         }
2905         mutex_exit(&mdi_pathmap_mutex);
2906 	MDI_PI(pip)->pi_path_instance = path_instance;
2907 
2908 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2909 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2910 	MDI_PI(pip)->pi_pprivate = NULL;
2911 	MDI_PI(pip)->pi_cprivate = NULL;
2912 	MDI_PI(pip)->pi_vprivate = NULL;
2913 	MDI_PI(pip)->pi_client_link = NULL;
2914 	MDI_PI(pip)->pi_phci_link = NULL;
2915 	MDI_PI(pip)->pi_ref_cnt = 0;
2916 	MDI_PI(pip)->pi_kstats = NULL;
2917 	MDI_PI(pip)->pi_preferred = 1;
2918 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2919 
2920 	/*
2921 	 * Lock both dev_info nodes against changes in parallel.
2922 	 *
2923 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
2924 	 * This atypical operation is done to synchronize pathinfo nodes
2925 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
2926 	 * the pathinfo nodes are children of the Client.
2927 	 */
2928 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2929 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2930 
2931 	i_mdi_phci_add_path(ph, pip);
2932 	i_mdi_client_add_path(ct, pip);
2933 
2934 	ndi_devi_exit(ph->ph_dip, ph_circular);
2935 	ndi_devi_exit(ct->ct_dip, ct_circular);
2936 
2937 	return (pip);
2938 }
2939 
2940 /*
2941  * mdi_pi_pathname_by_instance():
2942  *	Lookup of "path" by 'path_instance'. Return "path".
2943  *	NOTE: returned "path" remains valid forever (until reboot).
2944  */
2945 char *
2946 mdi_pi_pathname_by_instance(int path_instance)
2947 {
2948 	char		*path;
2949 	mod_hash_val_t	hv;
2950 
2951 	/* mdi_pathmap lookup of "path" by 'path_instance' */
2952 	mutex_enter(&mdi_pathmap_mutex);
2953 	if (mod_hash_find(mdi_pathmap_byinstance,
2954 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
2955 		path = (char *)hv;
2956 	else
2957 		path = NULL;
2958 	mutex_exit(&mdi_pathmap_mutex);
2959 	return (path);
2960 }
2961 
2962 /*
2963  * i_mdi_phci_add_path():
2964  * 		Add a mdi_pathinfo node to pHCI list.
2965  * Notes:
2966  *		Caller should per-pHCI mutex
2967  */
2968 static void
2969 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2970 {
2971 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2972 
2973 	MDI_PHCI_LOCK(ph);
2974 	if (ph->ph_path_head == NULL) {
2975 		ph->ph_path_head = pip;
2976 	} else {
2977 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2978 	}
2979 	ph->ph_path_tail = pip;
2980 	ph->ph_path_count++;
2981 	MDI_PHCI_UNLOCK(ph);
2982 }
2983 
2984 /*
2985  * i_mdi_client_add_path():
2986  *		Add mdi_pathinfo node to client list
2987  */
2988 static void
2989 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2990 {
2991 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2992 
2993 	MDI_CLIENT_LOCK(ct);
2994 	if (ct->ct_path_head == NULL) {
2995 		ct->ct_path_head = pip;
2996 	} else {
2997 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2998 	}
2999 	ct->ct_path_tail = pip;
3000 	ct->ct_path_count++;
3001 	MDI_CLIENT_UNLOCK(ct);
3002 }
3003 
3004 /*
3005  * mdi_pi_free():
3006  *		Free the mdi_pathinfo node and also client device node if this
3007  *		is the last path to the device
3008  * Return Values:
3009  *		MDI_SUCCESS
3010  *		MDI_FAILURE
3011  *		MDI_BUSY
3012  */
3013 /*ARGSUSED*/
3014 int
3015 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3016 {
3017 	int		rv = MDI_FAILURE;
3018 	mdi_vhci_t	*vh;
3019 	mdi_phci_t	*ph;
3020 	mdi_client_t	*ct;
3021 	int		(*f)();
3022 	int		client_held = 0;
3023 
3024 	MDI_PI_LOCK(pip);
3025 	ph = MDI_PI(pip)->pi_phci;
3026 	ASSERT(ph != NULL);
3027 	if (ph == NULL) {
3028 		/*
3029 		 * Invalid pHCI device, return failure
3030 		 */
3031 		MDI_DEBUG(1, (CE_WARN, NULL,
3032 		    "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip));
3033 		MDI_PI_UNLOCK(pip);
3034 		return (MDI_FAILURE);
3035 	}
3036 
3037 	vh = ph->ph_vhci;
3038 	ASSERT(vh != NULL);
3039 	if (vh == NULL) {
3040 		/* Invalid pHCI device, return failure */
3041 		MDI_DEBUG(1, (CE_WARN, NULL,
3042 		    "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip));
3043 		MDI_PI_UNLOCK(pip);
3044 		return (MDI_FAILURE);
3045 	}
3046 
3047 	ct = MDI_PI(pip)->pi_client;
3048 	ASSERT(ct != NULL);
3049 	if (ct == NULL) {
3050 		/*
3051 		 * Invalid Client device, return failure
3052 		 */
3053 		MDI_DEBUG(1, (CE_WARN, NULL,
3054 		    "!mdi_pi_free: invalid client pip=%p", (void *)pip));
3055 		MDI_PI_UNLOCK(pip);
3056 		return (MDI_FAILURE);
3057 	}
3058 
3059 	/*
3060 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3061 	 * if the node state is either offline or init and the reference count
3062 	 * is zero.
3063 	 */
3064 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3065 	    MDI_PI_IS_INITING(pip))) {
3066 		/*
3067 		 * Node is busy
3068 		 */
3069 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3070 		    "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip));
3071 		MDI_PI_UNLOCK(pip);
3072 		return (MDI_BUSY);
3073 	}
3074 
3075 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3076 		/*
3077 		 * Give a chance for pending I/Os to complete.
3078 		 */
3079 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: "
3080 		    "%d cmds still pending on path: %p\n",
3081 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3082 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3083 		    &MDI_PI(pip)->pi_mutex,
3084 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3085 			/*
3086 			 * The timeout time reached without ref_cnt being zero
3087 			 * being signaled.
3088 			 */
3089 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3090 			    "!mdi_pi_free: "
3091 			    "Timeout reached on path %p without the cond\n",
3092 			    (void *)pip));
3093 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3094 			    "!mdi_pi_free: "
3095 			    "%d cmds still pending on path: %p\n",
3096 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3097 			MDI_PI_UNLOCK(pip);
3098 			return (MDI_BUSY);
3099 		}
3100 	}
3101 	if (MDI_PI(pip)->pi_pm_held) {
3102 		client_held = 1;
3103 	}
3104 	MDI_PI_UNLOCK(pip);
3105 
3106 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3107 
3108 	MDI_CLIENT_LOCK(ct);
3109 
3110 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3111 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3112 
3113 	/*
3114 	 * Wait till failover is complete before removing this node.
3115 	 */
3116 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3117 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3118 
3119 	MDI_CLIENT_UNLOCK(ct);
3120 	MDI_VHCI_CLIENT_LOCK(vh);
3121 	MDI_CLIENT_LOCK(ct);
3122 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3123 
3124 	if (!MDI_PI_IS_INITING(pip)) {
3125 		f = vh->vh_ops->vo_pi_uninit;
3126 		if (f != NULL) {
3127 			rv = (*f)(vh->vh_dip, pip, 0);
3128 		}
3129 	}
3130 	/*
3131 	 * If vo_pi_uninit() completed successfully.
3132 	 */
3133 	if (rv == MDI_SUCCESS) {
3134 		if (client_held) {
3135 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
3136 			    "i_mdi_pm_rele_client\n"));
3137 			i_mdi_pm_rele_client(ct, 1);
3138 		}
3139 		i_mdi_pi_free(ph, pip, ct);
3140 		if (ct->ct_path_count == 0) {
3141 			/*
3142 			 * Client lost its last path.
3143 			 * Clean up the client device
3144 			 */
3145 			MDI_CLIENT_UNLOCK(ct);
3146 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3147 			MDI_VHCI_CLIENT_UNLOCK(vh);
3148 			return (rv);
3149 		}
3150 	}
3151 	MDI_CLIENT_UNLOCK(ct);
3152 	MDI_VHCI_CLIENT_UNLOCK(vh);
3153 
3154 	if (rv == MDI_FAILURE)
3155 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3156 
3157 	return (rv);
3158 }
3159 
3160 /*
3161  * i_mdi_pi_free():
3162  *		Free the mdi_pathinfo node
3163  */
3164 static void
3165 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3166 {
3167 	int	ct_circular;
3168 	int	ph_circular;
3169 
3170 	ASSERT(MDI_CLIENT_LOCKED(ct));
3171 
3172 	/*
3173 	 * remove any per-path kstats
3174 	 */
3175 	i_mdi_pi_kstat_destroy(pip);
3176 
3177 	/* See comments in i_mdi_pi_alloc() */
3178 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3179 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3180 
3181 	i_mdi_client_remove_path(ct, pip);
3182 	i_mdi_phci_remove_path(ph, pip);
3183 
3184 	ndi_devi_exit(ph->ph_dip, ph_circular);
3185 	ndi_devi_exit(ct->ct_dip, ct_circular);
3186 
3187 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3188 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3189 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3190 	if (MDI_PI(pip)->pi_addr) {
3191 		kmem_free(MDI_PI(pip)->pi_addr,
3192 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3193 		MDI_PI(pip)->pi_addr = NULL;
3194 	}
3195 
3196 	if (MDI_PI(pip)->pi_prop) {
3197 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3198 		MDI_PI(pip)->pi_prop = NULL;
3199 	}
3200 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3201 }
3202 
3203 
3204 /*
3205  * i_mdi_phci_remove_path():
3206  * 		Remove a mdi_pathinfo node from pHCI list.
3207  * Notes:
3208  *		Caller should hold per-pHCI mutex
3209  */
3210 static void
3211 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3212 {
3213 	mdi_pathinfo_t	*prev = NULL;
3214 	mdi_pathinfo_t	*path = NULL;
3215 
3216 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3217 
3218 	MDI_PHCI_LOCK(ph);
3219 	path = ph->ph_path_head;
3220 	while (path != NULL) {
3221 		if (path == pip) {
3222 			break;
3223 		}
3224 		prev = path;
3225 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3226 	}
3227 
3228 	if (path) {
3229 		ph->ph_path_count--;
3230 		if (prev) {
3231 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3232 		} else {
3233 			ph->ph_path_head =
3234 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3235 		}
3236 		if (ph->ph_path_tail == path) {
3237 			ph->ph_path_tail = prev;
3238 		}
3239 	}
3240 
3241 	/*
3242 	 * Clear the pHCI link
3243 	 */
3244 	MDI_PI(pip)->pi_phci_link = NULL;
3245 	MDI_PI(pip)->pi_phci = NULL;
3246 	MDI_PHCI_UNLOCK(ph);
3247 }
3248 
3249 /*
3250  * i_mdi_client_remove_path():
3251  * 		Remove a mdi_pathinfo node from client path list.
3252  */
3253 static void
3254 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3255 {
3256 	mdi_pathinfo_t	*prev = NULL;
3257 	mdi_pathinfo_t	*path;
3258 
3259 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3260 
3261 	ASSERT(MDI_CLIENT_LOCKED(ct));
3262 	path = ct->ct_path_head;
3263 	while (path != NULL) {
3264 		if (path == pip) {
3265 			break;
3266 		}
3267 		prev = path;
3268 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3269 	}
3270 
3271 	if (path) {
3272 		ct->ct_path_count--;
3273 		if (prev) {
3274 			MDI_PI(prev)->pi_client_link =
3275 			    MDI_PI(path)->pi_client_link;
3276 		} else {
3277 			ct->ct_path_head =
3278 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3279 		}
3280 		if (ct->ct_path_tail == path) {
3281 			ct->ct_path_tail = prev;
3282 		}
3283 		if (ct->ct_path_last == path) {
3284 			ct->ct_path_last = ct->ct_path_head;
3285 		}
3286 	}
3287 	MDI_PI(pip)->pi_client_link = NULL;
3288 	MDI_PI(pip)->pi_client = NULL;
3289 }
3290 
3291 /*
3292  * i_mdi_pi_state_change():
3293  *		online a mdi_pathinfo node
3294  *
3295  * Return Values:
3296  *		MDI_SUCCESS
3297  *		MDI_FAILURE
3298  */
3299 /*ARGSUSED*/
3300 static int
3301 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3302 {
3303 	int		rv = MDI_SUCCESS;
3304 	mdi_vhci_t	*vh;
3305 	mdi_phci_t	*ph;
3306 	mdi_client_t	*ct;
3307 	int		(*f)();
3308 	dev_info_t	*cdip;
3309 
3310 	MDI_PI_LOCK(pip);
3311 
3312 	ph = MDI_PI(pip)->pi_phci;
3313 	ASSERT(ph);
3314 	if (ph == NULL) {
3315 		/*
3316 		 * Invalid pHCI device, fail the request
3317 		 */
3318 		MDI_PI_UNLOCK(pip);
3319 		MDI_DEBUG(1, (CE_WARN, NULL,
3320 		    "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip));
3321 		return (MDI_FAILURE);
3322 	}
3323 
3324 	vh = ph->ph_vhci;
3325 	ASSERT(vh);
3326 	if (vh == NULL) {
3327 		/*
3328 		 * Invalid vHCI device, fail the request
3329 		 */
3330 		MDI_PI_UNLOCK(pip);
3331 		MDI_DEBUG(1, (CE_WARN, NULL,
3332 		    "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip));
3333 		return (MDI_FAILURE);
3334 	}
3335 
3336 	ct = MDI_PI(pip)->pi_client;
3337 	ASSERT(ct != NULL);
3338 	if (ct == NULL) {
3339 		/*
3340 		 * Invalid client device, fail the request
3341 		 */
3342 		MDI_PI_UNLOCK(pip);
3343 		MDI_DEBUG(1, (CE_WARN, NULL,
3344 		    "!mdi_pi_state_change: invalid client pip=%p",
3345 		    (void *)pip));
3346 		return (MDI_FAILURE);
3347 	}
3348 
3349 	/*
3350 	 * If this path has not been initialized yet, Callback vHCI driver's
3351 	 * pathinfo node initialize entry point
3352 	 */
3353 
3354 	if (MDI_PI_IS_INITING(pip)) {
3355 		MDI_PI_UNLOCK(pip);
3356 		f = vh->vh_ops->vo_pi_init;
3357 		if (f != NULL) {
3358 			rv = (*f)(vh->vh_dip, pip, 0);
3359 			if (rv != MDI_SUCCESS) {
3360 				MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3361 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3362 				    (void *)vh, (void *)pip));
3363 				return (MDI_FAILURE);
3364 			}
3365 		}
3366 		MDI_PI_LOCK(pip);
3367 		MDI_PI_CLEAR_TRANSIENT(pip);
3368 	}
3369 
3370 	/*
3371 	 * Do not allow state transition when pHCI is in offline/suspended
3372 	 * states
3373 	 */
3374 	i_mdi_phci_lock(ph, pip);
3375 	if (MDI_PHCI_IS_READY(ph) == 0) {
3376 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3377 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p",
3378 		    (void *)ph));
3379 		MDI_PI_UNLOCK(pip);
3380 		i_mdi_phci_unlock(ph);
3381 		return (MDI_BUSY);
3382 	}
3383 	MDI_PHCI_UNSTABLE(ph);
3384 	i_mdi_phci_unlock(ph);
3385 
3386 	/*
3387 	 * Check if mdi_pathinfo state is in transient state.
3388 	 * If yes, offlining is in progress and wait till transient state is
3389 	 * cleared.
3390 	 */
3391 	if (MDI_PI_IS_TRANSIENT(pip)) {
3392 		while (MDI_PI_IS_TRANSIENT(pip)) {
3393 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3394 			    &MDI_PI(pip)->pi_mutex);
3395 		}
3396 	}
3397 
3398 	/*
3399 	 * Grab the client lock in reverse order sequence and release the
3400 	 * mdi_pathinfo mutex.
3401 	 */
3402 	i_mdi_client_lock(ct, pip);
3403 	MDI_PI_UNLOCK(pip);
3404 
3405 	/*
3406 	 * Wait till failover state is cleared
3407 	 */
3408 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3409 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3410 
3411 	/*
3412 	 * Mark the mdi_pathinfo node state as transient
3413 	 */
3414 	MDI_PI_LOCK(pip);
3415 	switch (state) {
3416 	case MDI_PATHINFO_STATE_ONLINE:
3417 		MDI_PI_SET_ONLINING(pip);
3418 		break;
3419 
3420 	case MDI_PATHINFO_STATE_STANDBY:
3421 		MDI_PI_SET_STANDBYING(pip);
3422 		break;
3423 
3424 	case MDI_PATHINFO_STATE_FAULT:
3425 		/*
3426 		 * Mark the pathinfo state as FAULTED
3427 		 */
3428 		MDI_PI_SET_FAULTING(pip);
3429 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3430 		break;
3431 
3432 	case MDI_PATHINFO_STATE_OFFLINE:
3433 		/*
3434 		 * ndi_devi_offline() cannot hold pip or ct locks.
3435 		 */
3436 		MDI_PI_UNLOCK(pip);
3437 		/*
3438 		 * Don't offline the client dev_info node unless we have
3439 		 * no available paths left at all.
3440 		 */
3441 		cdip = ct->ct_dip;
3442 		if ((flag & NDI_DEVI_REMOVE) &&
3443 		    (ct->ct_path_count == 1)) {
3444 			i_mdi_client_unlock(ct);
3445 			rv = ndi_devi_offline(cdip, 0);
3446 			if (rv != NDI_SUCCESS) {
3447 				/*
3448 				 * Convert to MDI error code
3449 				 */
3450 				switch (rv) {
3451 				case NDI_BUSY:
3452 					rv = MDI_BUSY;
3453 					break;
3454 				default:
3455 					rv = MDI_FAILURE;
3456 					break;
3457 				}
3458 				goto state_change_exit;
3459 			} else {
3460 				i_mdi_client_lock(ct, NULL);
3461 			}
3462 		}
3463 		/*
3464 		 * Mark the mdi_pathinfo node state as transient
3465 		 */
3466 		MDI_PI_LOCK(pip);
3467 		MDI_PI_SET_OFFLINING(pip);
3468 		break;
3469 	}
3470 	MDI_PI_UNLOCK(pip);
3471 	MDI_CLIENT_UNSTABLE(ct);
3472 	i_mdi_client_unlock(ct);
3473 
3474 	f = vh->vh_ops->vo_pi_state_change;
3475 	if (f != NULL)
3476 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3477 
3478 	MDI_CLIENT_LOCK(ct);
3479 	MDI_PI_LOCK(pip);
3480 	if (rv == MDI_NOT_SUPPORTED) {
3481 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3482 	}
3483 	if (rv != MDI_SUCCESS) {
3484 		MDI_DEBUG(2, (CE_WARN, ct->ct_dip,
3485 		    "!vo_pi_state_change: failed rv = %x", rv));
3486 	}
3487 	if (MDI_PI_IS_TRANSIENT(pip)) {
3488 		if (rv == MDI_SUCCESS) {
3489 			MDI_PI_CLEAR_TRANSIENT(pip);
3490 		} else {
3491 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3492 		}
3493 	}
3494 
3495 	/*
3496 	 * Wake anyone waiting for this mdi_pathinfo node
3497 	 */
3498 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3499 	MDI_PI_UNLOCK(pip);
3500 
3501 	/*
3502 	 * Mark the client device as stable
3503 	 */
3504 	MDI_CLIENT_STABLE(ct);
3505 	if (rv == MDI_SUCCESS) {
3506 		if (ct->ct_unstable == 0) {
3507 			cdip = ct->ct_dip;
3508 
3509 			/*
3510 			 * Onlining the mdi_pathinfo node will impact the
3511 			 * client state Update the client and dev_info node
3512 			 * state accordingly
3513 			 */
3514 			rv = NDI_SUCCESS;
3515 			i_mdi_client_update_state(ct);
3516 			switch (MDI_CLIENT_STATE(ct)) {
3517 			case MDI_CLIENT_STATE_OPTIMAL:
3518 			case MDI_CLIENT_STATE_DEGRADED:
3519 				if (cdip && !i_ddi_devi_attached(cdip) &&
3520 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3521 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3522 
3523 					/*
3524 					 * Must do ndi_devi_online() through
3525 					 * hotplug thread for deferred
3526 					 * attach mechanism to work
3527 					 */
3528 					MDI_CLIENT_UNLOCK(ct);
3529 					rv = ndi_devi_online(cdip, 0);
3530 					MDI_CLIENT_LOCK(ct);
3531 					if ((rv != NDI_SUCCESS) &&
3532 					    (MDI_CLIENT_STATE(ct) ==
3533 					    MDI_CLIENT_STATE_DEGRADED)) {
3534 						/*
3535 						 * ndi_devi_online failed.
3536 						 * Reset client flags to
3537 						 * offline.
3538 						 */
3539 						MDI_DEBUG(1, (CE_WARN, cdip,
3540 						    "!ndi_devi_online: failed "
3541 						    " Error: %x", rv));
3542 						MDI_CLIENT_SET_OFFLINE(ct);
3543 					}
3544 					if (rv != NDI_SUCCESS) {
3545 						/* Reset the path state */
3546 						MDI_PI_LOCK(pip);
3547 						MDI_PI(pip)->pi_state =
3548 						    MDI_PI_OLD_STATE(pip);
3549 						MDI_PI_UNLOCK(pip);
3550 					}
3551 				}
3552 				break;
3553 
3554 			case MDI_CLIENT_STATE_FAILED:
3555 				/*
3556 				 * This is the last path case for
3557 				 * non-user initiated events.
3558 				 */
3559 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3560 				    cdip && (i_ddi_node_state(cdip) >=
3561 				    DS_INITIALIZED)) {
3562 					MDI_CLIENT_UNLOCK(ct);
3563 					rv = ndi_devi_offline(cdip, 0);
3564 					MDI_CLIENT_LOCK(ct);
3565 
3566 					if (rv != NDI_SUCCESS) {
3567 						/*
3568 						 * ndi_devi_offline failed.
3569 						 * Reset client flags to
3570 						 * online as the path could not
3571 						 * be offlined.
3572 						 */
3573 						MDI_DEBUG(1, (CE_WARN, cdip,
3574 						    "!ndi_devi_offline: failed "
3575 						    " Error: %x", rv));
3576 						MDI_CLIENT_SET_ONLINE(ct);
3577 					}
3578 				}
3579 				break;
3580 			}
3581 			/*
3582 			 * Convert to MDI error code
3583 			 */
3584 			switch (rv) {
3585 			case NDI_SUCCESS:
3586 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3587 				i_mdi_report_path_state(ct, pip);
3588 				rv = MDI_SUCCESS;
3589 				break;
3590 			case NDI_BUSY:
3591 				rv = MDI_BUSY;
3592 				break;
3593 			default:
3594 				rv = MDI_FAILURE;
3595 				break;
3596 			}
3597 		}
3598 	}
3599 	MDI_CLIENT_UNLOCK(ct);
3600 
3601 state_change_exit:
3602 	/*
3603 	 * Mark the pHCI as stable again.
3604 	 */
3605 	MDI_PHCI_LOCK(ph);
3606 	MDI_PHCI_STABLE(ph);
3607 	MDI_PHCI_UNLOCK(ph);
3608 	return (rv);
3609 }
3610 
3611 /*
3612  * mdi_pi_online():
3613  *		Place the path_info node in the online state.  The path is
3614  *		now available to be selected by mdi_select_path() for
3615  *		transporting I/O requests to client devices.
3616  * Return Values:
3617  *		MDI_SUCCESS
3618  *		MDI_FAILURE
3619  */
3620 int
3621 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3622 {
3623 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3624 	int		client_held = 0;
3625 	int		rv;
3626 	int		se_flag;
3627 	int		kmem_flag;
3628 
3629 	ASSERT(ct != NULL);
3630 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3631 	if (rv != MDI_SUCCESS)
3632 		return (rv);
3633 
3634 	MDI_PI_LOCK(pip);
3635 	if (MDI_PI(pip)->pi_pm_held == 0) {
3636 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3637 		    "i_mdi_pm_hold_pip %p\n", (void *)pip));
3638 		i_mdi_pm_hold_pip(pip);
3639 		client_held = 1;
3640 	}
3641 	MDI_PI_UNLOCK(pip);
3642 
3643 	if (client_held) {
3644 		MDI_CLIENT_LOCK(ct);
3645 		if (ct->ct_power_cnt == 0) {
3646 			rv = i_mdi_power_all_phci(ct);
3647 		}
3648 
3649 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3650 		    "i_mdi_pm_hold_client %p\n", (void *)ct));
3651 		i_mdi_pm_hold_client(ct, 1);
3652 		MDI_CLIENT_UNLOCK(ct);
3653 	}
3654 
3655 	/* determine interrupt context */
3656 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3657 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3658 
3659 	/* A new path is online.  Invalidate DINFOCACHE snap shot. */
3660 	i_ddi_di_cache_invalidate(kmem_flag);
3661 
3662 	return (rv);
3663 }
3664 
3665 /*
3666  * mdi_pi_standby():
3667  *		Place the mdi_pathinfo node in standby state
3668  *
3669  * Return Values:
3670  *		MDI_SUCCESS
3671  *		MDI_FAILURE
3672  */
3673 int
3674 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3675 {
3676 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3677 }
3678 
3679 /*
3680  * mdi_pi_fault():
3681  *		Place the mdi_pathinfo node in fault'ed state
3682  * Return Values:
3683  *		MDI_SUCCESS
3684  *		MDI_FAILURE
3685  */
3686 int
3687 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3688 {
3689 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3690 }
3691 
3692 /*
3693  * mdi_pi_offline():
3694  *		Offline a mdi_pathinfo node.
3695  * Return Values:
3696  *		MDI_SUCCESS
3697  *		MDI_FAILURE
3698  */
3699 int
3700 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3701 {
3702 	int	ret, client_held = 0;
3703 	mdi_client_t	*ct;
3704 	int		se_flag;
3705 	int		kmem_flag;
3706 
3707 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3708 
3709 	if (ret == MDI_SUCCESS) {
3710 		MDI_PI_LOCK(pip);
3711 		if (MDI_PI(pip)->pi_pm_held) {
3712 			client_held = 1;
3713 		}
3714 		MDI_PI_UNLOCK(pip);
3715 
3716 		if (client_held) {
3717 			ct = MDI_PI(pip)->pi_client;
3718 			MDI_CLIENT_LOCK(ct);
3719 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3720 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3721 			i_mdi_pm_rele_client(ct, 1);
3722 			MDI_CLIENT_UNLOCK(ct);
3723 		}
3724 
3725 		/* determine interrupt context */
3726 		se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3727 		kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3728 
3729 		/* pathinfo is offlined. update DINFOCACHE. */
3730 		i_ddi_di_cache_invalidate(kmem_flag);
3731 	}
3732 
3733 	return (ret);
3734 }
3735 
3736 /*
3737  * i_mdi_pi_offline():
3738  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3739  */
3740 static int
3741 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3742 {
3743 	dev_info_t	*vdip = NULL;
3744 	mdi_vhci_t	*vh = NULL;
3745 	mdi_client_t	*ct = NULL;
3746 	int		(*f)();
3747 	int		rv;
3748 
3749 	MDI_PI_LOCK(pip);
3750 	ct = MDI_PI(pip)->pi_client;
3751 	ASSERT(ct != NULL);
3752 
3753 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3754 		/*
3755 		 * Give a chance for pending I/Os to complete.
3756 		 */
3757 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3758 		    "%d cmds still pending on path: %p\n",
3759 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3760 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3761 		    &MDI_PI(pip)->pi_mutex,
3762 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3763 			/*
3764 			 * The timeout time reached without ref_cnt being zero
3765 			 * being signaled.
3766 			 */
3767 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3768 			    "Timeout reached on path %p without the cond\n",
3769 			    (void *)pip));
3770 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3771 			    "%d cmds still pending on path: %p\n",
3772 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3773 		}
3774 	}
3775 	vh = ct->ct_vhci;
3776 	vdip = vh->vh_dip;
3777 
3778 	/*
3779 	 * Notify vHCI that has registered this event
3780 	 */
3781 	ASSERT(vh->vh_ops);
3782 	f = vh->vh_ops->vo_pi_state_change;
3783 
3784 	if (f != NULL) {
3785 		MDI_PI_UNLOCK(pip);
3786 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3787 		    flags)) != MDI_SUCCESS) {
3788 			MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3789 			    "!vo_path_offline failed "
3790 			    "vdip %p, pip %p", (void *)vdip, (void *)pip));
3791 		}
3792 		MDI_PI_LOCK(pip);
3793 	}
3794 
3795 	/*
3796 	 * Set the mdi_pathinfo node state and clear the transient condition
3797 	 */
3798 	MDI_PI_SET_OFFLINE(pip);
3799 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3800 	MDI_PI_UNLOCK(pip);
3801 
3802 	MDI_CLIENT_LOCK(ct);
3803 	if (rv == MDI_SUCCESS) {
3804 		if (ct->ct_unstable == 0) {
3805 			dev_info_t	*cdip = ct->ct_dip;
3806 
3807 			/*
3808 			 * Onlining the mdi_pathinfo node will impact the
3809 			 * client state Update the client and dev_info node
3810 			 * state accordingly
3811 			 */
3812 			i_mdi_client_update_state(ct);
3813 			rv = NDI_SUCCESS;
3814 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3815 				if (cdip &&
3816 				    (i_ddi_node_state(cdip) >=
3817 				    DS_INITIALIZED)) {
3818 					MDI_CLIENT_UNLOCK(ct);
3819 					rv = ndi_devi_offline(cdip, 0);
3820 					MDI_CLIENT_LOCK(ct);
3821 					if (rv != NDI_SUCCESS) {
3822 						/*
3823 						 * ndi_devi_offline failed.
3824 						 * Reset client flags to
3825 						 * online.
3826 						 */
3827 						MDI_DEBUG(4, (CE_WARN, cdip,
3828 						    "!ndi_devi_offline: failed "
3829 						    " Error: %x", rv));
3830 						MDI_CLIENT_SET_ONLINE(ct);
3831 					}
3832 				}
3833 			}
3834 			/*
3835 			 * Convert to MDI error code
3836 			 */
3837 			switch (rv) {
3838 			case NDI_SUCCESS:
3839 				rv = MDI_SUCCESS;
3840 				break;
3841 			case NDI_BUSY:
3842 				rv = MDI_BUSY;
3843 				break;
3844 			default:
3845 				rv = MDI_FAILURE;
3846 				break;
3847 			}
3848 		}
3849 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3850 		i_mdi_report_path_state(ct, pip);
3851 	}
3852 
3853 	MDI_CLIENT_UNLOCK(ct);
3854 
3855 	/*
3856 	 * Change in the mdi_pathinfo node state will impact the client state
3857 	 */
3858 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3859 	    (void *)ct, (void *)pip));
3860 	return (rv);
3861 }
3862 
3863 
3864 /*
3865  * mdi_pi_get_addr():
3866  *		Get the unit address associated with a mdi_pathinfo node
3867  *
3868  * Return Values:
3869  *		char *
3870  */
3871 char *
3872 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3873 {
3874 	if (pip == NULL)
3875 		return (NULL);
3876 
3877 	return (MDI_PI(pip)->pi_addr);
3878 }
3879 
3880 /*
3881  * mdi_pi_get_path_instance():
3882  *		Get the 'path_instance' of a mdi_pathinfo node
3883  *
3884  * Return Values:
3885  *		path_instance
3886  */
3887 int
3888 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
3889 {
3890 	if (pip == NULL)
3891 		return (0);
3892 
3893 	return (MDI_PI(pip)->pi_path_instance);
3894 }
3895 
3896 /*
3897  * mdi_pi_pathname():
3898  *		Return pointer to path to pathinfo node.
3899  */
3900 char *
3901 mdi_pi_pathname(mdi_pathinfo_t *pip)
3902 {
3903 	if (pip == NULL)
3904 		return (NULL);
3905 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
3906 }
3907 
3908 char *
3909 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
3910 {
3911 	char *obp_path = NULL;
3912 	if ((pip == NULL) || (path == NULL))
3913 		return (NULL);
3914 
3915 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
3916 		(void) strcpy(path, obp_path);
3917 		(void) mdi_prop_free(obp_path);
3918 	} else {
3919 		path = NULL;
3920 	}
3921 	return (path);
3922 }
3923 
3924 int
3925 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
3926 {
3927 	dev_info_t *pdip;
3928 	char *obp_path = NULL;
3929 	int rc = MDI_FAILURE;
3930 
3931 	if (pip == NULL)
3932 		return (MDI_FAILURE);
3933 
3934 	pdip = mdi_pi_get_phci(pip);
3935 	if (pdip == NULL)
3936 		return (MDI_FAILURE);
3937 
3938 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3939 
3940 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
3941 		(void) ddi_pathname(pdip, obp_path);
3942 	}
3943 
3944 	if (component) {
3945 		(void) strncat(obp_path, "/", MAXPATHLEN);
3946 		(void) strncat(obp_path, component, MAXPATHLEN);
3947 	}
3948 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
3949 
3950 	if (obp_path)
3951 		kmem_free(obp_path, MAXPATHLEN);
3952 	return (rc);
3953 }
3954 
3955 /*
3956  * mdi_pi_get_client():
3957  *		Get the client devinfo associated with a mdi_pathinfo node
3958  *
3959  * Return Values:
3960  *		Handle to client device dev_info node
3961  */
3962 dev_info_t *
3963 mdi_pi_get_client(mdi_pathinfo_t *pip)
3964 {
3965 	dev_info_t	*dip = NULL;
3966 	if (pip) {
3967 		dip = MDI_PI(pip)->pi_client->ct_dip;
3968 	}
3969 	return (dip);
3970 }
3971 
3972 /*
3973  * mdi_pi_get_phci():
3974  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3975  * Return Values:
3976  *		Handle to dev_info node
3977  */
3978 dev_info_t *
3979 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3980 {
3981 	dev_info_t	*dip = NULL;
3982 	if (pip) {
3983 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3984 	}
3985 	return (dip);
3986 }
3987 
3988 /*
3989  * mdi_pi_get_client_private():
3990  *		Get the client private information associated with the
3991  *		mdi_pathinfo node
3992  */
3993 void *
3994 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3995 {
3996 	void *cprivate = NULL;
3997 	if (pip) {
3998 		cprivate = MDI_PI(pip)->pi_cprivate;
3999 	}
4000 	return (cprivate);
4001 }
4002 
4003 /*
4004  * mdi_pi_set_client_private():
4005  *		Set the client private information in the mdi_pathinfo node
4006  */
4007 void
4008 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4009 {
4010 	if (pip) {
4011 		MDI_PI(pip)->pi_cprivate = priv;
4012 	}
4013 }
4014 
4015 /*
4016  * mdi_pi_get_phci_private():
4017  *		Get the pHCI private information associated with the
4018  *		mdi_pathinfo node
4019  */
4020 caddr_t
4021 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4022 {
4023 	caddr_t	pprivate = NULL;
4024 	if (pip) {
4025 		pprivate = MDI_PI(pip)->pi_pprivate;
4026 	}
4027 	return (pprivate);
4028 }
4029 
4030 /*
4031  * mdi_pi_set_phci_private():
4032  *		Set the pHCI private information in the mdi_pathinfo node
4033  */
4034 void
4035 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4036 {
4037 	if (pip) {
4038 		MDI_PI(pip)->pi_pprivate = priv;
4039 	}
4040 }
4041 
4042 /*
4043  * mdi_pi_get_state():
4044  *		Get the mdi_pathinfo node state. Transient states are internal
4045  *		and not provided to the users
4046  */
4047 mdi_pathinfo_state_t
4048 mdi_pi_get_state(mdi_pathinfo_t *pip)
4049 {
4050 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4051 
4052 	if (pip) {
4053 		if (MDI_PI_IS_TRANSIENT(pip)) {
4054 			/*
4055 			 * mdi_pathinfo is in state transition.  Return the
4056 			 * last good state.
4057 			 */
4058 			state = MDI_PI_OLD_STATE(pip);
4059 		} else {
4060 			state = MDI_PI_STATE(pip);
4061 		}
4062 	}
4063 	return (state);
4064 }
4065 
4066 /*
4067  * Note that the following function needs to be the new interface for
4068  * mdi_pi_get_state when mpxio gets integrated to ON.
4069  */
4070 int
4071 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4072 		uint32_t *ext_state)
4073 {
4074 	*state = MDI_PATHINFO_STATE_INIT;
4075 
4076 	if (pip) {
4077 		if (MDI_PI_IS_TRANSIENT(pip)) {
4078 			/*
4079 			 * mdi_pathinfo is in state transition.  Return the
4080 			 * last good state.
4081 			 */
4082 			*state = MDI_PI_OLD_STATE(pip);
4083 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4084 		} else {
4085 			*state = MDI_PI_STATE(pip);
4086 			*ext_state = MDI_PI_EXT_STATE(pip);
4087 		}
4088 	}
4089 	return (MDI_SUCCESS);
4090 }
4091 
4092 /*
4093  * mdi_pi_get_preferred:
4094  *	Get the preferred path flag
4095  */
4096 int
4097 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4098 {
4099 	if (pip) {
4100 		return (MDI_PI(pip)->pi_preferred);
4101 	}
4102 	return (0);
4103 }
4104 
4105 /*
4106  * mdi_pi_set_preferred:
4107  *	Set the preferred path flag
4108  */
4109 void
4110 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4111 {
4112 	if (pip) {
4113 		MDI_PI(pip)->pi_preferred = preferred;
4114 	}
4115 }
4116 
4117 /*
4118  * mdi_pi_set_state():
4119  *		Set the mdi_pathinfo node state
4120  */
4121 void
4122 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4123 {
4124 	uint32_t	ext_state;
4125 
4126 	if (pip) {
4127 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4128 		MDI_PI(pip)->pi_state = state;
4129 		MDI_PI(pip)->pi_state |= ext_state;
4130 	}
4131 }
4132 
4133 /*
4134  * Property functions:
4135  */
4136 int
4137 i_map_nvlist_error_to_mdi(int val)
4138 {
4139 	int rv;
4140 
4141 	switch (val) {
4142 	case 0:
4143 		rv = DDI_PROP_SUCCESS;
4144 		break;
4145 	case EINVAL:
4146 	case ENOTSUP:
4147 		rv = DDI_PROP_INVAL_ARG;
4148 		break;
4149 	case ENOMEM:
4150 		rv = DDI_PROP_NO_MEMORY;
4151 		break;
4152 	default:
4153 		rv = DDI_PROP_NOT_FOUND;
4154 		break;
4155 	}
4156 	return (rv);
4157 }
4158 
4159 /*
4160  * mdi_pi_get_next_prop():
4161  * 		Property walk function.  The caller should hold mdi_pi_lock()
4162  *		and release by calling mdi_pi_unlock() at the end of walk to
4163  *		get a consistent value.
4164  */
4165 nvpair_t *
4166 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4167 {
4168 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4169 		return (NULL);
4170 	}
4171 	ASSERT(MDI_PI_LOCKED(pip));
4172 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4173 }
4174 
4175 /*
4176  * mdi_prop_remove():
4177  * 		Remove the named property from the named list.
4178  */
4179 int
4180 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4181 {
4182 	if (pip == NULL) {
4183 		return (DDI_PROP_NOT_FOUND);
4184 	}
4185 	ASSERT(!MDI_PI_LOCKED(pip));
4186 	MDI_PI_LOCK(pip);
4187 	if (MDI_PI(pip)->pi_prop == NULL) {
4188 		MDI_PI_UNLOCK(pip);
4189 		return (DDI_PROP_NOT_FOUND);
4190 	}
4191 	if (name) {
4192 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4193 	} else {
4194 		char		nvp_name[MAXNAMELEN];
4195 		nvpair_t	*nvp;
4196 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4197 		while (nvp) {
4198 			nvpair_t	*next;
4199 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4200 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
4201 			    nvpair_name(nvp));
4202 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4203 			    nvp_name);
4204 			nvp = next;
4205 		}
4206 	}
4207 	MDI_PI_UNLOCK(pip);
4208 	return (DDI_PROP_SUCCESS);
4209 }
4210 
4211 /*
4212  * mdi_prop_size():
4213  * 		Get buffer size needed to pack the property data.
4214  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4215  *		buffer size.
4216  */
4217 int
4218 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4219 {
4220 	int	rv;
4221 	size_t	bufsize;
4222 
4223 	*buflenp = 0;
4224 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4225 		return (DDI_PROP_NOT_FOUND);
4226 	}
4227 	ASSERT(MDI_PI_LOCKED(pip));
4228 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4229 	    &bufsize, NV_ENCODE_NATIVE);
4230 	*buflenp = bufsize;
4231 	return (i_map_nvlist_error_to_mdi(rv));
4232 }
4233 
4234 /*
4235  * mdi_prop_pack():
4236  * 		pack the property list.  The caller should hold the
4237  *		mdi_pathinfo_t node to get a consistent data
4238  */
4239 int
4240 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4241 {
4242 	int	rv;
4243 	size_t	bufsize;
4244 
4245 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4246 		return (DDI_PROP_NOT_FOUND);
4247 	}
4248 
4249 	ASSERT(MDI_PI_LOCKED(pip));
4250 
4251 	bufsize = buflen;
4252 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4253 	    NV_ENCODE_NATIVE, KM_SLEEP);
4254 
4255 	return (i_map_nvlist_error_to_mdi(rv));
4256 }
4257 
4258 /*
4259  * mdi_prop_update_byte():
4260  *		Create/Update a byte property
4261  */
4262 int
4263 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4264 {
4265 	int rv;
4266 
4267 	if (pip == NULL) {
4268 		return (DDI_PROP_INVAL_ARG);
4269 	}
4270 	ASSERT(!MDI_PI_LOCKED(pip));
4271 	MDI_PI_LOCK(pip);
4272 	if (MDI_PI(pip)->pi_prop == NULL) {
4273 		MDI_PI_UNLOCK(pip);
4274 		return (DDI_PROP_NOT_FOUND);
4275 	}
4276 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4277 	MDI_PI_UNLOCK(pip);
4278 	return (i_map_nvlist_error_to_mdi(rv));
4279 }
4280 
4281 /*
4282  * mdi_prop_update_byte_array():
4283  *		Create/Update a byte array property
4284  */
4285 int
4286 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4287     uint_t nelements)
4288 {
4289 	int rv;
4290 
4291 	if (pip == NULL) {
4292 		return (DDI_PROP_INVAL_ARG);
4293 	}
4294 	ASSERT(!MDI_PI_LOCKED(pip));
4295 	MDI_PI_LOCK(pip);
4296 	if (MDI_PI(pip)->pi_prop == NULL) {
4297 		MDI_PI_UNLOCK(pip);
4298 		return (DDI_PROP_NOT_FOUND);
4299 	}
4300 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4301 	MDI_PI_UNLOCK(pip);
4302 	return (i_map_nvlist_error_to_mdi(rv));
4303 }
4304 
4305 /*
4306  * mdi_prop_update_int():
4307  *		Create/Update a 32 bit integer property
4308  */
4309 int
4310 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4311 {
4312 	int rv;
4313 
4314 	if (pip == NULL) {
4315 		return (DDI_PROP_INVAL_ARG);
4316 	}
4317 	ASSERT(!MDI_PI_LOCKED(pip));
4318 	MDI_PI_LOCK(pip);
4319 	if (MDI_PI(pip)->pi_prop == NULL) {
4320 		MDI_PI_UNLOCK(pip);
4321 		return (DDI_PROP_NOT_FOUND);
4322 	}
4323 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4324 	MDI_PI_UNLOCK(pip);
4325 	return (i_map_nvlist_error_to_mdi(rv));
4326 }
4327 
4328 /*
4329  * mdi_prop_update_int64():
4330  *		Create/Update a 64 bit integer property
4331  */
4332 int
4333 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4334 {
4335 	int rv;
4336 
4337 	if (pip == NULL) {
4338 		return (DDI_PROP_INVAL_ARG);
4339 	}
4340 	ASSERT(!MDI_PI_LOCKED(pip));
4341 	MDI_PI_LOCK(pip);
4342 	if (MDI_PI(pip)->pi_prop == NULL) {
4343 		MDI_PI_UNLOCK(pip);
4344 		return (DDI_PROP_NOT_FOUND);
4345 	}
4346 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4347 	MDI_PI_UNLOCK(pip);
4348 	return (i_map_nvlist_error_to_mdi(rv));
4349 }
4350 
4351 /*
4352  * mdi_prop_update_int_array():
4353  *		Create/Update a int array property
4354  */
4355 int
4356 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4357 	    uint_t nelements)
4358 {
4359 	int rv;
4360 
4361 	if (pip == NULL) {
4362 		return (DDI_PROP_INVAL_ARG);
4363 	}
4364 	ASSERT(!MDI_PI_LOCKED(pip));
4365 	MDI_PI_LOCK(pip);
4366 	if (MDI_PI(pip)->pi_prop == NULL) {
4367 		MDI_PI_UNLOCK(pip);
4368 		return (DDI_PROP_NOT_FOUND);
4369 	}
4370 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4371 	    nelements);
4372 	MDI_PI_UNLOCK(pip);
4373 	return (i_map_nvlist_error_to_mdi(rv));
4374 }
4375 
4376 /*
4377  * mdi_prop_update_string():
4378  *		Create/Update a string property
4379  */
4380 int
4381 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4382 {
4383 	int rv;
4384 
4385 	if (pip == NULL) {
4386 		return (DDI_PROP_INVAL_ARG);
4387 	}
4388 	ASSERT(!MDI_PI_LOCKED(pip));
4389 	MDI_PI_LOCK(pip);
4390 	if (MDI_PI(pip)->pi_prop == NULL) {
4391 		MDI_PI_UNLOCK(pip);
4392 		return (DDI_PROP_NOT_FOUND);
4393 	}
4394 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4395 	MDI_PI_UNLOCK(pip);
4396 	return (i_map_nvlist_error_to_mdi(rv));
4397 }
4398 
4399 /*
4400  * mdi_prop_update_string_array():
4401  *		Create/Update a string array property
4402  */
4403 int
4404 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4405     uint_t nelements)
4406 {
4407 	int rv;
4408 
4409 	if (pip == NULL) {
4410 		return (DDI_PROP_INVAL_ARG);
4411 	}
4412 	ASSERT(!MDI_PI_LOCKED(pip));
4413 	MDI_PI_LOCK(pip);
4414 	if (MDI_PI(pip)->pi_prop == NULL) {
4415 		MDI_PI_UNLOCK(pip);
4416 		return (DDI_PROP_NOT_FOUND);
4417 	}
4418 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4419 	    nelements);
4420 	MDI_PI_UNLOCK(pip);
4421 	return (i_map_nvlist_error_to_mdi(rv));
4422 }
4423 
4424 /*
4425  * mdi_prop_lookup_byte():
4426  * 		Look for byte property identified by name.  The data returned
4427  *		is the actual property and valid as long as mdi_pathinfo_t node
4428  *		is alive.
4429  */
4430 int
4431 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4432 {
4433 	int rv;
4434 
4435 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4436 		return (DDI_PROP_NOT_FOUND);
4437 	}
4438 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4439 	return (i_map_nvlist_error_to_mdi(rv));
4440 }
4441 
4442 
4443 /*
4444  * mdi_prop_lookup_byte_array():
4445  * 		Look for byte array property identified by name.  The data
4446  *		returned is the actual property and valid as long as
4447  *		mdi_pathinfo_t node is alive.
4448  */
4449 int
4450 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4451     uint_t *nelements)
4452 {
4453 	int rv;
4454 
4455 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4456 		return (DDI_PROP_NOT_FOUND);
4457 	}
4458 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4459 	    nelements);
4460 	return (i_map_nvlist_error_to_mdi(rv));
4461 }
4462 
4463 /*
4464  * mdi_prop_lookup_int():
4465  * 		Look for int property identified by name.  The data returned
4466  *		is the actual property and valid as long as mdi_pathinfo_t
4467  *		node is alive.
4468  */
4469 int
4470 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4471 {
4472 	int rv;
4473 
4474 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4475 		return (DDI_PROP_NOT_FOUND);
4476 	}
4477 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4478 	return (i_map_nvlist_error_to_mdi(rv));
4479 }
4480 
4481 /*
4482  * mdi_prop_lookup_int64():
4483  * 		Look for int64 property identified by name.  The data returned
4484  *		is the actual property and valid as long as mdi_pathinfo_t node
4485  *		is alive.
4486  */
4487 int
4488 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4489 {
4490 	int rv;
4491 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4492 		return (DDI_PROP_NOT_FOUND);
4493 	}
4494 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4495 	return (i_map_nvlist_error_to_mdi(rv));
4496 }
4497 
4498 /*
4499  * mdi_prop_lookup_int_array():
4500  * 		Look for int array property identified by name.  The data
4501  *		returned is the actual property and valid as long as
4502  *		mdi_pathinfo_t node is alive.
4503  */
4504 int
4505 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4506     uint_t *nelements)
4507 {
4508 	int rv;
4509 
4510 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4511 		return (DDI_PROP_NOT_FOUND);
4512 	}
4513 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4514 	    (int32_t **)data, nelements);
4515 	return (i_map_nvlist_error_to_mdi(rv));
4516 }
4517 
4518 /*
4519  * mdi_prop_lookup_string():
4520  * 		Look for string property identified by name.  The data
4521  *		returned is the actual property and valid as long as
4522  *		mdi_pathinfo_t node is alive.
4523  */
4524 int
4525 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4526 {
4527 	int rv;
4528 
4529 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4530 		return (DDI_PROP_NOT_FOUND);
4531 	}
4532 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4533 	return (i_map_nvlist_error_to_mdi(rv));
4534 }
4535 
4536 /*
4537  * mdi_prop_lookup_string_array():
4538  * 		Look for string array property identified by name.  The data
4539  *		returned is the actual property and valid as long as
4540  *		mdi_pathinfo_t node is alive.
4541  */
4542 int
4543 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4544     uint_t *nelements)
4545 {
4546 	int rv;
4547 
4548 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4549 		return (DDI_PROP_NOT_FOUND);
4550 	}
4551 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4552 	    nelements);
4553 	return (i_map_nvlist_error_to_mdi(rv));
4554 }
4555 
4556 /*
4557  * mdi_prop_free():
4558  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4559  *		functions return the pointer to actual property data and not a
4560  *		copy of it.  So the data returned is valid as long as
4561  *		mdi_pathinfo_t node is valid.
4562  */
4563 /*ARGSUSED*/
4564 int
4565 mdi_prop_free(void *data)
4566 {
4567 	return (DDI_PROP_SUCCESS);
4568 }
4569 
4570 /*ARGSUSED*/
4571 static void
4572 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4573 {
4574 	char		*phci_path, *ct_path;
4575 	char		*ct_status;
4576 	char		*status;
4577 	dev_info_t	*dip = ct->ct_dip;
4578 	char		lb_buf[64];
4579 
4580 	ASSERT(MDI_CLIENT_LOCKED(ct));
4581 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4582 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4583 		return;
4584 	}
4585 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4586 		ct_status = "optimal";
4587 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4588 		ct_status = "degraded";
4589 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4590 		ct_status = "failed";
4591 	} else {
4592 		ct_status = "unknown";
4593 	}
4594 
4595 	if (MDI_PI_IS_OFFLINE(pip)) {
4596 		status = "offline";
4597 	} else if (MDI_PI_IS_ONLINE(pip)) {
4598 		status = "online";
4599 	} else if (MDI_PI_IS_STANDBY(pip)) {
4600 		status = "standby";
4601 	} else if (MDI_PI_IS_FAULT(pip)) {
4602 		status = "faulted";
4603 	} else {
4604 		status = "unknown";
4605 	}
4606 
4607 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4608 		(void) snprintf(lb_buf, sizeof (lb_buf),
4609 		    "%s, region-size: %d", mdi_load_balance_lba,
4610 			ct->ct_lb_args->region_size);
4611 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4612 		(void) snprintf(lb_buf, sizeof (lb_buf),
4613 		    "%s", mdi_load_balance_none);
4614 	} else {
4615 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4616 		    mdi_load_balance_rr);
4617 	}
4618 
4619 	if (dip) {
4620 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4621 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4622 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4623 		    "path %s (%s%d) to target address: %s is %s"
4624 		    " Load balancing: %s\n",
4625 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4626 		    ddi_get_instance(dip), ct_status,
4627 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4628 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4629 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4630 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4631 		kmem_free(phci_path, MAXPATHLEN);
4632 		kmem_free(ct_path, MAXPATHLEN);
4633 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4634 	}
4635 }
4636 
4637 #ifdef	DEBUG
4638 /*
4639  * i_mdi_log():
4640  *		Utility function for error message management
4641  *
4642  */
4643 /*PRINTFLIKE3*/
4644 static void
4645 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4646 {
4647 	char		name[MAXNAMELEN];
4648 	char		buf[MAXNAMELEN];
4649 	char		*bp;
4650 	va_list		ap;
4651 	int		log_only = 0;
4652 	int		boot_only = 0;
4653 	int		console_only = 0;
4654 
4655 	if (dip) {
4656 		(void) snprintf(name, MAXNAMELEN, "%s%d: ",
4657 		    ddi_node_name(dip), ddi_get_instance(dip));
4658 	} else {
4659 		name[0] = 0;
4660 	}
4661 
4662 	va_start(ap, fmt);
4663 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4664 	va_end(ap);
4665 
4666 	switch (buf[0]) {
4667 	case '!':
4668 		bp = &buf[1];
4669 		log_only = 1;
4670 		break;
4671 	case '?':
4672 		bp = &buf[1];
4673 		boot_only = 1;
4674 		break;
4675 	case '^':
4676 		bp = &buf[1];
4677 		console_only = 1;
4678 		break;
4679 	default:
4680 		bp = buf;
4681 		break;
4682 	}
4683 	if (mdi_debug_logonly) {
4684 		log_only = 1;
4685 		boot_only = 0;
4686 		console_only = 0;
4687 	}
4688 
4689 	switch (level) {
4690 	case CE_NOTE:
4691 		level = CE_CONT;
4692 		/* FALLTHROUGH */
4693 	case CE_CONT:
4694 	case CE_WARN:
4695 	case CE_PANIC:
4696 		if (boot_only) {
4697 			cmn_err(level, "?mdi: %s%s", name, bp);
4698 		} else if (console_only) {
4699 			cmn_err(level, "^mdi: %s%s", name, bp);
4700 		} else if (log_only) {
4701 			cmn_err(level, "!mdi: %s%s", name, bp);
4702 		} else {
4703 			cmn_err(level, "mdi: %s%s", name, bp);
4704 		}
4705 		break;
4706 	default:
4707 		cmn_err(level, "mdi: %s%s", name, bp);
4708 		break;
4709 	}
4710 }
4711 #endif	/* DEBUG */
4712 
4713 void
4714 i_mdi_client_online(dev_info_t *ct_dip)
4715 {
4716 	mdi_client_t	*ct;
4717 
4718 	/*
4719 	 * Client online notification. Mark client state as online
4720 	 * restore our binding with dev_info node
4721 	 */
4722 	ct = i_devi_get_client(ct_dip);
4723 	ASSERT(ct != NULL);
4724 	MDI_CLIENT_LOCK(ct);
4725 	MDI_CLIENT_SET_ONLINE(ct);
4726 	/* catch for any memory leaks */
4727 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4728 	ct->ct_dip = ct_dip;
4729 
4730 	if (ct->ct_power_cnt == 0)
4731 		(void) i_mdi_power_all_phci(ct);
4732 
4733 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4734 	    "i_mdi_pm_hold_client %p\n", (void *)ct));
4735 	i_mdi_pm_hold_client(ct, 1);
4736 
4737 	MDI_CLIENT_UNLOCK(ct);
4738 }
4739 
4740 void
4741 i_mdi_phci_online(dev_info_t *ph_dip)
4742 {
4743 	mdi_phci_t	*ph;
4744 
4745 	/* pHCI online notification. Mark state accordingly */
4746 	ph = i_devi_get_phci(ph_dip);
4747 	ASSERT(ph != NULL);
4748 	MDI_PHCI_LOCK(ph);
4749 	MDI_PHCI_SET_ONLINE(ph);
4750 	MDI_PHCI_UNLOCK(ph);
4751 }
4752 
4753 /*
4754  * mdi_devi_online():
4755  * 		Online notification from NDI framework on pHCI/client
4756  *		device online.
4757  * Return Values:
4758  *		NDI_SUCCESS
4759  *		MDI_FAILURE
4760  */
4761 /*ARGSUSED*/
4762 int
4763 mdi_devi_online(dev_info_t *dip, uint_t flags)
4764 {
4765 	if (MDI_PHCI(dip)) {
4766 		i_mdi_phci_online(dip);
4767 	}
4768 
4769 	if (MDI_CLIENT(dip)) {
4770 		i_mdi_client_online(dip);
4771 	}
4772 	return (NDI_SUCCESS);
4773 }
4774 
4775 /*
4776  * mdi_devi_offline():
4777  * 		Offline notification from NDI framework on pHCI/Client device
4778  *		offline.
4779  *
4780  * Return Values:
4781  *		NDI_SUCCESS
4782  *		NDI_FAILURE
4783  */
4784 /*ARGSUSED*/
4785 int
4786 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4787 {
4788 	int		rv = NDI_SUCCESS;
4789 
4790 	if (MDI_CLIENT(dip)) {
4791 		rv = i_mdi_client_offline(dip, flags);
4792 		if (rv != NDI_SUCCESS)
4793 			return (rv);
4794 	}
4795 
4796 	if (MDI_PHCI(dip)) {
4797 		rv = i_mdi_phci_offline(dip, flags);
4798 
4799 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4800 			/* set client back online */
4801 			i_mdi_client_online(dip);
4802 		}
4803 	}
4804 
4805 	return (rv);
4806 }
4807 
4808 /*ARGSUSED*/
4809 static int
4810 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4811 {
4812 	int		rv = NDI_SUCCESS;
4813 	mdi_phci_t	*ph;
4814 	mdi_client_t	*ct;
4815 	mdi_pathinfo_t	*pip;
4816 	mdi_pathinfo_t	*next;
4817 	mdi_pathinfo_t	*failed_pip = NULL;
4818 	dev_info_t	*cdip;
4819 
4820 	/*
4821 	 * pHCI component offline notification
4822 	 * Make sure that this pHCI instance is free to be offlined.
4823 	 * If it is OK to proceed, Offline and remove all the child
4824 	 * mdi_pathinfo nodes.  This process automatically offlines
4825 	 * corresponding client devices, for which this pHCI provides
4826 	 * critical services.
4827 	 */
4828 	ph = i_devi_get_phci(dip);
4829 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n",
4830 	    (void *)dip, (void *)ph));
4831 	if (ph == NULL) {
4832 		return (rv);
4833 	}
4834 
4835 	MDI_PHCI_LOCK(ph);
4836 
4837 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4838 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined",
4839 		    (void *)ph));
4840 		MDI_PHCI_UNLOCK(ph);
4841 		return (NDI_SUCCESS);
4842 	}
4843 
4844 	/*
4845 	 * Check to see if the pHCI can be offlined
4846 	 */
4847 	if (ph->ph_unstable) {
4848 		MDI_DEBUG(1, (CE_WARN, dip,
4849 		    "!One or more target devices are in transient "
4850 		    "state. This device can not be removed at "
4851 		    "this moment. Please try again later."));
4852 		MDI_PHCI_UNLOCK(ph);
4853 		return (NDI_BUSY);
4854 	}
4855 
4856 	pip = ph->ph_path_head;
4857 	while (pip != NULL) {
4858 		MDI_PI_LOCK(pip);
4859 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4860 
4861 		/*
4862 		 * The mdi_pathinfo state is OK. Check the client state.
4863 		 * If failover in progress fail the pHCI from offlining
4864 		 */
4865 		ct = MDI_PI(pip)->pi_client;
4866 		i_mdi_client_lock(ct, pip);
4867 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4868 		    (ct->ct_unstable)) {
4869 			/*
4870 			 * Failover is in progress, Fail the DR
4871 			 */
4872 			MDI_DEBUG(1, (CE_WARN, dip,
4873 			    "!pHCI device (%s%d) is Busy. %s",
4874 			    ddi_driver_name(dip), ddi_get_instance(dip),
4875 			    "This device can not be removed at "
4876 			    "this moment. Please try again later."));
4877 			MDI_PI_UNLOCK(pip);
4878 			i_mdi_client_unlock(ct);
4879 			MDI_PHCI_UNLOCK(ph);
4880 			return (NDI_BUSY);
4881 		}
4882 		MDI_PI_UNLOCK(pip);
4883 
4884 		/*
4885 		 * Check to see of we are removing the last path of this
4886 		 * client device...
4887 		 */
4888 		cdip = ct->ct_dip;
4889 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4890 		    (i_mdi_client_compute_state(ct, ph) ==
4891 		    MDI_CLIENT_STATE_FAILED)) {
4892 			i_mdi_client_unlock(ct);
4893 			MDI_PHCI_UNLOCK(ph);
4894 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4895 				/*
4896 				 * ndi_devi_offline() failed.
4897 				 * This pHCI provides the critical path
4898 				 * to one or more client devices.
4899 				 * Return busy.
4900 				 */
4901 				MDI_PHCI_LOCK(ph);
4902 				MDI_DEBUG(1, (CE_WARN, dip,
4903 				    "!pHCI device (%s%d) is Busy. %s",
4904 				    ddi_driver_name(dip), ddi_get_instance(dip),
4905 				    "This device can not be removed at "
4906 				    "this moment. Please try again later."));
4907 				failed_pip = pip;
4908 				break;
4909 			} else {
4910 				MDI_PHCI_LOCK(ph);
4911 				pip = next;
4912 			}
4913 		} else {
4914 			i_mdi_client_unlock(ct);
4915 			pip = next;
4916 		}
4917 	}
4918 
4919 	if (failed_pip) {
4920 		pip = ph->ph_path_head;
4921 		while (pip != failed_pip) {
4922 			MDI_PI_LOCK(pip);
4923 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4924 			ct = MDI_PI(pip)->pi_client;
4925 			i_mdi_client_lock(ct, pip);
4926 			cdip = ct->ct_dip;
4927 			switch (MDI_CLIENT_STATE(ct)) {
4928 			case MDI_CLIENT_STATE_OPTIMAL:
4929 			case MDI_CLIENT_STATE_DEGRADED:
4930 				if (cdip) {
4931 					MDI_PI_UNLOCK(pip);
4932 					i_mdi_client_unlock(ct);
4933 					MDI_PHCI_UNLOCK(ph);
4934 					(void) ndi_devi_online(cdip, 0);
4935 					MDI_PHCI_LOCK(ph);
4936 					pip = next;
4937 					continue;
4938 				}
4939 				break;
4940 
4941 			case MDI_CLIENT_STATE_FAILED:
4942 				if (cdip) {
4943 					MDI_PI_UNLOCK(pip);
4944 					i_mdi_client_unlock(ct);
4945 					MDI_PHCI_UNLOCK(ph);
4946 					(void) ndi_devi_offline(cdip, 0);
4947 					MDI_PHCI_LOCK(ph);
4948 					pip = next;
4949 					continue;
4950 				}
4951 				break;
4952 			}
4953 			MDI_PI_UNLOCK(pip);
4954 			i_mdi_client_unlock(ct);
4955 			pip = next;
4956 		}
4957 		MDI_PHCI_UNLOCK(ph);
4958 		return (NDI_BUSY);
4959 	}
4960 
4961 	/*
4962 	 * Mark the pHCI as offline
4963 	 */
4964 	MDI_PHCI_SET_OFFLINE(ph);
4965 
4966 	/*
4967 	 * Mark the child mdi_pathinfo nodes as transient
4968 	 */
4969 	pip = ph->ph_path_head;
4970 	while (pip != NULL) {
4971 		MDI_PI_LOCK(pip);
4972 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4973 		MDI_PI_SET_OFFLINING(pip);
4974 		MDI_PI_UNLOCK(pip);
4975 		pip = next;
4976 	}
4977 	MDI_PHCI_UNLOCK(ph);
4978 	/*
4979 	 * Give a chance for any pending commands to execute
4980 	 */
4981 	delay(1);
4982 	MDI_PHCI_LOCK(ph);
4983 	pip = ph->ph_path_head;
4984 	while (pip != NULL) {
4985 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4986 		(void) i_mdi_pi_offline(pip, flags);
4987 		MDI_PI_LOCK(pip);
4988 		ct = MDI_PI(pip)->pi_client;
4989 		if (!MDI_PI_IS_OFFLINE(pip)) {
4990 			MDI_DEBUG(1, (CE_WARN, dip,
4991 			    "!pHCI device (%s%d) is Busy. %s",
4992 			    ddi_driver_name(dip), ddi_get_instance(dip),
4993 			    "This device can not be removed at "
4994 			    "this moment. Please try again later."));
4995 			MDI_PI_UNLOCK(pip);
4996 			MDI_PHCI_SET_ONLINE(ph);
4997 			MDI_PHCI_UNLOCK(ph);
4998 			return (NDI_BUSY);
4999 		}
5000 		MDI_PI_UNLOCK(pip);
5001 		pip = next;
5002 	}
5003 	MDI_PHCI_UNLOCK(ph);
5004 
5005 	return (rv);
5006 }
5007 
5008 void
5009 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5010 {
5011 	mdi_phci_t	*ph;
5012 	mdi_client_t	*ct;
5013 	mdi_pathinfo_t	*pip;
5014 	mdi_pathinfo_t	*next;
5015 	dev_info_t	*cdip;
5016 
5017 	if (!MDI_PHCI(dip))
5018 		return;
5019 
5020 	ph = i_devi_get_phci(dip);
5021 	if (ph == NULL) {
5022 		return;
5023 	}
5024 
5025 	MDI_PHCI_LOCK(ph);
5026 
5027 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5028 		/* has no last path */
5029 		MDI_PHCI_UNLOCK(ph);
5030 		return;
5031 	}
5032 
5033 	pip = ph->ph_path_head;
5034 	while (pip != NULL) {
5035 		MDI_PI_LOCK(pip);
5036 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5037 
5038 		ct = MDI_PI(pip)->pi_client;
5039 		i_mdi_client_lock(ct, pip);
5040 		MDI_PI_UNLOCK(pip);
5041 
5042 		cdip = ct->ct_dip;
5043 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5044 		    (i_mdi_client_compute_state(ct, ph) ==
5045 		    MDI_CLIENT_STATE_FAILED)) {
5046 			/* Last path. Mark client dip as retiring */
5047 			i_mdi_client_unlock(ct);
5048 			MDI_PHCI_UNLOCK(ph);
5049 			(void) e_ddi_mark_retiring(cdip, cons_array);
5050 			MDI_PHCI_LOCK(ph);
5051 			pip = next;
5052 		} else {
5053 			i_mdi_client_unlock(ct);
5054 			pip = next;
5055 		}
5056 	}
5057 
5058 	MDI_PHCI_UNLOCK(ph);
5059 
5060 	return;
5061 }
5062 
5063 void
5064 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5065 {
5066 	mdi_phci_t	*ph;
5067 	mdi_client_t	*ct;
5068 	mdi_pathinfo_t	*pip;
5069 	mdi_pathinfo_t	*next;
5070 	dev_info_t	*cdip;
5071 
5072 	if (!MDI_PHCI(dip))
5073 		return;
5074 
5075 	ph = i_devi_get_phci(dip);
5076 	if (ph == NULL)
5077 		return;
5078 
5079 	MDI_PHCI_LOCK(ph);
5080 
5081 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5082 		MDI_PHCI_UNLOCK(ph);
5083 		/* not last path */
5084 		return;
5085 	}
5086 
5087 	if (ph->ph_unstable) {
5088 		MDI_PHCI_UNLOCK(ph);
5089 		/* can't check for constraints */
5090 		*constraint = 0;
5091 		return;
5092 	}
5093 
5094 	pip = ph->ph_path_head;
5095 	while (pip != NULL) {
5096 		MDI_PI_LOCK(pip);
5097 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5098 
5099 		/*
5100 		 * The mdi_pathinfo state is OK. Check the client state.
5101 		 * If failover in progress fail the pHCI from offlining
5102 		 */
5103 		ct = MDI_PI(pip)->pi_client;
5104 		i_mdi_client_lock(ct, pip);
5105 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5106 		    (ct->ct_unstable)) {
5107 			/*
5108 			 * Failover is in progress, can't check for constraints
5109 			 */
5110 			MDI_PI_UNLOCK(pip);
5111 			i_mdi_client_unlock(ct);
5112 			MDI_PHCI_UNLOCK(ph);
5113 			*constraint = 0;
5114 			return;
5115 		}
5116 		MDI_PI_UNLOCK(pip);
5117 
5118 		/*
5119 		 * Check to see of we are retiring the last path of this
5120 		 * client device...
5121 		 */
5122 		cdip = ct->ct_dip;
5123 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5124 		    (i_mdi_client_compute_state(ct, ph) ==
5125 		    MDI_CLIENT_STATE_FAILED)) {
5126 			i_mdi_client_unlock(ct);
5127 			MDI_PHCI_UNLOCK(ph);
5128 			(void) e_ddi_retire_notify(cdip, constraint);
5129 			MDI_PHCI_LOCK(ph);
5130 			pip = next;
5131 		} else {
5132 			i_mdi_client_unlock(ct);
5133 			pip = next;
5134 		}
5135 	}
5136 
5137 	MDI_PHCI_UNLOCK(ph);
5138 
5139 	return;
5140 }
5141 
5142 /*
5143  * offline the path(s) hanging off the PHCI. If the
5144  * last path to any client, check that constraints
5145  * have been applied.
5146  */
5147 void
5148 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
5149 {
5150 	mdi_phci_t	*ph;
5151 	mdi_client_t	*ct;
5152 	mdi_pathinfo_t	*pip;
5153 	mdi_pathinfo_t	*next;
5154 	dev_info_t	*cdip;
5155 	int		unstable = 0;
5156 	int		constraint;
5157 
5158 	if (!MDI_PHCI(dip))
5159 		return;
5160 
5161 	ph = i_devi_get_phci(dip);
5162 	if (ph == NULL) {
5163 		/* no last path and no pips */
5164 		return;
5165 	}
5166 
5167 	MDI_PHCI_LOCK(ph);
5168 
5169 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5170 		MDI_PHCI_UNLOCK(ph);
5171 		/* no last path and no pips */
5172 		return;
5173 	}
5174 
5175 	/*
5176 	 * Check to see if the pHCI can be offlined
5177 	 */
5178 	if (ph->ph_unstable) {
5179 		unstable = 1;
5180 	}
5181 
5182 	pip = ph->ph_path_head;
5183 	while (pip != NULL) {
5184 		MDI_PI_LOCK(pip);
5185 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5186 
5187 		/*
5188 		 * if failover in progress fail the pHCI from offlining
5189 		 */
5190 		ct = MDI_PI(pip)->pi_client;
5191 		i_mdi_client_lock(ct, pip);
5192 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5193 		    (ct->ct_unstable)) {
5194 			unstable = 1;
5195 		}
5196 		MDI_PI_UNLOCK(pip);
5197 
5198 		/*
5199 		 * Check to see of we are removing the last path of this
5200 		 * client device...
5201 		 */
5202 		cdip = ct->ct_dip;
5203 		if (!phci_only && cdip &&
5204 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5205 		    (i_mdi_client_compute_state(ct, ph) ==
5206 		    MDI_CLIENT_STATE_FAILED)) {
5207 			i_mdi_client_unlock(ct);
5208 			MDI_PHCI_UNLOCK(ph);
5209 			/*
5210 			 * We don't retire clients we just retire the
5211 			 * path to a client. If it is the last path
5212 			 * to a client, constraints are checked and
5213 			 * if we pass the last path is offlined. MPXIO will
5214 			 * then fail all I/Os to the client. Since we don't
5215 			 * want to retire the client on a path error
5216 			 * set constraint = 0 so that the client dip
5217 			 * is not retired.
5218 			 */
5219 			constraint = 0;
5220 			(void) e_ddi_retire_finalize(cdip, &constraint);
5221 			MDI_PHCI_LOCK(ph);
5222 			pip = next;
5223 		} else {
5224 			i_mdi_client_unlock(ct);
5225 			pip = next;
5226 		}
5227 	}
5228 
5229 	/*
5230 	 * Cannot offline pip(s)
5231 	 */
5232 	if (unstable) {
5233 		cmn_err(CE_WARN, "PHCI in transient state, cannot "
5234 		    "retire, dip = %p", (void *)dip);
5235 		MDI_PHCI_UNLOCK(ph);
5236 		return;
5237 	}
5238 
5239 	/*
5240 	 * Mark the pHCI as offline
5241 	 */
5242 	MDI_PHCI_SET_OFFLINE(ph);
5243 
5244 	/*
5245 	 * Mark the child mdi_pathinfo nodes as transient
5246 	 */
5247 	pip = ph->ph_path_head;
5248 	while (pip != NULL) {
5249 		MDI_PI_LOCK(pip);
5250 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5251 		MDI_PI_SET_OFFLINING(pip);
5252 		MDI_PI_UNLOCK(pip);
5253 		pip = next;
5254 	}
5255 	MDI_PHCI_UNLOCK(ph);
5256 	/*
5257 	 * Give a chance for any pending commands to execute
5258 	 */
5259 	delay(1);
5260 	MDI_PHCI_LOCK(ph);
5261 	pip = ph->ph_path_head;
5262 	while (pip != NULL) {
5263 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5264 		(void) i_mdi_pi_offline(pip, 0);
5265 		MDI_PI_LOCK(pip);
5266 		ct = MDI_PI(pip)->pi_client;
5267 		if (!MDI_PI_IS_OFFLINE(pip)) {
5268 			cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
5269 			    "PHCI dip = %p", (void *)dip);
5270 			MDI_PI_UNLOCK(pip);
5271 			MDI_PHCI_SET_ONLINE(ph);
5272 			MDI_PHCI_UNLOCK(ph);
5273 			return;
5274 		}
5275 		MDI_PI_UNLOCK(pip);
5276 		pip = next;
5277 	}
5278 	MDI_PHCI_UNLOCK(ph);
5279 
5280 	return;
5281 }
5282 
5283 void
5284 mdi_phci_unretire(dev_info_t *dip)
5285 {
5286 	ASSERT(MDI_PHCI(dip));
5287 
5288 	/*
5289 	 * Online the phci
5290 	 */
5291 	i_mdi_phci_online(dip);
5292 }
5293 
5294 /*ARGSUSED*/
5295 static int
5296 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5297 {
5298 	int		rv = NDI_SUCCESS;
5299 	mdi_client_t	*ct;
5300 
5301 	/*
5302 	 * Client component to go offline.  Make sure that we are
5303 	 * not in failing over state and update client state
5304 	 * accordingly
5305 	 */
5306 	ct = i_devi_get_client(dip);
5307 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n",
5308 	    (void *)dip, (void *)ct));
5309 	if (ct != NULL) {
5310 		MDI_CLIENT_LOCK(ct);
5311 		if (ct->ct_unstable) {
5312 			/*
5313 			 * One or more paths are in transient state,
5314 			 * Dont allow offline of a client device
5315 			 */
5316 			MDI_DEBUG(1, (CE_WARN, dip,
5317 			    "!One or more paths to this device is "
5318 			    "in transient state. This device can not "
5319 			    "be removed at this moment. "
5320 			    "Please try again later."));
5321 			MDI_CLIENT_UNLOCK(ct);
5322 			return (NDI_BUSY);
5323 		}
5324 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5325 			/*
5326 			 * Failover is in progress, Dont allow DR of
5327 			 * a client device
5328 			 */
5329 			MDI_DEBUG(1, (CE_WARN, dip,
5330 			    "!Client device (%s%d) is Busy. %s",
5331 			    ddi_driver_name(dip), ddi_get_instance(dip),
5332 			    "This device can not be removed at "
5333 			    "this moment. Please try again later."));
5334 			MDI_CLIENT_UNLOCK(ct);
5335 			return (NDI_BUSY);
5336 		}
5337 		MDI_CLIENT_SET_OFFLINE(ct);
5338 
5339 		/*
5340 		 * Unbind our relationship with the dev_info node
5341 		 */
5342 		if (flags & NDI_DEVI_REMOVE) {
5343 			ct->ct_dip = NULL;
5344 		}
5345 		MDI_CLIENT_UNLOCK(ct);
5346 	}
5347 	return (rv);
5348 }
5349 
5350 /*
5351  * mdi_pre_attach():
5352  *		Pre attach() notification handler
5353  */
5354 /*ARGSUSED*/
5355 int
5356 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5357 {
5358 	/* don't support old DDI_PM_RESUME */
5359 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5360 	    (cmd == DDI_PM_RESUME))
5361 		return (DDI_FAILURE);
5362 
5363 	return (DDI_SUCCESS);
5364 }
5365 
5366 /*
5367  * mdi_post_attach():
5368  *		Post attach() notification handler
5369  */
5370 /*ARGSUSED*/
5371 void
5372 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5373 {
5374 	mdi_phci_t	*ph;
5375 	mdi_client_t	*ct;
5376 	mdi_vhci_t	*vh;
5377 
5378 	if (MDI_PHCI(dip)) {
5379 		ph = i_devi_get_phci(dip);
5380 		ASSERT(ph != NULL);
5381 
5382 		MDI_PHCI_LOCK(ph);
5383 		switch (cmd) {
5384 		case DDI_ATTACH:
5385 			MDI_DEBUG(2, (CE_NOTE, dip,
5386 			    "!pHCI post_attach: called %p\n", (void *)ph));
5387 			if (error == DDI_SUCCESS) {
5388 				MDI_PHCI_SET_ATTACH(ph);
5389 			} else {
5390 				MDI_DEBUG(1, (CE_NOTE, dip,
5391 				    "!pHCI post_attach: failed error=%d\n",
5392 				    error));
5393 				MDI_PHCI_SET_DETACH(ph);
5394 			}
5395 			break;
5396 
5397 		case DDI_RESUME:
5398 			MDI_DEBUG(2, (CE_NOTE, dip,
5399 			    "!pHCI post_resume: called %p\n", (void *)ph));
5400 			if (error == DDI_SUCCESS) {
5401 				MDI_PHCI_SET_RESUME(ph);
5402 			} else {
5403 				MDI_DEBUG(1, (CE_NOTE, dip,
5404 				    "!pHCI post_resume: failed error=%d\n",
5405 				    error));
5406 				MDI_PHCI_SET_SUSPEND(ph);
5407 			}
5408 			break;
5409 		}
5410 		MDI_PHCI_UNLOCK(ph);
5411 	}
5412 
5413 	if (MDI_CLIENT(dip)) {
5414 		ct = i_devi_get_client(dip);
5415 		ASSERT(ct != NULL);
5416 
5417 		MDI_CLIENT_LOCK(ct);
5418 		switch (cmd) {
5419 		case DDI_ATTACH:
5420 			MDI_DEBUG(2, (CE_NOTE, dip,
5421 			    "!Client post_attach: called %p\n", (void *)ct));
5422 			if (error != DDI_SUCCESS) {
5423 				MDI_DEBUG(1, (CE_NOTE, dip,
5424 				    "!Client post_attach: failed error=%d\n",
5425 				    error));
5426 				MDI_CLIENT_SET_DETACH(ct);
5427 				MDI_DEBUG(4, (CE_WARN, dip,
5428 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
5429 				i_mdi_pm_reset_client(ct);
5430 				break;
5431 			}
5432 
5433 			/*
5434 			 * Client device has successfully attached, inform
5435 			 * the vhci.
5436 			 */
5437 			vh = ct->ct_vhci;
5438 			if (vh->vh_ops->vo_client_attached)
5439 				(*vh->vh_ops->vo_client_attached)(dip);
5440 
5441 			MDI_CLIENT_SET_ATTACH(ct);
5442 			break;
5443 
5444 		case DDI_RESUME:
5445 			MDI_DEBUG(2, (CE_NOTE, dip,
5446 			    "!Client post_attach: called %p\n", (void *)ct));
5447 			if (error == DDI_SUCCESS) {
5448 				MDI_CLIENT_SET_RESUME(ct);
5449 			} else {
5450 				MDI_DEBUG(1, (CE_NOTE, dip,
5451 				    "!Client post_resume: failed error=%d\n",
5452 				    error));
5453 				MDI_CLIENT_SET_SUSPEND(ct);
5454 			}
5455 			break;
5456 		}
5457 		MDI_CLIENT_UNLOCK(ct);
5458 	}
5459 }
5460 
5461 /*
5462  * mdi_pre_detach():
5463  *		Pre detach notification handler
5464  */
5465 /*ARGSUSED*/
5466 int
5467 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5468 {
5469 	int rv = DDI_SUCCESS;
5470 
5471 	if (MDI_CLIENT(dip)) {
5472 		(void) i_mdi_client_pre_detach(dip, cmd);
5473 	}
5474 
5475 	if (MDI_PHCI(dip)) {
5476 		rv = i_mdi_phci_pre_detach(dip, cmd);
5477 	}
5478 
5479 	return (rv);
5480 }
5481 
5482 /*ARGSUSED*/
5483 static int
5484 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5485 {
5486 	int		rv = DDI_SUCCESS;
5487 	mdi_phci_t	*ph;
5488 	mdi_client_t	*ct;
5489 	mdi_pathinfo_t	*pip;
5490 	mdi_pathinfo_t	*failed_pip = NULL;
5491 	mdi_pathinfo_t	*next;
5492 
5493 	ph = i_devi_get_phci(dip);
5494 	if (ph == NULL) {
5495 		return (rv);
5496 	}
5497 
5498 	MDI_PHCI_LOCK(ph);
5499 	switch (cmd) {
5500 	case DDI_DETACH:
5501 		MDI_DEBUG(2, (CE_NOTE, dip,
5502 		    "!pHCI pre_detach: called %p\n", (void *)ph));
5503 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5504 			/*
5505 			 * mdi_pathinfo nodes are still attached to
5506 			 * this pHCI. Fail the detach for this pHCI.
5507 			 */
5508 			MDI_DEBUG(2, (CE_WARN, dip,
5509 			    "!pHCI pre_detach: "
5510 			    "mdi_pathinfo nodes are still attached "
5511 			    "%p\n", (void *)ph));
5512 			rv = DDI_FAILURE;
5513 			break;
5514 		}
5515 		MDI_PHCI_SET_DETACH(ph);
5516 		break;
5517 
5518 	case DDI_SUSPEND:
5519 		/*
5520 		 * pHCI is getting suspended.  Since mpxio client
5521 		 * devices may not be suspended at this point, to avoid
5522 		 * a potential stack overflow, it is important to suspend
5523 		 * client devices before pHCI can be suspended.
5524 		 */
5525 
5526 		MDI_DEBUG(2, (CE_NOTE, dip,
5527 		    "!pHCI pre_suspend: called %p\n", (void *)ph));
5528 		/*
5529 		 * Suspend all the client devices accessible through this pHCI
5530 		 */
5531 		pip = ph->ph_path_head;
5532 		while (pip != NULL && rv == DDI_SUCCESS) {
5533 			dev_info_t *cdip;
5534 			MDI_PI_LOCK(pip);
5535 			next =
5536 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5537 			ct = MDI_PI(pip)->pi_client;
5538 			i_mdi_client_lock(ct, pip);
5539 			cdip = ct->ct_dip;
5540 			MDI_PI_UNLOCK(pip);
5541 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5542 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5543 				i_mdi_client_unlock(ct);
5544 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5545 				    DDI_SUCCESS) {
5546 					/*
5547 					 * Suspend of one of the client
5548 					 * device has failed.
5549 					 */
5550 					MDI_DEBUG(1, (CE_WARN, dip,
5551 					    "!Suspend of device (%s%d) failed.",
5552 					    ddi_driver_name(cdip),
5553 					    ddi_get_instance(cdip)));
5554 					failed_pip = pip;
5555 					break;
5556 				}
5557 			} else {
5558 				i_mdi_client_unlock(ct);
5559 			}
5560 			pip = next;
5561 		}
5562 
5563 		if (rv == DDI_SUCCESS) {
5564 			/*
5565 			 * Suspend of client devices is complete. Proceed
5566 			 * with pHCI suspend.
5567 			 */
5568 			MDI_PHCI_SET_SUSPEND(ph);
5569 		} else {
5570 			/*
5571 			 * Revert back all the suspended client device states
5572 			 * to converse.
5573 			 */
5574 			pip = ph->ph_path_head;
5575 			while (pip != failed_pip) {
5576 				dev_info_t *cdip;
5577 				MDI_PI_LOCK(pip);
5578 				next =
5579 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5580 				ct = MDI_PI(pip)->pi_client;
5581 				i_mdi_client_lock(ct, pip);
5582 				cdip = ct->ct_dip;
5583 				MDI_PI_UNLOCK(pip);
5584 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5585 					i_mdi_client_unlock(ct);
5586 					(void) devi_attach(cdip, DDI_RESUME);
5587 				} else {
5588 					i_mdi_client_unlock(ct);
5589 				}
5590 				pip = next;
5591 			}
5592 		}
5593 		break;
5594 
5595 	default:
5596 		rv = DDI_FAILURE;
5597 		break;
5598 	}
5599 	MDI_PHCI_UNLOCK(ph);
5600 	return (rv);
5601 }
5602 
5603 /*ARGSUSED*/
5604 static int
5605 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5606 {
5607 	int		rv = DDI_SUCCESS;
5608 	mdi_client_t	*ct;
5609 
5610 	ct = i_devi_get_client(dip);
5611 	if (ct == NULL) {
5612 		return (rv);
5613 	}
5614 
5615 	MDI_CLIENT_LOCK(ct);
5616 	switch (cmd) {
5617 	case DDI_DETACH:
5618 		MDI_DEBUG(2, (CE_NOTE, dip,
5619 		    "!Client pre_detach: called %p\n", (void *)ct));
5620 		MDI_CLIENT_SET_DETACH(ct);
5621 		break;
5622 
5623 	case DDI_SUSPEND:
5624 		MDI_DEBUG(2, (CE_NOTE, dip,
5625 		    "!Client pre_suspend: called %p\n", (void *)ct));
5626 		MDI_CLIENT_SET_SUSPEND(ct);
5627 		break;
5628 
5629 	default:
5630 		rv = DDI_FAILURE;
5631 		break;
5632 	}
5633 	MDI_CLIENT_UNLOCK(ct);
5634 	return (rv);
5635 }
5636 
5637 /*
5638  * mdi_post_detach():
5639  *		Post detach notification handler
5640  */
5641 /*ARGSUSED*/
5642 void
5643 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5644 {
5645 	/*
5646 	 * Detach/Suspend of mpxio component failed. Update our state
5647 	 * too
5648 	 */
5649 	if (MDI_PHCI(dip))
5650 		i_mdi_phci_post_detach(dip, cmd, error);
5651 
5652 	if (MDI_CLIENT(dip))
5653 		i_mdi_client_post_detach(dip, cmd, error);
5654 }
5655 
5656 /*ARGSUSED*/
5657 static void
5658 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5659 {
5660 	mdi_phci_t	*ph;
5661 
5662 	/*
5663 	 * Detach/Suspend of phci component failed. Update our state
5664 	 * too
5665 	 */
5666 	ph = i_devi_get_phci(dip);
5667 	if (ph == NULL) {
5668 		return;
5669 	}
5670 
5671 	MDI_PHCI_LOCK(ph);
5672 	/*
5673 	 * Detach of pHCI failed. Restore back converse
5674 	 * state
5675 	 */
5676 	switch (cmd) {
5677 	case DDI_DETACH:
5678 		MDI_DEBUG(2, (CE_NOTE, dip,
5679 		    "!pHCI post_detach: called %p\n", (void *)ph));
5680 		if (error != DDI_SUCCESS)
5681 			MDI_PHCI_SET_ATTACH(ph);
5682 		break;
5683 
5684 	case DDI_SUSPEND:
5685 		MDI_DEBUG(2, (CE_NOTE, dip,
5686 		    "!pHCI post_suspend: called %p\n", (void *)ph));
5687 		if (error != DDI_SUCCESS)
5688 			MDI_PHCI_SET_RESUME(ph);
5689 		break;
5690 	}
5691 	MDI_PHCI_UNLOCK(ph);
5692 }
5693 
5694 /*ARGSUSED*/
5695 static void
5696 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5697 {
5698 	mdi_client_t	*ct;
5699 
5700 	ct = i_devi_get_client(dip);
5701 	if (ct == NULL) {
5702 		return;
5703 	}
5704 	MDI_CLIENT_LOCK(ct);
5705 	/*
5706 	 * Detach of Client failed. Restore back converse
5707 	 * state
5708 	 */
5709 	switch (cmd) {
5710 	case DDI_DETACH:
5711 		MDI_DEBUG(2, (CE_NOTE, dip,
5712 		    "!Client post_detach: called %p\n", (void *)ct));
5713 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5714 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5715 			    "i_mdi_pm_rele_client\n"));
5716 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5717 		} else {
5718 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5719 			    "i_mdi_pm_reset_client\n"));
5720 			i_mdi_pm_reset_client(ct);
5721 		}
5722 		if (error != DDI_SUCCESS)
5723 			MDI_CLIENT_SET_ATTACH(ct);
5724 		break;
5725 
5726 	case DDI_SUSPEND:
5727 		MDI_DEBUG(2, (CE_NOTE, dip,
5728 		    "!Client post_suspend: called %p\n", (void *)ct));
5729 		if (error != DDI_SUCCESS)
5730 			MDI_CLIENT_SET_RESUME(ct);
5731 		break;
5732 	}
5733 	MDI_CLIENT_UNLOCK(ct);
5734 }
5735 
5736 int
5737 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
5738 {
5739 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
5740 }
5741 
5742 /*
5743  * create and install per-path (client - pHCI) statistics
5744  * I/O stats supported: nread, nwritten, reads, and writes
5745  * Error stats - hard errors, soft errors, & transport errors
5746  */
5747 int
5748 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
5749 {
5750 	kstat_t			*kiosp, *kerrsp;
5751 	struct pi_errs		*nsp;
5752 	struct mdi_pi_kstats	*mdi_statp;
5753 
5754 	if (MDI_PI(pip)->pi_kstats != NULL)
5755 		return (MDI_SUCCESS);
5756 
5757 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5758 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
5759 		return (MDI_FAILURE);
5760 	}
5761 
5762 	(void) strcat(ksname, ",err");
5763 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5764 	    KSTAT_TYPE_NAMED,
5765 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5766 	if (kerrsp == NULL) {
5767 		kstat_delete(kiosp);
5768 		return (MDI_FAILURE);
5769 	}
5770 
5771 	nsp = (struct pi_errs *)kerrsp->ks_data;
5772 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5773 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5774 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5775 	    KSTAT_DATA_UINT32);
5776 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5777 	    KSTAT_DATA_UINT32);
5778 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5779 	    KSTAT_DATA_UINT32);
5780 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5781 	    KSTAT_DATA_UINT32);
5782 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5783 	    KSTAT_DATA_UINT32);
5784 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5785 	    KSTAT_DATA_UINT32);
5786 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5787 	    KSTAT_DATA_UINT32);
5788 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5789 
5790 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5791 	mdi_statp->pi_kstat_ref = 1;
5792 	mdi_statp->pi_kstat_iostats = kiosp;
5793 	mdi_statp->pi_kstat_errstats = kerrsp;
5794 	kstat_install(kiosp);
5795 	kstat_install(kerrsp);
5796 	MDI_PI(pip)->pi_kstats = mdi_statp;
5797 	return (MDI_SUCCESS);
5798 }
5799 
5800 /*
5801  * destroy per-path properties
5802  */
5803 static void
5804 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5805 {
5806 
5807 	struct mdi_pi_kstats *mdi_statp;
5808 
5809 	if (MDI_PI(pip)->pi_kstats == NULL)
5810 		return;
5811 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5812 		return;
5813 
5814 	MDI_PI(pip)->pi_kstats = NULL;
5815 
5816 	/*
5817 	 * the kstat may be shared between multiple pathinfo nodes
5818 	 * decrement this pathinfo's usage, removing the kstats
5819 	 * themselves when the last pathinfo reference is removed.
5820 	 */
5821 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5822 	if (--mdi_statp->pi_kstat_ref != 0)
5823 		return;
5824 
5825 	kstat_delete(mdi_statp->pi_kstat_iostats);
5826 	kstat_delete(mdi_statp->pi_kstat_errstats);
5827 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5828 }
5829 
5830 /*
5831  * update I/O paths KSTATS
5832  */
5833 void
5834 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5835 {
5836 	kstat_t *iostatp;
5837 	size_t xfer_cnt;
5838 
5839 	ASSERT(pip != NULL);
5840 
5841 	/*
5842 	 * I/O can be driven across a path prior to having path
5843 	 * statistics available, i.e. probe(9e).
5844 	 */
5845 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5846 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5847 		xfer_cnt = bp->b_bcount - bp->b_resid;
5848 		if (bp->b_flags & B_READ) {
5849 			KSTAT_IO_PTR(iostatp)->reads++;
5850 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5851 		} else {
5852 			KSTAT_IO_PTR(iostatp)->writes++;
5853 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5854 		}
5855 	}
5856 }
5857 
5858 /*
5859  * Enable the path(specific client/target/initiator)
5860  * Enabling a path means that MPxIO may select the enabled path for routing
5861  * future I/O requests, subject to other path state constraints.
5862  */
5863 int
5864 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5865 {
5866 	mdi_phci_t	*ph;
5867 
5868 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5869 	if (ph == NULL) {
5870 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5871 			" failed. pip: %p ph = NULL\n", (void *)pip));
5872 		return (MDI_FAILURE);
5873 	}
5874 
5875 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5876 		MDI_ENABLE_OP);
5877 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5878 		" Returning success pip = %p. ph = %p\n",
5879 		(void *)pip, (void *)ph));
5880 	return (MDI_SUCCESS);
5881 
5882 }
5883 
5884 /*
5885  * Disable the path (specific client/target/initiator)
5886  * Disabling a path means that MPxIO will not select the disabled path for
5887  * routing any new I/O requests.
5888  */
5889 int
5890 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5891 {
5892 	mdi_phci_t	*ph;
5893 
5894 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5895 	if (ph == NULL) {
5896 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5897 			" failed. pip: %p ph = NULL\n", (void *)pip));
5898 		return (MDI_FAILURE);
5899 	}
5900 
5901 	(void) i_mdi_enable_disable_path(pip,
5902 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5903 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5904 		"Returning success pip = %p. ph = %p",
5905 		(void *)pip, (void *)ph));
5906 	return (MDI_SUCCESS);
5907 }
5908 
5909 /*
5910  * disable the path to a particular pHCI (pHCI specified in the phci_path
5911  * argument) for a particular client (specified in the client_path argument).
5912  * Disabling a path means that MPxIO will not select the disabled path for
5913  * routing any new I/O requests.
5914  * NOTE: this will be removed once the NWS files are changed to use the new
5915  * mdi_{enable,disable}_path interfaces
5916  */
5917 int
5918 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5919 {
5920 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5921 }
5922 
5923 /*
5924  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5925  * argument) for a particular client (specified in the client_path argument).
5926  * Enabling a path means that MPxIO may select the enabled path for routing
5927  * future I/O requests, subject to other path state constraints.
5928  * NOTE: this will be removed once the NWS files are changed to use the new
5929  * mdi_{enable,disable}_path interfaces
5930  */
5931 
5932 int
5933 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5934 {
5935 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5936 }
5937 
5938 /*
5939  * Common routine for doing enable/disable.
5940  */
5941 static mdi_pathinfo_t *
5942 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5943 		int op)
5944 {
5945 	int		sync_flag = 0;
5946 	int		rv;
5947 	mdi_pathinfo_t 	*next;
5948 	int		(*f)() = NULL;
5949 
5950 	f = vh->vh_ops->vo_pi_state_change;
5951 
5952 	sync_flag = (flags << 8) & 0xf00;
5953 
5954 	/*
5955 	 * Do a callback into the mdi consumer to let it
5956 	 * know that path is about to get enabled/disabled.
5957 	 */
5958 	if (f != NULL) {
5959 		rv = (*f)(vh->vh_dip, pip, 0,
5960 			MDI_PI_EXT_STATE(pip),
5961 			MDI_EXT_STATE_CHANGE | sync_flag |
5962 			op | MDI_BEFORE_STATE_CHANGE);
5963 		if (rv != MDI_SUCCESS) {
5964 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5965 			"!vo_pi_state_change: failed rv = %x", rv));
5966 		}
5967 	}
5968 	MDI_PI_LOCK(pip);
5969 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5970 
5971 	switch (flags) {
5972 		case USER_DISABLE:
5973 			if (op == MDI_DISABLE_OP) {
5974 				MDI_PI_SET_USER_DISABLE(pip);
5975 			} else {
5976 				MDI_PI_SET_USER_ENABLE(pip);
5977 			}
5978 			break;
5979 		case DRIVER_DISABLE:
5980 			if (op == MDI_DISABLE_OP) {
5981 				MDI_PI_SET_DRV_DISABLE(pip);
5982 			} else {
5983 				MDI_PI_SET_DRV_ENABLE(pip);
5984 			}
5985 			break;
5986 		case DRIVER_DISABLE_TRANSIENT:
5987 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
5988 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5989 			} else {
5990 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5991 			}
5992 			break;
5993 	}
5994 	MDI_PI_UNLOCK(pip);
5995 	/*
5996 	 * Do a callback into the mdi consumer to let it
5997 	 * know that path is now enabled/disabled.
5998 	 */
5999 	if (f != NULL) {
6000 		rv = (*f)(vh->vh_dip, pip, 0,
6001 			MDI_PI_EXT_STATE(pip),
6002 			MDI_EXT_STATE_CHANGE | sync_flag |
6003 			op | MDI_AFTER_STATE_CHANGE);
6004 		if (rv != MDI_SUCCESS) {
6005 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
6006 			"!vo_pi_state_change: failed rv = %x", rv));
6007 		}
6008 	}
6009 	return (next);
6010 }
6011 
6012 /*
6013  * Common routine for doing enable/disable.
6014  * NOTE: this will be removed once the NWS files are changed to use the new
6015  * mdi_{enable,disable}_path has been putback
6016  */
6017 int
6018 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6019 {
6020 
6021 	mdi_phci_t	*ph;
6022 	mdi_vhci_t	*vh = NULL;
6023 	mdi_client_t	*ct;
6024 	mdi_pathinfo_t	*next, *pip;
6025 	int		found_it;
6026 
6027 	ph = i_devi_get_phci(pdip);
6028 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6029 		"Op = %d pdip = %p cdip = %p\n", op, (void *)pdip,
6030 		(void *)cdip));
6031 	if (ph == NULL) {
6032 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
6033 			"Op %d failed. ph = NULL\n", op));
6034 		return (MDI_FAILURE);
6035 	}
6036 
6037 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6038 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6039 			"Op Invalid operation = %d\n", op));
6040 		return (MDI_FAILURE);
6041 	}
6042 
6043 	vh = ph->ph_vhci;
6044 
6045 	if (cdip == NULL) {
6046 		/*
6047 		 * Need to mark the Phci as enabled/disabled.
6048 		 */
6049 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6050 		"Op %d for the phci\n", op));
6051 		MDI_PHCI_LOCK(ph);
6052 		switch (flags) {
6053 			case USER_DISABLE:
6054 				if (op == MDI_DISABLE_OP) {
6055 					MDI_PHCI_SET_USER_DISABLE(ph);
6056 				} else {
6057 					MDI_PHCI_SET_USER_ENABLE(ph);
6058 				}
6059 				break;
6060 			case DRIVER_DISABLE:
6061 				if (op == MDI_DISABLE_OP) {
6062 					MDI_PHCI_SET_DRV_DISABLE(ph);
6063 				} else {
6064 					MDI_PHCI_SET_DRV_ENABLE(ph);
6065 				}
6066 				break;
6067 			case DRIVER_DISABLE_TRANSIENT:
6068 				if (op == MDI_DISABLE_OP) {
6069 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6070 				} else {
6071 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6072 				}
6073 				break;
6074 			default:
6075 				MDI_PHCI_UNLOCK(ph);
6076 				MDI_DEBUG(1, (CE_NOTE, NULL,
6077 				"!i_mdi_pi_enable_disable:"
6078 				" Invalid flag argument= %d\n", flags));
6079 		}
6080 
6081 		/*
6082 		 * Phci has been disabled. Now try to enable/disable
6083 		 * path info's to each client.
6084 		 */
6085 		pip = ph->ph_path_head;
6086 		while (pip != NULL) {
6087 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6088 		}
6089 		MDI_PHCI_UNLOCK(ph);
6090 	} else {
6091 
6092 		/*
6093 		 * Disable a specific client.
6094 		 */
6095 		ct = i_devi_get_client(cdip);
6096 		if (ct == NULL) {
6097 			MDI_DEBUG(1, (CE_NOTE, NULL,
6098 			"!i_mdi_pi_enable_disable:"
6099 			" failed. ct = NULL operation = %d\n", op));
6100 			return (MDI_FAILURE);
6101 		}
6102 
6103 		MDI_CLIENT_LOCK(ct);
6104 		pip = ct->ct_path_head;
6105 		found_it = 0;
6106 		while (pip != NULL) {
6107 			MDI_PI_LOCK(pip);
6108 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6109 			if (MDI_PI(pip)->pi_phci == ph) {
6110 				MDI_PI_UNLOCK(pip);
6111 				found_it = 1;
6112 				break;
6113 			}
6114 			MDI_PI_UNLOCK(pip);
6115 			pip = next;
6116 		}
6117 
6118 
6119 		MDI_CLIENT_UNLOCK(ct);
6120 		if (found_it == 0) {
6121 			MDI_DEBUG(1, (CE_NOTE, NULL,
6122 			"!i_mdi_pi_enable_disable:"
6123 			" failed. Could not find corresponding pip\n"));
6124 			return (MDI_FAILURE);
6125 		}
6126 
6127 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6128 	}
6129 
6130 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6131 		"Op %d Returning success pdip = %p cdip = %p\n",
6132 		op, (void *)pdip, (void *)cdip));
6133 	return (MDI_SUCCESS);
6134 }
6135 
6136 /*
6137  * Ensure phci powered up
6138  */
6139 static void
6140 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6141 {
6142 	dev_info_t	*ph_dip;
6143 
6144 	ASSERT(pip != NULL);
6145 	ASSERT(MDI_PI_LOCKED(pip));
6146 
6147 	if (MDI_PI(pip)->pi_pm_held) {
6148 		return;
6149 	}
6150 
6151 	ph_dip = mdi_pi_get_phci(pip);
6152 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n",
6153 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6154 	if (ph_dip == NULL) {
6155 		return;
6156 	}
6157 
6158 	MDI_PI_UNLOCK(pip);
6159 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6160 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6161 
6162 	pm_hold_power(ph_dip);
6163 
6164 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6165 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6166 	MDI_PI_LOCK(pip);
6167 
6168 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6169 	if (DEVI(ph_dip)->devi_pm_info)
6170 		MDI_PI(pip)->pi_pm_held = 1;
6171 }
6172 
6173 /*
6174  * Allow phci powered down
6175  */
6176 static void
6177 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6178 {
6179 	dev_info_t	*ph_dip = NULL;
6180 
6181 	ASSERT(pip != NULL);
6182 	ASSERT(MDI_PI_LOCKED(pip));
6183 
6184 	if (MDI_PI(pip)->pi_pm_held == 0) {
6185 		return;
6186 	}
6187 
6188 	ph_dip = mdi_pi_get_phci(pip);
6189 	ASSERT(ph_dip != NULL);
6190 
6191 	MDI_PI_UNLOCK(pip);
6192 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n",
6193 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6194 
6195 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6196 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6197 	pm_rele_power(ph_dip);
6198 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6199 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6200 
6201 	MDI_PI_LOCK(pip);
6202 	MDI_PI(pip)->pi_pm_held = 0;
6203 }
6204 
6205 static void
6206 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6207 {
6208 	ASSERT(MDI_CLIENT_LOCKED(ct));
6209 
6210 	ct->ct_power_cnt += incr;
6211 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p "
6212 	    "ct_power_cnt = %d incr = %d\n", (void *)ct,
6213 	    ct->ct_power_cnt, incr));
6214 	ASSERT(ct->ct_power_cnt >= 0);
6215 }
6216 
6217 static void
6218 i_mdi_rele_all_phci(mdi_client_t *ct)
6219 {
6220 	mdi_pathinfo_t  *pip;
6221 
6222 	ASSERT(MDI_CLIENT_LOCKED(ct));
6223 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6224 	while (pip != NULL) {
6225 		mdi_hold_path(pip);
6226 		MDI_PI_LOCK(pip);
6227 		i_mdi_pm_rele_pip(pip);
6228 		MDI_PI_UNLOCK(pip);
6229 		mdi_rele_path(pip);
6230 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6231 	}
6232 }
6233 
6234 static void
6235 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6236 {
6237 	ASSERT(MDI_CLIENT_LOCKED(ct));
6238 
6239 	if (i_ddi_devi_attached(ct->ct_dip)) {
6240 		ct->ct_power_cnt -= decr;
6241 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p "
6242 		    "ct_power_cnt = %d decr = %d\n",
6243 		    (void *)ct, ct->ct_power_cnt, decr));
6244 	}
6245 
6246 	ASSERT(ct->ct_power_cnt >= 0);
6247 	if (ct->ct_power_cnt == 0) {
6248 		i_mdi_rele_all_phci(ct);
6249 		return;
6250 	}
6251 }
6252 
6253 static void
6254 i_mdi_pm_reset_client(mdi_client_t *ct)
6255 {
6256 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p "
6257 	    "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt));
6258 	ASSERT(MDI_CLIENT_LOCKED(ct));
6259 	ct->ct_power_cnt = 0;
6260 	i_mdi_rele_all_phci(ct);
6261 	ct->ct_powercnt_config = 0;
6262 	ct->ct_powercnt_unconfig = 0;
6263 	ct->ct_powercnt_reset = 1;
6264 }
6265 
6266 static int
6267 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6268 {
6269 	int		ret;
6270 	dev_info_t	*ph_dip;
6271 
6272 	MDI_PI_LOCK(pip);
6273 	i_mdi_pm_hold_pip(pip);
6274 
6275 	ph_dip = mdi_pi_get_phci(pip);
6276 	MDI_PI_UNLOCK(pip);
6277 
6278 	/* bring all components of phci to full power */
6279 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6280 	    "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip),
6281 	    ddi_get_instance(ph_dip), (void *)pip));
6282 
6283 	ret = pm_powerup(ph_dip);
6284 
6285 	if (ret == DDI_FAILURE) {
6286 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6287 		    "pm_powerup FAILED for %s%d %p\n",
6288 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip),
6289 		    (void *)pip));
6290 
6291 		MDI_PI_LOCK(pip);
6292 		i_mdi_pm_rele_pip(pip);
6293 		MDI_PI_UNLOCK(pip);
6294 		return (MDI_FAILURE);
6295 	}
6296 
6297 	return (MDI_SUCCESS);
6298 }
6299 
6300 static int
6301 i_mdi_power_all_phci(mdi_client_t *ct)
6302 {
6303 	mdi_pathinfo_t  *pip;
6304 	int		succeeded = 0;
6305 
6306 	ASSERT(MDI_CLIENT_LOCKED(ct));
6307 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6308 	while (pip != NULL) {
6309 		/*
6310 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6311 		 * or MDI_PATHINFO_STATE_OFFLINE.
6312 		 */
6313 		if (MDI_PI_IS_INIT(pip) ||
6314 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6315 			mdi_hold_path(pip);
6316 			MDI_CLIENT_UNLOCK(ct);
6317 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6318 				succeeded = 1;
6319 
6320 			ASSERT(ct == MDI_PI(pip)->pi_client);
6321 			MDI_CLIENT_LOCK(ct);
6322 			mdi_rele_path(pip);
6323 		}
6324 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6325 	}
6326 
6327 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6328 }
6329 
6330 /*
6331  * mdi_bus_power():
6332  *		1. Place the phci(s) into powered up state so that
6333  *		   client can do power management
6334  *		2. Ensure phci powered up as client power managing
6335  * Return Values:
6336  *		MDI_SUCCESS
6337  *		MDI_FAILURE
6338  */
6339 int
6340 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6341     void *arg, void *result)
6342 {
6343 	int			ret = MDI_SUCCESS;
6344 	pm_bp_child_pwrchg_t	*bpc;
6345 	mdi_client_t		*ct;
6346 	dev_info_t		*cdip;
6347 	pm_bp_has_changed_t	*bphc;
6348 
6349 	/*
6350 	 * BUS_POWER_NOINVOL not supported
6351 	 */
6352 	if (op == BUS_POWER_NOINVOL)
6353 		return (MDI_FAILURE);
6354 
6355 	/*
6356 	 * ignore other OPs.
6357 	 * return quickly to save cou cycles on the ct processing
6358 	 */
6359 	switch (op) {
6360 	case BUS_POWER_PRE_NOTIFICATION:
6361 	case BUS_POWER_POST_NOTIFICATION:
6362 		bpc = (pm_bp_child_pwrchg_t *)arg;
6363 		cdip = bpc->bpc_dip;
6364 		break;
6365 	case BUS_POWER_HAS_CHANGED:
6366 		bphc = (pm_bp_has_changed_t *)arg;
6367 		cdip = bphc->bphc_dip;
6368 		break;
6369 	default:
6370 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6371 	}
6372 
6373 	ASSERT(MDI_CLIENT(cdip));
6374 
6375 	ct = i_devi_get_client(cdip);
6376 	if (ct == NULL)
6377 		return (MDI_FAILURE);
6378 
6379 	/*
6380 	 * wait till the mdi_pathinfo node state change are processed
6381 	 */
6382 	MDI_CLIENT_LOCK(ct);
6383 	switch (op) {
6384 	case BUS_POWER_PRE_NOTIFICATION:
6385 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6386 		    "BUS_POWER_PRE_NOTIFICATION:"
6387 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6388 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6389 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6390 
6391 		/* serialize power level change per client */
6392 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6393 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6394 
6395 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6396 
6397 		if (ct->ct_power_cnt == 0) {
6398 			ret = i_mdi_power_all_phci(ct);
6399 		}
6400 
6401 		/*
6402 		 * if new_level > 0:
6403 		 *	- hold phci(s)
6404 		 *	- power up phci(s) if not already
6405 		 * ignore power down
6406 		 */
6407 		if (bpc->bpc_nlevel > 0) {
6408 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6409 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6410 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
6411 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6412 			}
6413 		}
6414 		break;
6415 	case BUS_POWER_POST_NOTIFICATION:
6416 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6417 		    "BUS_POWER_POST_NOTIFICATION:"
6418 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
6419 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6420 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6421 		    *(int *)result));
6422 
6423 		if (*(int *)result == DDI_SUCCESS) {
6424 			if (bpc->bpc_nlevel > 0) {
6425 				MDI_CLIENT_SET_POWER_UP(ct);
6426 			} else {
6427 				MDI_CLIENT_SET_POWER_DOWN(ct);
6428 			}
6429 		}
6430 
6431 		/* release the hold we did in pre-notification */
6432 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6433 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6434 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6435 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6436 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6437 		}
6438 
6439 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6440 			/* another thread might started attaching */
6441 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6442 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6443 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
6444 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6445 			/* detaching has been taken care in pm_post_unconfig */
6446 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6447 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6448 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
6449 				i_mdi_pm_reset_client(ct);
6450 			}
6451 		}
6452 
6453 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6454 		cv_broadcast(&ct->ct_powerchange_cv);
6455 
6456 		break;
6457 
6458 	/* need to do more */
6459 	case BUS_POWER_HAS_CHANGED:
6460 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
6461 		    "BUS_POWER_HAS_CHANGED:"
6462 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6463 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6464 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6465 
6466 		if (bphc->bphc_nlevel > 0 &&
6467 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6468 			if (ct->ct_power_cnt == 0) {
6469 				ret = i_mdi_power_all_phci(ct);
6470 			}
6471 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6472 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
6473 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6474 		}
6475 
6476 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6477 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6478 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6479 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6480 		}
6481 		break;
6482 	}
6483 
6484 	MDI_CLIENT_UNLOCK(ct);
6485 	return (ret);
6486 }
6487 
6488 static int
6489 i_mdi_pm_pre_config_one(dev_info_t *child)
6490 {
6491 	int		ret = MDI_SUCCESS;
6492 	mdi_client_t	*ct;
6493 
6494 	ct = i_devi_get_client(child);
6495 	if (ct == NULL)
6496 		return (MDI_FAILURE);
6497 
6498 	MDI_CLIENT_LOCK(ct);
6499 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6500 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6501 
6502 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6503 		MDI_CLIENT_UNLOCK(ct);
6504 		MDI_DEBUG(4, (CE_NOTE, child,
6505 		    "i_mdi_pm_pre_config_one already configured\n"));
6506 		return (MDI_SUCCESS);
6507 	}
6508 
6509 	if (ct->ct_powercnt_config) {
6510 		MDI_CLIENT_UNLOCK(ct);
6511 		MDI_DEBUG(4, (CE_NOTE, child,
6512 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
6513 		return (MDI_SUCCESS);
6514 	}
6515 
6516 	if (ct->ct_power_cnt == 0) {
6517 		ret = i_mdi_power_all_phci(ct);
6518 	}
6519 	MDI_DEBUG(4, (CE_NOTE, child,
6520 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
6521 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6522 	ct->ct_powercnt_config = 1;
6523 	ct->ct_powercnt_reset = 0;
6524 	MDI_CLIENT_UNLOCK(ct);
6525 	return (ret);
6526 }
6527 
6528 static int
6529 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6530 {
6531 	int			ret = MDI_SUCCESS;
6532 	dev_info_t		*cdip;
6533 	int			circ;
6534 
6535 	ASSERT(MDI_VHCI(vdip));
6536 
6537 	/* ndi_devi_config_one */
6538 	if (child) {
6539 		ASSERT(DEVI_BUSY_OWNED(vdip));
6540 		return (i_mdi_pm_pre_config_one(child));
6541 	}
6542 
6543 	/* devi_config_common */
6544 	ndi_devi_enter(vdip, &circ);
6545 	cdip = ddi_get_child(vdip);
6546 	while (cdip) {
6547 		dev_info_t *next = ddi_get_next_sibling(cdip);
6548 
6549 		ret = i_mdi_pm_pre_config_one(cdip);
6550 		if (ret != MDI_SUCCESS)
6551 			break;
6552 		cdip = next;
6553 	}
6554 	ndi_devi_exit(vdip, circ);
6555 	return (ret);
6556 }
6557 
6558 static int
6559 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6560 {
6561 	int		ret = MDI_SUCCESS;
6562 	mdi_client_t	*ct;
6563 
6564 	ct = i_devi_get_client(child);
6565 	if (ct == NULL)
6566 		return (MDI_FAILURE);
6567 
6568 	MDI_CLIENT_LOCK(ct);
6569 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6570 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6571 
6572 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6573 		MDI_DEBUG(4, (CE_NOTE, child,
6574 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6575 		MDI_CLIENT_UNLOCK(ct);
6576 		return (MDI_SUCCESS);
6577 	}
6578 
6579 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6580 	    (flags & NDI_AUTODETACH)) {
6581 		MDI_DEBUG(4, (CE_NOTE, child,
6582 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6583 		MDI_CLIENT_UNLOCK(ct);
6584 		return (MDI_FAILURE);
6585 	}
6586 
6587 	if (ct->ct_powercnt_unconfig) {
6588 		MDI_DEBUG(4, (CE_NOTE, child,
6589 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6590 		MDI_CLIENT_UNLOCK(ct);
6591 		*held = 1;
6592 		return (MDI_SUCCESS);
6593 	}
6594 
6595 	if (ct->ct_power_cnt == 0) {
6596 		ret = i_mdi_power_all_phci(ct);
6597 	}
6598 	MDI_DEBUG(4, (CE_NOTE, child,
6599 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6600 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6601 	ct->ct_powercnt_unconfig = 1;
6602 	ct->ct_powercnt_reset = 0;
6603 	MDI_CLIENT_UNLOCK(ct);
6604 	if (ret == MDI_SUCCESS)
6605 		*held = 1;
6606 	return (ret);
6607 }
6608 
6609 static int
6610 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6611     int flags)
6612 {
6613 	int			ret = MDI_SUCCESS;
6614 	dev_info_t		*cdip;
6615 	int			circ;
6616 
6617 	ASSERT(MDI_VHCI(vdip));
6618 	*held = 0;
6619 
6620 	/* ndi_devi_unconfig_one */
6621 	if (child) {
6622 		ASSERT(DEVI_BUSY_OWNED(vdip));
6623 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6624 	}
6625 
6626 	/* devi_unconfig_common */
6627 	ndi_devi_enter(vdip, &circ);
6628 	cdip = ddi_get_child(vdip);
6629 	while (cdip) {
6630 		dev_info_t *next = ddi_get_next_sibling(cdip);
6631 
6632 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6633 		cdip = next;
6634 	}
6635 	ndi_devi_exit(vdip, circ);
6636 
6637 	if (*held)
6638 		ret = MDI_SUCCESS;
6639 
6640 	return (ret);
6641 }
6642 
6643 static void
6644 i_mdi_pm_post_config_one(dev_info_t *child)
6645 {
6646 	mdi_client_t	*ct;
6647 
6648 	ct = i_devi_get_client(child);
6649 	if (ct == NULL)
6650 		return;
6651 
6652 	MDI_CLIENT_LOCK(ct);
6653 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6654 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6655 
6656 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6657 		MDI_DEBUG(4, (CE_NOTE, child,
6658 		    "i_mdi_pm_post_config_one NOT configured\n"));
6659 		MDI_CLIENT_UNLOCK(ct);
6660 		return;
6661 	}
6662 
6663 	/* client has not been updated */
6664 	if (MDI_CLIENT_IS_FAILED(ct)) {
6665 		MDI_DEBUG(4, (CE_NOTE, child,
6666 		    "i_mdi_pm_post_config_one NOT configured\n"));
6667 		MDI_CLIENT_UNLOCK(ct);
6668 		return;
6669 	}
6670 
6671 	/* another thread might have powered it down or detached it */
6672 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6673 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6674 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6675 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6676 		MDI_DEBUG(4, (CE_NOTE, child,
6677 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6678 		i_mdi_pm_reset_client(ct);
6679 	} else {
6680 		mdi_pathinfo_t  *pip, *next;
6681 		int	valid_path_count = 0;
6682 
6683 		MDI_DEBUG(4, (CE_NOTE, child,
6684 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6685 		pip = ct->ct_path_head;
6686 		while (pip != NULL) {
6687 			MDI_PI_LOCK(pip);
6688 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6689 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6690 				valid_path_count ++;
6691 			MDI_PI_UNLOCK(pip);
6692 			pip = next;
6693 		}
6694 		i_mdi_pm_rele_client(ct, valid_path_count);
6695 	}
6696 	ct->ct_powercnt_config = 0;
6697 	MDI_CLIENT_UNLOCK(ct);
6698 }
6699 
6700 static void
6701 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
6702 {
6703 	int		circ;
6704 	dev_info_t	*cdip;
6705 
6706 	ASSERT(MDI_VHCI(vdip));
6707 
6708 	/* ndi_devi_config_one */
6709 	if (child) {
6710 		ASSERT(DEVI_BUSY_OWNED(vdip));
6711 		i_mdi_pm_post_config_one(child);
6712 		return;
6713 	}
6714 
6715 	/* devi_config_common */
6716 	ndi_devi_enter(vdip, &circ);
6717 	cdip = ddi_get_child(vdip);
6718 	while (cdip) {
6719 		dev_info_t *next = ddi_get_next_sibling(cdip);
6720 
6721 		i_mdi_pm_post_config_one(cdip);
6722 		cdip = next;
6723 	}
6724 	ndi_devi_exit(vdip, circ);
6725 }
6726 
6727 static void
6728 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6729 {
6730 	mdi_client_t	*ct;
6731 
6732 	ct = i_devi_get_client(child);
6733 	if (ct == NULL)
6734 		return;
6735 
6736 	MDI_CLIENT_LOCK(ct);
6737 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6738 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6739 
6740 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6741 		MDI_DEBUG(4, (CE_NOTE, child,
6742 		    "i_mdi_pm_post_unconfig NOT held\n"));
6743 		MDI_CLIENT_UNLOCK(ct);
6744 		return;
6745 	}
6746 
6747 	/* failure detaching or another thread just attached it */
6748 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6749 	    i_ddi_devi_attached(ct->ct_dip)) ||
6750 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6751 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6752 		MDI_DEBUG(4, (CE_NOTE, child,
6753 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6754 		i_mdi_pm_reset_client(ct);
6755 	} else {
6756 		mdi_pathinfo_t  *pip, *next;
6757 		int	valid_path_count = 0;
6758 
6759 		MDI_DEBUG(4, (CE_NOTE, child,
6760 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6761 		pip = ct->ct_path_head;
6762 		while (pip != NULL) {
6763 			MDI_PI_LOCK(pip);
6764 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6765 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6766 				valid_path_count ++;
6767 			MDI_PI_UNLOCK(pip);
6768 			pip = next;
6769 		}
6770 		i_mdi_pm_rele_client(ct, valid_path_count);
6771 		ct->ct_powercnt_unconfig = 0;
6772 	}
6773 
6774 	MDI_CLIENT_UNLOCK(ct);
6775 }
6776 
6777 static void
6778 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
6779 {
6780 	int			circ;
6781 	dev_info_t		*cdip;
6782 
6783 	ASSERT(MDI_VHCI(vdip));
6784 
6785 	if (!held) {
6786 		MDI_DEBUG(4, (CE_NOTE, vdip,
6787 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6788 		return;
6789 	}
6790 
6791 	if (child) {
6792 		ASSERT(DEVI_BUSY_OWNED(vdip));
6793 		i_mdi_pm_post_unconfig_one(child);
6794 		return;
6795 	}
6796 
6797 	ndi_devi_enter(vdip, &circ);
6798 	cdip = ddi_get_child(vdip);
6799 	while (cdip) {
6800 		dev_info_t *next = ddi_get_next_sibling(cdip);
6801 
6802 		i_mdi_pm_post_unconfig_one(cdip);
6803 		cdip = next;
6804 	}
6805 	ndi_devi_exit(vdip, circ);
6806 }
6807 
6808 int
6809 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6810 {
6811 	int			circ, ret = MDI_SUCCESS;
6812 	dev_info_t		*client_dip = NULL;
6813 	mdi_client_t		*ct;
6814 
6815 	/*
6816 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6817 	 * Power up pHCI for the named client device.
6818 	 * Note: Before the client is enumerated under vhci by phci,
6819 	 * client_dip can be NULL. Then proceed to power up all the
6820 	 * pHCIs.
6821 	 */
6822 	if (devnm != NULL) {
6823 		ndi_devi_enter(vdip, &circ);
6824 		client_dip = ndi_devi_findchild(vdip, devnm);
6825 	}
6826 
6827 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n",
6828 	    op, devnm ? devnm : "NULL", (void *)client_dip));
6829 
6830 	switch (op) {
6831 	case MDI_PM_PRE_CONFIG:
6832 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6833 		break;
6834 
6835 	case MDI_PM_PRE_UNCONFIG:
6836 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6837 		    flags);
6838 		break;
6839 
6840 	case MDI_PM_POST_CONFIG:
6841 		i_mdi_pm_post_config(vdip, client_dip);
6842 		break;
6843 
6844 	case MDI_PM_POST_UNCONFIG:
6845 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6846 		break;
6847 
6848 	case MDI_PM_HOLD_POWER:
6849 	case MDI_PM_RELE_POWER:
6850 		ASSERT(args);
6851 
6852 		client_dip = (dev_info_t *)args;
6853 		ASSERT(MDI_CLIENT(client_dip));
6854 
6855 		ct = i_devi_get_client(client_dip);
6856 		MDI_CLIENT_LOCK(ct);
6857 
6858 		if (op == MDI_PM_HOLD_POWER) {
6859 			if (ct->ct_power_cnt == 0) {
6860 				(void) i_mdi_power_all_phci(ct);
6861 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6862 				    "mdi_power i_mdi_pm_hold_client\n"));
6863 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6864 			}
6865 		} else {
6866 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6867 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6868 				    "mdi_power i_mdi_pm_rele_client\n"));
6869 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6870 			} else {
6871 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6872 				    "mdi_power i_mdi_pm_reset_client\n"));
6873 				i_mdi_pm_reset_client(ct);
6874 			}
6875 		}
6876 
6877 		MDI_CLIENT_UNLOCK(ct);
6878 		break;
6879 
6880 	default:
6881 		break;
6882 	}
6883 
6884 	if (devnm)
6885 		ndi_devi_exit(vdip, circ);
6886 
6887 	return (ret);
6888 }
6889 
6890 int
6891 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6892 {
6893 	mdi_vhci_t *vhci;
6894 
6895 	if (!MDI_VHCI(dip))
6896 		return (MDI_FAILURE);
6897 
6898 	if (mdi_class) {
6899 		vhci = DEVI(dip)->devi_mdi_xhci;
6900 		ASSERT(vhci);
6901 		*mdi_class = vhci->vh_class;
6902 	}
6903 
6904 	return (MDI_SUCCESS);
6905 }
6906 
6907 int
6908 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6909 {
6910 	mdi_phci_t *phci;
6911 
6912 	if (!MDI_PHCI(dip))
6913 		return (MDI_FAILURE);
6914 
6915 	if (mdi_class) {
6916 		phci = DEVI(dip)->devi_mdi_xhci;
6917 		ASSERT(phci);
6918 		*mdi_class = phci->ph_vhci->vh_class;
6919 	}
6920 
6921 	return (MDI_SUCCESS);
6922 }
6923 
6924 int
6925 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6926 {
6927 	mdi_client_t *client;
6928 
6929 	if (!MDI_CLIENT(dip))
6930 		return (MDI_FAILURE);
6931 
6932 	if (mdi_class) {
6933 		client = DEVI(dip)->devi_mdi_client;
6934 		ASSERT(client);
6935 		*mdi_class = client->ct_vhci->vh_class;
6936 	}
6937 
6938 	return (MDI_SUCCESS);
6939 }
6940 
6941 void *
6942 mdi_client_get_vhci_private(dev_info_t *dip)
6943 {
6944 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6945 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6946 		mdi_client_t	*ct;
6947 		ct = i_devi_get_client(dip);
6948 		return (ct->ct_vprivate);
6949 	}
6950 	return (NULL);
6951 }
6952 
6953 void
6954 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6955 {
6956 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6957 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6958 		mdi_client_t	*ct;
6959 		ct = i_devi_get_client(dip);
6960 		ct->ct_vprivate = data;
6961 	}
6962 }
6963 /*
6964  * mdi_pi_get_vhci_private():
6965  *		Get the vhci private information associated with the
6966  *		mdi_pathinfo node
6967  */
6968 void *
6969 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6970 {
6971 	caddr_t	vprivate = NULL;
6972 	if (pip) {
6973 		vprivate = MDI_PI(pip)->pi_vprivate;
6974 	}
6975 	return (vprivate);
6976 }
6977 
6978 /*
6979  * mdi_pi_set_vhci_private():
6980  *		Set the vhci private information in the mdi_pathinfo node
6981  */
6982 void
6983 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6984 {
6985 	if (pip) {
6986 		MDI_PI(pip)->pi_vprivate = priv;
6987 	}
6988 }
6989 
6990 /*
6991  * mdi_phci_get_vhci_private():
6992  *		Get the vhci private information associated with the
6993  *		mdi_phci node
6994  */
6995 void *
6996 mdi_phci_get_vhci_private(dev_info_t *dip)
6997 {
6998 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6999 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7000 		mdi_phci_t	*ph;
7001 		ph = i_devi_get_phci(dip);
7002 		return (ph->ph_vprivate);
7003 	}
7004 	return (NULL);
7005 }
7006 
7007 /*
7008  * mdi_phci_set_vhci_private():
7009  *		Set the vhci private information in the mdi_phci node
7010  */
7011 void
7012 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7013 {
7014 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7015 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7016 		mdi_phci_t	*ph;
7017 		ph = i_devi_get_phci(dip);
7018 		ph->ph_vprivate = priv;
7019 	}
7020 }
7021 
7022 /*
7023  * List of vhci class names:
7024  * A vhci class name must be in this list only if the corresponding vhci
7025  * driver intends to use the mdi provided bus config implementation
7026  * (i.e., mdi_vhci_bus_config()).
7027  */
7028 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7029 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7030 
7031 /*
7032  * During boot time, the on-disk vhci cache for every vhci class is read
7033  * in the form of an nvlist and stored here.
7034  */
7035 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7036 
7037 /* nvpair names in vhci cache nvlist */
7038 #define	MDI_VHCI_CACHE_VERSION	1
7039 #define	MDI_NVPNAME_VERSION	"version"
7040 #define	MDI_NVPNAME_PHCIS	"phcis"
7041 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7042 
7043 /*
7044  * Given vhci class name, return its on-disk vhci cache filename.
7045  * Memory for the returned filename which includes the full path is allocated
7046  * by this function.
7047  */
7048 static char *
7049 vhclass2vhcache_filename(char *vhclass)
7050 {
7051 	char *filename;
7052 	int len;
7053 	static char *fmt = "/etc/devices/mdi_%s_cache";
7054 
7055 	/*
7056 	 * fmt contains the on-disk vhci cache file name format;
7057 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7058 	 */
7059 
7060 	/* the -1 below is to account for "%s" in the format string */
7061 	len = strlen(fmt) + strlen(vhclass) - 1;
7062 	filename = kmem_alloc(len, KM_SLEEP);
7063 	(void) snprintf(filename, len, fmt, vhclass);
7064 	ASSERT(len == (strlen(filename) + 1));
7065 	return (filename);
7066 }
7067 
7068 /*
7069  * initialize the vhci cache related data structures and read the on-disk
7070  * vhci cached data into memory.
7071  */
7072 static void
7073 setup_vhci_cache(mdi_vhci_t *vh)
7074 {
7075 	mdi_vhci_config_t *vhc;
7076 	mdi_vhci_cache_t *vhcache;
7077 	int i;
7078 	nvlist_t *nvl = NULL;
7079 
7080 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7081 	vh->vh_config = vhc;
7082 	vhcache = &vhc->vhc_vhcache;
7083 
7084 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7085 
7086 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7087 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7088 
7089 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7090 
7091 	/*
7092 	 * Create string hash; same as mod_hash_create_strhash() except that
7093 	 * we use NULL key destructor.
7094 	 */
7095 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7096 	    mdi_bus_config_cache_hash_size,
7097 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7098 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7099 
7100 	/*
7101 	 * The on-disk vhci cache is read during booting prior to the
7102 	 * lights-out period by mdi_read_devices_files().
7103 	 */
7104 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7105 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7106 			nvl = vhcache_nvl[i];
7107 			vhcache_nvl[i] = NULL;
7108 			break;
7109 		}
7110 	}
7111 
7112 	/*
7113 	 * this is to cover the case of some one manually causing unloading
7114 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7115 	 */
7116 	if (nvl == NULL && modrootloaded)
7117 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7118 
7119 	if (nvl != NULL) {
7120 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7121 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7122 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7123 		else  {
7124 			cmn_err(CE_WARN,
7125 			    "%s: data file corrupted, will recreate\n",
7126 			    vhc->vhc_vhcache_filename);
7127 		}
7128 		rw_exit(&vhcache->vhcache_lock);
7129 		nvlist_free(nvl);
7130 	}
7131 
7132 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7133 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7134 
7135 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7136 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7137 }
7138 
7139 /*
7140  * free all vhci cache related resources
7141  */
7142 static int
7143 destroy_vhci_cache(mdi_vhci_t *vh)
7144 {
7145 	mdi_vhci_config_t *vhc = vh->vh_config;
7146 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7147 	mdi_vhcache_phci_t *cphci, *cphci_next;
7148 	mdi_vhcache_client_t *cct, *cct_next;
7149 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7150 
7151 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7152 		return (MDI_FAILURE);
7153 
7154 	kmem_free(vhc->vhc_vhcache_filename,
7155 	    strlen(vhc->vhc_vhcache_filename) + 1);
7156 
7157 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7158 
7159 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7160 	    cphci = cphci_next) {
7161 		cphci_next = cphci->cphci_next;
7162 		free_vhcache_phci(cphci);
7163 	}
7164 
7165 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7166 		cct_next = cct->cct_next;
7167 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7168 			cpi_next = cpi->cpi_next;
7169 			free_vhcache_pathinfo(cpi);
7170 		}
7171 		free_vhcache_client(cct);
7172 	}
7173 
7174 	rw_destroy(&vhcache->vhcache_lock);
7175 
7176 	mutex_destroy(&vhc->vhc_lock);
7177 	cv_destroy(&vhc->vhc_cv);
7178 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7179 	return (MDI_SUCCESS);
7180 }
7181 
7182 /*
7183  * Stop all vhci cache related async threads and free their resources.
7184  */
7185 static int
7186 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7187 {
7188 	mdi_async_client_config_t *acc, *acc_next;
7189 
7190 	mutex_enter(&vhc->vhc_lock);
7191 	vhc->vhc_flags |= MDI_VHC_EXIT;
7192 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7193 	cv_broadcast(&vhc->vhc_cv);
7194 
7195 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7196 	    vhc->vhc_acc_thrcount != 0) {
7197 		mutex_exit(&vhc->vhc_lock);
7198 		delay(1);
7199 		mutex_enter(&vhc->vhc_lock);
7200 	}
7201 
7202 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7203 
7204 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7205 		acc_next = acc->acc_next;
7206 		free_async_client_config(acc);
7207 	}
7208 	vhc->vhc_acc_list_head = NULL;
7209 	vhc->vhc_acc_list_tail = NULL;
7210 	vhc->vhc_acc_count = 0;
7211 
7212 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7213 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7214 		mutex_exit(&vhc->vhc_lock);
7215 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7216 			vhcache_dirty(vhc);
7217 			return (MDI_FAILURE);
7218 		}
7219 	} else
7220 		mutex_exit(&vhc->vhc_lock);
7221 
7222 	if (callb_delete(vhc->vhc_cbid) != 0)
7223 		return (MDI_FAILURE);
7224 
7225 	return (MDI_SUCCESS);
7226 }
7227 
7228 /*
7229  * Stop vhci cache flush thread
7230  */
7231 /* ARGSUSED */
7232 static boolean_t
7233 stop_vhcache_flush_thread(void *arg, int code)
7234 {
7235 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7236 
7237 	mutex_enter(&vhc->vhc_lock);
7238 	vhc->vhc_flags |= MDI_VHC_EXIT;
7239 	cv_broadcast(&vhc->vhc_cv);
7240 
7241 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7242 		mutex_exit(&vhc->vhc_lock);
7243 		delay(1);
7244 		mutex_enter(&vhc->vhc_lock);
7245 	}
7246 
7247 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7248 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7249 		mutex_exit(&vhc->vhc_lock);
7250 		(void) flush_vhcache(vhc, 1);
7251 	} else
7252 		mutex_exit(&vhc->vhc_lock);
7253 
7254 	return (B_TRUE);
7255 }
7256 
7257 /*
7258  * Enqueue the vhcache phci (cphci) at the tail of the list
7259  */
7260 static void
7261 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7262 {
7263 	cphci->cphci_next = NULL;
7264 	if (vhcache->vhcache_phci_head == NULL)
7265 		vhcache->vhcache_phci_head = cphci;
7266 	else
7267 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7268 	vhcache->vhcache_phci_tail = cphci;
7269 }
7270 
7271 /*
7272  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7273  */
7274 static void
7275 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7276     mdi_vhcache_pathinfo_t *cpi)
7277 {
7278 	cpi->cpi_next = NULL;
7279 	if (cct->cct_cpi_head == NULL)
7280 		cct->cct_cpi_head = cpi;
7281 	else
7282 		cct->cct_cpi_tail->cpi_next = cpi;
7283 	cct->cct_cpi_tail = cpi;
7284 }
7285 
7286 /*
7287  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7288  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7289  * flag set come at the beginning of the list. All cpis which have this
7290  * flag set come at the end of the list.
7291  */
7292 static void
7293 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7294     mdi_vhcache_pathinfo_t *newcpi)
7295 {
7296 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7297 
7298 	if (cct->cct_cpi_head == NULL ||
7299 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7300 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7301 	else {
7302 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7303 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7304 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7305 			;
7306 
7307 		if (prev_cpi == NULL)
7308 			cct->cct_cpi_head = newcpi;
7309 		else
7310 			prev_cpi->cpi_next = newcpi;
7311 
7312 		newcpi->cpi_next = cpi;
7313 
7314 		if (cpi == NULL)
7315 			cct->cct_cpi_tail = newcpi;
7316 	}
7317 }
7318 
7319 /*
7320  * Enqueue the vhcache client (cct) at the tail of the list
7321  */
7322 static void
7323 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7324     mdi_vhcache_client_t *cct)
7325 {
7326 	cct->cct_next = NULL;
7327 	if (vhcache->vhcache_client_head == NULL)
7328 		vhcache->vhcache_client_head = cct;
7329 	else
7330 		vhcache->vhcache_client_tail->cct_next = cct;
7331 	vhcache->vhcache_client_tail = cct;
7332 }
7333 
7334 static void
7335 free_string_array(char **str, int nelem)
7336 {
7337 	int i;
7338 
7339 	if (str) {
7340 		for (i = 0; i < nelem; i++) {
7341 			if (str[i])
7342 				kmem_free(str[i], strlen(str[i]) + 1);
7343 		}
7344 		kmem_free(str, sizeof (char *) * nelem);
7345 	}
7346 }
7347 
7348 static void
7349 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7350 {
7351 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7352 	kmem_free(cphci, sizeof (*cphci));
7353 }
7354 
7355 static void
7356 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7357 {
7358 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7359 	kmem_free(cpi, sizeof (*cpi));
7360 }
7361 
7362 static void
7363 free_vhcache_client(mdi_vhcache_client_t *cct)
7364 {
7365 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7366 	kmem_free(cct, sizeof (*cct));
7367 }
7368 
7369 static char *
7370 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7371 {
7372 	char *name_addr;
7373 	int len;
7374 
7375 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7376 	name_addr = kmem_alloc(len, KM_SLEEP);
7377 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7378 
7379 	if (ret_len)
7380 		*ret_len = len;
7381 	return (name_addr);
7382 }
7383 
7384 /*
7385  * Copy the contents of paddrnvl to vhci cache.
7386  * paddrnvl nvlist contains path information for a vhci client.
7387  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7388  */
7389 static void
7390 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7391     mdi_vhcache_client_t *cct)
7392 {
7393 	nvpair_t *nvp = NULL;
7394 	mdi_vhcache_pathinfo_t *cpi;
7395 	uint_t nelem;
7396 	uint32_t *val;
7397 
7398 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7399 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7400 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7401 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7402 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7403 		ASSERT(nelem == 2);
7404 		cpi->cpi_cphci = cphci_list[val[0]];
7405 		cpi->cpi_flags = val[1];
7406 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7407 	}
7408 }
7409 
7410 /*
7411  * Copy the contents of caddrmapnvl to vhci cache.
7412  * caddrmapnvl nvlist contains vhci client address to phci client address
7413  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7414  * this nvlist.
7415  */
7416 static void
7417 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7418     mdi_vhcache_phci_t *cphci_list[])
7419 {
7420 	nvpair_t *nvp = NULL;
7421 	nvlist_t *paddrnvl;
7422 	mdi_vhcache_client_t *cct;
7423 
7424 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7425 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7426 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7427 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7428 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7429 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7430 		/* the client must contain at least one path */
7431 		ASSERT(cct->cct_cpi_head != NULL);
7432 
7433 		enqueue_vhcache_client(vhcache, cct);
7434 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7435 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7436 	}
7437 }
7438 
7439 /*
7440  * Copy the contents of the main nvlist to vhci cache.
7441  *
7442  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7443  * The nvlist contains the mappings between the vhci client addresses and
7444  * their corresponding phci client addresses.
7445  *
7446  * The structure of the nvlist is as follows:
7447  *
7448  * Main nvlist:
7449  *	NAME		TYPE		DATA
7450  *	version		int32		version number
7451  *	phcis		string array	array of phci paths
7452  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7453  *
7454  * structure of c2paddrs_nvl:
7455  *	NAME		TYPE		DATA
7456  *	caddr1		nvlist_t	paddrs_nvl1
7457  *	caddr2		nvlist_t	paddrs_nvl2
7458  *	...
7459  * where caddr1, caddr2, ... are vhci client name and addresses in the
7460  * form of "<clientname>@<clientaddress>".
7461  * (for example: "ssd@2000002037cd9f72");
7462  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7463  *
7464  * structure of paddrs_nvl:
7465  *	NAME		TYPE		DATA
7466  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7467  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7468  *	...
7469  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7470  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7471  * phci-ids are integers that identify PHCIs to which the
7472  * the bus specific address belongs to. These integers are used as an index
7473  * into to the phcis string array in the main nvlist to get the PHCI path.
7474  */
7475 static int
7476 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7477 {
7478 	char **phcis, **phci_namep;
7479 	uint_t nphcis;
7480 	mdi_vhcache_phci_t *cphci, **cphci_list;
7481 	nvlist_t *caddrmapnvl;
7482 	int32_t ver;
7483 	int i;
7484 	size_t cphci_list_size;
7485 
7486 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7487 
7488 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7489 	    ver != MDI_VHCI_CACHE_VERSION)
7490 		return (MDI_FAILURE);
7491 
7492 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7493 	    &nphcis) != 0)
7494 		return (MDI_SUCCESS);
7495 
7496 	ASSERT(nphcis > 0);
7497 
7498 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7499 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7500 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7501 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7502 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7503 		enqueue_vhcache_phci(vhcache, cphci);
7504 		cphci_list[i] = cphci;
7505 	}
7506 
7507 	ASSERT(vhcache->vhcache_phci_head != NULL);
7508 
7509 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7510 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7511 
7512 	kmem_free(cphci_list, cphci_list_size);
7513 	return (MDI_SUCCESS);
7514 }
7515 
7516 /*
7517  * Build paddrnvl for the specified client using the information in the
7518  * vhci cache and add it to the caddrmapnnvl.
7519  * Returns 0 on success, errno on failure.
7520  */
7521 static int
7522 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7523     nvlist_t *caddrmapnvl)
7524 {
7525 	mdi_vhcache_pathinfo_t *cpi;
7526 	nvlist_t *nvl;
7527 	int err;
7528 	uint32_t val[2];
7529 
7530 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7531 
7532 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7533 		return (err);
7534 
7535 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7536 		val[0] = cpi->cpi_cphci->cphci_id;
7537 		val[1] = cpi->cpi_flags;
7538 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7539 		    != 0)
7540 			goto out;
7541 	}
7542 
7543 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7544 out:
7545 	nvlist_free(nvl);
7546 	return (err);
7547 }
7548 
7549 /*
7550  * Build caddrmapnvl using the information in the vhci cache
7551  * and add it to the mainnvl.
7552  * Returns 0 on success, errno on failure.
7553  */
7554 static int
7555 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7556 {
7557 	mdi_vhcache_client_t *cct;
7558 	nvlist_t *nvl;
7559 	int err;
7560 
7561 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7562 
7563 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7564 		return (err);
7565 
7566 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7567 	    cct = cct->cct_next) {
7568 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7569 			goto out;
7570 	}
7571 
7572 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7573 out:
7574 	nvlist_free(nvl);
7575 	return (err);
7576 }
7577 
7578 /*
7579  * Build nvlist using the information in the vhci cache.
7580  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7581  * Returns nvl on success, NULL on failure.
7582  */
7583 static nvlist_t *
7584 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7585 {
7586 	mdi_vhcache_phci_t *cphci;
7587 	uint_t phci_count;
7588 	char **phcis;
7589 	nvlist_t *nvl;
7590 	int err, i;
7591 
7592 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7593 		nvl = NULL;
7594 		goto out;
7595 	}
7596 
7597 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7598 	    MDI_VHCI_CACHE_VERSION)) != 0)
7599 		goto out;
7600 
7601 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7602 	if (vhcache->vhcache_phci_head == NULL) {
7603 		rw_exit(&vhcache->vhcache_lock);
7604 		return (nvl);
7605 	}
7606 
7607 	phci_count = 0;
7608 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7609 	    cphci = cphci->cphci_next)
7610 		cphci->cphci_id = phci_count++;
7611 
7612 	/* build phci pathname list */
7613 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7614 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7615 	    cphci = cphci->cphci_next, i++)
7616 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7617 
7618 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7619 	    phci_count);
7620 	free_string_array(phcis, phci_count);
7621 
7622 	if (err == 0 &&
7623 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7624 		rw_exit(&vhcache->vhcache_lock);
7625 		return (nvl);
7626 	}
7627 
7628 	rw_exit(&vhcache->vhcache_lock);
7629 out:
7630 	if (nvl)
7631 		nvlist_free(nvl);
7632 	return (NULL);
7633 }
7634 
7635 /*
7636  * Lookup vhcache phci structure for the specified phci path.
7637  */
7638 static mdi_vhcache_phci_t *
7639 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7640 {
7641 	mdi_vhcache_phci_t *cphci;
7642 
7643 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7644 
7645 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7646 	    cphci = cphci->cphci_next) {
7647 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7648 			return (cphci);
7649 	}
7650 
7651 	return (NULL);
7652 }
7653 
7654 /*
7655  * Lookup vhcache phci structure for the specified phci.
7656  */
7657 static mdi_vhcache_phci_t *
7658 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7659 {
7660 	mdi_vhcache_phci_t *cphci;
7661 
7662 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7663 
7664 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7665 	    cphci = cphci->cphci_next) {
7666 		if (cphci->cphci_phci == ph)
7667 			return (cphci);
7668 	}
7669 
7670 	return (NULL);
7671 }
7672 
7673 /*
7674  * Add the specified phci to the vhci cache if not already present.
7675  */
7676 static void
7677 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7678 {
7679 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7680 	mdi_vhcache_phci_t *cphci;
7681 	char *pathname;
7682 	int cache_updated;
7683 
7684 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7685 
7686 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7687 	(void) ddi_pathname(ph->ph_dip, pathname);
7688 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7689 	    != NULL) {
7690 		cphci->cphci_phci = ph;
7691 		cache_updated = 0;
7692 	} else {
7693 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7694 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7695 		cphci->cphci_phci = ph;
7696 		enqueue_vhcache_phci(vhcache, cphci);
7697 		cache_updated = 1;
7698 	}
7699 
7700 	rw_exit(&vhcache->vhcache_lock);
7701 
7702 	/*
7703 	 * Since a new phci has been added, reset
7704 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7705 	 * during next vhcache_discover_paths().
7706 	 */
7707 	mutex_enter(&vhc->vhc_lock);
7708 	vhc->vhc_path_discovery_cutoff_time = 0;
7709 	mutex_exit(&vhc->vhc_lock);
7710 
7711 	kmem_free(pathname, MAXPATHLEN);
7712 	if (cache_updated)
7713 		vhcache_dirty(vhc);
7714 }
7715 
7716 /*
7717  * Remove the reference to the specified phci from the vhci cache.
7718  */
7719 static void
7720 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7721 {
7722 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7723 	mdi_vhcache_phci_t *cphci;
7724 
7725 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7726 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7727 		/* do not remove the actual mdi_vhcache_phci structure */
7728 		cphci->cphci_phci = NULL;
7729 	}
7730 	rw_exit(&vhcache->vhcache_lock);
7731 }
7732 
7733 static void
7734 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7735     mdi_vhcache_lookup_token_t *src)
7736 {
7737 	if (src == NULL) {
7738 		dst->lt_cct = NULL;
7739 		dst->lt_cct_lookup_time = 0;
7740 	} else {
7741 		dst->lt_cct = src->lt_cct;
7742 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7743 	}
7744 }
7745 
7746 /*
7747  * Look up vhcache client for the specified client.
7748  */
7749 static mdi_vhcache_client_t *
7750 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7751     mdi_vhcache_lookup_token_t *token)
7752 {
7753 	mod_hash_val_t hv;
7754 	char *name_addr;
7755 	int len;
7756 
7757 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7758 
7759 	/*
7760 	 * If no vhcache clean occurred since the last lookup, we can
7761 	 * simply return the cct from the last lookup operation.
7762 	 * It works because ccts are never freed except during the vhcache
7763 	 * cleanup operation.
7764 	 */
7765 	if (token != NULL &&
7766 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7767 		return (token->lt_cct);
7768 
7769 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7770 	if (mod_hash_find(vhcache->vhcache_client_hash,
7771 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7772 		if (token) {
7773 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7774 			token->lt_cct_lookup_time = lbolt64;
7775 		}
7776 	} else {
7777 		if (token) {
7778 			token->lt_cct = NULL;
7779 			token->lt_cct_lookup_time = 0;
7780 		}
7781 		hv = NULL;
7782 	}
7783 	kmem_free(name_addr, len);
7784 	return ((mdi_vhcache_client_t *)hv);
7785 }
7786 
7787 /*
7788  * Add the specified path to the vhci cache if not already present.
7789  * Also add the vhcache client for the client corresponding to this path
7790  * if it doesn't already exist.
7791  */
7792 static void
7793 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7794 {
7795 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7796 	mdi_vhcache_client_t *cct;
7797 	mdi_vhcache_pathinfo_t *cpi;
7798 	mdi_phci_t *ph = pip->pi_phci;
7799 	mdi_client_t *ct = pip->pi_client;
7800 	int cache_updated = 0;
7801 
7802 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7803 
7804 	/* if vhcache client for this pip doesn't already exist, add it */
7805 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7806 	    NULL)) == NULL) {
7807 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7808 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7809 		    ct->ct_guid, NULL);
7810 		enqueue_vhcache_client(vhcache, cct);
7811 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7812 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7813 		cache_updated = 1;
7814 	}
7815 
7816 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7817 		if (cpi->cpi_cphci->cphci_phci == ph &&
7818 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7819 			cpi->cpi_pip = pip;
7820 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7821 				cpi->cpi_flags &=
7822 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7823 				sort_vhcache_paths(cct);
7824 				cache_updated = 1;
7825 			}
7826 			break;
7827 		}
7828 	}
7829 
7830 	if (cpi == NULL) {
7831 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7832 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7833 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7834 		ASSERT(cpi->cpi_cphci != NULL);
7835 		cpi->cpi_pip = pip;
7836 		enqueue_vhcache_pathinfo(cct, cpi);
7837 		cache_updated = 1;
7838 	}
7839 
7840 	rw_exit(&vhcache->vhcache_lock);
7841 
7842 	if (cache_updated)
7843 		vhcache_dirty(vhc);
7844 }
7845 
7846 /*
7847  * Remove the reference to the specified path from the vhci cache.
7848  */
7849 static void
7850 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7851 {
7852 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7853 	mdi_client_t *ct = pip->pi_client;
7854 	mdi_vhcache_client_t *cct;
7855 	mdi_vhcache_pathinfo_t *cpi;
7856 
7857 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7858 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7859 	    NULL)) != NULL) {
7860 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7861 		    cpi = cpi->cpi_next) {
7862 			if (cpi->cpi_pip == pip) {
7863 				cpi->cpi_pip = NULL;
7864 				break;
7865 			}
7866 		}
7867 	}
7868 	rw_exit(&vhcache->vhcache_lock);
7869 }
7870 
7871 /*
7872  * Flush the vhci cache to disk.
7873  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7874  */
7875 static int
7876 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7877 {
7878 	nvlist_t *nvl;
7879 	int err;
7880 	int rv;
7881 
7882 	/*
7883 	 * It is possible that the system may shutdown before
7884 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7885 	 * flushing the cache in this case do not check for
7886 	 * i_ddi_io_initialized when force flag is set.
7887 	 */
7888 	if (force_flag == 0 && !i_ddi_io_initialized())
7889 		return (MDI_FAILURE);
7890 
7891 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7892 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7893 		nvlist_free(nvl);
7894 	} else
7895 		err = EFAULT;
7896 
7897 	rv = MDI_SUCCESS;
7898 	mutex_enter(&vhc->vhc_lock);
7899 	if (err != 0) {
7900 		if (err == EROFS) {
7901 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7902 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7903 			    MDI_VHC_VHCACHE_DIRTY);
7904 		} else {
7905 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7906 				cmn_err(CE_CONT, "%s: update failed\n",
7907 				    vhc->vhc_vhcache_filename);
7908 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7909 			}
7910 			rv = MDI_FAILURE;
7911 		}
7912 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7913 		cmn_err(CE_CONT,
7914 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7915 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7916 	}
7917 	mutex_exit(&vhc->vhc_lock);
7918 
7919 	return (rv);
7920 }
7921 
7922 /*
7923  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7924  * Exits itself if left idle for the idle timeout period.
7925  */
7926 static void
7927 vhcache_flush_thread(void *arg)
7928 {
7929 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7930 	clock_t idle_time, quit_at_ticks;
7931 	callb_cpr_t cprinfo;
7932 
7933 	/* number of seconds to sleep idle before exiting */
7934 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7935 
7936 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7937 	    "mdi_vhcache_flush");
7938 	mutex_enter(&vhc->vhc_lock);
7939 	for (; ; ) {
7940 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7941 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7942 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7943 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7944 				(void) cv_timedwait(&vhc->vhc_cv,
7945 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7946 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7947 			} else {
7948 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7949 				mutex_exit(&vhc->vhc_lock);
7950 
7951 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7952 					vhcache_dirty(vhc);
7953 
7954 				mutex_enter(&vhc->vhc_lock);
7955 			}
7956 		}
7957 
7958 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7959 
7960 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7961 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7962 		    ddi_get_lbolt() < quit_at_ticks) {
7963 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7964 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7965 			    quit_at_ticks);
7966 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7967 		}
7968 
7969 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7970 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7971 			goto out;
7972 	}
7973 
7974 out:
7975 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7976 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7977 	CALLB_CPR_EXIT(&cprinfo);
7978 }
7979 
7980 /*
7981  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7982  */
7983 static void
7984 vhcache_dirty(mdi_vhci_config_t *vhc)
7985 {
7986 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7987 	int create_thread;
7988 
7989 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7990 	/* do not flush cache until the cache is fully built */
7991 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7992 		rw_exit(&vhcache->vhcache_lock);
7993 		return;
7994 	}
7995 	rw_exit(&vhcache->vhcache_lock);
7996 
7997 	mutex_enter(&vhc->vhc_lock);
7998 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7999 		mutex_exit(&vhc->vhc_lock);
8000 		return;
8001 	}
8002 
8003 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8004 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8005 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8006 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8007 		cv_broadcast(&vhc->vhc_cv);
8008 		create_thread = 0;
8009 	} else {
8010 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8011 		create_thread = 1;
8012 	}
8013 	mutex_exit(&vhc->vhc_lock);
8014 
8015 	if (create_thread)
8016 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8017 		    0, &p0, TS_RUN, minclsyspri);
8018 }
8019 
8020 /*
8021  * phci bus config structure - one for for each phci bus config operation that
8022  * we initiate on behalf of a vhci.
8023  */
8024 typedef struct mdi_phci_bus_config_s {
8025 	char *phbc_phci_path;
8026 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8027 	struct mdi_phci_bus_config_s *phbc_next;
8028 } mdi_phci_bus_config_t;
8029 
8030 /* vhci bus config structure - one for each vhci bus config operation */
8031 typedef struct mdi_vhci_bus_config_s {
8032 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8033 	major_t vhbc_op_major;		/* bus config op major */
8034 	uint_t vhbc_op_flags;		/* bus config op flags */
8035 	kmutex_t vhbc_lock;
8036 	kcondvar_t vhbc_cv;
8037 	int vhbc_thr_count;
8038 } mdi_vhci_bus_config_t;
8039 
8040 /*
8041  * bus config the specified phci
8042  */
8043 static void
8044 bus_config_phci(void *arg)
8045 {
8046 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8047 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8048 	dev_info_t *ph_dip;
8049 
8050 	/*
8051 	 * first configure all path components upto phci and then configure
8052 	 * the phci children.
8053 	 */
8054 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8055 	    != NULL) {
8056 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8057 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8058 			(void) ndi_devi_config_driver(ph_dip,
8059 			    vhbc->vhbc_op_flags,
8060 			    vhbc->vhbc_op_major);
8061 		} else
8062 			(void) ndi_devi_config(ph_dip,
8063 			    vhbc->vhbc_op_flags);
8064 
8065 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8066 		ndi_rele_devi(ph_dip);
8067 	}
8068 
8069 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8070 	kmem_free(phbc, sizeof (*phbc));
8071 
8072 	mutex_enter(&vhbc->vhbc_lock);
8073 	vhbc->vhbc_thr_count--;
8074 	if (vhbc->vhbc_thr_count == 0)
8075 		cv_broadcast(&vhbc->vhbc_cv);
8076 	mutex_exit(&vhbc->vhbc_lock);
8077 }
8078 
8079 /*
8080  * Bus config all phcis associated with the vhci in parallel.
8081  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8082  */
8083 static void
8084 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8085     ddi_bus_config_op_t op, major_t maj)
8086 {
8087 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8088 	mdi_vhci_bus_config_t *vhbc;
8089 	mdi_vhcache_phci_t *cphci;
8090 
8091 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8092 	if (vhcache->vhcache_phci_head == NULL) {
8093 		rw_exit(&vhcache->vhcache_lock);
8094 		return;
8095 	}
8096 
8097 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8098 
8099 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8100 	    cphci = cphci->cphci_next) {
8101 		/* skip phcis that haven't attached before root is available */
8102 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8103 			continue;
8104 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8105 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8106 		    KM_SLEEP);
8107 		phbc->phbc_vhbusconfig = vhbc;
8108 		phbc->phbc_next = phbc_head;
8109 		phbc_head = phbc;
8110 		vhbc->vhbc_thr_count++;
8111 	}
8112 	rw_exit(&vhcache->vhcache_lock);
8113 
8114 	vhbc->vhbc_op = op;
8115 	vhbc->vhbc_op_major = maj;
8116 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8117 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8118 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8119 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8120 
8121 	/* now create threads to initiate bus config on all phcis in parallel */
8122 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8123 		phbc_next = phbc->phbc_next;
8124 		if (mdi_mtc_off)
8125 			bus_config_phci((void *)phbc);
8126 		else
8127 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8128 			    0, &p0, TS_RUN, minclsyspri);
8129 	}
8130 
8131 	mutex_enter(&vhbc->vhbc_lock);
8132 	/* wait until all threads exit */
8133 	while (vhbc->vhbc_thr_count > 0)
8134 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8135 	mutex_exit(&vhbc->vhbc_lock);
8136 
8137 	mutex_destroy(&vhbc->vhbc_lock);
8138 	cv_destroy(&vhbc->vhbc_cv);
8139 	kmem_free(vhbc, sizeof (*vhbc));
8140 }
8141 
8142 /*
8143  * Single threaded version of bus_config_all_phcis()
8144  */
8145 static void
8146 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8147     ddi_bus_config_op_t op, major_t maj)
8148 {
8149 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8150 
8151 	single_threaded_vhconfig_enter(vhc);
8152 	bus_config_all_phcis(vhcache, flags, op, maj);
8153 	single_threaded_vhconfig_exit(vhc);
8154 }
8155 
8156 /*
8157  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8158  * The path includes the child component in addition to the phci path.
8159  */
8160 static int
8161 bus_config_one_phci_child(char *path)
8162 {
8163 	dev_info_t *ph_dip, *child;
8164 	char *devnm;
8165 	int rv = MDI_FAILURE;
8166 
8167 	/* extract the child component of the phci */
8168 	devnm = strrchr(path, '/');
8169 	*devnm++ = '\0';
8170 
8171 	/*
8172 	 * first configure all path components upto phci and then
8173 	 * configure the phci child.
8174 	 */
8175 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8176 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8177 		    NDI_SUCCESS) {
8178 			/*
8179 			 * release the hold that ndi_devi_config_one() placed
8180 			 */
8181 			ndi_rele_devi(child);
8182 			rv = MDI_SUCCESS;
8183 		}
8184 
8185 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8186 		ndi_rele_devi(ph_dip);
8187 	}
8188 
8189 	devnm--;
8190 	*devnm = '/';
8191 	return (rv);
8192 }
8193 
8194 /*
8195  * Build a list of phci client paths for the specified vhci client.
8196  * The list includes only those phci client paths which aren't configured yet.
8197  */
8198 static mdi_phys_path_t *
8199 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8200 {
8201 	mdi_vhcache_pathinfo_t *cpi;
8202 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8203 	int config_path, len;
8204 
8205 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8206 		/*
8207 		 * include only those paths that aren't configured.
8208 		 */
8209 		config_path = 0;
8210 		if (cpi->cpi_pip == NULL)
8211 			config_path = 1;
8212 		else {
8213 			MDI_PI_LOCK(cpi->cpi_pip);
8214 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8215 				config_path = 1;
8216 			MDI_PI_UNLOCK(cpi->cpi_pip);
8217 		}
8218 
8219 		if (config_path) {
8220 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8221 			len = strlen(cpi->cpi_cphci->cphci_path) +
8222 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8223 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8224 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8225 			    cpi->cpi_cphci->cphci_path, ct_name,
8226 			    cpi->cpi_addr);
8227 			pp->phys_path_next = NULL;
8228 
8229 			if (pp_head == NULL)
8230 				pp_head = pp;
8231 			else
8232 				pp_tail->phys_path_next = pp;
8233 			pp_tail = pp;
8234 		}
8235 	}
8236 
8237 	return (pp_head);
8238 }
8239 
8240 /*
8241  * Free the memory allocated for phci client path list.
8242  */
8243 static void
8244 free_phclient_path_list(mdi_phys_path_t *pp_head)
8245 {
8246 	mdi_phys_path_t *pp, *pp_next;
8247 
8248 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8249 		pp_next = pp->phys_path_next;
8250 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8251 		kmem_free(pp, sizeof (*pp));
8252 	}
8253 }
8254 
8255 /*
8256  * Allocated async client structure and initialize with the specified values.
8257  */
8258 static mdi_async_client_config_t *
8259 alloc_async_client_config(char *ct_name, char *ct_addr,
8260     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8261 {
8262 	mdi_async_client_config_t *acc;
8263 
8264 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8265 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8266 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8267 	acc->acc_phclient_path_list_head = pp_head;
8268 	init_vhcache_lookup_token(&acc->acc_token, tok);
8269 	acc->acc_next = NULL;
8270 	return (acc);
8271 }
8272 
8273 /*
8274  * Free the memory allocated for the async client structure and their members.
8275  */
8276 static void
8277 free_async_client_config(mdi_async_client_config_t *acc)
8278 {
8279 	if (acc->acc_phclient_path_list_head)
8280 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8281 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8282 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8283 	kmem_free(acc, sizeof (*acc));
8284 }
8285 
8286 /*
8287  * Sort vhcache pathinfos (cpis) of the specified client.
8288  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8289  * flag set come at the beginning of the list. All cpis which have this
8290  * flag set come at the end of the list.
8291  */
8292 static void
8293 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8294 {
8295 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8296 
8297 	cpi_head = cct->cct_cpi_head;
8298 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8299 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8300 		cpi_next = cpi->cpi_next;
8301 		enqueue_vhcache_pathinfo(cct, cpi);
8302 	}
8303 }
8304 
8305 /*
8306  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8307  * every vhcache pathinfo of the specified client. If not adjust the flag
8308  * setting appropriately.
8309  *
8310  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8311  * on-disk vhci cache. So every time this flag is updated the cache must be
8312  * flushed.
8313  */
8314 static void
8315 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8316     mdi_vhcache_lookup_token_t *tok)
8317 {
8318 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8319 	mdi_vhcache_client_t *cct;
8320 	mdi_vhcache_pathinfo_t *cpi;
8321 
8322 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8323 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8324 	    == NULL) {
8325 		rw_exit(&vhcache->vhcache_lock);
8326 		return;
8327 	}
8328 
8329 	/*
8330 	 * to avoid unnecessary on-disk cache updates, first check if an
8331 	 * update is really needed. If no update is needed simply return.
8332 	 */
8333 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8334 		if ((cpi->cpi_pip != NULL &&
8335 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8336 		    (cpi->cpi_pip == NULL &&
8337 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8338 			break;
8339 		}
8340 	}
8341 	if (cpi == NULL) {
8342 		rw_exit(&vhcache->vhcache_lock);
8343 		return;
8344 	}
8345 
8346 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8347 		rw_exit(&vhcache->vhcache_lock);
8348 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8349 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8350 		    tok)) == NULL) {
8351 			rw_exit(&vhcache->vhcache_lock);
8352 			return;
8353 		}
8354 	}
8355 
8356 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8357 		if (cpi->cpi_pip != NULL)
8358 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8359 		else
8360 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8361 	}
8362 	sort_vhcache_paths(cct);
8363 
8364 	rw_exit(&vhcache->vhcache_lock);
8365 	vhcache_dirty(vhc);
8366 }
8367 
8368 /*
8369  * Configure all specified paths of the client.
8370  */
8371 static void
8372 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8373     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8374 {
8375 	mdi_phys_path_t *pp;
8376 
8377 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8378 		(void) bus_config_one_phci_child(pp->phys_path);
8379 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8380 }
8381 
8382 /*
8383  * Dequeue elements from vhci async client config list and bus configure
8384  * their corresponding phci clients.
8385  */
8386 static void
8387 config_client_paths_thread(void *arg)
8388 {
8389 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8390 	mdi_async_client_config_t *acc;
8391 	clock_t quit_at_ticks;
8392 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8393 	callb_cpr_t cprinfo;
8394 
8395 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8396 	    "mdi_config_client_paths");
8397 
8398 	for (; ; ) {
8399 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8400 
8401 		mutex_enter(&vhc->vhc_lock);
8402 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8403 		    vhc->vhc_acc_list_head == NULL &&
8404 		    ddi_get_lbolt() < quit_at_ticks) {
8405 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8406 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8407 			    quit_at_ticks);
8408 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8409 		}
8410 
8411 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8412 		    vhc->vhc_acc_list_head == NULL)
8413 			goto out;
8414 
8415 		acc = vhc->vhc_acc_list_head;
8416 		vhc->vhc_acc_list_head = acc->acc_next;
8417 		if (vhc->vhc_acc_list_head == NULL)
8418 			vhc->vhc_acc_list_tail = NULL;
8419 		vhc->vhc_acc_count--;
8420 		mutex_exit(&vhc->vhc_lock);
8421 
8422 		config_client_paths_sync(vhc, acc->acc_ct_name,
8423 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8424 		    &acc->acc_token);
8425 
8426 		free_async_client_config(acc);
8427 	}
8428 
8429 out:
8430 	vhc->vhc_acc_thrcount--;
8431 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8432 	CALLB_CPR_EXIT(&cprinfo);
8433 }
8434 
8435 /*
8436  * Arrange for all the phci client paths (pp_head) for the specified client
8437  * to be bus configured asynchronously by a thread.
8438  */
8439 static void
8440 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8441     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8442 {
8443 	mdi_async_client_config_t *acc, *newacc;
8444 	int create_thread;
8445 
8446 	if (pp_head == NULL)
8447 		return;
8448 
8449 	if (mdi_mtc_off) {
8450 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8451 		free_phclient_path_list(pp_head);
8452 		return;
8453 	}
8454 
8455 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8456 	ASSERT(newacc);
8457 
8458 	mutex_enter(&vhc->vhc_lock);
8459 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8460 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8461 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8462 			free_async_client_config(newacc);
8463 			mutex_exit(&vhc->vhc_lock);
8464 			return;
8465 		}
8466 	}
8467 
8468 	if (vhc->vhc_acc_list_head == NULL)
8469 		vhc->vhc_acc_list_head = newacc;
8470 	else
8471 		vhc->vhc_acc_list_tail->acc_next = newacc;
8472 	vhc->vhc_acc_list_tail = newacc;
8473 	vhc->vhc_acc_count++;
8474 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8475 		cv_broadcast(&vhc->vhc_cv);
8476 		create_thread = 0;
8477 	} else {
8478 		vhc->vhc_acc_thrcount++;
8479 		create_thread = 1;
8480 	}
8481 	mutex_exit(&vhc->vhc_lock);
8482 
8483 	if (create_thread)
8484 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8485 		    0, &p0, TS_RUN, minclsyspri);
8486 }
8487 
8488 /*
8489  * Return number of online paths for the specified client.
8490  */
8491 static int
8492 nonline_paths(mdi_vhcache_client_t *cct)
8493 {
8494 	mdi_vhcache_pathinfo_t *cpi;
8495 	int online_count = 0;
8496 
8497 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8498 		if (cpi->cpi_pip != NULL) {
8499 			MDI_PI_LOCK(cpi->cpi_pip);
8500 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8501 				online_count++;
8502 			MDI_PI_UNLOCK(cpi->cpi_pip);
8503 		}
8504 	}
8505 
8506 	return (online_count);
8507 }
8508 
8509 /*
8510  * Bus configure all paths for the specified vhci client.
8511  * If at least one path for the client is already online, the remaining paths
8512  * will be configured asynchronously. Otherwise, it synchronously configures
8513  * the paths until at least one path is online and then rest of the paths
8514  * will be configured asynchronously.
8515  */
8516 static void
8517 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8518 {
8519 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8520 	mdi_phys_path_t *pp_head, *pp;
8521 	mdi_vhcache_client_t *cct;
8522 	mdi_vhcache_lookup_token_t tok;
8523 
8524 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8525 
8526 	init_vhcache_lookup_token(&tok, NULL);
8527 
8528 	if (ct_name == NULL || ct_addr == NULL ||
8529 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8530 	    == NULL ||
8531 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8532 		rw_exit(&vhcache->vhcache_lock);
8533 		return;
8534 	}
8535 
8536 	/* if at least one path is online, configure the rest asynchronously */
8537 	if (nonline_paths(cct) > 0) {
8538 		rw_exit(&vhcache->vhcache_lock);
8539 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8540 		return;
8541 	}
8542 
8543 	rw_exit(&vhcache->vhcache_lock);
8544 
8545 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8546 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8547 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8548 
8549 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8550 			    ct_addr, &tok)) == NULL) {
8551 				rw_exit(&vhcache->vhcache_lock);
8552 				goto out;
8553 			}
8554 
8555 			if (nonline_paths(cct) > 0 &&
8556 			    pp->phys_path_next != NULL) {
8557 				rw_exit(&vhcache->vhcache_lock);
8558 				config_client_paths_async(vhc, ct_name, ct_addr,
8559 				    pp->phys_path_next, &tok);
8560 				pp->phys_path_next = NULL;
8561 				goto out;
8562 			}
8563 
8564 			rw_exit(&vhcache->vhcache_lock);
8565 		}
8566 	}
8567 
8568 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8569 out:
8570 	free_phclient_path_list(pp_head);
8571 }
8572 
8573 static void
8574 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8575 {
8576 	mutex_enter(&vhc->vhc_lock);
8577 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8578 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8579 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8580 	mutex_exit(&vhc->vhc_lock);
8581 }
8582 
8583 static void
8584 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8585 {
8586 	mutex_enter(&vhc->vhc_lock);
8587 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8588 	cv_broadcast(&vhc->vhc_cv);
8589 	mutex_exit(&vhc->vhc_lock);
8590 }
8591 
8592 typedef struct mdi_phci_driver_info {
8593 	char	*phdriver_name;	/* name of the phci driver */
8594 
8595 	/* set to non zero if the phci driver supports root device */
8596 	int	phdriver_root_support;
8597 } mdi_phci_driver_info_t;
8598 
8599 /*
8600  * vhci class and root support capability of a phci driver can be
8601  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8602  * phci driver.conf file. The built-in tables below contain this information
8603  * for those phci drivers whose driver.conf files don't yet contain this info.
8604  *
8605  * All phci drivers expect iscsi have root device support.
8606  */
8607 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8608 	{ "fp", 1 },
8609 	{ "iscsi", 0 },
8610 	{ "ibsrp", 1 }
8611 	};
8612 
8613 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8614 
8615 static void *
8616 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8617 {
8618 	void *new_ptr;
8619 
8620 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8621 	if (old_ptr) {
8622 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
8623 		kmem_free(old_ptr, old_size);
8624 	}
8625 	return (new_ptr);
8626 }
8627 
8628 static void
8629 add_to_phci_list(char ***driver_list, int **root_support_list,
8630     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8631 {
8632 	ASSERT(*cur_elements <= *max_elements);
8633 	if (*cur_elements == *max_elements) {
8634 		*max_elements += 10;
8635 		*driver_list = mdi_realloc(*driver_list,
8636 		    sizeof (char *) * (*cur_elements),
8637 		    sizeof (char *) * (*max_elements));
8638 		*root_support_list = mdi_realloc(*root_support_list,
8639 		    sizeof (int) * (*cur_elements),
8640 		    sizeof (int) * (*max_elements));
8641 	}
8642 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8643 	(*root_support_list)[*cur_elements] = root_support;
8644 	(*cur_elements)++;
8645 }
8646 
8647 static void
8648 get_phci_driver_list(char *vhci_class, char ***driver_list,
8649     int **root_support_list, int *cur_elements, int *max_elements)
8650 {
8651 	mdi_phci_driver_info_t	*st_driver_list, *p;
8652 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8653 	major_t		m;
8654 	struct devnames	*dnp;
8655 	ddi_prop_t	*propp;
8656 
8657 	*driver_list = NULL;
8658 	*root_support_list = NULL;
8659 	*cur_elements = 0;
8660 	*max_elements = 0;
8661 
8662 	/* add the phci drivers derived from the phci driver.conf files */
8663 	for (m = 0; m < devcnt; m++) {
8664 		dnp = &devnamesp[m];
8665 
8666 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8667 			LOCK_DEV_OPS(&dnp->dn_lock);
8668 			if (dnp->dn_global_prop_ptr != NULL &&
8669 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8670 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8671 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8672 			    strcmp(propp->prop_val, vhci_class) == 0) {
8673 
8674 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8675 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8676 				    &dnp->dn_global_prop_ptr->prop_list)
8677 				    == NULL) ? 1 : 0;
8678 
8679 				add_to_phci_list(driver_list, root_support_list,
8680 				    cur_elements, max_elements, dnp->dn_name,
8681 				    root_support);
8682 
8683 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8684 			} else
8685 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8686 		}
8687 	}
8688 
8689 	driver_conf_count = *cur_elements;
8690 
8691 	/* add the phci drivers specified in the built-in tables */
8692 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8693 		st_driver_list = scsi_phci_driver_list;
8694 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8695 		    sizeof (mdi_phci_driver_info_t);
8696 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8697 		st_driver_list = ib_phci_driver_list;
8698 		st_ndrivers = sizeof (ib_phci_driver_list) /
8699 		    sizeof (mdi_phci_driver_info_t);
8700 	} else {
8701 		st_driver_list = NULL;
8702 		st_ndrivers = 0;
8703 	}
8704 
8705 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8706 		/* add this phci driver if not already added before */
8707 		for (j = 0; j < driver_conf_count; j++) {
8708 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8709 				break;
8710 		}
8711 		if (j == driver_conf_count) {
8712 			add_to_phci_list(driver_list, root_support_list,
8713 			    cur_elements, max_elements, p->phdriver_name,
8714 			    p->phdriver_root_support);
8715 		}
8716 	}
8717 }
8718 
8719 /*
8720  * Attach the phci driver instances associated with the specified vhci class.
8721  * If root is mounted attach all phci driver instances.
8722  * If root is not mounted, attach the instances of only those phci
8723  * drivers that have the root support.
8724  */
8725 static void
8726 attach_phci_drivers(char *vhci_class)
8727 {
8728 	char	**driver_list, **p;
8729 	int	*root_support_list;
8730 	int	cur_elements, max_elements, i;
8731 	major_t	m;
8732 
8733 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8734 	    &cur_elements, &max_elements);
8735 
8736 	for (i = 0; i < cur_elements; i++) {
8737 		if (modrootloaded || root_support_list[i]) {
8738 			m = ddi_name_to_major(driver_list[i]);
8739 			if (m != DDI_MAJOR_T_NONE &&
8740 			    ddi_hold_installed_driver(m))
8741 				ddi_rele_driver(m);
8742 		}
8743 	}
8744 
8745 	if (driver_list) {
8746 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8747 			kmem_free(*p, strlen(*p) + 1);
8748 		kmem_free(driver_list, sizeof (char *) * max_elements);
8749 		kmem_free(root_support_list, sizeof (int) * max_elements);
8750 	}
8751 }
8752 
8753 /*
8754  * Build vhci cache:
8755  *
8756  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8757  * the phci driver instances. During this process the cache gets built.
8758  *
8759  * Cache is built fully if the root is mounted.
8760  * If the root is not mounted, phci drivers that do not have root support
8761  * are not attached. As a result the cache is built partially. The entries
8762  * in the cache reflect only those phci drivers that have root support.
8763  */
8764 static int
8765 build_vhci_cache(mdi_vhci_t *vh)
8766 {
8767 	mdi_vhci_config_t *vhc = vh->vh_config;
8768 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8769 
8770 	single_threaded_vhconfig_enter(vhc);
8771 
8772 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8773 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8774 		rw_exit(&vhcache->vhcache_lock);
8775 		single_threaded_vhconfig_exit(vhc);
8776 		return (0);
8777 	}
8778 	rw_exit(&vhcache->vhcache_lock);
8779 
8780 	attach_phci_drivers(vh->vh_class);
8781 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8782 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8783 
8784 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8785 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8786 	rw_exit(&vhcache->vhcache_lock);
8787 
8788 	single_threaded_vhconfig_exit(vhc);
8789 	vhcache_dirty(vhc);
8790 	return (1);
8791 }
8792 
8793 /*
8794  * Determine if discovery of paths is needed.
8795  */
8796 static int
8797 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8798 {
8799 	int rv = 1;
8800 
8801 	mutex_enter(&vhc->vhc_lock);
8802 	if (i_ddi_io_initialized() == 0) {
8803 		if (vhc->vhc_path_discovery_boot > 0) {
8804 			vhc->vhc_path_discovery_boot--;
8805 			goto out;
8806 		}
8807 	} else {
8808 		if (vhc->vhc_path_discovery_postboot > 0) {
8809 			vhc->vhc_path_discovery_postboot--;
8810 			goto out;
8811 		}
8812 	}
8813 
8814 	/*
8815 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8816 	 * This is to avoid a series of full path discoveries when opening
8817 	 * stale /dev/[r]dsk links.
8818 	 */
8819 	if (mdi_path_discovery_interval != -1 &&
8820 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8821 		goto out;
8822 
8823 	rv = 0;
8824 out:
8825 	mutex_exit(&vhc->vhc_lock);
8826 	return (rv);
8827 }
8828 
8829 /*
8830  * Discover all paths:
8831  *
8832  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8833  * driver instances. During this process all paths will be discovered.
8834  */
8835 static int
8836 vhcache_discover_paths(mdi_vhci_t *vh)
8837 {
8838 	mdi_vhci_config_t *vhc = vh->vh_config;
8839 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8840 	int rv = 0;
8841 
8842 	single_threaded_vhconfig_enter(vhc);
8843 
8844 	if (vhcache_do_discovery(vhc)) {
8845 		attach_phci_drivers(vh->vh_class);
8846 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8847 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8848 
8849 		mutex_enter(&vhc->vhc_lock);
8850 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8851 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8852 		mutex_exit(&vhc->vhc_lock);
8853 		rv = 1;
8854 	}
8855 
8856 	single_threaded_vhconfig_exit(vhc);
8857 	return (rv);
8858 }
8859 
8860 /*
8861  * Generic vhci bus config implementation:
8862  *
8863  * Parameters
8864  *	vdip	vhci dip
8865  *	flags	bus config flags
8866  *	op	bus config operation
8867  *	The remaining parameters are bus config operation specific
8868  *
8869  * for BUS_CONFIG_ONE
8870  *	arg	pointer to name@addr
8871  *	child	upon successful return from this function, *child will be
8872  *		set to the configured and held devinfo child node of vdip.
8873  *	ct_addr	pointer to client address (i.e. GUID)
8874  *
8875  * for BUS_CONFIG_DRIVER
8876  *	arg	major number of the driver
8877  *	child and ct_addr parameters are ignored
8878  *
8879  * for BUS_CONFIG_ALL
8880  *	arg, child, and ct_addr parameters are ignored
8881  *
8882  * Note that for the rest of the bus config operations, this function simply
8883  * calls the framework provided default bus config routine.
8884  */
8885 int
8886 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8887     void *arg, dev_info_t **child, char *ct_addr)
8888 {
8889 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8890 	mdi_vhci_config_t *vhc = vh->vh_config;
8891 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8892 	int rv = 0;
8893 	int params_valid = 0;
8894 	char *cp;
8895 
8896 	/*
8897 	 * To bus config vhcis we relay operation, possibly using another
8898 	 * thread, to phcis. The phci driver then interacts with MDI to cause
8899 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
8900 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
8901 	 * thread may be adding the child, to avoid deadlock we can't wait
8902 	 * for the relayed operations to complete if we have already entered
8903 	 * the vhci node.
8904 	 */
8905 	if (DEVI_BUSY_OWNED(vdip)) {
8906 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: "
8907 		    "vhci dip is busy owned %p\n", (void *)vdip));
8908 		goto default_bus_config;
8909 	}
8910 
8911 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8912 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8913 		rw_exit(&vhcache->vhcache_lock);
8914 		rv = build_vhci_cache(vh);
8915 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8916 	}
8917 
8918 	switch (op) {
8919 	case BUS_CONFIG_ONE:
8920 		if (arg != NULL && ct_addr != NULL) {
8921 			/* extract node name */
8922 			cp = (char *)arg;
8923 			while (*cp != '\0' && *cp != '@')
8924 				cp++;
8925 			if (*cp == '@') {
8926 				params_valid = 1;
8927 				*cp = '\0';
8928 				config_client_paths(vhc, (char *)arg, ct_addr);
8929 				/* config_client_paths() releases cache_lock */
8930 				*cp = '@';
8931 				break;
8932 			}
8933 		}
8934 
8935 		rw_exit(&vhcache->vhcache_lock);
8936 		break;
8937 
8938 	case BUS_CONFIG_DRIVER:
8939 		rw_exit(&vhcache->vhcache_lock);
8940 		if (rv == 0)
8941 			st_bus_config_all_phcis(vhc, flags, op,
8942 			    (major_t)(uintptr_t)arg);
8943 		break;
8944 
8945 	case BUS_CONFIG_ALL:
8946 		rw_exit(&vhcache->vhcache_lock);
8947 		if (rv == 0)
8948 			st_bus_config_all_phcis(vhc, flags, op, -1);
8949 		break;
8950 
8951 	default:
8952 		rw_exit(&vhcache->vhcache_lock);
8953 		break;
8954 	}
8955 
8956 
8957 default_bus_config:
8958 	/*
8959 	 * All requested child nodes are enumerated under the vhci.
8960 	 * Now configure them.
8961 	 */
8962 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8963 	    NDI_SUCCESS) {
8964 		return (MDI_SUCCESS);
8965 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8966 		/* discover all paths and try configuring again */
8967 		if (vhcache_discover_paths(vh) &&
8968 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8969 		    NDI_SUCCESS)
8970 			return (MDI_SUCCESS);
8971 	}
8972 
8973 	return (MDI_FAILURE);
8974 }
8975 
8976 /*
8977  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8978  */
8979 static nvlist_t *
8980 read_on_disk_vhci_cache(char *vhci_class)
8981 {
8982 	nvlist_t *nvl;
8983 	int err;
8984 	char *filename;
8985 
8986 	filename = vhclass2vhcache_filename(vhci_class);
8987 
8988 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8989 		kmem_free(filename, strlen(filename) + 1);
8990 		return (nvl);
8991 	} else if (err == EIO)
8992 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8993 	else if (err == EINVAL)
8994 		cmn_err(CE_WARN,
8995 		    "%s: data file corrupted, will recreate\n", filename);
8996 
8997 	kmem_free(filename, strlen(filename) + 1);
8998 	return (NULL);
8999 }
9000 
9001 /*
9002  * Read on-disk vhci cache into nvlists for all vhci classes.
9003  * Called during booting by i_ddi_read_devices_files().
9004  */
9005 void
9006 mdi_read_devices_files(void)
9007 {
9008 	int i;
9009 
9010 	for (i = 0; i < N_VHCI_CLASSES; i++)
9011 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9012 }
9013 
9014 /*
9015  * Remove all stale entries from vhci cache.
9016  */
9017 static void
9018 clean_vhcache(mdi_vhci_config_t *vhc)
9019 {
9020 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9021 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
9022 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
9023 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
9024 
9025 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9026 
9027 	cct_head = vhcache->vhcache_client_head;
9028 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9029 	for (cct = cct_head; cct != NULL; cct = cct_next) {
9030 		cct_next = cct->cct_next;
9031 
9032 		cpi_head = cct->cct_cpi_head;
9033 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
9034 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
9035 			cpi_next = cpi->cpi_next;
9036 			if (cpi->cpi_pip != NULL) {
9037 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
9038 				enqueue_tail_vhcache_pathinfo(cct, cpi);
9039 			} else
9040 				free_vhcache_pathinfo(cpi);
9041 		}
9042 
9043 		if (cct->cct_cpi_head != NULL)
9044 			enqueue_vhcache_client(vhcache, cct);
9045 		else {
9046 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9047 			    (mod_hash_key_t)cct->cct_name_addr);
9048 			free_vhcache_client(cct);
9049 		}
9050 	}
9051 
9052 	cphci_head = vhcache->vhcache_phci_head;
9053 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9054 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
9055 		cphci_next = cphci->cphci_next;
9056 		if (cphci->cphci_phci != NULL)
9057 			enqueue_vhcache_phci(vhcache, cphci);
9058 		else
9059 			free_vhcache_phci(cphci);
9060 	}
9061 
9062 	vhcache->vhcache_clean_time = lbolt64;
9063 	rw_exit(&vhcache->vhcache_lock);
9064 	vhcache_dirty(vhc);
9065 }
9066 
9067 /*
9068  * Remove all stale entries from vhci cache.
9069  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9070  */
9071 void
9072 mdi_clean_vhcache(void)
9073 {
9074 	mdi_vhci_t *vh;
9075 
9076 	mutex_enter(&mdi_mutex);
9077 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9078 		vh->vh_refcnt++;
9079 		mutex_exit(&mdi_mutex);
9080 		clean_vhcache(vh->vh_config);
9081 		mutex_enter(&mdi_mutex);
9082 		vh->vh_refcnt--;
9083 	}
9084 	mutex_exit(&mdi_mutex);
9085 }
9086 
9087 /*
9088  * mdi_vhci_walk_clients():
9089  *		Walker routine to traverse client dev_info nodes
9090  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9091  * below the client, including nexus devices, which we dont want.
9092  * So we just traverse the immediate siblings, starting from 1st client.
9093  */
9094 void
9095 mdi_vhci_walk_clients(dev_info_t *vdip,
9096     int (*f)(dev_info_t *, void *), void *arg)
9097 {
9098 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9099 	dev_info_t	*cdip;
9100 	mdi_client_t	*ct;
9101 
9102 	MDI_VHCI_CLIENT_LOCK(vh);
9103 	cdip = ddi_get_child(vdip);
9104 	while (cdip) {
9105 		ct = i_devi_get_client(cdip);
9106 		MDI_CLIENT_LOCK(ct);
9107 
9108 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9109 			cdip = ddi_get_next_sibling(cdip);
9110 		else
9111 			cdip = NULL;
9112 
9113 		MDI_CLIENT_UNLOCK(ct);
9114 	}
9115 	MDI_VHCI_CLIENT_UNLOCK(vh);
9116 }
9117 
9118 /*
9119  * mdi_vhci_walk_phcis():
9120  *		Walker routine to traverse phci dev_info nodes
9121  */
9122 void
9123 mdi_vhci_walk_phcis(dev_info_t *vdip,
9124     int (*f)(dev_info_t *, void *), void *arg)
9125 {
9126 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9127 	mdi_phci_t	*ph, *next;
9128 
9129 	MDI_VHCI_PHCI_LOCK(vh);
9130 	ph = vh->vh_phci_head;
9131 	while (ph) {
9132 		MDI_PHCI_LOCK(ph);
9133 
9134 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9135 			next = ph->ph_next;
9136 		else
9137 			next = NULL;
9138 
9139 		MDI_PHCI_UNLOCK(ph);
9140 		ph = next;
9141 	}
9142 	MDI_VHCI_PHCI_UNLOCK(vh);
9143 }
9144 
9145 
9146 /*
9147  * mdi_walk_vhcis():
9148  *		Walker routine to traverse vhci dev_info nodes
9149  */
9150 void
9151 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9152 {
9153 	mdi_vhci_t	*vh = NULL;
9154 
9155 	mutex_enter(&mdi_mutex);
9156 	/*
9157 	 * Scan for already registered vhci
9158 	 */
9159 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9160 		vh->vh_refcnt++;
9161 		mutex_exit(&mdi_mutex);
9162 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9163 			mutex_enter(&mdi_mutex);
9164 			vh->vh_refcnt--;
9165 			break;
9166 		} else {
9167 			mutex_enter(&mdi_mutex);
9168 			vh->vh_refcnt--;
9169 		}
9170 	}
9171 
9172 	mutex_exit(&mdi_mutex);
9173 }
9174 
9175 /*
9176  * i_mdi_log_sysevent():
9177  *		Logs events for pickup by syseventd
9178  */
9179 static void
9180 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9181 {
9182 	char		*path_name;
9183 	nvlist_t	*attr_list;
9184 
9185 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9186 	    KM_SLEEP) != DDI_SUCCESS) {
9187 		goto alloc_failed;
9188 	}
9189 
9190 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9191 	(void) ddi_pathname(dip, path_name);
9192 
9193 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9194 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9195 		goto error;
9196 	}
9197 
9198 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9199 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9200 		goto error;
9201 	}
9202 
9203 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9204 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9205 		goto error;
9206 	}
9207 
9208 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9209 	    path_name) != DDI_SUCCESS) {
9210 		goto error;
9211 	}
9212 
9213 	if (nvlist_add_string(attr_list, DDI_CLASS,
9214 	    ph_vh_class) != DDI_SUCCESS) {
9215 		goto error;
9216 	}
9217 
9218 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9219 	    attr_list, NULL, DDI_SLEEP);
9220 
9221 error:
9222 	kmem_free(path_name, MAXPATHLEN);
9223 	nvlist_free(attr_list);
9224 	return;
9225 
9226 alloc_failed:
9227 	MDI_DEBUG(1, (CE_WARN, dip,
9228 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
9229 }
9230 
9231 char **
9232 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9233 {
9234 	char	**driver_list, **ret_driver_list = NULL;
9235 	int	*root_support_list;
9236 	int	cur_elements, max_elements;
9237 
9238 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9239 	    &cur_elements, &max_elements);
9240 
9241 
9242 	if (driver_list) {
9243 		kmem_free(root_support_list, sizeof (int) * max_elements);
9244 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9245 		    * max_elements, sizeof (char *) * cur_elements);
9246 	}
9247 	*ndrivers = cur_elements;
9248 
9249 	return (ret_driver_list);
9250 
9251 }
9252 
9253 void
9254 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9255 {
9256 	char	**p;
9257 	int	i;
9258 
9259 	if (driver_list) {
9260 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9261 			kmem_free(*p, strlen(*p) + 1);
9262 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9263 	}
9264 }
9265