xref: /titanic_51/usr/src/uts/common/os/sunmdi.c (revision d9638e547d8811f2c689977f8dd2a353938b61fd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
30  * detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 #include <sys/modhash.h>
68 #include <sys/disp.h>
69 #include <sys/autoconf.h>
70 
71 #ifdef	DEBUG
72 #include <sys/debug.h>
73 int	mdi_debug = 1;
74 #define	MDI_DEBUG(level, stmnt) \
75 	    if (mdi_debug >= (level)) i_mdi_log stmnt
76 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
77 #else	/* !DEBUG */
78 #define	MDI_DEBUG(level, stmnt)
79 #endif	/* DEBUG */
80 
81 extern pri_t	minclsyspri;
82 extern int	modrootloaded;
83 
84 /*
85  * Global mutex:
86  * Protects vHCI list and structure members, pHCI and Client lists.
87  */
88 kmutex_t	mdi_mutex;
89 
90 /*
91  * Registered vHCI class driver lists
92  */
93 int		mdi_vhci_count;
94 mdi_vhci_t	*mdi_vhci_head;
95 mdi_vhci_t	*mdi_vhci_tail;
96 
97 /*
98  * Client Hash Table size
99  */
100 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
101 
102 /*
103  * taskq interface definitions
104  */
105 #define	MDI_TASKQ_N_THREADS	8
106 #define	MDI_TASKQ_PRI		minclsyspri
107 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
108 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
109 
110 taskq_t				*mdi_taskq;
111 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
112 
113 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
114 
115 /*
116  * The data should be "quiet" for this interval (in seconds) before the
117  * vhci cached data is flushed to the disk.
118  */
119 static int mdi_vhcache_flush_delay = 10;
120 
121 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
122 static int mdi_vhcache_flush_daemon_idle_time = 60;
123 
124 /*
125  * MDI falls back to discovery of all paths when a bus_config_one fails.
126  * The following parameters can be used to tune this operation.
127  *
128  * mdi_path_discovery_boot
129  *	Number of times path discovery will be attempted during early boot.
130  *	Probably there is no reason to ever set this value to greater than one.
131  *
132  * mdi_path_discovery_postboot
133  *	Number of times path discovery will be attempted after early boot.
134  *	Set it to a minimum of two to allow for discovery of iscsi paths which
135  *	may happen very late during booting.
136  *
137  * mdi_path_discovery_interval
138  *	Minimum number of seconds MDI will wait between successive discovery
139  *	of all paths. Set it to -1 to disable discovery of all paths.
140  */
141 static int mdi_path_discovery_boot = 1;
142 static int mdi_path_discovery_postboot = 2;
143 static int mdi_path_discovery_interval = 10;
144 
145 /*
146  * number of seconds the asynchronous configuration thread will sleep idle
147  * before exiting.
148  */
149 static int mdi_async_config_idle_time = 600;
150 
151 static int mdi_bus_config_cache_hash_size = 256;
152 
153 /* turns off multithreaded configuration for certain operations */
154 static int mdi_mtc_off = 0;
155 
156 /*
157  * MDI component property name/value string definitions
158  */
159 const char 		*mdi_component_prop = "mpxio-component";
160 const char		*mdi_component_prop_vhci = "vhci";
161 const char		*mdi_component_prop_phci = "phci";
162 const char		*mdi_component_prop_client = "client";
163 
164 /*
165  * MDI client global unique identifier property name
166  */
167 const char		*mdi_client_guid_prop = "client-guid";
168 
169 /*
170  * MDI client load balancing property name/value string definitions
171  */
172 const char		*mdi_load_balance = "load-balance";
173 const char		*mdi_load_balance_none = "none";
174 const char		*mdi_load_balance_rr = "round-robin";
175 const char		*mdi_load_balance_lba = "logical-block";
176 
177 /*
178  * Obsolete vHCI class definition; to be removed after Leadville update
179  */
180 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
181 
182 static char vhci_greeting[] =
183 	"\tThere already exists one vHCI driver for class %s\n"
184 	"\tOnly one vHCI driver for each class is allowed\n";
185 
186 /*
187  * Static function prototypes
188  */
189 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
190 static int		i_mdi_client_offline(dev_info_t *, uint_t);
191 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
192 static void		i_mdi_phci_post_detach(dev_info_t *,
193 			    ddi_detach_cmd_t, int);
194 static int		i_mdi_client_pre_detach(dev_info_t *,
195 			    ddi_detach_cmd_t);
196 static void		i_mdi_client_post_detach(dev_info_t *,
197 			    ddi_detach_cmd_t, int);
198 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
199 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
200 static int 		i_mdi_lba_lb(mdi_client_t *ct,
201 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
202 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
203 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
204 static void		i_mdi_pm_reset_client(mdi_client_t *);
205 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
206 static int		i_mdi_power_all_phci(mdi_client_t *);
207 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
208 
209 
210 /*
211  * Internal mdi_pathinfo node functions
212  */
213 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
214 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
215 
216 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
217 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
218 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
219 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
220 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
221 			    mdi_client_t *);
222 static void		i_mdi_phci_unlock(mdi_phci_t *);
223 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
224 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
225 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
226 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
227 			    mdi_client_t *);
228 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
229 static void		i_mdi_client_remove_path(mdi_client_t *,
230 			    mdi_pathinfo_t *);
231 
232 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
233 			    mdi_pathinfo_state_t, int);
234 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
235 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
236 			    char **, int);
237 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
238 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
239 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
240 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
241 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
242 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
243 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
244 static void		i_mdi_client_update_state(mdi_client_t *);
245 static int		i_mdi_client_compute_state(mdi_client_t *,
246 			    mdi_phci_t *);
247 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
248 static void		i_mdi_client_unlock(mdi_client_t *);
249 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
250 static mdi_client_t	*i_devi_get_client(dev_info_t *);
251 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int,
252 			int);
253 /*
254  * Failover related function prototypes
255  */
256 static int		i_mdi_failover(void *);
257 
258 /*
259  * misc internal functions
260  */
261 static int		i_mdi_get_hash_key(char *);
262 static int		i_map_nvlist_error_to_mdi(int);
263 static void		i_mdi_report_path_state(mdi_client_t *,
264 			    mdi_pathinfo_t *);
265 
266 static void		setup_vhci_cache(mdi_vhci_t *);
267 static int		destroy_vhci_cache(mdi_vhci_t *);
268 static void		setup_phci_driver_list(mdi_vhci_t *);
269 static void		free_phci_driver_list(mdi_vhci_config_t *);
270 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
271 static boolean_t	stop_vhcache_flush_thread(void *, int);
272 static void		free_string_array(char **, int);
273 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
274 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
275 static void		free_vhcache_client(mdi_vhcache_client_t *);
276 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
277 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
278 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
279 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
280 static void		vhcache_pi_add(mdi_vhci_config_t *,
281 			    struct mdi_pathinfo *);
282 static void		vhcache_pi_remove(mdi_vhci_config_t *,
283 			    struct mdi_pathinfo *);
284 static void		free_phclient_path_list(mdi_phys_path_t *);
285 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
286 static int		flush_vhcache(mdi_vhci_config_t *, int);
287 static void		vhcache_dirty(mdi_vhci_config_t *);
288 static void		free_async_client_config(mdi_async_client_config_t *);
289 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
290 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
291 static nvlist_t		*read_on_disk_vhci_cache(char *);
292 extern int		fread_nvlist(char *, nvlist_t **);
293 extern int		fwrite_nvlist(char *, nvlist_t *);
294 
295 /* called once when first vhci registers with mdi */
296 static void
297 i_mdi_init()
298 {
299 	static int initialized = 0;
300 
301 	if (initialized)
302 		return;
303 	initialized = 1;
304 
305 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
306 	/*
307 	 * Create our taskq resources
308 	 */
309 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
310 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
311 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
312 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
313 }
314 
315 /*
316  * mdi_get_component_type():
317  *		Return mpxio component type
318  * Return Values:
319  *		MDI_COMPONENT_NONE
320  *		MDI_COMPONENT_VHCI
321  *		MDI_COMPONENT_PHCI
322  *		MDI_COMPONENT_CLIENT
323  * XXX This doesn't work under multi-level MPxIO and should be
324  *	removed when clients migrate mdi_is_*() interfaces.
325  */
326 int
327 mdi_get_component_type(dev_info_t *dip)
328 {
329 	return (DEVI(dip)->devi_mdi_component);
330 }
331 
332 /*
333  * mdi_vhci_register():
334  *		Register a vHCI module with the mpxio framework
335  *		mdi_vhci_register() is called by vHCI drivers to register the
336  *		'class_driver' vHCI driver and its MDI entrypoints with the
337  *		mpxio framework.  The vHCI driver must call this interface as
338  *		part of its attach(9e) handler.
339  *		Competing threads may try to attach mdi_vhci_register() as
340  *		the vHCI drivers are loaded and attached as a result of pHCI
341  *		driver instance registration (mdi_phci_register()) with the
342  *		framework.
343  * Return Values:
344  *		MDI_SUCCESS
345  *		MDI_FAILURE
346  */
347 
348 /*ARGSUSED*/
349 int
350 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
351     int flags)
352 {
353 	mdi_vhci_t		*vh = NULL;
354 
355 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
356 
357 	i_mdi_init();
358 
359 	mutex_enter(&mdi_mutex);
360 	/*
361 	 * Scan for already registered vhci
362 	 */
363 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
364 		if (strcmp(vh->vh_class, class) == 0) {
365 			/*
366 			 * vHCI has already been created.  Check for valid
367 			 * vHCI ops registration.  We only support one vHCI
368 			 * module per class
369 			 */
370 			if (vh->vh_ops != NULL) {
371 				mutex_exit(&mdi_mutex);
372 				cmn_err(CE_NOTE, vhci_greeting, class);
373 				return (MDI_FAILURE);
374 			}
375 			break;
376 		}
377 	}
378 
379 	/*
380 	 * if not yet created, create the vHCI component
381 	 */
382 	if (vh == NULL) {
383 		struct client_hash	*hash = NULL;
384 		char			*load_balance;
385 
386 		/*
387 		 * Allocate and initialize the mdi extensions
388 		 */
389 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
390 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
391 		    KM_SLEEP);
392 		vh->vh_client_table = hash;
393 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
394 		(void) strcpy(vh->vh_class, class);
395 		vh->vh_lb = LOAD_BALANCE_RR;
396 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
397 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
398 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
399 				vh->vh_lb = LOAD_BALANCE_NONE;
400 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
401 				    == 0) {
402 				vh->vh_lb = LOAD_BALANCE_LBA;
403 			}
404 			ddi_prop_free(load_balance);
405 		}
406 
407 		/*
408 		 * Store the vHCI ops vectors
409 		 */
410 		vh->vh_dip = vdip;
411 		vh->vh_ops = vops;
412 
413 		setup_vhci_cache(vh);
414 
415 		if (mdi_vhci_head == NULL) {
416 			mdi_vhci_head = vh;
417 		}
418 		if (mdi_vhci_tail) {
419 			mdi_vhci_tail->vh_next = vh;
420 		}
421 		mdi_vhci_tail = vh;
422 		mdi_vhci_count++;
423 	}
424 
425 	/*
426 	 * Claim the devfs node as a vhci component
427 	 */
428 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
429 
430 	/*
431 	 * Initialize our back reference from dev_info node
432 	 */
433 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
434 	mutex_exit(&mdi_mutex);
435 	return (MDI_SUCCESS);
436 }
437 
438 /*
439  * mdi_vhci_unregister():
440  *		Unregister a vHCI module from mpxio framework
441  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
442  * 		of a vhci to unregister it from the framework.
443  * Return Values:
444  *		MDI_SUCCESS
445  *		MDI_FAILURE
446  */
447 
448 /*ARGSUSED*/
449 int
450 mdi_vhci_unregister(dev_info_t *vdip, int flags)
451 {
452 	mdi_vhci_t	*found, *vh, *prev = NULL;
453 
454 	/*
455 	 * Check for invalid VHCI
456 	 */
457 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
458 		return (MDI_FAILURE);
459 
460 	mutex_enter(&mdi_mutex);
461 
462 	/*
463 	 * Scan the list of registered vHCIs for a match
464 	 */
465 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
466 		if (found == vh)
467 			break;
468 		prev = found;
469 	}
470 
471 	if (found == NULL) {
472 		mutex_exit(&mdi_mutex);
473 		return (MDI_FAILURE);
474 	}
475 
476 	/*
477 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
478 	 * should have been unregistered, before a vHCI can be
479 	 * unregistered.
480 	 */
481 	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
482 		mutex_exit(&mdi_mutex);
483 		return (MDI_FAILURE);
484 	}
485 
486 	/*
487 	 * Remove the vHCI from the global list
488 	 */
489 	if (vh == mdi_vhci_head) {
490 		mdi_vhci_head = vh->vh_next;
491 	} else {
492 		prev->vh_next = vh->vh_next;
493 	}
494 	if (vh == mdi_vhci_tail) {
495 		mdi_vhci_tail = prev;
496 	}
497 
498 	mdi_vhci_count--;
499 	mutex_exit(&mdi_mutex);
500 
501 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
502 		/* add vhci to the global list */
503 		mutex_enter(&mdi_mutex);
504 		if (mdi_vhci_head == NULL)
505 			mdi_vhci_head = vh;
506 		else
507 			mdi_vhci_tail->vh_next = vh;
508 		mdi_vhci_tail = vh;
509 		mdi_vhci_count++;
510 		mutex_exit(&mdi_mutex);
511 		return (MDI_FAILURE);
512 	}
513 
514 	vh->vh_ops = NULL;
515 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
516 	DEVI(vdip)->devi_mdi_xhci = NULL;
517 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
518 	kmem_free(vh->vh_client_table,
519 	    mdi_client_table_size * sizeof (struct client_hash));
520 
521 	kmem_free(vh, sizeof (mdi_vhci_t));
522 	return (MDI_SUCCESS);
523 }
524 
525 /*
526  * i_mdi_vhci_class2vhci():
527  *		Look for a matching vHCI module given a vHCI class name
528  * Return Values:
529  *		Handle to a vHCI component
530  *		NULL
531  */
532 static mdi_vhci_t *
533 i_mdi_vhci_class2vhci(char *class)
534 {
535 	mdi_vhci_t	*vh = NULL;
536 
537 	ASSERT(!MUTEX_HELD(&mdi_mutex));
538 
539 	mutex_enter(&mdi_mutex);
540 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
541 		if (strcmp(vh->vh_class, class) == 0) {
542 			break;
543 		}
544 	}
545 	mutex_exit(&mdi_mutex);
546 	return (vh);
547 }
548 
549 /*
550  * i_devi_get_vhci():
551  *		Utility function to get the handle to a vHCI component
552  * Return Values:
553  *		Handle to a vHCI component
554  *		NULL
555  */
556 mdi_vhci_t *
557 i_devi_get_vhci(dev_info_t *vdip)
558 {
559 	mdi_vhci_t	*vh = NULL;
560 	if (MDI_VHCI(vdip)) {
561 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
562 	}
563 	return (vh);
564 }
565 
566 /*
567  * mdi_phci_register():
568  *		Register a pHCI module with mpxio framework
569  *		mdi_phci_register() is called by pHCI drivers to register with
570  *		the mpxio framework and a specific 'class_driver' vHCI.  The
571  *		pHCI driver must call this interface as part of its attach(9e)
572  *		handler.
573  * Return Values:
574  *		MDI_SUCCESS
575  *		MDI_FAILURE
576  */
577 
578 /*ARGSUSED*/
579 int
580 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
581 {
582 	mdi_phci_t		*ph;
583 	mdi_vhci_t		*vh;
584 	char			*data;
585 	char			*pathname;
586 
587 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
588 	(void) ddi_pathname(pdip, pathname);
589 
590 	/*
591 	 * Check for mpxio-disable property. Enable mpxio if the property is
592 	 * missing or not set to "yes".
593 	 * If the property is set to "yes" then emit a brief message.
594 	 */
595 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
596 	    &data) == DDI_SUCCESS)) {
597 		if (strcmp(data, "yes") == 0) {
598 			MDI_DEBUG(1, (CE_CONT, pdip,
599 			    "?%s (%s%d) multipath capabilities "
600 			    "disabled via %s.conf.\n", pathname,
601 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
602 			    ddi_driver_name(pdip)));
603 			ddi_prop_free(data);
604 			kmem_free(pathname, MAXPATHLEN);
605 			return (MDI_FAILURE);
606 		}
607 		ddi_prop_free(data);
608 	}
609 
610 	kmem_free(pathname, MAXPATHLEN);
611 
612 	/*
613 	 * Search for a matching vHCI
614 	 */
615 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
616 	if (vh == NULL) {
617 		return (MDI_FAILURE);
618 	}
619 
620 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
621 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
622 	ph->ph_dip = pdip;
623 	ph->ph_vhci = vh;
624 	ph->ph_next = NULL;
625 	ph->ph_unstable = 0;
626 	ph->ph_vprivate = 0;
627 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
628 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
629 
630 	MDI_PHCI_SET_POWER_UP(ph);
631 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
632 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
633 
634 	vhcache_phci_add(vh->vh_config, ph);
635 
636 	mutex_enter(&mdi_mutex);
637 	if (vh->vh_phci_head == NULL) {
638 		vh->vh_phci_head = ph;
639 	}
640 	if (vh->vh_phci_tail) {
641 		vh->vh_phci_tail->ph_next = ph;
642 	}
643 	vh->vh_phci_tail = ph;
644 	vh->vh_phci_count++;
645 	mutex_exit(&mdi_mutex);
646 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
647 	return (MDI_SUCCESS);
648 }
649 
650 /*
651  * mdi_phci_unregister():
652  *		Unregister a pHCI module from mpxio framework
653  *		mdi_phci_unregister() is called by the pHCI drivers from their
654  *		detach(9E) handler to unregister their instances from the
655  *		framework.
656  * Return Values:
657  *		MDI_SUCCESS
658  *		MDI_FAILURE
659  */
660 
661 /*ARGSUSED*/
662 int
663 mdi_phci_unregister(dev_info_t *pdip, int flags)
664 {
665 	mdi_vhci_t		*vh;
666 	mdi_phci_t		*ph;
667 	mdi_phci_t		*tmp;
668 	mdi_phci_t		*prev = NULL;
669 
670 	ph = i_devi_get_phci(pdip);
671 	if (ph == NULL) {
672 		MDI_DEBUG(1, (CE_WARN, pdip,
673 		    "!pHCI unregister: Not a valid pHCI"));
674 		return (MDI_FAILURE);
675 	}
676 
677 	vh = ph->ph_vhci;
678 	ASSERT(vh != NULL);
679 	if (vh == NULL) {
680 		MDI_DEBUG(1, (CE_WARN, pdip,
681 		    "!pHCI unregister: Not a valid vHCI"));
682 		return (MDI_FAILURE);
683 	}
684 
685 	mutex_enter(&mdi_mutex);
686 	tmp = vh->vh_phci_head;
687 	while (tmp) {
688 		if (tmp == ph) {
689 			break;
690 		}
691 		prev = tmp;
692 		tmp = tmp->ph_next;
693 	}
694 
695 	if (ph == vh->vh_phci_head) {
696 		vh->vh_phci_head = ph->ph_next;
697 	} else {
698 		prev->ph_next = ph->ph_next;
699 	}
700 
701 	if (ph == vh->vh_phci_tail) {
702 		vh->vh_phci_tail = prev;
703 	}
704 
705 	vh->vh_phci_count--;
706 
707 	mutex_exit(&mdi_mutex);
708 
709 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
710 	    ESC_DDI_INITIATOR_UNREGISTER);
711 	vhcache_phci_remove(vh->vh_config, ph);
712 	cv_destroy(&ph->ph_unstable_cv);
713 	cv_destroy(&ph->ph_powerchange_cv);
714 	mutex_destroy(&ph->ph_mutex);
715 	kmem_free(ph, sizeof (mdi_phci_t));
716 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
717 	DEVI(pdip)->devi_mdi_xhci = NULL;
718 	return (MDI_SUCCESS);
719 }
720 
721 /*
722  * i_devi_get_phci():
723  * 		Utility function to return the phci extensions.
724  */
725 static mdi_phci_t *
726 i_devi_get_phci(dev_info_t *pdip)
727 {
728 	mdi_phci_t	*ph = NULL;
729 	if (MDI_PHCI(pdip)) {
730 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
731 	}
732 	return (ph);
733 }
734 
735 /*
736  * mdi_phci_path2devinfo():
737  * 		Utility function to search for a valid phci device given
738  *		the devfs pathname.
739  */
740 
741 dev_info_t *
742 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
743 {
744 	char		*temp_pathname;
745 	mdi_vhci_t	*vh;
746 	mdi_phci_t	*ph;
747 	dev_info_t 	*pdip = NULL;
748 
749 	vh = i_devi_get_vhci(vdip);
750 	ASSERT(vh != NULL);
751 
752 	if (vh == NULL) {
753 		/*
754 		 * Invalid vHCI component, return failure
755 		 */
756 		return (NULL);
757 	}
758 
759 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
760 	mutex_enter(&mdi_mutex);
761 	ph = vh->vh_phci_head;
762 	while (ph != NULL) {
763 		pdip = ph->ph_dip;
764 		ASSERT(pdip != NULL);
765 		*temp_pathname = '\0';
766 		(void) ddi_pathname(pdip, temp_pathname);
767 		if (strcmp(temp_pathname, pathname) == 0) {
768 			break;
769 		}
770 		ph = ph->ph_next;
771 	}
772 	if (ph == NULL) {
773 		pdip = NULL;
774 	}
775 	mutex_exit(&mdi_mutex);
776 	kmem_free(temp_pathname, MAXPATHLEN);
777 	return (pdip);
778 }
779 
780 /*
781  * mdi_phci_get_path_count():
782  * 		get number of path information nodes associated with a given
783  *		pHCI device.
784  */
785 int
786 mdi_phci_get_path_count(dev_info_t *pdip)
787 {
788 	mdi_phci_t	*ph;
789 	int		count = 0;
790 
791 	ph = i_devi_get_phci(pdip);
792 	if (ph != NULL) {
793 		count = ph->ph_path_count;
794 	}
795 	return (count);
796 }
797 
798 /*
799  * i_mdi_phci_lock():
800  *		Lock a pHCI device
801  * Return Values:
802  *		None
803  * Note:
804  *		The default locking order is:
805  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
806  *		But there are number of situations where locks need to be
807  *		grabbed in reverse order.  This routine implements try and lock
808  *		mechanism depending on the requested parameter option.
809  */
810 static void
811 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
812 {
813 	if (pip) {
814 		/* Reverse locking is requested. */
815 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
816 			/*
817 			 * tryenter failed. Try to grab again
818 			 * after a small delay
819 			 */
820 			MDI_PI_HOLD(pip);
821 			MDI_PI_UNLOCK(pip);
822 			delay(1);
823 			MDI_PI_LOCK(pip);
824 			MDI_PI_RELE(pip);
825 		}
826 	} else {
827 		MDI_PHCI_LOCK(ph);
828 	}
829 }
830 
831 /*
832  * i_mdi_phci_get_client_lock():
833  *		Lock a pHCI device
834  * Return Values:
835  *		None
836  * Note:
837  *		The default locking order is:
838  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
839  *		But there are number of situations where locks need to be
840  *		grabbed in reverse order.  This routine implements try and lock
841  *		mechanism depending on the requested parameter option.
842  */
843 static void
844 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
845 {
846 	if (ct) {
847 		/* Reverse locking is requested. */
848 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
849 			/*
850 			 * tryenter failed. Try to grab again
851 			 * after a small delay
852 			 */
853 			MDI_CLIENT_UNLOCK(ct);
854 			delay(1);
855 			MDI_CLIENT_LOCK(ct);
856 		}
857 	} else {
858 		MDI_PHCI_LOCK(ph);
859 	}
860 }
861 
862 /*
863  * i_mdi_phci_unlock():
864  *		Unlock the pHCI component
865  */
866 static void
867 i_mdi_phci_unlock(mdi_phci_t *ph)
868 {
869 	MDI_PHCI_UNLOCK(ph);
870 }
871 
872 /*
873  * i_mdi_devinfo_create():
874  *		create client device's devinfo node
875  * Return Values:
876  *		dev_info
877  *		NULL
878  * Notes:
879  */
880 static dev_info_t *
881 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
882 	char **compatible, int ncompatible)
883 {
884 	dev_info_t *cdip = NULL;
885 
886 	ASSERT(MUTEX_HELD(&mdi_mutex));
887 
888 	/* Verify for duplicate entry */
889 	cdip = i_mdi_devinfo_find(vh, name, guid);
890 	ASSERT(cdip == NULL);
891 	if (cdip) {
892 		cmn_err(CE_WARN,
893 		    "i_mdi_devinfo_create: client dip %p already exists",
894 			(void *)cdip);
895 	}
896 
897 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
898 	if (cdip == NULL)
899 		goto fail;
900 
901 	/*
902 	 * Create component type and Global unique identifier
903 	 * properties
904 	 */
905 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
906 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
907 		goto fail;
908 	}
909 
910 	/* Decorate the node with compatible property */
911 	if (compatible &&
912 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
913 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
914 		goto fail;
915 	}
916 
917 	return (cdip);
918 
919 fail:
920 	if (cdip) {
921 		(void) ndi_prop_remove_all(cdip);
922 		(void) ndi_devi_free(cdip);
923 	}
924 	return (NULL);
925 }
926 
927 /*
928  * i_mdi_devinfo_find():
929  *		Find a matching devinfo node for given client node name
930  *		and its guid.
931  * Return Values:
932  *		Handle to a dev_info node or NULL
933  */
934 
935 static dev_info_t *
936 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
937 {
938 	char			*data;
939 	dev_info_t 		*cdip = NULL;
940 	dev_info_t 		*ndip = NULL;
941 	int			circular;
942 
943 	ndi_devi_enter(vh->vh_dip, &circular);
944 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
945 	while ((cdip = ndip) != NULL) {
946 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
947 
948 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
949 			continue;
950 		}
951 
952 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
953 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
954 		    &data) != DDI_PROP_SUCCESS) {
955 			continue;
956 		}
957 
958 		if (strcmp(data, guid) != 0) {
959 			ddi_prop_free(data);
960 			continue;
961 		}
962 		ddi_prop_free(data);
963 		break;
964 	}
965 	ndi_devi_exit(vh->vh_dip, circular);
966 	return (cdip);
967 }
968 
969 /*
970  * i_mdi_devinfo_remove():
971  *		Remove a client device node
972  */
973 static int
974 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
975 {
976 	int	rv = MDI_SUCCESS;
977 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
978 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
979 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
980 		if (rv != NDI_SUCCESS) {
981 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
982 			    " failed. cdip = %p\n", cdip));
983 		}
984 		/*
985 		 * Convert to MDI error code
986 		 */
987 		switch (rv) {
988 		case NDI_SUCCESS:
989 			rv = MDI_SUCCESS;
990 			break;
991 		case NDI_BUSY:
992 			rv = MDI_BUSY;
993 			break;
994 		default:
995 			rv = MDI_FAILURE;
996 			break;
997 		}
998 	}
999 	return (rv);
1000 }
1001 
1002 /*
1003  * i_devi_get_client()
1004  *		Utility function to get mpxio component extensions
1005  */
1006 static mdi_client_t *
1007 i_devi_get_client(dev_info_t *cdip)
1008 {
1009 	mdi_client_t	*ct = NULL;
1010 	if (MDI_CLIENT(cdip)) {
1011 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1012 	}
1013 	return (ct);
1014 }
1015 
1016 /*
1017  * i_mdi_is_child_present():
1018  *		Search for the presence of client device dev_info node
1019  */
1020 
1021 static int
1022 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1023 {
1024 	int		rv = MDI_FAILURE;
1025 	struct dev_info	*dip;
1026 	int		circular;
1027 
1028 	ndi_devi_enter(vdip, &circular);
1029 	dip = DEVI(vdip)->devi_child;
1030 	while (dip) {
1031 		if (dip == DEVI(cdip)) {
1032 			rv = MDI_SUCCESS;
1033 			break;
1034 		}
1035 		dip = dip->devi_sibling;
1036 	}
1037 	ndi_devi_exit(vdip, circular);
1038 	return (rv);
1039 }
1040 
1041 
1042 /*
1043  * i_mdi_client_lock():
1044  *		Grab client component lock
1045  * Return Values:
1046  *		None
1047  * Note:
1048  *		The default locking order is:
1049  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1050  *		But there are number of situations where locks need to be
1051  *		grabbed in reverse order.  This routine implements try and lock
1052  *		mechanism depending on the requested parameter option.
1053  */
1054 
1055 static void
1056 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1057 {
1058 	if (pip) {
1059 		/*
1060 		 * Reverse locking is requested.
1061 		 */
1062 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1063 			/*
1064 			 * tryenter failed. Try to grab again
1065 			 * after a small delay
1066 			 */
1067 			MDI_PI_HOLD(pip);
1068 			MDI_PI_UNLOCK(pip);
1069 			delay(1);
1070 			MDI_PI_LOCK(pip);
1071 			MDI_PI_RELE(pip);
1072 		}
1073 	} else {
1074 		MDI_CLIENT_LOCK(ct);
1075 	}
1076 }
1077 
1078 /*
1079  * i_mdi_client_unlock():
1080  *		Unlock a client component
1081  */
1082 
1083 static void
1084 i_mdi_client_unlock(mdi_client_t *ct)
1085 {
1086 	MDI_CLIENT_UNLOCK(ct);
1087 }
1088 
1089 /*
1090  * i_mdi_client_alloc():
1091  * 		Allocate and initialize a client structure.  Caller should
1092  *		hold the global mdi_mutex.
1093  * Return Values:
1094  *		Handle to a client component
1095  */
1096 /*ARGSUSED*/
1097 static mdi_client_t *
1098 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1099 {
1100 	mdi_client_t	*ct;
1101 
1102 	ASSERT(MUTEX_HELD(&mdi_mutex));
1103 
1104 	/*
1105 	 * Allocate and initialize a component structure.
1106 	 */
1107 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1108 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1109 	ct->ct_hnext = NULL;
1110 	ct->ct_hprev = NULL;
1111 	ct->ct_dip = NULL;
1112 	ct->ct_vhci = vh;
1113 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1114 	(void) strcpy(ct->ct_drvname, name);
1115 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1116 	(void) strcpy(ct->ct_guid, lguid);
1117 	ct->ct_cprivate = NULL;
1118 	ct->ct_vprivate = NULL;
1119 	ct->ct_flags = 0;
1120 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1121 	MDI_CLIENT_SET_OFFLINE(ct);
1122 	MDI_CLIENT_SET_DETACH(ct);
1123 	MDI_CLIENT_SET_POWER_UP(ct);
1124 	ct->ct_failover_flags = 0;
1125 	ct->ct_failover_status = 0;
1126 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1127 	ct->ct_unstable = 0;
1128 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1129 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1130 	ct->ct_lb = vh->vh_lb;
1131 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1132 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1133 	ct->ct_path_count = 0;
1134 	ct->ct_path_head = NULL;
1135 	ct->ct_path_tail = NULL;
1136 	ct->ct_path_last = NULL;
1137 
1138 	/*
1139 	 * Add this client component to our client hash queue
1140 	 */
1141 	i_mdi_client_enlist_table(vh, ct);
1142 	return (ct);
1143 }
1144 
1145 /*
1146  * i_mdi_client_enlist_table():
1147  *		Attach the client device to the client hash table. Caller
1148  *		should hold the mdi_mutex
1149  */
1150 
1151 static void
1152 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1153 {
1154 	int 			index;
1155 	struct client_hash	*head;
1156 
1157 	ASSERT(MUTEX_HELD(&mdi_mutex));
1158 	index = i_mdi_get_hash_key(ct->ct_guid);
1159 	head = &vh->vh_client_table[index];
1160 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1161 	head->ct_hash_head = ct;
1162 	head->ct_hash_count++;
1163 	vh->vh_client_count++;
1164 }
1165 
1166 /*
1167  * i_mdi_client_delist_table():
1168  *		Attach the client device to the client hash table.
1169  *		Caller should hold the mdi_mutex
1170  */
1171 
1172 static void
1173 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1174 {
1175 	int			index;
1176 	char			*guid;
1177 	struct client_hash 	*head;
1178 	mdi_client_t		*next;
1179 	mdi_client_t		*last;
1180 
1181 	ASSERT(MUTEX_HELD(&mdi_mutex));
1182 	guid = ct->ct_guid;
1183 	index = i_mdi_get_hash_key(guid);
1184 	head = &vh->vh_client_table[index];
1185 
1186 	last = NULL;
1187 	next = (mdi_client_t *)head->ct_hash_head;
1188 	while (next != NULL) {
1189 		if (next == ct) {
1190 			break;
1191 		}
1192 		last = next;
1193 		next = next->ct_hnext;
1194 	}
1195 
1196 	if (next) {
1197 		head->ct_hash_count--;
1198 		if (last == NULL) {
1199 			head->ct_hash_head = ct->ct_hnext;
1200 		} else {
1201 			last->ct_hnext = ct->ct_hnext;
1202 		}
1203 		ct->ct_hnext = NULL;
1204 		vh->vh_client_count--;
1205 	}
1206 }
1207 
1208 
1209 /*
1210  * i_mdi_client_free():
1211  *		Free a client component
1212  */
1213 static int
1214 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1215 {
1216 	int		rv = MDI_SUCCESS;
1217 	int		flags = ct->ct_flags;
1218 	dev_info_t	*cdip;
1219 	dev_info_t	*vdip;
1220 
1221 	ASSERT(MUTEX_HELD(&mdi_mutex));
1222 	vdip = vh->vh_dip;
1223 	cdip = ct->ct_dip;
1224 
1225 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1226 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1227 	DEVI(cdip)->devi_mdi_client = NULL;
1228 
1229 	/*
1230 	 * Clear out back ref. to dev_info_t node
1231 	 */
1232 	ct->ct_dip = NULL;
1233 
1234 	/*
1235 	 * Remove this client from our hash queue
1236 	 */
1237 	i_mdi_client_delist_table(vh, ct);
1238 
1239 	/*
1240 	 * Uninitialize and free the component
1241 	 */
1242 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1243 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1244 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1245 	cv_destroy(&ct->ct_failover_cv);
1246 	cv_destroy(&ct->ct_unstable_cv);
1247 	cv_destroy(&ct->ct_powerchange_cv);
1248 	mutex_destroy(&ct->ct_mutex);
1249 	kmem_free(ct, sizeof (*ct));
1250 
1251 	if (cdip != NULL) {
1252 		mutex_exit(&mdi_mutex);
1253 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1254 		mutex_enter(&mdi_mutex);
1255 	}
1256 	return (rv);
1257 }
1258 
1259 /*
1260  * i_mdi_client_find():
1261  * 		Find the client structure corresponding to a given guid
1262  *		Caller should hold the mdi_mutex
1263  */
1264 static mdi_client_t *
1265 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1266 {
1267 	int			index;
1268 	struct client_hash	*head;
1269 	mdi_client_t		*ct;
1270 
1271 	ASSERT(MUTEX_HELD(&mdi_mutex));
1272 	index = i_mdi_get_hash_key(guid);
1273 	head = &vh->vh_client_table[index];
1274 
1275 	ct = head->ct_hash_head;
1276 	while (ct != NULL) {
1277 		if (strcmp(ct->ct_guid, guid) == 0 &&
1278 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1279 			break;
1280 		}
1281 		ct = ct->ct_hnext;
1282 	}
1283 	return (ct);
1284 }
1285 
1286 
1287 
1288 /*
1289  * i_mdi_client_update_state():
1290  *		Compute and update client device state
1291  * Notes:
1292  *		A client device can be in any of three possible states:
1293  *
1294  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1295  *		one online/standby paths. Can tolerate failures.
1296  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1297  *		no alternate paths available as standby. A failure on the online
1298  *		would result in loss of access to device data.
1299  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1300  *		no paths available to access the device.
1301  */
1302 static void
1303 i_mdi_client_update_state(mdi_client_t *ct)
1304 {
1305 	int state;
1306 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1307 	state = i_mdi_client_compute_state(ct, NULL);
1308 	MDI_CLIENT_SET_STATE(ct, state);
1309 }
1310 
1311 /*
1312  * i_mdi_client_compute_state():
1313  *		Compute client device state
1314  *
1315  *		mdi_phci_t *	Pointer to pHCI structure which should
1316  *				while computing the new value.  Used by
1317  *				i_mdi_phci_offline() to find the new
1318  *				client state after DR of a pHCI.
1319  */
1320 static int
1321 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1322 {
1323 	int		state;
1324 	int		online_count = 0;
1325 	int		standby_count = 0;
1326 	mdi_pathinfo_t	*pip, *next;
1327 
1328 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1329 	pip = ct->ct_path_head;
1330 	while (pip != NULL) {
1331 		MDI_PI_LOCK(pip);
1332 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1333 		if (MDI_PI(pip)->pi_phci == ph) {
1334 			MDI_PI_UNLOCK(pip);
1335 			pip = next;
1336 			continue;
1337 		}
1338 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1339 				== MDI_PATHINFO_STATE_ONLINE)
1340 			online_count++;
1341 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1342 				== MDI_PATHINFO_STATE_STANDBY)
1343 			standby_count++;
1344 		MDI_PI_UNLOCK(pip);
1345 		pip = next;
1346 	}
1347 
1348 	if (online_count == 0) {
1349 		if (standby_count == 0) {
1350 			state = MDI_CLIENT_STATE_FAILED;
1351 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1352 			    " ct = %p\n", ct));
1353 		} else if (standby_count == 1) {
1354 			state = MDI_CLIENT_STATE_DEGRADED;
1355 		} else {
1356 			state = MDI_CLIENT_STATE_OPTIMAL;
1357 		}
1358 	} else if (online_count == 1) {
1359 		if (standby_count == 0) {
1360 			state = MDI_CLIENT_STATE_DEGRADED;
1361 		} else {
1362 			state = MDI_CLIENT_STATE_OPTIMAL;
1363 		}
1364 	} else {
1365 		state = MDI_CLIENT_STATE_OPTIMAL;
1366 	}
1367 	return (state);
1368 }
1369 
1370 /*
1371  * i_mdi_client2devinfo():
1372  *		Utility function
1373  */
1374 dev_info_t *
1375 i_mdi_client2devinfo(mdi_client_t *ct)
1376 {
1377 	return (ct->ct_dip);
1378 }
1379 
1380 /*
1381  * mdi_client_path2_devinfo():
1382  * 		Given the parent devinfo and child devfs pathname, search for
1383  *		a valid devfs node handle.
1384  */
1385 dev_info_t *
1386 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1387 {
1388 	dev_info_t 	*cdip = NULL;
1389 	dev_info_t 	*ndip = NULL;
1390 	char		*temp_pathname;
1391 	int		circular;
1392 
1393 	/*
1394 	 * Allocate temp buffer
1395 	 */
1396 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1397 
1398 	/*
1399 	 * Lock parent against changes
1400 	 */
1401 	ndi_devi_enter(vdip, &circular);
1402 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1403 	while ((cdip = ndip) != NULL) {
1404 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1405 
1406 		*temp_pathname = '\0';
1407 		(void) ddi_pathname(cdip, temp_pathname);
1408 		if (strcmp(temp_pathname, pathname) == 0) {
1409 			break;
1410 		}
1411 	}
1412 	/*
1413 	 * Release devinfo lock
1414 	 */
1415 	ndi_devi_exit(vdip, circular);
1416 
1417 	/*
1418 	 * Free the temp buffer
1419 	 */
1420 	kmem_free(temp_pathname, MAXPATHLEN);
1421 	return (cdip);
1422 }
1423 
1424 
1425 /*
1426  * mdi_client_get_path_count():
1427  * 		Utility function to get number of path information nodes
1428  *		associated with a given client device.
1429  */
1430 int
1431 mdi_client_get_path_count(dev_info_t *cdip)
1432 {
1433 	mdi_client_t	*ct;
1434 	int		count = 0;
1435 
1436 	ct = i_devi_get_client(cdip);
1437 	if (ct != NULL) {
1438 		count = ct->ct_path_count;
1439 	}
1440 	return (count);
1441 }
1442 
1443 
1444 /*
1445  * i_mdi_get_hash_key():
1446  * 		Create a hash using strings as keys
1447  *
1448  */
1449 static int
1450 i_mdi_get_hash_key(char *str)
1451 {
1452 	uint32_t	g, hash = 0;
1453 	char		*p;
1454 
1455 	for (p = str; *p != '\0'; p++) {
1456 		g = *p;
1457 		hash += g;
1458 	}
1459 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1460 }
1461 
1462 /*
1463  * mdi_get_lb_policy():
1464  * 		Get current load balancing policy for a given client device
1465  */
1466 client_lb_t
1467 mdi_get_lb_policy(dev_info_t *cdip)
1468 {
1469 	client_lb_t	lb = LOAD_BALANCE_NONE;
1470 	mdi_client_t	*ct;
1471 
1472 	ct = i_devi_get_client(cdip);
1473 	if (ct != NULL) {
1474 		lb = ct->ct_lb;
1475 	}
1476 	return (lb);
1477 }
1478 
1479 /*
1480  * mdi_set_lb_region_size():
1481  * 		Set current region size for the load-balance
1482  */
1483 int
1484 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1485 {
1486 	mdi_client_t	*ct;
1487 	int		rv = MDI_FAILURE;
1488 
1489 	ct = i_devi_get_client(cdip);
1490 	if (ct != NULL && ct->ct_lb_args != NULL) {
1491 		ct->ct_lb_args->region_size = region_size;
1492 		rv = MDI_SUCCESS;
1493 	}
1494 	return (rv);
1495 }
1496 
1497 /*
1498  * mdi_Set_lb_policy():
1499  * 		Set current load balancing policy for a given client device
1500  */
1501 int
1502 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1503 {
1504 	mdi_client_t	*ct;
1505 	int		rv = MDI_FAILURE;
1506 
1507 	ct = i_devi_get_client(cdip);
1508 	if (ct != NULL) {
1509 		ct->ct_lb = lb;
1510 		rv = MDI_SUCCESS;
1511 	}
1512 	return (rv);
1513 }
1514 
1515 /*
1516  * mdi_failover():
1517  *		failover function called by the vHCI drivers to initiate
1518  *		a failover operation.  This is typically due to non-availability
1519  *		of online paths to route I/O requests.  Failover can be
1520  *		triggered through user application also.
1521  *
1522  *		The vHCI driver calls mdi_failover() to initiate a failover
1523  *		operation. mdi_failover() calls back into the vHCI driver's
1524  *		vo_failover() entry point to perform the actual failover
1525  *		operation.  The reason for requiring the vHCI driver to
1526  *		initiate failover by calling mdi_failover(), instead of directly
1527  *		executing vo_failover() itself, is to ensure that the mdi
1528  *		framework can keep track of the client state properly.
1529  *		Additionally, mdi_failover() provides as a convenience the
1530  *		option of performing the failover operation synchronously or
1531  *		asynchronously
1532  *
1533  *		Upon successful completion of the failover operation, the
1534  *		paths that were previously ONLINE will be in the STANDBY state,
1535  *		and the newly activated paths will be in the ONLINE state.
1536  *
1537  *		The flags modifier determines whether the activation is done
1538  *		synchronously: MDI_FAILOVER_SYNC
1539  * Return Values:
1540  *		MDI_SUCCESS
1541  *		MDI_FAILURE
1542  *		MDI_BUSY
1543  */
1544 /*ARGSUSED*/
1545 int
1546 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1547 {
1548 	int			rv;
1549 	mdi_client_t		*ct;
1550 
1551 	ct = i_devi_get_client(cdip);
1552 	ASSERT(ct != NULL);
1553 	if (ct == NULL) {
1554 		/* cdip is not a valid client device. Nothing more to do. */
1555 		return (MDI_FAILURE);
1556 	}
1557 
1558 	MDI_CLIENT_LOCK(ct);
1559 
1560 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1561 		/* A path to the client is being freed */
1562 		MDI_CLIENT_UNLOCK(ct);
1563 		return (MDI_BUSY);
1564 	}
1565 
1566 
1567 	if (MDI_CLIENT_IS_FAILED(ct)) {
1568 		/*
1569 		 * Client is in failed state. Nothing more to do.
1570 		 */
1571 		MDI_CLIENT_UNLOCK(ct);
1572 		return (MDI_FAILURE);
1573 	}
1574 
1575 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1576 		/*
1577 		 * Failover is already in progress; return BUSY
1578 		 */
1579 		MDI_CLIENT_UNLOCK(ct);
1580 		return (MDI_BUSY);
1581 	}
1582 	/*
1583 	 * Make sure that mdi_pathinfo node state changes are processed.
1584 	 * We do not allow failovers to progress while client path state
1585 	 * changes are in progress
1586 	 */
1587 	if (ct->ct_unstable) {
1588 		if (flags == MDI_FAILOVER_ASYNC) {
1589 			MDI_CLIENT_UNLOCK(ct);
1590 			return (MDI_BUSY);
1591 		} else {
1592 			while (ct->ct_unstable)
1593 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1594 		}
1595 	}
1596 
1597 	/*
1598 	 * Client device is in stable state. Before proceeding, perform sanity
1599 	 * checks again.
1600 	 */
1601 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1602 	    (i_ddi_node_state(ct->ct_dip) < DS_READY)) {
1603 		/*
1604 		 * Client is in failed state. Nothing more to do.
1605 		 */
1606 		MDI_CLIENT_UNLOCK(ct);
1607 		return (MDI_FAILURE);
1608 	}
1609 
1610 	/*
1611 	 * Set the client state as failover in progress.
1612 	 */
1613 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1614 	ct->ct_failover_flags = flags;
1615 	MDI_CLIENT_UNLOCK(ct);
1616 
1617 	if (flags == MDI_FAILOVER_ASYNC) {
1618 		/*
1619 		 * Submit the initiate failover request via CPR safe
1620 		 * taskq threads.
1621 		 */
1622 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1623 		    ct, KM_SLEEP);
1624 		return (MDI_ACCEPT);
1625 	} else {
1626 		/*
1627 		 * Synchronous failover mode.  Typically invoked from the user
1628 		 * land.
1629 		 */
1630 		rv = i_mdi_failover(ct);
1631 	}
1632 	return (rv);
1633 }
1634 
1635 /*
1636  * i_mdi_failover():
1637  *		internal failover function. Invokes vHCI drivers failover
1638  *		callback function and process the failover status
1639  * Return Values:
1640  *		None
1641  *
1642  * Note: A client device in failover state can not be detached or freed.
1643  */
1644 static int
1645 i_mdi_failover(void *arg)
1646 {
1647 	int		rv = MDI_SUCCESS;
1648 	mdi_client_t	*ct = (mdi_client_t *)arg;
1649 	mdi_vhci_t	*vh = ct->ct_vhci;
1650 
1651 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1652 
1653 	if (vh->vh_ops->vo_failover != NULL) {
1654 		/*
1655 		 * Call vHCI drivers callback routine
1656 		 */
1657 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1658 		    ct->ct_failover_flags);
1659 	}
1660 
1661 	MDI_CLIENT_LOCK(ct);
1662 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1663 
1664 	/*
1665 	 * Save the failover return status
1666 	 */
1667 	ct->ct_failover_status = rv;
1668 
1669 	/*
1670 	 * As a result of failover, client status would have been changed.
1671 	 * Update the client state and wake up anyone waiting on this client
1672 	 * device.
1673 	 */
1674 	i_mdi_client_update_state(ct);
1675 
1676 	cv_broadcast(&ct->ct_failover_cv);
1677 	MDI_CLIENT_UNLOCK(ct);
1678 	return (rv);
1679 }
1680 
1681 /*
1682  * Load balancing is logical block.
1683  * IOs within the range described by region_size
1684  * would go on the same path. This would improve the
1685  * performance by cache-hit on some of the RAID devices.
1686  * Search only for online paths(At some point we
1687  * may want to balance across target ports).
1688  * If no paths are found then default to round-robin.
1689  */
1690 static int
1691 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1692 {
1693 	int		path_index = -1;
1694 	int		online_path_count = 0;
1695 	int		online_nonpref_path_count = 0;
1696 	int 		region_size = ct->ct_lb_args->region_size;
1697 	mdi_pathinfo_t	*pip;
1698 	mdi_pathinfo_t	*next;
1699 	int		preferred, path_cnt;
1700 
1701 	pip = ct->ct_path_head;
1702 	while (pip) {
1703 		MDI_PI_LOCK(pip);
1704 		if (MDI_PI(pip)->pi_state ==
1705 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1706 			online_path_count++;
1707 		} else if (MDI_PI(pip)->pi_state ==
1708 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1709 			online_nonpref_path_count++;
1710 		}
1711 		next = (mdi_pathinfo_t *)
1712 		    MDI_PI(pip)->pi_client_link;
1713 		MDI_PI_UNLOCK(pip);
1714 		pip = next;
1715 	}
1716 	/* if found any online/preferred then use this type */
1717 	if (online_path_count > 0) {
1718 		path_cnt = online_path_count;
1719 		preferred = 1;
1720 	} else if (online_nonpref_path_count > 0) {
1721 		path_cnt = online_nonpref_path_count;
1722 		preferred = 0;
1723 	} else {
1724 		path_cnt = 0;
1725 	}
1726 	if (path_cnt) {
1727 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1728 		pip = ct->ct_path_head;
1729 		while (pip && path_index != -1) {
1730 			MDI_PI_LOCK(pip);
1731 			if (path_index == 0 &&
1732 			    (MDI_PI(pip)->pi_state ==
1733 			    MDI_PATHINFO_STATE_ONLINE) &&
1734 				MDI_PI(pip)->pi_preferred == preferred) {
1735 				MDI_PI_HOLD(pip);
1736 				MDI_PI_UNLOCK(pip);
1737 				*ret_pip = pip;
1738 				return (MDI_SUCCESS);
1739 			}
1740 			path_index --;
1741 			next = (mdi_pathinfo_t *)
1742 			    MDI_PI(pip)->pi_client_link;
1743 			MDI_PI_UNLOCK(pip);
1744 			pip = next;
1745 		}
1746 		if (pip == NULL) {
1747 			MDI_DEBUG(4, (CE_NOTE, NULL,
1748 			    "!lba %p, no pip !!\n",
1749 				bp->b_blkno));
1750 		} else {
1751 			MDI_DEBUG(4, (CE_NOTE, NULL,
1752 			    "!lba %p, no pip for path_index, "
1753 			    "pip %p\n", pip));
1754 		}
1755 	}
1756 	return (MDI_FAILURE);
1757 }
1758 
1759 /*
1760  * mdi_select_path():
1761  *		select a path to access a client device.
1762  *
1763  *		mdi_select_path() function is called by the vHCI drivers to
1764  *		select a path to route the I/O request to.  The caller passes
1765  *		the block I/O data transfer structure ("buf") as one of the
1766  *		parameters.  The mpxio framework uses the buf structure
1767  *		contents to maintain per path statistics (total I/O size /
1768  *		count pending).  If more than one online paths are available to
1769  *		select, the framework automatically selects a suitable path
1770  *		for routing I/O request. If a failover operation is active for
1771  *		this client device the call shall be failed with MDI_BUSY error
1772  *		code.
1773  *
1774  *		By default this function returns a suitable path in online
1775  *		state based on the current load balancing policy.  Currently
1776  *		we support LOAD_BALANCE_NONE (Previously selected online path
1777  *		will continue to be used till the path is usable) and
1778  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1779  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1780  *		based on the logical block).  The load balancing
1781  *		through vHCI drivers configuration file (driver.conf).
1782  *
1783  *		vHCI drivers may override this default behavior by specifying
1784  *		appropriate flags.  If start_pip is specified (non NULL) is
1785  *		used as start point to walk and find the next appropriate path.
1786  *		The following values are currently defined:
1787  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1788  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1789  *
1790  *		The non-standard behavior is used by the scsi_vhci driver,
1791  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1792  *		attach of client devices (to avoid an unnecessary failover
1793  *		when the STANDBY path comes up first), during failover
1794  *		(to activate a STANDBY path as ONLINE).
1795  *
1796  *		The selected path in returned in a held state (ref_cnt).
1797  *		Caller should release the hold by calling mdi_rele_path().
1798  *
1799  * Return Values:
1800  *		MDI_SUCCESS	- Completed successfully
1801  *		MDI_BUSY 	- Client device is busy failing over
1802  *		MDI_NOPATH	- Client device is online, but no valid path are
1803  *				  available to access this client device
1804  *		MDI_FAILURE	- Invalid client device or state
1805  *		MDI_DEVI_ONLINING
1806  *				- Client device (struct dev_info state) is in
1807  *				  onlining state.
1808  */
1809 
1810 /*ARGSUSED*/
1811 int
1812 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1813     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1814 {
1815 	mdi_client_t	*ct;
1816 	mdi_pathinfo_t	*pip;
1817 	mdi_pathinfo_t	*next;
1818 	mdi_pathinfo_t	*head;
1819 	mdi_pathinfo_t	*start;
1820 	client_lb_t	lbp;	/* load balancing policy */
1821 	int		sb = 1;	/* standard behavior */
1822 	int		preferred = 1;	/* preferred path */
1823 	int		cond, cont = 1;
1824 	int		retry = 0;
1825 
1826 	if (flags != 0) {
1827 		/*
1828 		 * disable default behavior
1829 		 */
1830 		sb = 0;
1831 	}
1832 
1833 	*ret_pip = NULL;
1834 	ct = i_devi_get_client(cdip);
1835 	if (ct == NULL) {
1836 		/* mdi extensions are NULL, Nothing more to do */
1837 		return (MDI_FAILURE);
1838 	}
1839 
1840 	MDI_CLIENT_LOCK(ct);
1841 
1842 	if (sb) {
1843 		if (MDI_CLIENT_IS_FAILED(ct)) {
1844 			/*
1845 			 * Client is not ready to accept any I/O requests.
1846 			 * Fail this request.
1847 			 */
1848 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1849 			    "client state offline ct = %p\n", ct));
1850 			MDI_CLIENT_UNLOCK(ct);
1851 			return (MDI_FAILURE);
1852 		}
1853 
1854 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1855 			/*
1856 			 * Check for Failover is in progress. If so tell the
1857 			 * caller that this device is busy.
1858 			 */
1859 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1860 			    "client failover in progress ct = %p\n", ct));
1861 			MDI_CLIENT_UNLOCK(ct);
1862 			return (MDI_BUSY);
1863 		}
1864 
1865 		/*
1866 		 * Check to see whether the client device is attached.
1867 		 * If not so, let the vHCI driver manually select a path
1868 		 * (standby) and let the probe/attach process to continue.
1869 		 */
1870 		if ((MDI_CLIENT_IS_DETACHED(ct)) ||
1871 		    i_ddi_node_state(cdip) < DS_READY) {
1872 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1873 			MDI_CLIENT_UNLOCK(ct);
1874 			return (MDI_DEVI_ONLINING);
1875 		}
1876 	}
1877 
1878 	/*
1879 	 * Cache in the client list head.  If head of the list is NULL
1880 	 * return MDI_NOPATH
1881 	 */
1882 	head = ct->ct_path_head;
1883 	if (head == NULL) {
1884 		MDI_CLIENT_UNLOCK(ct);
1885 		return (MDI_NOPATH);
1886 	}
1887 
1888 	/*
1889 	 * for non default behavior, bypass current
1890 	 * load balancing policy and always use LOAD_BALANCE_RR
1891 	 * except that the start point will be adjusted based
1892 	 * on the provided start_pip
1893 	 */
1894 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1895 
1896 	switch (lbp) {
1897 	case LOAD_BALANCE_NONE:
1898 		/*
1899 		 * Load balancing is None  or Alternate path mode
1900 		 * Start looking for a online mdi_pathinfo node starting from
1901 		 * last known selected path
1902 		 */
1903 		preferred = 1;
1904 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1905 		if (pip == NULL) {
1906 			pip = head;
1907 		}
1908 		start = pip;
1909 		do {
1910 			MDI_PI_LOCK(pip);
1911 			/*
1912 			 * No need to explicitly check if the path is disabled.
1913 			 * Since we are checking for state == ONLINE and the
1914 			 * same veriable is used for DISABLE/ENABLE information.
1915 			 */
1916 			if (MDI_PI(pip)->pi_state  ==
1917 				MDI_PATHINFO_STATE_ONLINE &&
1918 				preferred == MDI_PI(pip)->pi_preferred) {
1919 				/*
1920 				 * Return the path in hold state. Caller should
1921 				 * release the lock by calling mdi_rele_path()
1922 				 */
1923 				MDI_PI_HOLD(pip);
1924 				MDI_PI_UNLOCK(pip);
1925 				ct->ct_path_last = pip;
1926 				*ret_pip = pip;
1927 				MDI_CLIENT_UNLOCK(ct);
1928 				return (MDI_SUCCESS);
1929 			}
1930 
1931 			/*
1932 			 * Path is busy.
1933 			 */
1934 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1935 			    MDI_PI_IS_TRANSIENT(pip))
1936 				retry = 1;
1937 			/*
1938 			 * Keep looking for a next available online path
1939 			 */
1940 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1941 			if (next == NULL) {
1942 				next = head;
1943 			}
1944 			MDI_PI_UNLOCK(pip);
1945 			pip = next;
1946 			if (start == pip && preferred) {
1947 				preferred = 0;
1948 			} else if (start == pip && !preferred) {
1949 				cont = 0;
1950 			}
1951 		} while (cont);
1952 		break;
1953 
1954 	case LOAD_BALANCE_LBA:
1955 		/*
1956 		 * Make sure we are looking
1957 		 * for an online path. Otherwise, if it is for a STANDBY
1958 		 * path request, it will go through and fetch an ONLINE
1959 		 * path which is not desirable.
1960 		 */
1961 		if ((ct->ct_lb_args != NULL) &&
1962 			    (ct->ct_lb_args->region_size) && bp &&
1963 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1964 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1965 				    == MDI_SUCCESS) {
1966 				MDI_CLIENT_UNLOCK(ct);
1967 				return (MDI_SUCCESS);
1968 			}
1969 		}
1970 		/*  FALLTHROUGH */
1971 	case LOAD_BALANCE_RR:
1972 		/*
1973 		 * Load balancing is Round Robin. Start looking for a online
1974 		 * mdi_pathinfo node starting from last known selected path
1975 		 * as the start point.  If override flags are specified,
1976 		 * process accordingly.
1977 		 * If the search is already in effect(start_pip not null),
1978 		 * then lets just use the same path preference to continue the
1979 		 * traversal.
1980 		 */
1981 
1982 		if (start_pip != NULL) {
1983 			preferred = MDI_PI(start_pip)->pi_preferred;
1984 		} else {
1985 			preferred = 1;
1986 		}
1987 
1988 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1989 		if (start == NULL) {
1990 			pip = head;
1991 		} else {
1992 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1993 			if (pip == NULL) {
1994 				if (!sb) {
1995 					if (preferred == 0) {
1996 						/*
1997 						 * Looks like we have completed
1998 						 * the traversal as preferred
1999 						 * value is 0. Time to bail out.
2000 						 */
2001 						*ret_pip = NULL;
2002 						MDI_CLIENT_UNLOCK(ct);
2003 						return (MDI_NOPATH);
2004 					} else {
2005 						/*
2006 						 * Looks like we reached the
2007 						 * end of the list. Lets enable
2008 						 * traversal of non preferred
2009 						 * paths.
2010 						 */
2011 						preferred = 0;
2012 					}
2013 				}
2014 				pip = head;
2015 			}
2016 		}
2017 		start = pip;
2018 		do {
2019 			MDI_PI_LOCK(pip);
2020 			if (sb) {
2021 				cond = ((MDI_PI(pip)->pi_state ==
2022 				    MDI_PATHINFO_STATE_ONLINE &&
2023 					MDI_PI(pip)->pi_preferred ==
2024 						preferred) ? 1 : 0);
2025 			} else {
2026 				if (flags == MDI_SELECT_ONLINE_PATH) {
2027 					cond = ((MDI_PI(pip)->pi_state ==
2028 					    MDI_PATHINFO_STATE_ONLINE &&
2029 						MDI_PI(pip)->pi_preferred ==
2030 						preferred) ? 1 : 0);
2031 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2032 					cond = ((MDI_PI(pip)->pi_state ==
2033 					    MDI_PATHINFO_STATE_STANDBY &&
2034 						MDI_PI(pip)->pi_preferred ==
2035 						preferred) ? 1 : 0);
2036 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2037 				    MDI_SELECT_STANDBY_PATH)) {
2038 					cond = (((MDI_PI(pip)->pi_state ==
2039 					    MDI_PATHINFO_STATE_ONLINE ||
2040 					    (MDI_PI(pip)->pi_state ==
2041 					    MDI_PATHINFO_STATE_STANDBY)) &&
2042 						MDI_PI(pip)->pi_preferred ==
2043 						preferred) ? 1 : 0);
2044 				} else {
2045 					cond = 0;
2046 				}
2047 			}
2048 			/*
2049 			 * No need to explicitly check if the path is disabled.
2050 			 * Since we are checking for state == ONLINE and the
2051 			 * same veriable is used for DISABLE/ENABLE information.
2052 			 */
2053 			if (cond) {
2054 				/*
2055 				 * Return the path in hold state. Caller should
2056 				 * release the lock by calling mdi_rele_path()
2057 				 */
2058 				MDI_PI_HOLD(pip);
2059 				MDI_PI_UNLOCK(pip);
2060 				if (sb)
2061 					ct->ct_path_last = pip;
2062 				*ret_pip = pip;
2063 				MDI_CLIENT_UNLOCK(ct);
2064 				return (MDI_SUCCESS);
2065 			}
2066 			/*
2067 			 * Path is busy.
2068 			 */
2069 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2070 			    MDI_PI_IS_TRANSIENT(pip))
2071 				retry = 1;
2072 
2073 			/*
2074 			 * Keep looking for a next available online path
2075 			 */
2076 do_again:
2077 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2078 			if (next == NULL) {
2079 				if (!sb) {
2080 					if (preferred == 1) {
2081 						/*
2082 						 * Looks like we reached the
2083 						 * end of the list. Lets enable
2084 						 * traversal of non preferred
2085 						 * paths.
2086 						 */
2087 						preferred = 0;
2088 						next = head;
2089 					} else {
2090 						/*
2091 						 * We have done both the passes
2092 						 * Preferred as well as for
2093 						 * Non-preferred. Bail out now.
2094 						 */
2095 						cont = 0;
2096 					}
2097 				} else {
2098 					/*
2099 					 * Standard behavior case.
2100 					 */
2101 					next = head;
2102 				}
2103 			}
2104 			MDI_PI_UNLOCK(pip);
2105 			if (cont == 0) {
2106 				break;
2107 			}
2108 			pip = next;
2109 
2110 			if (!sb) {
2111 				/*
2112 				 * We need to handle the selection of
2113 				 * non-preferred path in the following
2114 				 * case:
2115 				 *
2116 				 * +------+   +------+   +------+   +-----+
2117 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2118 				 * +------+   +------+   +------+   +-----+
2119 				 *
2120 				 * If we start the search with B, we need to
2121 				 * skip beyond B to pick C which is non -
2122 				 * preferred in the second pass. The following
2123 				 * test, if true, will allow us to skip over
2124 				 * the 'start'(B in the example) to select
2125 				 * other non preferred elements.
2126 				 */
2127 				if ((start_pip != NULL) && (start_pip == pip) &&
2128 				    (MDI_PI(start_pip)->pi_preferred
2129 				    != preferred)) {
2130 					/*
2131 					 * try again after going past the start
2132 					 * pip
2133 					 */
2134 					MDI_PI_LOCK(pip);
2135 					goto do_again;
2136 				}
2137 			} else {
2138 				/*
2139 				 * Standard behavior case
2140 				 */
2141 				if (start == pip && preferred) {
2142 					/* look for nonpreferred paths */
2143 					preferred = 0;
2144 				} else if (start == pip && !preferred) {
2145 					/*
2146 					 * Exit condition
2147 					 */
2148 					cont = 0;
2149 				}
2150 			}
2151 		} while (cont);
2152 		break;
2153 	}
2154 
2155 	MDI_CLIENT_UNLOCK(ct);
2156 	if (retry == 1) {
2157 		return (MDI_BUSY);
2158 	} else {
2159 		return (MDI_NOPATH);
2160 	}
2161 }
2162 
2163 /*
2164  * For a client, return the next available path to any phci
2165  *
2166  * Note:
2167  *		Caller should hold the branch's devinfo node to get a consistent
2168  *		snap shot of the mdi_pathinfo nodes.
2169  *
2170  *		Please note that even the list is stable the mdi_pathinfo
2171  *		node state and properties are volatile.  The caller should lock
2172  *		and unlock the nodes by calling mdi_pi_lock() and
2173  *		mdi_pi_unlock() functions to get a stable properties.
2174  *
2175  *		If there is a need to use the nodes beyond the hold of the
2176  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2177  *		need to be held against unexpected removal by calling
2178  *		mdi_hold_path() and should be released by calling
2179  *		mdi_rele_path() on completion.
2180  */
2181 mdi_pathinfo_t *
2182 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2183 {
2184 	mdi_client_t *ct;
2185 
2186 	if (!MDI_CLIENT(ct_dip))
2187 		return (NULL);
2188 
2189 	/*
2190 	 * Walk through client link
2191 	 */
2192 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2193 	ASSERT(ct != NULL);
2194 
2195 	if (pip == NULL)
2196 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2197 
2198 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2199 }
2200 
2201 /*
2202  * For a phci, return the next available path to any client
2203  * Note: ditto mdi_get_next_phci_path()
2204  */
2205 mdi_pathinfo_t *
2206 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2207 {
2208 	mdi_phci_t *ph;
2209 
2210 	if (!MDI_PHCI(ph_dip))
2211 		return (NULL);
2212 
2213 	/*
2214 	 * Walk through pHCI link
2215 	 */
2216 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2217 	ASSERT(ph != NULL);
2218 
2219 	if (pip == NULL)
2220 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2221 
2222 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2223 }
2224 
2225 /*
2226  * mdi_get_nextpath():
2227  *		mdi_pathinfo node walker function.  Get the next node from the
2228  *		client or pHCI device list.
2229  *
2230  * XXX This is wrapper function for compatibility purposes only.
2231  *
2232  *	It doesn't work under Multi-level MPxIO, where a dip
2233  *	is both client and phci (which link should next_path follow?).
2234  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2235  *	this interface should be removed.
2236  */
2237 void
2238 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2239     mdi_pathinfo_t **ret_pip)
2240 {
2241 	if (MDI_CLIENT(dip)) {
2242 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2243 	} else if (MDI_PHCI(dip)) {
2244 		*ret_pip = mdi_get_next_client_path(dip, pip);
2245 	} else {
2246 		*ret_pip = NULL;
2247 	}
2248 }
2249 
2250 /*
2251  * mdi_hold_path():
2252  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2253  * Return Values:
2254  *		None
2255  */
2256 void
2257 mdi_hold_path(mdi_pathinfo_t *pip)
2258 {
2259 	if (pip) {
2260 		MDI_PI_LOCK(pip);
2261 		MDI_PI_HOLD(pip);
2262 		MDI_PI_UNLOCK(pip);
2263 	}
2264 }
2265 
2266 
2267 /*
2268  * mdi_rele_path():
2269  *		Release the mdi_pathinfo node which was selected
2270  *		through mdi_select_path() mechanism or manually held by
2271  *		calling mdi_hold_path().
2272  * Return Values:
2273  *		None
2274  */
2275 void
2276 mdi_rele_path(mdi_pathinfo_t *pip)
2277 {
2278 	if (pip) {
2279 		MDI_PI_LOCK(pip);
2280 		MDI_PI_RELE(pip);
2281 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2282 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2283 		}
2284 		MDI_PI_UNLOCK(pip);
2285 	}
2286 }
2287 
2288 
2289 /*
2290  * mdi_pi_lock():
2291  * 		Lock the mdi_pathinfo node.
2292  * Note:
2293  *		The caller should release the lock by calling mdi_pi_unlock()
2294  */
2295 void
2296 mdi_pi_lock(mdi_pathinfo_t *pip)
2297 {
2298 	ASSERT(pip != NULL);
2299 	if (pip) {
2300 		MDI_PI_LOCK(pip);
2301 	}
2302 }
2303 
2304 
2305 /*
2306  * mdi_pi_unlock():
2307  * 		Unlock the mdi_pathinfo node.
2308  * Note:
2309  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2310  */
2311 void
2312 mdi_pi_unlock(mdi_pathinfo_t *pip)
2313 {
2314 	ASSERT(pip != NULL);
2315 	if (pip) {
2316 		MDI_PI_UNLOCK(pip);
2317 	}
2318 }
2319 
2320 /*
2321  * mdi_pi_find():
2322  *		Search the list of mdi_pathinfo nodes attached to the
2323  *		pHCI/Client device node whose path address matches "paddr".
2324  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2325  *		found.
2326  * Return Values:
2327  *		mdi_pathinfo node handle
2328  *		NULL
2329  * Notes:
2330  *		Caller need not hold any locks to call this function.
2331  */
2332 mdi_pathinfo_t *
2333 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2334 {
2335 	mdi_phci_t		*ph;
2336 	mdi_vhci_t		*vh;
2337 	mdi_client_t		*ct;
2338 	mdi_pathinfo_t		*pip = NULL;
2339 
2340 	if ((pdip == NULL) || (paddr == NULL)) {
2341 		return (NULL);
2342 	}
2343 	ph = i_devi_get_phci(pdip);
2344 	if (ph == NULL) {
2345 		/*
2346 		 * Invalid pHCI device, Nothing more to do.
2347 		 */
2348 		MDI_DEBUG(2, (CE_WARN, NULL,
2349 		    "!mdi_pi_find: invalid phci"));
2350 		return (NULL);
2351 	}
2352 
2353 	vh = ph->ph_vhci;
2354 	if (vh == NULL) {
2355 		/*
2356 		 * Invalid vHCI device, Nothing more to do.
2357 		 */
2358 		MDI_DEBUG(2, (CE_WARN, NULL,
2359 		    "!mdi_pi_find: invalid phci"));
2360 		return (NULL);
2361 	}
2362 
2363 	/*
2364 	 * Look for client device identified by caddr (guid)
2365 	 */
2366 	if (caddr == NULL) {
2367 		/*
2368 		 * Find a mdi_pathinfo node under pHCI list for a matching
2369 		 * unit address.
2370 		 */
2371 		mutex_enter(&ph->ph_mutex);
2372 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2373 
2374 		while (pip != NULL) {
2375 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2376 				break;
2377 			}
2378 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2379 		}
2380 		mutex_exit(&ph->ph_mutex);
2381 		return (pip);
2382 	}
2383 
2384 	/*
2385 	 * XXX - Is the rest of the code in this function really necessary?
2386 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2387 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2388 	 * whether the search is based on the pathinfo nodes attached to
2389 	 * the pHCI or the client node, the result will be the same.
2390 	 */
2391 
2392 	/*
2393 	 * Find the client device corresponding to 'caddr'
2394 	 */
2395 	mutex_enter(&mdi_mutex);
2396 
2397 	/*
2398 	 * XXX - Passing NULL to the following function works as long as the
2399 	 * the client addresses (caddr) are unique per vhci basis.
2400 	 */
2401 	ct = i_mdi_client_find(vh, NULL, caddr);
2402 	if (ct == NULL) {
2403 		/*
2404 		 * Client not found, Obviously mdi_pathinfo node has not been
2405 		 * created yet.
2406 		 */
2407 		mutex_exit(&mdi_mutex);
2408 		return (pip);
2409 	}
2410 
2411 	/*
2412 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2413 	 * pHCI and paddr
2414 	 */
2415 	MDI_CLIENT_LOCK(ct);
2416 
2417 	/*
2418 	 * Release the global mutex as it is no more needed. Note: We always
2419 	 * respect the locking order while acquiring.
2420 	 */
2421 	mutex_exit(&mdi_mutex);
2422 
2423 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2424 	while (pip != NULL) {
2425 		/*
2426 		 * Compare the unit address
2427 		 */
2428 		if ((MDI_PI(pip)->pi_phci == ph) &&
2429 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2430 			break;
2431 		}
2432 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2433 	}
2434 	MDI_CLIENT_UNLOCK(ct);
2435 	return (pip);
2436 }
2437 
2438 /*
2439  * mdi_pi_alloc():
2440  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2441  *		The mdi_pathinfo node returned by this function identifies a
2442  *		unique device path is capable of having properties attached
2443  *		and passed to mdi_pi_online() to fully attach and online the
2444  *		path and client device node.
2445  *		The mdi_pathinfo node returned by this function must be
2446  *		destroyed using mdi_pi_free() if the path is no longer
2447  *		operational or if the caller fails to attach a client device
2448  *		node when calling mdi_pi_online(). The framework will not free
2449  *		the resources allocated.
2450  *		This function can be called from both interrupt and kernel
2451  *		contexts.  DDI_NOSLEEP flag should be used while calling
2452  *		from interrupt contexts.
2453  * Return Values:
2454  *		MDI_SUCCESS
2455  *		MDI_FAILURE
2456  *		MDI_NOMEM
2457  */
2458 /*ARGSUSED*/
2459 int
2460 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2461     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2462 {
2463 	mdi_vhci_t	*vh;
2464 	mdi_phci_t	*ph;
2465 	mdi_client_t	*ct;
2466 	mdi_pathinfo_t	*pip = NULL;
2467 	dev_info_t	*cdip;
2468 	int		rv = MDI_NOMEM;
2469 	int		path_allocated = 0;
2470 
2471 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2472 	    ret_pip == NULL) {
2473 		/* Nothing more to do */
2474 		return (MDI_FAILURE);
2475 	}
2476 
2477 	*ret_pip = NULL;
2478 	ph = i_devi_get_phci(pdip);
2479 	ASSERT(ph != NULL);
2480 	if (ph == NULL) {
2481 		/* Invalid pHCI device, return failure */
2482 		MDI_DEBUG(1, (CE_WARN, NULL,
2483 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2484 		return (MDI_FAILURE);
2485 	}
2486 
2487 	MDI_PHCI_LOCK(ph);
2488 	vh = ph->ph_vhci;
2489 	if (vh == NULL) {
2490 		/* Invalid vHCI device, return failure */
2491 		MDI_DEBUG(1, (CE_WARN, NULL,
2492 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2493 		MDI_PHCI_UNLOCK(ph);
2494 		return (MDI_FAILURE);
2495 	}
2496 
2497 	if (MDI_PHCI_IS_READY(ph) == 0) {
2498 		/*
2499 		 * Do not allow new node creation when pHCI is in
2500 		 * offline/suspended states
2501 		 */
2502 		MDI_DEBUG(1, (CE_WARN, NULL,
2503 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2504 		MDI_PHCI_UNLOCK(ph);
2505 		return (MDI_BUSY);
2506 	}
2507 	MDI_PHCI_UNSTABLE(ph);
2508 	MDI_PHCI_UNLOCK(ph);
2509 
2510 	/* look for a matching client, create one if not found */
2511 	mutex_enter(&mdi_mutex);
2512 	ct = i_mdi_client_find(vh, cname, caddr);
2513 	if (ct == NULL) {
2514 		ct = i_mdi_client_alloc(vh, cname, caddr);
2515 		ASSERT(ct != NULL);
2516 	}
2517 
2518 	if (ct->ct_dip == NULL) {
2519 		/*
2520 		 * Allocate a devinfo node
2521 		 */
2522 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2523 		    compatible, ncompatible);
2524 		if (ct->ct_dip == NULL) {
2525 			(void) i_mdi_client_free(vh, ct);
2526 			goto fail;
2527 		}
2528 	}
2529 	cdip = ct->ct_dip;
2530 
2531 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2532 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2533 
2534 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2535 	while (pip != NULL) {
2536 		/*
2537 		 * Compare the unit address
2538 		 */
2539 		if ((MDI_PI(pip)->pi_phci == ph) &&
2540 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2541 			break;
2542 		}
2543 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2544 	}
2545 
2546 	if (pip == NULL) {
2547 		/*
2548 		 * This is a new path for this client device.  Allocate and
2549 		 * initialize a new pathinfo node
2550 		 */
2551 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2552 		ASSERT(pip != NULL);
2553 		path_allocated = 1;
2554 	}
2555 	rv = MDI_SUCCESS;
2556 
2557 fail:
2558 	/*
2559 	 * Release the global mutex.
2560 	 */
2561 	mutex_exit(&mdi_mutex);
2562 
2563 	/*
2564 	 * Mark the pHCI as stable
2565 	 */
2566 	MDI_PHCI_LOCK(ph);
2567 	MDI_PHCI_STABLE(ph);
2568 	MDI_PHCI_UNLOCK(ph);
2569 	*ret_pip = pip;
2570 
2571 	if (path_allocated)
2572 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2573 
2574 	return (rv);
2575 }
2576 
2577 /*ARGSUSED*/
2578 int
2579 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2580     int flags, mdi_pathinfo_t **ret_pip)
2581 {
2582 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2583 	    flags, ret_pip));
2584 }
2585 
2586 /*
2587  * i_mdi_pi_alloc():
2588  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2589  * Return Values:
2590  *		mdi_pathinfo
2591  */
2592 
2593 /*ARGSUSED*/
2594 static mdi_pathinfo_t *
2595 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2596 {
2597 	mdi_pathinfo_t	*pip;
2598 	int		ct_circular;
2599 	int		ph_circular;
2600 	int		se_flag;
2601 	int		kmem_flag;
2602 
2603 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2604 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2605 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2606 	    MDI_PATHINFO_STATE_TRANSIENT;
2607 
2608 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2609 		MDI_PI_SET_USER_DISABLE(pip);
2610 
2611 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2612 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2613 
2614 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2615 		MDI_PI_SET_DRV_DISABLE(pip);
2616 
2617 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2618 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2619 	MDI_PI(pip)->pi_client = ct;
2620 	MDI_PI(pip)->pi_phci = ph;
2621 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2622 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2623 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2624 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2625 	MDI_PI(pip)->pi_pprivate = NULL;
2626 	MDI_PI(pip)->pi_cprivate = NULL;
2627 	MDI_PI(pip)->pi_vprivate = NULL;
2628 	MDI_PI(pip)->pi_client_link = NULL;
2629 	MDI_PI(pip)->pi_phci_link = NULL;
2630 	MDI_PI(pip)->pi_ref_cnt = 0;
2631 	MDI_PI(pip)->pi_kstats = NULL;
2632 	MDI_PI(pip)->pi_preferred = 1;
2633 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2634 
2635 	/*
2636 	 * Lock both dev_info nodes against changes in parallel.
2637 	 */
2638 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2639 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2640 
2641 	i_mdi_phci_add_path(ph, pip);
2642 	i_mdi_client_add_path(ct, pip);
2643 
2644 	ndi_devi_exit(ph->ph_dip, ph_circular);
2645 	ndi_devi_exit(ct->ct_dip, ct_circular);
2646 
2647 	/* determine interrupt context */
2648 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2649 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2650 
2651 	i_ddi_di_cache_invalidate(kmem_flag);
2652 
2653 	return (pip);
2654 }
2655 
2656 /*
2657  * i_mdi_phci_add_path():
2658  * 		Add a mdi_pathinfo node to pHCI list.
2659  * Notes:
2660  *		Caller should per-pHCI mutex
2661  */
2662 
2663 static void
2664 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2665 {
2666 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2667 
2668 	if (ph->ph_path_head == NULL) {
2669 		ph->ph_path_head = pip;
2670 	} else {
2671 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2672 	}
2673 	ph->ph_path_tail = pip;
2674 	ph->ph_path_count++;
2675 }
2676 
2677 /*
2678  * i_mdi_client_add_path():
2679  *		Add mdi_pathinfo node to client list
2680  */
2681 
2682 static void
2683 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2684 {
2685 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2686 
2687 	if (ct->ct_path_head == NULL) {
2688 		ct->ct_path_head = pip;
2689 	} else {
2690 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2691 	}
2692 	ct->ct_path_tail = pip;
2693 	ct->ct_path_count++;
2694 }
2695 
2696 /*
2697  * mdi_pi_free():
2698  *		Free the mdi_pathinfo node and also client device node if this
2699  *		is the last path to the device
2700  * Return Values:
2701  *		MDI_SUCCESS
2702  *		MDI_FAILURE
2703  *		MDI_BUSY
2704  */
2705 
2706 /*ARGSUSED*/
2707 int
2708 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2709 {
2710 	int		rv = MDI_SUCCESS;
2711 	mdi_vhci_t	*vh;
2712 	mdi_phci_t	*ph;
2713 	mdi_client_t	*ct;
2714 	int		(*f)();
2715 	int		client_held = 0;
2716 
2717 	MDI_PI_LOCK(pip);
2718 	ph = MDI_PI(pip)->pi_phci;
2719 	ASSERT(ph != NULL);
2720 	if (ph == NULL) {
2721 		/*
2722 		 * Invalid pHCI device, return failure
2723 		 */
2724 		MDI_DEBUG(1, (CE_WARN, NULL,
2725 		    "!mdi_pi_free: invalid pHCI"));
2726 		MDI_PI_UNLOCK(pip);
2727 		return (MDI_FAILURE);
2728 	}
2729 
2730 	vh = ph->ph_vhci;
2731 	ASSERT(vh != NULL);
2732 	if (vh == NULL) {
2733 		/* Invalid pHCI device, return failure */
2734 		MDI_DEBUG(1, (CE_WARN, NULL,
2735 		    "!mdi_pi_free: invalid vHCI"));
2736 		MDI_PI_UNLOCK(pip);
2737 		return (MDI_FAILURE);
2738 	}
2739 
2740 	ct = MDI_PI(pip)->pi_client;
2741 	ASSERT(ct != NULL);
2742 	if (ct == NULL) {
2743 		/*
2744 		 * Invalid Client device, return failure
2745 		 */
2746 		MDI_DEBUG(1, (CE_WARN, NULL,
2747 		    "!mdi_pi_free: invalid client"));
2748 		MDI_PI_UNLOCK(pip);
2749 		return (MDI_FAILURE);
2750 	}
2751 
2752 	/*
2753 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2754 	 * if the node state is either offline or init and the reference count
2755 	 * is zero.
2756 	 */
2757 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2758 	    MDI_PI_IS_INITING(pip))) {
2759 		/*
2760 		 * Node is busy
2761 		 */
2762 		MDI_DEBUG(1, (CE_WARN, NULL,
2763 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2764 		MDI_PI_UNLOCK(pip);
2765 		return (MDI_BUSY);
2766 	}
2767 
2768 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2769 		/*
2770 		 * Give a chance for pending I/Os to complete.
2771 		 */
2772 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!mdi_pi_free: "
2773 		    "%d cmds still pending on path: %p\n",
2774 		    MDI_PI(pip)->pi_ref_cnt, pip));
2775 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2776 		    &MDI_PI(pip)->pi_mutex,
2777 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2778 			/*
2779 			 * The timeout time reached without ref_cnt being zero
2780 			 * being signaled.
2781 			 */
2782 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2783 			    "!mdi_pi_free: "
2784 			    "Timeout reached on path %p without the cond\n",
2785 			    pip));
2786 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2787 			    "!mdi_pi_free: "
2788 			    "%d cmds still pending on path: %p\n",
2789 			    MDI_PI(pip)->pi_ref_cnt, pip));
2790 			MDI_PI_UNLOCK(pip);
2791 			return (MDI_BUSY);
2792 		}
2793 	}
2794 	if (MDI_PI(pip)->pi_pm_held) {
2795 		client_held = 1;
2796 	}
2797 	MDI_PI_UNLOCK(pip);
2798 
2799 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
2800 
2801 	MDI_CLIENT_LOCK(ct);
2802 
2803 	/* Prevent further failovers till mdi_mutex is held */
2804 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2805 
2806 	/*
2807 	 * Wait till failover is complete before removing this node.
2808 	 */
2809 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2810 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2811 
2812 	MDI_CLIENT_UNLOCK(ct);
2813 	mutex_enter(&mdi_mutex);
2814 	MDI_CLIENT_LOCK(ct);
2815 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2816 
2817 	if (!MDI_PI_IS_INITING(pip)) {
2818 		f = vh->vh_ops->vo_pi_uninit;
2819 		if (f != NULL) {
2820 			rv = (*f)(vh->vh_dip, pip, 0);
2821 		}
2822 	}
2823 	/*
2824 	 * If vo_pi_uninit() completed successfully.
2825 	 */
2826 	if (rv == MDI_SUCCESS) {
2827 		if (client_held) {
2828 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2829 			    "i_mdi_pm_rele_client\n"));
2830 			i_mdi_pm_rele_client(ct, 1);
2831 		}
2832 		i_mdi_pi_free(ph, pip, ct);
2833 		if (ct->ct_path_count == 0) {
2834 			/*
2835 			 * Client lost its last path.
2836 			 * Clean up the client device
2837 			 */
2838 			MDI_CLIENT_UNLOCK(ct);
2839 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2840 			mutex_exit(&mdi_mutex);
2841 			return (rv);
2842 		}
2843 	}
2844 	MDI_CLIENT_UNLOCK(ct);
2845 	mutex_exit(&mdi_mutex);
2846 
2847 	if (rv == MDI_FAILURE)
2848 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2849 
2850 	return (rv);
2851 }
2852 
2853 /*
2854  * i_mdi_pi_free():
2855  *		Free the mdi_pathinfo node
2856  */
2857 static void
2858 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2859 {
2860 	int	ct_circular;
2861 	int	ph_circular;
2862 	int	se_flag;
2863 	int	kmem_flag;
2864 
2865 	/*
2866 	 * remove any per-path kstats
2867 	 */
2868 	i_mdi_pi_kstat_destroy(pip);
2869 
2870 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2871 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2872 
2873 	i_mdi_client_remove_path(ct, pip);
2874 	i_mdi_phci_remove_path(ph, pip);
2875 
2876 	ndi_devi_exit(ph->ph_dip, ph_circular);
2877 	ndi_devi_exit(ct->ct_dip, ct_circular);
2878 
2879 	/* determine interrupt context */
2880 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
2881 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2882 
2883 	i_ddi_di_cache_invalidate(kmem_flag);
2884 
2885 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2886 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2887 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2888 	if (MDI_PI(pip)->pi_addr) {
2889 		kmem_free(MDI_PI(pip)->pi_addr,
2890 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2891 		MDI_PI(pip)->pi_addr = NULL;
2892 	}
2893 
2894 	if (MDI_PI(pip)->pi_prop) {
2895 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2896 		MDI_PI(pip)->pi_prop = NULL;
2897 	}
2898 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2899 }
2900 
2901 
2902 /*
2903  * i_mdi_phci_remove_path():
2904  * 		Remove a mdi_pathinfo node from pHCI list.
2905  * Notes:
2906  *		Caller should hold per-pHCI mutex
2907  */
2908 
2909 static void
2910 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2911 {
2912 	mdi_pathinfo_t	*prev = NULL;
2913 	mdi_pathinfo_t	*path = NULL;
2914 
2915 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2916 
2917 	path = ph->ph_path_head;
2918 	while (path != NULL) {
2919 		if (path == pip) {
2920 			break;
2921 		}
2922 		prev = path;
2923 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2924 	}
2925 
2926 	if (path) {
2927 		ph->ph_path_count--;
2928 		if (prev) {
2929 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2930 		} else {
2931 			ph->ph_path_head =
2932 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2933 		}
2934 		if (ph->ph_path_tail == path) {
2935 			ph->ph_path_tail = prev;
2936 		}
2937 	}
2938 
2939 	/*
2940 	 * Clear the pHCI link
2941 	 */
2942 	MDI_PI(pip)->pi_phci_link = NULL;
2943 	MDI_PI(pip)->pi_phci = NULL;
2944 }
2945 
2946 /*
2947  * i_mdi_client_remove_path():
2948  * 		Remove a mdi_pathinfo node from client path list.
2949  */
2950 
2951 static void
2952 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2953 {
2954 	mdi_pathinfo_t	*prev = NULL;
2955 	mdi_pathinfo_t	*path;
2956 
2957 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2958 
2959 	path = ct->ct_path_head;
2960 	while (path != NULL) {
2961 		if (path == pip) {
2962 			break;
2963 		}
2964 		prev = path;
2965 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2966 	}
2967 
2968 	if (path) {
2969 		ct->ct_path_count--;
2970 		if (prev) {
2971 			MDI_PI(prev)->pi_client_link =
2972 			    MDI_PI(path)->pi_client_link;
2973 		} else {
2974 			ct->ct_path_head =
2975 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2976 		}
2977 		if (ct->ct_path_tail == path) {
2978 			ct->ct_path_tail = prev;
2979 		}
2980 		if (ct->ct_path_last == path) {
2981 			ct->ct_path_last = ct->ct_path_head;
2982 		}
2983 	}
2984 	MDI_PI(pip)->pi_client_link = NULL;
2985 	MDI_PI(pip)->pi_client = NULL;
2986 }
2987 
2988 /*
2989  * i_mdi_pi_state_change():
2990  *		online a mdi_pathinfo node
2991  *
2992  * Return Values:
2993  *		MDI_SUCCESS
2994  *		MDI_FAILURE
2995  */
2996 /*ARGSUSED*/
2997 static int
2998 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
2999 {
3000 	int		rv = MDI_SUCCESS;
3001 	mdi_vhci_t	*vh;
3002 	mdi_phci_t	*ph;
3003 	mdi_client_t	*ct;
3004 	int		(*f)();
3005 	dev_info_t	*cdip;
3006 
3007 	MDI_PI_LOCK(pip);
3008 
3009 	ph = MDI_PI(pip)->pi_phci;
3010 	ASSERT(ph);
3011 	if (ph == NULL) {
3012 		/*
3013 		 * Invalid pHCI device, fail the request
3014 		 */
3015 		MDI_PI_UNLOCK(pip);
3016 		MDI_DEBUG(1, (CE_WARN, NULL,
3017 		    "!mdi_pi_state_change: invalid phci"));
3018 		return (MDI_FAILURE);
3019 	}
3020 
3021 	vh = ph->ph_vhci;
3022 	ASSERT(vh);
3023 	if (vh == NULL) {
3024 		/*
3025 		 * Invalid vHCI device, fail the request
3026 		 */
3027 		MDI_PI_UNLOCK(pip);
3028 		MDI_DEBUG(1, (CE_WARN, NULL,
3029 		    "!mdi_pi_state_change: invalid vhci"));
3030 		return (MDI_FAILURE);
3031 	}
3032 
3033 	ct = MDI_PI(pip)->pi_client;
3034 	ASSERT(ct != NULL);
3035 	if (ct == NULL) {
3036 		/*
3037 		 * Invalid client device, fail the request
3038 		 */
3039 		MDI_PI_UNLOCK(pip);
3040 		MDI_DEBUG(1, (CE_WARN, NULL,
3041 		    "!mdi_pi_state_change: invalid client"));
3042 		return (MDI_FAILURE);
3043 	}
3044 
3045 	/*
3046 	 * If this path has not been initialized yet, Callback vHCI driver's
3047 	 * pathinfo node initialize entry point
3048 	 */
3049 
3050 	if (MDI_PI_IS_INITING(pip)) {
3051 		MDI_PI_UNLOCK(pip);
3052 		f = vh->vh_ops->vo_pi_init;
3053 		if (f != NULL) {
3054 			rv = (*f)(vh->vh_dip, pip, 0);
3055 			if (rv != MDI_SUCCESS) {
3056 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3057 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3058 				    vh, pip));
3059 				return (MDI_FAILURE);
3060 			}
3061 		}
3062 		MDI_PI_LOCK(pip);
3063 		MDI_PI_CLEAR_TRANSIENT(pip);
3064 	}
3065 
3066 	/*
3067 	 * Do not allow state transition when pHCI is in offline/suspended
3068 	 * states
3069 	 */
3070 	i_mdi_phci_lock(ph, pip);
3071 	if (MDI_PHCI_IS_READY(ph) == 0) {
3072 		MDI_DEBUG(1, (CE_WARN, NULL,
3073 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3074 		MDI_PI_UNLOCK(pip);
3075 		i_mdi_phci_unlock(ph);
3076 		return (MDI_BUSY);
3077 	}
3078 	MDI_PHCI_UNSTABLE(ph);
3079 	i_mdi_phci_unlock(ph);
3080 
3081 	/*
3082 	 * Check if mdi_pathinfo state is in transient state.
3083 	 * If yes, offlining is in progress and wait till transient state is
3084 	 * cleared.
3085 	 */
3086 	if (MDI_PI_IS_TRANSIENT(pip)) {
3087 		while (MDI_PI_IS_TRANSIENT(pip)) {
3088 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3089 			    &MDI_PI(pip)->pi_mutex);
3090 		}
3091 	}
3092 
3093 	/*
3094 	 * Grab the client lock in reverse order sequence and release the
3095 	 * mdi_pathinfo mutex.
3096 	 */
3097 	i_mdi_client_lock(ct, pip);
3098 	MDI_PI_UNLOCK(pip);
3099 
3100 	/*
3101 	 * Wait till failover state is cleared
3102 	 */
3103 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3104 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3105 
3106 	/*
3107 	 * Mark the mdi_pathinfo node state as transient
3108 	 */
3109 	MDI_PI_LOCK(pip);
3110 	switch (state) {
3111 	case MDI_PATHINFO_STATE_ONLINE:
3112 		MDI_PI_SET_ONLINING(pip);
3113 		break;
3114 
3115 	case MDI_PATHINFO_STATE_STANDBY:
3116 		MDI_PI_SET_STANDBYING(pip);
3117 		break;
3118 
3119 	case MDI_PATHINFO_STATE_FAULT:
3120 		/*
3121 		 * Mark the pathinfo state as FAULTED
3122 		 */
3123 		MDI_PI_SET_FAULTING(pip);
3124 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3125 		break;
3126 
3127 	case MDI_PATHINFO_STATE_OFFLINE:
3128 		/*
3129 		 * ndi_devi_offline() cannot hold pip or ct locks.
3130 		 */
3131 		MDI_PI_UNLOCK(pip);
3132 		/*
3133 		 * Do not offline if path will become last path and path
3134 		 * is busy for user initiated events.
3135 		 */
3136 		cdip = ct->ct_dip;
3137 		if ((flag & NDI_DEVI_REMOVE) &&
3138 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3139 			i_mdi_client_unlock(ct);
3140 			rv = ndi_devi_offline(cdip, 0);
3141 			if (rv != NDI_SUCCESS) {
3142 				/*
3143 				 * Convert to MDI error code
3144 				 */
3145 				switch (rv) {
3146 				case NDI_BUSY:
3147 					rv = MDI_BUSY;
3148 					break;
3149 				default:
3150 					rv = MDI_FAILURE;
3151 					break;
3152 				}
3153 				goto state_change_exit;
3154 			} else {
3155 				i_mdi_client_lock(ct, NULL);
3156 			}
3157 		}
3158 		/*
3159 		 * Mark the mdi_pathinfo node state as transient
3160 		 */
3161 		MDI_PI_LOCK(pip);
3162 		MDI_PI_SET_OFFLINING(pip);
3163 		break;
3164 	}
3165 	MDI_PI_UNLOCK(pip);
3166 	MDI_CLIENT_UNSTABLE(ct);
3167 	i_mdi_client_unlock(ct);
3168 
3169 	f = vh->vh_ops->vo_pi_state_change;
3170 	if (f != NULL) {
3171 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3172 		if (rv == MDI_NOT_SUPPORTED) {
3173 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3174 		}
3175 		if (rv != MDI_SUCCESS) {
3176 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3177 			    "!vo_pi_state_change: failed rv = %x", rv));
3178 		}
3179 	}
3180 	MDI_CLIENT_LOCK(ct);
3181 	MDI_PI_LOCK(pip);
3182 	if (MDI_PI_IS_TRANSIENT(pip)) {
3183 		if (rv == MDI_SUCCESS) {
3184 			MDI_PI_CLEAR_TRANSIENT(pip);
3185 		} else {
3186 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3187 		}
3188 	}
3189 
3190 	/*
3191 	 * Wake anyone waiting for this mdi_pathinfo node
3192 	 */
3193 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3194 	MDI_PI_UNLOCK(pip);
3195 
3196 	/*
3197 	 * Mark the client device as stable
3198 	 */
3199 	MDI_CLIENT_STABLE(ct);
3200 	if (rv == MDI_SUCCESS) {
3201 		if (ct->ct_unstable == 0) {
3202 			cdip = ct->ct_dip;
3203 
3204 			/*
3205 			 * Onlining the mdi_pathinfo node will impact the
3206 			 * client state Update the client and dev_info node
3207 			 * state accordingly
3208 			 */
3209 			rv = NDI_SUCCESS;
3210 			i_mdi_client_update_state(ct);
3211 			switch (MDI_CLIENT_STATE(ct)) {
3212 			case MDI_CLIENT_STATE_OPTIMAL:
3213 			case MDI_CLIENT_STATE_DEGRADED:
3214 				if (cdip &&
3215 				    (i_ddi_node_state(cdip) < DS_READY) &&
3216 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3217 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3218 
3219 					i_mdi_client_unlock(ct);
3220 					/*
3221 					 * Must do ndi_devi_online() through
3222 					 * hotplug thread for deferred
3223 					 * attach mechanism to work
3224 					 */
3225 					rv = ndi_devi_online(cdip, 0);
3226 					i_mdi_client_lock(ct, NULL);
3227 					if ((rv != NDI_SUCCESS) &&
3228 					    (MDI_CLIENT_STATE(ct) ==
3229 					    MDI_CLIENT_STATE_DEGRADED)) {
3230 						/*
3231 						 * ndi_devi_online failed.
3232 						 * Reset client flags to
3233 						 * offline.
3234 						 */
3235 						MDI_DEBUG(1, (CE_WARN, cdip,
3236 						    "!ndi_devi_online: failed "
3237 						    " Error: %x", rv));
3238 						MDI_CLIENT_SET_OFFLINE(ct);
3239 					}
3240 					if (rv != NDI_SUCCESS) {
3241 						/* Reset the path state */
3242 						MDI_PI_LOCK(pip);
3243 						MDI_PI(pip)->pi_state =
3244 						    MDI_PI_OLD_STATE(pip);
3245 						MDI_PI_UNLOCK(pip);
3246 					}
3247 				}
3248 				break;
3249 
3250 			case MDI_CLIENT_STATE_FAILED:
3251 				/*
3252 				 * This is the last path case for
3253 				 * non-user initiated events.
3254 				 */
3255 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3256 				    cdip && (i_ddi_node_state(cdip) >=
3257 				    DS_INITIALIZED)) {
3258 					i_mdi_client_unlock(ct);
3259 					rv = ndi_devi_offline(cdip, 0);
3260 					i_mdi_client_lock(ct, NULL);
3261 
3262 					if (rv != NDI_SUCCESS) {
3263 						/*
3264 						 * ndi_devi_offline failed.
3265 						 * Reset client flags to
3266 						 * online as the path could not
3267 						 * be offlined.
3268 						 */
3269 						MDI_DEBUG(1, (CE_WARN, cdip,
3270 						    "!ndi_devi_offline: failed "
3271 						    " Error: %x", rv));
3272 						MDI_CLIENT_SET_ONLINE(ct);
3273 					}
3274 				}
3275 				break;
3276 			}
3277 			/*
3278 			 * Convert to MDI error code
3279 			 */
3280 			switch (rv) {
3281 			case NDI_SUCCESS:
3282 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3283 				i_mdi_report_path_state(ct, pip);
3284 				rv = MDI_SUCCESS;
3285 				break;
3286 			case NDI_BUSY:
3287 				rv = MDI_BUSY;
3288 				break;
3289 			default:
3290 				rv = MDI_FAILURE;
3291 				break;
3292 			}
3293 		}
3294 	}
3295 	MDI_CLIENT_UNLOCK(ct);
3296 
3297 state_change_exit:
3298 	/*
3299 	 * Mark the pHCI as stable again.
3300 	 */
3301 	MDI_PHCI_LOCK(ph);
3302 	MDI_PHCI_STABLE(ph);
3303 	MDI_PHCI_UNLOCK(ph);
3304 	return (rv);
3305 }
3306 
3307 /*
3308  * mdi_pi_online():
3309  *		Place the path_info node in the online state.  The path is
3310  *		now available to be selected by mdi_select_path() for
3311  *		transporting I/O requests to client devices.
3312  * Return Values:
3313  *		MDI_SUCCESS
3314  *		MDI_FAILURE
3315  */
3316 int
3317 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3318 {
3319 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3320 	dev_info_t *cdip;
3321 	int		client_held = 0;
3322 	int rv;
3323 
3324 	ASSERT(ct != NULL);
3325 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3326 	if (rv != MDI_SUCCESS)
3327 		return (rv);
3328 
3329 	MDI_PI_LOCK(pip);
3330 	if (MDI_PI(pip)->pi_pm_held == 0) {
3331 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3332 		    "i_mdi_pm_hold_pip\n"));
3333 		i_mdi_pm_hold_pip(pip);
3334 		client_held = 1;
3335 	}
3336 	MDI_PI_UNLOCK(pip);
3337 
3338 	if (client_held) {
3339 		MDI_CLIENT_LOCK(ct);
3340 		if (ct->ct_power_cnt == 0) {
3341 			rv = i_mdi_power_all_phci(ct);
3342 		}
3343 
3344 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3345 		    "i_mdi_pm_hold_client\n"));
3346 		i_mdi_pm_hold_client(ct, 1);
3347 		MDI_CLIENT_UNLOCK(ct);
3348 	}
3349 
3350 	/*
3351 	 * Create the per-path (pathinfo) IO and error kstats which
3352 	 * are reported via iostat(1m).
3353 	 *
3354 	 * Defer creating the per-path kstats if device is not yet
3355 	 * attached;  the names of the kstats are constructed in part
3356 	 * using the devices instance number which is assigned during
3357 	 * process of attaching the client device.
3358 	 *
3359 	 * The framework post_attach handler, mdi_post_attach(), is
3360 	 * is responsible for initializing the client's pathinfo list
3361 	 * once successfully attached.
3362 	 */
3363 	cdip = ct->ct_dip;
3364 	ASSERT(cdip);
3365 	if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED))
3366 		return (rv);
3367 
3368 	MDI_CLIENT_LOCK(ct);
3369 	rv = i_mdi_pi_kstat_create(pip);
3370 	MDI_CLIENT_UNLOCK(ct);
3371 	return (rv);
3372 }
3373 
3374 /*
3375  * mdi_pi_standby():
3376  *		Place the mdi_pathinfo node in standby state
3377  *
3378  * Return Values:
3379  *		MDI_SUCCESS
3380  *		MDI_FAILURE
3381  */
3382 int
3383 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3384 {
3385 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3386 }
3387 
3388 /*
3389  * mdi_pi_fault():
3390  *		Place the mdi_pathinfo node in fault'ed state
3391  * Return Values:
3392  *		MDI_SUCCESS
3393  *		MDI_FAILURE
3394  */
3395 int
3396 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3397 {
3398 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3399 }
3400 
3401 /*
3402  * mdi_pi_offline():
3403  *		Offline a mdi_pathinfo node.
3404  * Return Values:
3405  *		MDI_SUCCESS
3406  *		MDI_FAILURE
3407  */
3408 int
3409 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3410 {
3411 	int	ret, client_held = 0;
3412 	mdi_client_t	*ct;
3413 
3414 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3415 
3416 	if (ret == MDI_SUCCESS) {
3417 		MDI_PI_LOCK(pip);
3418 		if (MDI_PI(pip)->pi_pm_held) {
3419 			client_held = 1;
3420 		}
3421 		MDI_PI_UNLOCK(pip);
3422 
3423 		if (client_held) {
3424 			ct = MDI_PI(pip)->pi_client;
3425 			MDI_CLIENT_LOCK(ct);
3426 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3427 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3428 			i_mdi_pm_rele_client(ct, 1);
3429 			MDI_CLIENT_UNLOCK(ct);
3430 		}
3431 	}
3432 
3433 	return (ret);
3434 }
3435 
3436 /*
3437  * i_mdi_pi_offline():
3438  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3439  */
3440 static int
3441 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3442 {
3443 	dev_info_t	*vdip = NULL;
3444 	mdi_vhci_t	*vh = NULL;
3445 	mdi_client_t	*ct = NULL;
3446 	int		(*f)();
3447 	int		rv;
3448 
3449 	MDI_PI_LOCK(pip);
3450 	ct = MDI_PI(pip)->pi_client;
3451 	ASSERT(ct != NULL);
3452 
3453 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3454 		/*
3455 		 * Give a chance for pending I/Os to complete.
3456 		 */
3457 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3458 		    "%d cmds still pending on path: %p\n",
3459 		    MDI_PI(pip)->pi_ref_cnt, pip));
3460 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3461 		    &MDI_PI(pip)->pi_mutex,
3462 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3463 			/*
3464 			 * The timeout time reached without ref_cnt being zero
3465 			 * being signaled.
3466 			 */
3467 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3468 			    "Timeout reached on path %p without the cond\n",
3469 			    pip));
3470 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3471 			    "%d cmds still pending on path: %p\n",
3472 			    MDI_PI(pip)->pi_ref_cnt, pip));
3473 		}
3474 	}
3475 	vh = ct->ct_vhci;
3476 	vdip = vh->vh_dip;
3477 
3478 	/*
3479 	 * Notify vHCI that has registered this event
3480 	 */
3481 	ASSERT(vh->vh_ops);
3482 	f = vh->vh_ops->vo_pi_state_change;
3483 
3484 	if (f != NULL) {
3485 		MDI_PI_UNLOCK(pip);
3486 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3487 		    flags)) != MDI_SUCCESS) {
3488 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3489 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3490 		}
3491 		MDI_PI_LOCK(pip);
3492 	}
3493 
3494 	/*
3495 	 * Set the mdi_pathinfo node state and clear the transient condition
3496 	 */
3497 	MDI_PI_SET_OFFLINE(pip);
3498 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3499 	MDI_PI_UNLOCK(pip);
3500 
3501 	MDI_CLIENT_LOCK(ct);
3502 	if (rv == MDI_SUCCESS) {
3503 		if (ct->ct_unstable == 0) {
3504 			dev_info_t	*cdip = ct->ct_dip;
3505 
3506 			/*
3507 			 * Onlining the mdi_pathinfo node will impact the
3508 			 * client state Update the client and dev_info node
3509 			 * state accordingly
3510 			 */
3511 			i_mdi_client_update_state(ct);
3512 			rv = NDI_SUCCESS;
3513 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3514 				if (cdip &&
3515 				    (i_ddi_node_state(cdip) >=
3516 				    DS_INITIALIZED)) {
3517 					MDI_CLIENT_UNLOCK(ct);
3518 					rv = ndi_devi_offline(cdip, 0);
3519 					MDI_CLIENT_LOCK(ct);
3520 					if (rv != NDI_SUCCESS) {
3521 						/*
3522 						 * ndi_devi_offline failed.
3523 						 * Reset client flags to
3524 						 * online.
3525 						 */
3526 						MDI_DEBUG(4, (CE_WARN, cdip,
3527 						    "!ndi_devi_offline: failed "
3528 						    " Error: %x", rv));
3529 						MDI_CLIENT_SET_ONLINE(ct);
3530 					}
3531 				}
3532 			}
3533 			/*
3534 			 * Convert to MDI error code
3535 			 */
3536 			switch (rv) {
3537 			case NDI_SUCCESS:
3538 				rv = MDI_SUCCESS;
3539 				break;
3540 			case NDI_BUSY:
3541 				rv = MDI_BUSY;
3542 				break;
3543 			default:
3544 				rv = MDI_FAILURE;
3545 				break;
3546 			}
3547 		}
3548 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3549 		i_mdi_report_path_state(ct, pip);
3550 	}
3551 
3552 	MDI_CLIENT_UNLOCK(ct);
3553 
3554 	/*
3555 	 * Change in the mdi_pathinfo node state will impact the client state
3556 	 */
3557 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3558 	    ct, pip));
3559 	return (rv);
3560 }
3561 
3562 
3563 /*
3564  * mdi_pi_get_addr():
3565  *		Get the unit address associated with a mdi_pathinfo node
3566  *
3567  * Return Values:
3568  *		char *
3569  */
3570 char *
3571 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3572 {
3573 	if (pip == NULL)
3574 		return (NULL);
3575 
3576 	return (MDI_PI(pip)->pi_addr);
3577 }
3578 
3579 /*
3580  * mdi_pi_get_client():
3581  *		Get the client devinfo associated with a mdi_pathinfo node
3582  *
3583  * Return Values:
3584  *		Handle to client device dev_info node
3585  */
3586 dev_info_t *
3587 mdi_pi_get_client(mdi_pathinfo_t *pip)
3588 {
3589 	dev_info_t	*dip = NULL;
3590 	if (pip) {
3591 		dip = MDI_PI(pip)->pi_client->ct_dip;
3592 	}
3593 	return (dip);
3594 }
3595 
3596 /*
3597  * mdi_pi_get_phci():
3598  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3599  * Return Values:
3600  *		Handle to dev_info node
3601  */
3602 dev_info_t *
3603 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3604 {
3605 	dev_info_t	*dip = NULL;
3606 	if (pip) {
3607 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3608 	}
3609 	return (dip);
3610 }
3611 
3612 /*
3613  * mdi_pi_get_client_private():
3614  *		Get the client private information associated with the
3615  *		mdi_pathinfo node
3616  */
3617 void *
3618 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3619 {
3620 	void *cprivate = NULL;
3621 	if (pip) {
3622 		cprivate = MDI_PI(pip)->pi_cprivate;
3623 	}
3624 	return (cprivate);
3625 }
3626 
3627 /*
3628  * mdi_pi_set_client_private():
3629  *		Set the client private information in the mdi_pathinfo node
3630  */
3631 void
3632 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3633 {
3634 	if (pip) {
3635 		MDI_PI(pip)->pi_cprivate = priv;
3636 	}
3637 }
3638 
3639 /*
3640  * mdi_pi_get_phci_private():
3641  *		Get the pHCI private information associated with the
3642  *		mdi_pathinfo node
3643  */
3644 caddr_t
3645 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3646 {
3647 	caddr_t	pprivate = NULL;
3648 	if (pip) {
3649 		pprivate = MDI_PI(pip)->pi_pprivate;
3650 	}
3651 	return (pprivate);
3652 }
3653 
3654 /*
3655  * mdi_pi_set_phci_private():
3656  *		Set the pHCI private information in the mdi_pathinfo node
3657  */
3658 void
3659 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3660 {
3661 	if (pip) {
3662 		MDI_PI(pip)->pi_pprivate = priv;
3663 	}
3664 }
3665 
3666 /*
3667  * mdi_pi_get_state():
3668  *		Get the mdi_pathinfo node state. Transient states are internal
3669  *		and not provided to the users
3670  */
3671 mdi_pathinfo_state_t
3672 mdi_pi_get_state(mdi_pathinfo_t *pip)
3673 {
3674 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3675 
3676 	if (pip) {
3677 		if (MDI_PI_IS_TRANSIENT(pip)) {
3678 			/*
3679 			 * mdi_pathinfo is in state transition.  Return the
3680 			 * last good state.
3681 			 */
3682 			state = MDI_PI_OLD_STATE(pip);
3683 		} else {
3684 			state = MDI_PI_STATE(pip);
3685 		}
3686 	}
3687 	return (state);
3688 }
3689 
3690 /*
3691  * Note that the following function needs to be the new interface for
3692  * mdi_pi_get_state when mpxio gets integrated to ON.
3693  */
3694 int
3695 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3696 		uint32_t *ext_state)
3697 {
3698 	*state = MDI_PATHINFO_STATE_INIT;
3699 
3700 	if (pip) {
3701 		if (MDI_PI_IS_TRANSIENT(pip)) {
3702 			/*
3703 			 * mdi_pathinfo is in state transition.  Return the
3704 			 * last good state.
3705 			 */
3706 			*state = MDI_PI_OLD_STATE(pip);
3707 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3708 		} else {
3709 			*state = MDI_PI_STATE(pip);
3710 			*ext_state = MDI_PI_EXT_STATE(pip);
3711 		}
3712 	}
3713 	return (MDI_SUCCESS);
3714 }
3715 
3716 /*
3717  * mdi_pi_get_preferred:
3718  *	Get the preferred path flag
3719  */
3720 int
3721 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3722 {
3723 	if (pip) {
3724 		return (MDI_PI(pip)->pi_preferred);
3725 	}
3726 	return (0);
3727 }
3728 
3729 /*
3730  * mdi_pi_set_preferred:
3731  *	Set the preferred path flag
3732  */
3733 void
3734 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3735 {
3736 	if (pip) {
3737 		MDI_PI(pip)->pi_preferred = preferred;
3738 	}
3739 }
3740 
3741 
3742 /*
3743  * mdi_pi_set_state():
3744  *		Set the mdi_pathinfo node state
3745  */
3746 void
3747 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3748 {
3749 	uint32_t	ext_state;
3750 
3751 	if (pip) {
3752 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3753 		MDI_PI(pip)->pi_state = state;
3754 		MDI_PI(pip)->pi_state |= ext_state;
3755 	}
3756 }
3757 
3758 /*
3759  * Property functions:
3760  */
3761 
3762 int
3763 i_map_nvlist_error_to_mdi(int val)
3764 {
3765 	int rv;
3766 
3767 	switch (val) {
3768 	case 0:
3769 		rv = DDI_PROP_SUCCESS;
3770 		break;
3771 	case EINVAL:
3772 	case ENOTSUP:
3773 		rv = DDI_PROP_INVAL_ARG;
3774 		break;
3775 	case ENOMEM:
3776 		rv = DDI_PROP_NO_MEMORY;
3777 		break;
3778 	default:
3779 		rv = DDI_PROP_NOT_FOUND;
3780 		break;
3781 	}
3782 	return (rv);
3783 }
3784 
3785 /*
3786  * mdi_pi_get_next_prop():
3787  * 		Property walk function.  The caller should hold mdi_pi_lock()
3788  *		and release by calling mdi_pi_unlock() at the end of walk to
3789  *		get a consistent value.
3790  */
3791 
3792 nvpair_t *
3793 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3794 {
3795 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3796 		return (NULL);
3797 	}
3798 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3799 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3800 }
3801 
3802 /*
3803  * mdi_prop_remove():
3804  * 		Remove the named property from the named list.
3805  */
3806 
3807 int
3808 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3809 {
3810 	if (pip == NULL) {
3811 		return (DDI_PROP_NOT_FOUND);
3812 	}
3813 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3814 	MDI_PI_LOCK(pip);
3815 	if (MDI_PI(pip)->pi_prop == NULL) {
3816 		MDI_PI_UNLOCK(pip);
3817 		return (DDI_PROP_NOT_FOUND);
3818 	}
3819 	if (name) {
3820 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3821 	} else {
3822 		char		nvp_name[MAXNAMELEN];
3823 		nvpair_t	*nvp;
3824 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3825 		while (nvp) {
3826 			nvpair_t	*next;
3827 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3828 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3829 			    nvpair_name(nvp));
3830 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3831 			    nvp_name);
3832 			nvp = next;
3833 		}
3834 	}
3835 	MDI_PI_UNLOCK(pip);
3836 	return (DDI_PROP_SUCCESS);
3837 }
3838 
3839 /*
3840  * mdi_prop_size():
3841  * 		Get buffer size needed to pack the property data.
3842  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3843  *		buffer size.
3844  */
3845 
3846 int
3847 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3848 {
3849 	int	rv;
3850 	size_t	bufsize;
3851 
3852 	*buflenp = 0;
3853 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3854 		return (DDI_PROP_NOT_FOUND);
3855 	}
3856 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3857 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3858 	    &bufsize, NV_ENCODE_NATIVE);
3859 	*buflenp = bufsize;
3860 	return (i_map_nvlist_error_to_mdi(rv));
3861 }
3862 
3863 /*
3864  * mdi_prop_pack():
3865  * 		pack the property list.  The caller should hold the
3866  *		mdi_pathinfo_t node to get a consistent data
3867  */
3868 
3869 int
3870 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3871 {
3872 	int	rv;
3873 	size_t	bufsize;
3874 
3875 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3876 		return (DDI_PROP_NOT_FOUND);
3877 	}
3878 
3879 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3880 
3881 	bufsize = buflen;
3882 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3883 	    NV_ENCODE_NATIVE, KM_SLEEP);
3884 
3885 	return (i_map_nvlist_error_to_mdi(rv));
3886 }
3887 
3888 /*
3889  * mdi_prop_update_byte():
3890  *		Create/Update a byte property
3891  */
3892 int
3893 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3894 {
3895 	int rv;
3896 
3897 	if (pip == NULL) {
3898 		return (DDI_PROP_INVAL_ARG);
3899 	}
3900 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3901 	MDI_PI_LOCK(pip);
3902 	if (MDI_PI(pip)->pi_prop == NULL) {
3903 		MDI_PI_UNLOCK(pip);
3904 		return (DDI_PROP_NOT_FOUND);
3905 	}
3906 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3907 	MDI_PI_UNLOCK(pip);
3908 	return (i_map_nvlist_error_to_mdi(rv));
3909 }
3910 
3911 /*
3912  * mdi_prop_update_byte_array():
3913  *		Create/Update a byte array property
3914  */
3915 int
3916 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3917     uint_t nelements)
3918 {
3919 	int rv;
3920 
3921 	if (pip == NULL) {
3922 		return (DDI_PROP_INVAL_ARG);
3923 	}
3924 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3925 	MDI_PI_LOCK(pip);
3926 	if (MDI_PI(pip)->pi_prop == NULL) {
3927 		MDI_PI_UNLOCK(pip);
3928 		return (DDI_PROP_NOT_FOUND);
3929 	}
3930 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3931 	MDI_PI_UNLOCK(pip);
3932 	return (i_map_nvlist_error_to_mdi(rv));
3933 }
3934 
3935 /*
3936  * mdi_prop_update_int():
3937  *		Create/Update a 32 bit integer property
3938  */
3939 int
3940 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3941 {
3942 	int rv;
3943 
3944 	if (pip == NULL) {
3945 		return (DDI_PROP_INVAL_ARG);
3946 	}
3947 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3948 	MDI_PI_LOCK(pip);
3949 	if (MDI_PI(pip)->pi_prop == NULL) {
3950 		MDI_PI_UNLOCK(pip);
3951 		return (DDI_PROP_NOT_FOUND);
3952 	}
3953 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3954 	MDI_PI_UNLOCK(pip);
3955 	return (i_map_nvlist_error_to_mdi(rv));
3956 }
3957 
3958 /*
3959  * mdi_prop_update_int64():
3960  *		Create/Update a 64 bit integer property
3961  */
3962 int
3963 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3964 {
3965 	int rv;
3966 
3967 	if (pip == NULL) {
3968 		return (DDI_PROP_INVAL_ARG);
3969 	}
3970 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3971 	MDI_PI_LOCK(pip);
3972 	if (MDI_PI(pip)->pi_prop == NULL) {
3973 		MDI_PI_UNLOCK(pip);
3974 		return (DDI_PROP_NOT_FOUND);
3975 	}
3976 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3977 	MDI_PI_UNLOCK(pip);
3978 	return (i_map_nvlist_error_to_mdi(rv));
3979 }
3980 
3981 /*
3982  * mdi_prop_update_int_array():
3983  *		Create/Update a int array property
3984  */
3985 int
3986 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
3987 	    uint_t nelements)
3988 {
3989 	int rv;
3990 
3991 	if (pip == NULL) {
3992 		return (DDI_PROP_INVAL_ARG);
3993 	}
3994 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3995 	MDI_PI_LOCK(pip);
3996 	if (MDI_PI(pip)->pi_prop == NULL) {
3997 		MDI_PI_UNLOCK(pip);
3998 		return (DDI_PROP_NOT_FOUND);
3999 	}
4000 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4001 	    nelements);
4002 	MDI_PI_UNLOCK(pip);
4003 	return (i_map_nvlist_error_to_mdi(rv));
4004 }
4005 
4006 /*
4007  * mdi_prop_update_string():
4008  *		Create/Update a string property
4009  */
4010 int
4011 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4012 {
4013 	int rv;
4014 
4015 	if (pip == NULL) {
4016 		return (DDI_PROP_INVAL_ARG);
4017 	}
4018 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4019 	MDI_PI_LOCK(pip);
4020 	if (MDI_PI(pip)->pi_prop == NULL) {
4021 		MDI_PI_UNLOCK(pip);
4022 		return (DDI_PROP_NOT_FOUND);
4023 	}
4024 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4025 	MDI_PI_UNLOCK(pip);
4026 	return (i_map_nvlist_error_to_mdi(rv));
4027 }
4028 
4029 /*
4030  * mdi_prop_update_string_array():
4031  *		Create/Update a string array property
4032  */
4033 int
4034 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4035     uint_t nelements)
4036 {
4037 	int rv;
4038 
4039 	if (pip == NULL) {
4040 		return (DDI_PROP_INVAL_ARG);
4041 	}
4042 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4043 	MDI_PI_LOCK(pip);
4044 	if (MDI_PI(pip)->pi_prop == NULL) {
4045 		MDI_PI_UNLOCK(pip);
4046 		return (DDI_PROP_NOT_FOUND);
4047 	}
4048 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4049 	    nelements);
4050 	MDI_PI_UNLOCK(pip);
4051 	return (i_map_nvlist_error_to_mdi(rv));
4052 }
4053 
4054 /*
4055  * mdi_prop_lookup_byte():
4056  * 		Look for byte property identified by name.  The data returned
4057  *		is the actual property and valid as long as mdi_pathinfo_t node
4058  *		is alive.
4059  */
4060 int
4061 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4062 {
4063 	int rv;
4064 
4065 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4066 		return (DDI_PROP_NOT_FOUND);
4067 	}
4068 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4069 	return (i_map_nvlist_error_to_mdi(rv));
4070 }
4071 
4072 
4073 /*
4074  * mdi_prop_lookup_byte_array():
4075  * 		Look for byte array property identified by name.  The data
4076  *		returned is the actual property and valid as long as
4077  *		mdi_pathinfo_t node is alive.
4078  */
4079 int
4080 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4081     uint_t *nelements)
4082 {
4083 	int rv;
4084 
4085 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4086 		return (DDI_PROP_NOT_FOUND);
4087 	}
4088 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4089 	    nelements);
4090 	return (i_map_nvlist_error_to_mdi(rv));
4091 }
4092 
4093 /*
4094  * mdi_prop_lookup_int():
4095  * 		Look for int property identified by name.  The data returned
4096  *		is the actual property and valid as long as mdi_pathinfo_t
4097  *		node is alive.
4098  */
4099 int
4100 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4101 {
4102 	int rv;
4103 
4104 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4105 		return (DDI_PROP_NOT_FOUND);
4106 	}
4107 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4108 	return (i_map_nvlist_error_to_mdi(rv));
4109 }
4110 
4111 /*
4112  * mdi_prop_lookup_int64():
4113  * 		Look for int64 property identified by name.  The data returned
4114  *		is the actual property and valid as long as mdi_pathinfo_t node
4115  *		is alive.
4116  */
4117 int
4118 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4119 {
4120 	int rv;
4121 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4122 		return (DDI_PROP_NOT_FOUND);
4123 	}
4124 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4125 	return (i_map_nvlist_error_to_mdi(rv));
4126 }
4127 
4128 /*
4129  * mdi_prop_lookup_int_array():
4130  * 		Look for int array property identified by name.  The data
4131  *		returned is the actual property and valid as long as
4132  *		mdi_pathinfo_t node is alive.
4133  */
4134 int
4135 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4136     uint_t *nelements)
4137 {
4138 	int rv;
4139 
4140 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4141 		return (DDI_PROP_NOT_FOUND);
4142 	}
4143 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4144 	    (int32_t **)data, nelements);
4145 	return (i_map_nvlist_error_to_mdi(rv));
4146 }
4147 
4148 /*
4149  * mdi_prop_lookup_string():
4150  * 		Look for string property identified by name.  The data
4151  *		returned is the actual property and valid as long as
4152  *		mdi_pathinfo_t node is alive.
4153  */
4154 int
4155 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4156 {
4157 	int rv;
4158 
4159 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4160 		return (DDI_PROP_NOT_FOUND);
4161 	}
4162 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4163 	return (i_map_nvlist_error_to_mdi(rv));
4164 }
4165 
4166 /*
4167  * mdi_prop_lookup_string_array():
4168  * 		Look for string array property identified by name.  The data
4169  *		returned is the actual property and valid as long as
4170  *		mdi_pathinfo_t node is alive.
4171  */
4172 
4173 int
4174 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4175     uint_t *nelements)
4176 {
4177 	int rv;
4178 
4179 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4180 		return (DDI_PROP_NOT_FOUND);
4181 	}
4182 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4183 	    nelements);
4184 	return (i_map_nvlist_error_to_mdi(rv));
4185 }
4186 
4187 /*
4188  * mdi_prop_free():
4189  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4190  *		functions return the pointer to actual property data and not a
4191  *		copy of it.  So the data returned is valid as long as
4192  *		mdi_pathinfo_t node is valid.
4193  */
4194 
4195 /*ARGSUSED*/
4196 int
4197 mdi_prop_free(void *data)
4198 {
4199 	return (DDI_PROP_SUCCESS);
4200 }
4201 
4202 /*ARGSUSED*/
4203 static void
4204 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4205 {
4206 	char		*phci_path, *ct_path;
4207 	char		*ct_status;
4208 	char		*status;
4209 	dev_info_t	*dip = ct->ct_dip;
4210 	char		lb_buf[64];
4211 
4212 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4213 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4214 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4215 		return;
4216 	}
4217 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4218 		ct_status = "optimal";
4219 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4220 		ct_status = "degraded";
4221 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4222 		ct_status = "failed";
4223 	} else {
4224 		ct_status = "unknown";
4225 	}
4226 
4227 	if (MDI_PI_IS_OFFLINE(pip)) {
4228 		status = "offline";
4229 	} else if (MDI_PI_IS_ONLINE(pip)) {
4230 		status = "online";
4231 	} else if (MDI_PI_IS_STANDBY(pip)) {
4232 		status = "standby";
4233 	} else if (MDI_PI_IS_FAULT(pip)) {
4234 		status = "faulted";
4235 	} else {
4236 		status = "unknown";
4237 	}
4238 
4239 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4240 		(void) snprintf(lb_buf, sizeof (lb_buf),
4241 		    "%s, region-size: %d", mdi_load_balance_lba,
4242 			ct->ct_lb_args->region_size);
4243 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4244 		(void) snprintf(lb_buf, sizeof (lb_buf),
4245 		    "%s", mdi_load_balance_none);
4246 	} else {
4247 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4248 		    mdi_load_balance_rr);
4249 	}
4250 
4251 	if (dip) {
4252 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4253 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4254 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4255 		    "path %s (%s%d) to target address: %s is %s"
4256 		    " Load balancing: %s\n",
4257 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4258 		    ddi_get_instance(dip), ct_status,
4259 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4260 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4261 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4262 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4263 		kmem_free(phci_path, MAXPATHLEN);
4264 		kmem_free(ct_path, MAXPATHLEN);
4265 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4266 	}
4267 }
4268 
4269 #ifdef	DEBUG
4270 /*
4271  * i_mdi_log():
4272  *		Utility function for error message management
4273  *
4274  */
4275 
4276 /*VARARGS3*/
4277 static void
4278 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4279 {
4280 	char		buf[MAXNAMELEN];
4281 	char		name[MAXNAMELEN];
4282 	va_list		ap;
4283 	int		log_only = 0;
4284 	int		boot_only = 0;
4285 	int		console_only = 0;
4286 
4287 	if (dip) {
4288 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4289 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4290 			    ddi_node_name(dip), ddi_get_instance(dip));
4291 		} else {
4292 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4293 			    ddi_node_name(dip), ddi_get_instance(dip));
4294 		}
4295 	} else {
4296 		name[0] = '\0';
4297 	}
4298 
4299 	va_start(ap, fmt);
4300 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4301 	va_end(ap);
4302 
4303 	switch (buf[0]) {
4304 	case '!':
4305 		log_only = 1;
4306 		break;
4307 	case '?':
4308 		boot_only = 1;
4309 		break;
4310 	case '^':
4311 		console_only = 1;
4312 		break;
4313 	}
4314 
4315 	switch (level) {
4316 	case CE_NOTE:
4317 		level = CE_CONT;
4318 		/* FALLTHROUGH */
4319 	case CE_CONT:
4320 	case CE_WARN:
4321 	case CE_PANIC:
4322 		if (boot_only) {
4323 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4324 		} else if (console_only) {
4325 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4326 		} else if (log_only) {
4327 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4328 		} else {
4329 			cmn_err(level, "%s\t%s", name, buf);
4330 		}
4331 		break;
4332 	default:
4333 		cmn_err(level, "%s\t%s", name, buf);
4334 		break;
4335 	}
4336 }
4337 #endif	/* DEBUG */
4338 
4339 void
4340 i_mdi_client_online(dev_info_t *ct_dip)
4341 {
4342 	mdi_client_t	*ct;
4343 
4344 	/*
4345 	 * Client online notification. Mark client state as online
4346 	 * restore our binding with dev_info node
4347 	 */
4348 	ct = i_devi_get_client(ct_dip);
4349 	ASSERT(ct != NULL);
4350 	MDI_CLIENT_LOCK(ct);
4351 	MDI_CLIENT_SET_ONLINE(ct);
4352 	/* catch for any memory leaks */
4353 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4354 	ct->ct_dip = ct_dip;
4355 
4356 	if (ct->ct_power_cnt == 0)
4357 		(void) i_mdi_power_all_phci(ct);
4358 
4359 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4360 	    "i_mdi_pm_hold_client\n"));
4361 	i_mdi_pm_hold_client(ct, 1);
4362 
4363 	MDI_CLIENT_UNLOCK(ct);
4364 }
4365 
4366 void
4367 i_mdi_phci_online(dev_info_t *ph_dip)
4368 {
4369 	mdi_phci_t	*ph;
4370 
4371 	/* pHCI online notification. Mark state accordingly */
4372 	ph = i_devi_get_phci(ph_dip);
4373 	ASSERT(ph != NULL);
4374 	MDI_PHCI_LOCK(ph);
4375 	MDI_PHCI_SET_ONLINE(ph);
4376 	MDI_PHCI_UNLOCK(ph);
4377 }
4378 
4379 /*
4380  * mdi_devi_online():
4381  * 		Online notification from NDI framework on pHCI/client
4382  *		device online.
4383  * Return Values:
4384  *		NDI_SUCCESS
4385  *		MDI_FAILURE
4386  */
4387 
4388 /*ARGSUSED*/
4389 int
4390 mdi_devi_online(dev_info_t *dip, uint_t flags)
4391 {
4392 	if (MDI_PHCI(dip)) {
4393 		i_mdi_phci_online(dip);
4394 	}
4395 
4396 	if (MDI_CLIENT(dip)) {
4397 		i_mdi_client_online(dip);
4398 	}
4399 	return (NDI_SUCCESS);
4400 }
4401 
4402 /*
4403  * mdi_devi_offline():
4404  * 		Offline notification from NDI framework on pHCI/Client device
4405  *		offline.
4406  *
4407  * Return Values:
4408  *		NDI_SUCCESS
4409  *		NDI_FAILURE
4410  */
4411 
4412 /*ARGSUSED*/
4413 int
4414 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4415 {
4416 	int		rv = NDI_SUCCESS;
4417 
4418 	if (MDI_CLIENT(dip)) {
4419 		rv = i_mdi_client_offline(dip, flags);
4420 		if (rv != NDI_SUCCESS)
4421 			return (rv);
4422 	}
4423 
4424 	if (MDI_PHCI(dip)) {
4425 		rv = i_mdi_phci_offline(dip, flags);
4426 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4427 			/* set client back online */
4428 			i_mdi_client_online(dip);
4429 		}
4430 	}
4431 
4432 	return (rv);
4433 }
4434 
4435 /*ARGSUSED*/
4436 static int
4437 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4438 {
4439 	int		rv = NDI_SUCCESS;
4440 	mdi_phci_t	*ph;
4441 	mdi_client_t	*ct;
4442 	mdi_pathinfo_t	*pip;
4443 	mdi_pathinfo_t	*next;
4444 	mdi_pathinfo_t	*failed_pip = NULL;
4445 	dev_info_t	*cdip;
4446 
4447 	/*
4448 	 * pHCI component offline notification
4449 	 * Make sure that this pHCI instance is free to be offlined.
4450 	 * If it is OK to proceed, Offline and remove all the child
4451 	 * mdi_pathinfo nodes.  This process automatically offlines
4452 	 * corresponding client devices, for which this pHCI provides
4453 	 * critical services.
4454 	 */
4455 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4456 	    dip));
4457 
4458 	ph = i_devi_get_phci(dip);
4459 	if (ph == NULL) {
4460 		return (rv);
4461 	}
4462 
4463 	MDI_PHCI_LOCK(ph);
4464 
4465 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4466 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4467 		MDI_PHCI_UNLOCK(ph);
4468 		return (NDI_SUCCESS);
4469 	}
4470 
4471 	/*
4472 	 * Check to see if the pHCI can be offlined
4473 	 */
4474 	if (ph->ph_unstable) {
4475 		MDI_DEBUG(1, (CE_WARN, dip,
4476 		    "!One or more target devices are in transient "
4477 		    "state. This device can not be removed at "
4478 		    "this moment. Please try again later."));
4479 		MDI_PHCI_UNLOCK(ph);
4480 		return (NDI_BUSY);
4481 	}
4482 
4483 	pip = ph->ph_path_head;
4484 	while (pip != NULL) {
4485 		MDI_PI_LOCK(pip);
4486 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4487 		/*
4488 		 * The mdi_pathinfo state is OK. Check the client state.
4489 		 * If failover in progress fail the pHCI from offlining
4490 		 */
4491 		ct = MDI_PI(pip)->pi_client;
4492 		i_mdi_client_lock(ct, pip);
4493 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4494 		    (ct->ct_unstable)) {
4495 			/*
4496 			 * Failover is in progress, Fail the DR
4497 			 */
4498 			MDI_DEBUG(1, (CE_WARN, dip,
4499 			    "!pHCI device (%s%d) is Busy. %s",
4500 			    ddi_driver_name(dip), ddi_get_instance(dip),
4501 			    "This device can not be removed at "
4502 			    "this moment. Please try again later."));
4503 			MDI_PI_UNLOCK(pip);
4504 			MDI_CLIENT_UNLOCK(ct);
4505 			MDI_PHCI_UNLOCK(ph);
4506 			return (NDI_BUSY);
4507 		}
4508 		MDI_PI_UNLOCK(pip);
4509 
4510 		/*
4511 		 * Check to see of we are removing the last path of this
4512 		 * client device...
4513 		 */
4514 		cdip = ct->ct_dip;
4515 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4516 		    (i_mdi_client_compute_state(ct, ph) ==
4517 		    MDI_CLIENT_STATE_FAILED)) {
4518 			i_mdi_client_unlock(ct);
4519 			MDI_PHCI_UNLOCK(ph);
4520 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4521 				/*
4522 				 * ndi_devi_offline() failed.
4523 				 * This pHCI provides the critical path
4524 				 * to one or more client devices.
4525 				 * Return busy.
4526 				 */
4527 				MDI_PHCI_LOCK(ph);
4528 				MDI_DEBUG(1, (CE_WARN, dip,
4529 				    "!pHCI device (%s%d) is Busy. %s",
4530 				    ddi_driver_name(dip), ddi_get_instance(dip),
4531 				    "This device can not be removed at "
4532 				    "this moment. Please try again later."));
4533 				failed_pip = pip;
4534 				break;
4535 			} else {
4536 				MDI_PHCI_LOCK(ph);
4537 				pip = next;
4538 			}
4539 		} else {
4540 			i_mdi_client_unlock(ct);
4541 			pip = next;
4542 		}
4543 	}
4544 
4545 	if (failed_pip) {
4546 		pip = ph->ph_path_head;
4547 		while (pip != failed_pip) {
4548 			MDI_PI_LOCK(pip);
4549 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4550 			ct = MDI_PI(pip)->pi_client;
4551 			i_mdi_client_lock(ct, pip);
4552 			cdip = ct->ct_dip;
4553 			switch (MDI_CLIENT_STATE(ct)) {
4554 			case MDI_CLIENT_STATE_OPTIMAL:
4555 			case MDI_CLIENT_STATE_DEGRADED:
4556 				if (cdip) {
4557 					MDI_PI_UNLOCK(pip);
4558 					i_mdi_client_unlock(ct);
4559 					MDI_PHCI_UNLOCK(ph);
4560 					(void) ndi_devi_online(cdip, 0);
4561 					MDI_PHCI_LOCK(ph);
4562 					pip = next;
4563 					continue;
4564 				}
4565 				break;
4566 
4567 			case MDI_CLIENT_STATE_FAILED:
4568 				if (cdip) {
4569 					MDI_PI_UNLOCK(pip);
4570 					i_mdi_client_unlock(ct);
4571 					MDI_PHCI_UNLOCK(ph);
4572 					(void) ndi_devi_offline(cdip, 0);
4573 					MDI_PHCI_LOCK(ph);
4574 					pip = next;
4575 					continue;
4576 				}
4577 				break;
4578 			}
4579 			MDI_PI_UNLOCK(pip);
4580 			i_mdi_client_unlock(ct);
4581 			pip = next;
4582 		}
4583 		MDI_PHCI_UNLOCK(ph);
4584 		return (NDI_BUSY);
4585 	}
4586 
4587 	/*
4588 	 * Mark the pHCI as offline
4589 	 */
4590 	MDI_PHCI_SET_OFFLINE(ph);
4591 
4592 	/*
4593 	 * Mark the child mdi_pathinfo nodes as transient
4594 	 */
4595 	pip = ph->ph_path_head;
4596 	while (pip != NULL) {
4597 		MDI_PI_LOCK(pip);
4598 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4599 		MDI_PI_SET_OFFLINING(pip);
4600 		MDI_PI_UNLOCK(pip);
4601 		pip = next;
4602 	}
4603 	MDI_PHCI_UNLOCK(ph);
4604 	/*
4605 	 * Give a chance for any pending commands to execute
4606 	 */
4607 	delay(1);
4608 	MDI_PHCI_LOCK(ph);
4609 	pip = ph->ph_path_head;
4610 	while (pip != NULL) {
4611 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4612 		(void) i_mdi_pi_offline(pip, flags);
4613 		MDI_PI_LOCK(pip);
4614 		ct = MDI_PI(pip)->pi_client;
4615 		if (!MDI_PI_IS_OFFLINE(pip)) {
4616 			MDI_DEBUG(1, (CE_WARN, dip,
4617 			    "!pHCI device (%s%d) is Busy. %s",
4618 			    ddi_driver_name(dip), ddi_get_instance(dip),
4619 			    "This device can not be removed at "
4620 			    "this moment. Please try again later."));
4621 			MDI_PI_UNLOCK(pip);
4622 			MDI_PHCI_SET_ONLINE(ph);
4623 			MDI_PHCI_UNLOCK(ph);
4624 			return (NDI_BUSY);
4625 		}
4626 		MDI_PI_UNLOCK(pip);
4627 		pip = next;
4628 	}
4629 	MDI_PHCI_UNLOCK(ph);
4630 
4631 	return (rv);
4632 }
4633 
4634 /*ARGSUSED*/
4635 static int
4636 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4637 {
4638 	int		rv = NDI_SUCCESS;
4639 	mdi_client_t	*ct;
4640 
4641 	/*
4642 	 * Client component to go offline.  Make sure that we are
4643 	 * not in failing over state and update client state
4644 	 * accordingly
4645 	 */
4646 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4647 	    dip));
4648 	ct = i_devi_get_client(dip);
4649 	if (ct != NULL) {
4650 		MDI_CLIENT_LOCK(ct);
4651 		if (ct->ct_unstable) {
4652 			/*
4653 			 * One or more paths are in transient state,
4654 			 * Dont allow offline of a client device
4655 			 */
4656 			MDI_DEBUG(1, (CE_WARN, dip,
4657 			    "!One or more paths to this device is "
4658 			    "in transient state. This device can not "
4659 			    "be removed at this moment. "
4660 			    "Please try again later."));
4661 			MDI_CLIENT_UNLOCK(ct);
4662 			return (NDI_BUSY);
4663 		}
4664 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4665 			/*
4666 			 * Failover is in progress, Dont allow DR of
4667 			 * a client device
4668 			 */
4669 			MDI_DEBUG(1, (CE_WARN, dip,
4670 			    "!Client device (%s%d) is Busy. %s",
4671 			    ddi_driver_name(dip), ddi_get_instance(dip),
4672 			    "This device can not be removed at "
4673 			    "this moment. Please try again later."));
4674 			MDI_CLIENT_UNLOCK(ct);
4675 			return (NDI_BUSY);
4676 		}
4677 		MDI_CLIENT_SET_OFFLINE(ct);
4678 
4679 		/*
4680 		 * Unbind our relationship with the dev_info node
4681 		 */
4682 		if (flags & NDI_DEVI_REMOVE) {
4683 			ct->ct_dip = NULL;
4684 		}
4685 		MDI_CLIENT_UNLOCK(ct);
4686 	}
4687 	return (rv);
4688 }
4689 
4690 /*
4691  * mdi_pre_attach():
4692  *		Pre attach() notification handler
4693  */
4694 
4695 /*ARGSUSED*/
4696 int
4697 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4698 {
4699 	/* don't support old DDI_PM_RESUME */
4700 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4701 	    (cmd == DDI_PM_RESUME))
4702 		return (DDI_FAILURE);
4703 
4704 	return (DDI_SUCCESS);
4705 }
4706 
4707 /*
4708  * mdi_post_attach():
4709  *		Post attach() notification handler
4710  */
4711 
4712 /*ARGSUSED*/
4713 void
4714 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4715 {
4716 	mdi_phci_t	*ph;
4717 	mdi_client_t	*ct;
4718 	mdi_pathinfo_t	*pip;
4719 
4720 	if (MDI_PHCI(dip)) {
4721 		ph = i_devi_get_phci(dip);
4722 		ASSERT(ph != NULL);
4723 
4724 		MDI_PHCI_LOCK(ph);
4725 		switch (cmd) {
4726 		case DDI_ATTACH:
4727 			MDI_DEBUG(2, (CE_NOTE, dip,
4728 			    "!pHCI post_attach: called %p\n", ph));
4729 			if (error == DDI_SUCCESS) {
4730 				MDI_PHCI_SET_ATTACH(ph);
4731 			} else {
4732 				MDI_DEBUG(1, (CE_NOTE, dip,
4733 				    "!pHCI post_attach: failed error=%d\n",
4734 				    error));
4735 				MDI_PHCI_SET_DETACH(ph);
4736 			}
4737 			break;
4738 
4739 		case DDI_RESUME:
4740 			MDI_DEBUG(2, (CE_NOTE, dip,
4741 			    "!pHCI post_resume: called %p\n", ph));
4742 			if (error == DDI_SUCCESS) {
4743 				MDI_PHCI_SET_RESUME(ph);
4744 			} else {
4745 				MDI_DEBUG(1, (CE_NOTE, dip,
4746 				    "!pHCI post_resume: failed error=%d\n",
4747 				    error));
4748 				MDI_PHCI_SET_SUSPEND(ph);
4749 			}
4750 			break;
4751 		}
4752 		MDI_PHCI_UNLOCK(ph);
4753 	}
4754 
4755 	if (MDI_CLIENT(dip)) {
4756 		ct = i_devi_get_client(dip);
4757 		ASSERT(ct != NULL);
4758 
4759 		MDI_CLIENT_LOCK(ct);
4760 		switch (cmd) {
4761 		case DDI_ATTACH:
4762 			MDI_DEBUG(2, (CE_NOTE, dip,
4763 			    "!Client post_attach: called %p\n", ct));
4764 			if (error != DDI_SUCCESS) {
4765 				MDI_DEBUG(1, (CE_NOTE, dip,
4766 				    "!Client post_attach: failed error=%d\n",
4767 				    error));
4768 				MDI_CLIENT_SET_DETACH(ct);
4769 				MDI_DEBUG(4, (CE_WARN, dip,
4770 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4771 				i_mdi_pm_reset_client(ct);
4772 				break;
4773 			}
4774 
4775 			/*
4776 			 * Client device has successfully attached.
4777 			 * Create kstats for any pathinfo structures
4778 			 * initially associated with this client.
4779 			 */
4780 			for (pip = ct->ct_path_head; pip != NULL;
4781 			    pip = (mdi_pathinfo_t *)
4782 			    MDI_PI(pip)->pi_client_link) {
4783 				(void) i_mdi_pi_kstat_create(pip);
4784 				i_mdi_report_path_state(ct, pip);
4785 			}
4786 			MDI_CLIENT_SET_ATTACH(ct);
4787 			break;
4788 
4789 		case DDI_RESUME:
4790 			MDI_DEBUG(2, (CE_NOTE, dip,
4791 			    "!Client post_attach: called %p\n", ct));
4792 			if (error == DDI_SUCCESS) {
4793 				MDI_CLIENT_SET_RESUME(ct);
4794 			} else {
4795 				MDI_DEBUG(1, (CE_NOTE, dip,
4796 				    "!Client post_resume: failed error=%d\n",
4797 				    error));
4798 				MDI_CLIENT_SET_SUSPEND(ct);
4799 			}
4800 			break;
4801 		}
4802 		MDI_CLIENT_UNLOCK(ct);
4803 	}
4804 }
4805 
4806 /*
4807  * mdi_pre_detach():
4808  *		Pre detach notification handler
4809  */
4810 
4811 /*ARGSUSED*/
4812 int
4813 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4814 {
4815 	int rv = DDI_SUCCESS;
4816 
4817 	if (MDI_CLIENT(dip)) {
4818 		(void) i_mdi_client_pre_detach(dip, cmd);
4819 	}
4820 
4821 	if (MDI_PHCI(dip)) {
4822 		rv = i_mdi_phci_pre_detach(dip, cmd);
4823 	}
4824 
4825 	return (rv);
4826 }
4827 
4828 /*ARGSUSED*/
4829 static int
4830 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4831 {
4832 	int		rv = DDI_SUCCESS;
4833 	mdi_phci_t	*ph;
4834 	mdi_client_t	*ct;
4835 	mdi_pathinfo_t	*pip;
4836 	mdi_pathinfo_t	*failed_pip = NULL;
4837 	mdi_pathinfo_t	*next;
4838 
4839 	ph = i_devi_get_phci(dip);
4840 	if (ph == NULL) {
4841 		return (rv);
4842 	}
4843 
4844 	MDI_PHCI_LOCK(ph);
4845 	switch (cmd) {
4846 	case DDI_DETACH:
4847 		MDI_DEBUG(2, (CE_NOTE, dip,
4848 		    "!pHCI pre_detach: called %p\n", ph));
4849 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4850 			/*
4851 			 * mdi_pathinfo nodes are still attached to
4852 			 * this pHCI. Fail the detach for this pHCI.
4853 			 */
4854 			MDI_DEBUG(2, (CE_WARN, dip,
4855 			    "!pHCI pre_detach: "
4856 			    "mdi_pathinfo nodes are still attached "
4857 			    "%p\n", ph));
4858 			rv = DDI_FAILURE;
4859 			break;
4860 		}
4861 		MDI_PHCI_SET_DETACH(ph);
4862 		break;
4863 
4864 	case DDI_SUSPEND:
4865 		/*
4866 		 * pHCI is getting suspended.  Since mpxio client
4867 		 * devices may not be suspended at this point, to avoid
4868 		 * a potential stack overflow, it is important to suspend
4869 		 * client devices before pHCI can be suspended.
4870 		 */
4871 
4872 		MDI_DEBUG(2, (CE_NOTE, dip,
4873 		    "!pHCI pre_suspend: called %p\n", ph));
4874 		/*
4875 		 * Suspend all the client devices accessible through this pHCI
4876 		 */
4877 		pip = ph->ph_path_head;
4878 		while (pip != NULL && rv == DDI_SUCCESS) {
4879 			dev_info_t *cdip;
4880 			MDI_PI_LOCK(pip);
4881 			next =
4882 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4883 			ct = MDI_PI(pip)->pi_client;
4884 			i_mdi_client_lock(ct, pip);
4885 			cdip = ct->ct_dip;
4886 			MDI_PI_UNLOCK(pip);
4887 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4888 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4889 				i_mdi_client_unlock(ct);
4890 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4891 				    DDI_SUCCESS) {
4892 					/*
4893 					 * Suspend of one of the client
4894 					 * device has failed.
4895 					 */
4896 					MDI_DEBUG(1, (CE_WARN, dip,
4897 					    "!Suspend of device (%s%d) failed.",
4898 					    ddi_driver_name(cdip),
4899 					    ddi_get_instance(cdip)));
4900 					failed_pip = pip;
4901 					break;
4902 				}
4903 			} else {
4904 				i_mdi_client_unlock(ct);
4905 			}
4906 			pip = next;
4907 		}
4908 
4909 		if (rv == DDI_SUCCESS) {
4910 			/*
4911 			 * Suspend of client devices is complete. Proceed
4912 			 * with pHCI suspend.
4913 			 */
4914 			MDI_PHCI_SET_SUSPEND(ph);
4915 		} else {
4916 			/*
4917 			 * Revert back all the suspended client device states
4918 			 * to converse.
4919 			 */
4920 			pip = ph->ph_path_head;
4921 			while (pip != failed_pip) {
4922 				dev_info_t *cdip;
4923 				MDI_PI_LOCK(pip);
4924 				next =
4925 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4926 				ct = MDI_PI(pip)->pi_client;
4927 				i_mdi_client_lock(ct, pip);
4928 				cdip = ct->ct_dip;
4929 				MDI_PI_UNLOCK(pip);
4930 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4931 					i_mdi_client_unlock(ct);
4932 					(void) devi_attach(cdip, DDI_RESUME);
4933 				} else {
4934 					i_mdi_client_unlock(ct);
4935 				}
4936 				pip = next;
4937 			}
4938 		}
4939 		break;
4940 
4941 	default:
4942 		rv = DDI_FAILURE;
4943 		break;
4944 	}
4945 	MDI_PHCI_UNLOCK(ph);
4946 	return (rv);
4947 }
4948 
4949 /*ARGSUSED*/
4950 static int
4951 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4952 {
4953 	int		rv = DDI_SUCCESS;
4954 	mdi_client_t	*ct;
4955 
4956 	ct = i_devi_get_client(dip);
4957 	if (ct == NULL) {
4958 		return (rv);
4959 	}
4960 
4961 	MDI_CLIENT_LOCK(ct);
4962 	switch (cmd) {
4963 	case DDI_DETACH:
4964 		MDI_DEBUG(2, (CE_NOTE, dip,
4965 		    "!Client pre_detach: called %p\n", ct));
4966 		MDI_CLIENT_SET_DETACH(ct);
4967 		break;
4968 
4969 	case DDI_SUSPEND:
4970 		MDI_DEBUG(2, (CE_NOTE, dip,
4971 		    "!Client pre_suspend: called %p\n", ct));
4972 		MDI_CLIENT_SET_SUSPEND(ct);
4973 		break;
4974 
4975 	default:
4976 		rv = DDI_FAILURE;
4977 		break;
4978 	}
4979 	MDI_CLIENT_UNLOCK(ct);
4980 	return (rv);
4981 }
4982 
4983 /*
4984  * mdi_post_detach():
4985  *		Post detach notification handler
4986  */
4987 
4988 /*ARGSUSED*/
4989 void
4990 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4991 {
4992 	/*
4993 	 * Detach/Suspend of mpxio component failed. Update our state
4994 	 * too
4995 	 */
4996 	if (MDI_PHCI(dip))
4997 		i_mdi_phci_post_detach(dip, cmd, error);
4998 
4999 	if (MDI_CLIENT(dip))
5000 		i_mdi_client_post_detach(dip, cmd, error);
5001 }
5002 
5003 /*ARGSUSED*/
5004 static void
5005 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5006 {
5007 	mdi_phci_t	*ph;
5008 
5009 	/*
5010 	 * Detach/Suspend of phci component failed. Update our state
5011 	 * too
5012 	 */
5013 	ph = i_devi_get_phci(dip);
5014 	if (ph == NULL) {
5015 		return;
5016 	}
5017 
5018 	MDI_PHCI_LOCK(ph);
5019 	/*
5020 	 * Detach of pHCI failed. Restore back converse
5021 	 * state
5022 	 */
5023 	switch (cmd) {
5024 	case DDI_DETACH:
5025 		MDI_DEBUG(2, (CE_NOTE, dip,
5026 		    "!pHCI post_detach: called %p\n", ph));
5027 		if (error != DDI_SUCCESS)
5028 			MDI_PHCI_SET_ATTACH(ph);
5029 		break;
5030 
5031 	case DDI_SUSPEND:
5032 		MDI_DEBUG(2, (CE_NOTE, dip,
5033 		    "!pHCI post_suspend: called %p\n", ph));
5034 		if (error != DDI_SUCCESS)
5035 			MDI_PHCI_SET_RESUME(ph);
5036 		break;
5037 	}
5038 	MDI_PHCI_UNLOCK(ph);
5039 }
5040 
5041 /*ARGSUSED*/
5042 static void
5043 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5044 {
5045 	mdi_client_t	*ct;
5046 
5047 	ct = i_devi_get_client(dip);
5048 	if (ct == NULL) {
5049 		return;
5050 	}
5051 	MDI_CLIENT_LOCK(ct);
5052 	/*
5053 	 * Detach of Client failed. Restore back converse
5054 	 * state
5055 	 */
5056 	switch (cmd) {
5057 	case DDI_DETACH:
5058 		MDI_DEBUG(2, (CE_NOTE, dip,
5059 		    "!Client post_detach: called %p\n", ct));
5060 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5061 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5062 			    "i_mdi_pm_rele_client\n"));
5063 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5064 		} else {
5065 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5066 			    "i_mdi_pm_reset_client\n"));
5067 			i_mdi_pm_reset_client(ct);
5068 		}
5069 		if (error != DDI_SUCCESS)
5070 			MDI_CLIENT_SET_ATTACH(ct);
5071 		break;
5072 
5073 	case DDI_SUSPEND:
5074 		MDI_DEBUG(2, (CE_NOTE, dip,
5075 		    "!Client post_suspend: called %p\n", ct));
5076 		if (error != DDI_SUCCESS)
5077 			MDI_CLIENT_SET_RESUME(ct);
5078 		break;
5079 	}
5080 	MDI_CLIENT_UNLOCK(ct);
5081 }
5082 
5083 /*
5084  * create and install per-path (client - pHCI) statistics
5085  * I/O stats supported: nread, nwritten, reads, and writes
5086  * Error stats - hard errors, soft errors, & transport errors
5087  */
5088 static int
5089 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5090 {
5091 
5092 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5093 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5094 	char ksname[KSTAT_STRLEN];
5095 	mdi_pathinfo_t *cpip;
5096 	const char *err_postfix = ",err";
5097 	kstat_t	*kiosp, *kerrsp;
5098 	struct pi_errs	*nsp;
5099 	struct mdi_pi_kstats *mdi_statp;
5100 
5101 	ASSERT(client != NULL && ppath != NULL);
5102 
5103 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5104 
5105 	if (MDI_PI(pip)->pi_kstats != NULL)
5106 		return (MDI_SUCCESS);
5107 
5108 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5109 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5110 		if (cpip == pip)
5111 			continue;
5112 		/*
5113 		 * We have found a different path with same parent
5114 		 * kstats for a given client-pHCI are common
5115 		 */
5116 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5117 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5118 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5119 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5120 			return (MDI_SUCCESS);
5121 		}
5122 	}
5123 
5124 	/*
5125 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5126 	 * clamp length of name against max length of error kstat name
5127 	 */
5128 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5129 	    ddi_driver_name(client), ddi_get_instance(client),
5130 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5131 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5132 		return (MDI_FAILURE);
5133 	}
5134 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5135 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5136 		return (MDI_FAILURE);
5137 	}
5138 
5139 	(void) strcat(ksname, err_postfix);
5140 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5141 	    KSTAT_TYPE_NAMED,
5142 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5143 
5144 	if (kerrsp == NULL) {
5145 		kstat_delete(kiosp);
5146 		return (MDI_FAILURE);
5147 	}
5148 
5149 	nsp = (struct pi_errs *)kerrsp->ks_data;
5150 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5151 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5152 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5153 	    KSTAT_DATA_UINT32);
5154 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5155 	    KSTAT_DATA_UINT32);
5156 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5157 	    KSTAT_DATA_UINT32);
5158 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5159 	    KSTAT_DATA_UINT32);
5160 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5161 	    KSTAT_DATA_UINT32);
5162 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5163 	    KSTAT_DATA_UINT32);
5164 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5165 	    KSTAT_DATA_UINT32);
5166 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5167 
5168 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5169 	mdi_statp->pi_kstat_ref = 1;
5170 	mdi_statp->pi_kstat_iostats = kiosp;
5171 	mdi_statp->pi_kstat_errstats = kerrsp;
5172 	kstat_install(kiosp);
5173 	kstat_install(kerrsp);
5174 	MDI_PI(pip)->pi_kstats = mdi_statp;
5175 	return (MDI_SUCCESS);
5176 }
5177 
5178 /*
5179  * destroy per-path properties
5180  */
5181 static void
5182 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5183 {
5184 
5185 	struct mdi_pi_kstats *mdi_statp;
5186 
5187 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5188 		return;
5189 
5190 	MDI_PI(pip)->pi_kstats = NULL;
5191 
5192 	/*
5193 	 * the kstat may be shared between multiple pathinfo nodes
5194 	 * decrement this pathinfo's usage, removing the kstats
5195 	 * themselves when the last pathinfo reference is removed.
5196 	 */
5197 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5198 	if (--mdi_statp->pi_kstat_ref != 0)
5199 		return;
5200 
5201 	kstat_delete(mdi_statp->pi_kstat_iostats);
5202 	kstat_delete(mdi_statp->pi_kstat_errstats);
5203 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5204 }
5205 
5206 /*
5207  * update I/O paths KSTATS
5208  */
5209 void
5210 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5211 {
5212 	kstat_t *iostatp;
5213 	size_t xfer_cnt;
5214 
5215 	ASSERT(pip != NULL);
5216 
5217 	/*
5218 	 * I/O can be driven across a path prior to having path
5219 	 * statistics available, i.e. probe(9e).
5220 	 */
5221 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5222 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5223 		xfer_cnt = bp->b_bcount - bp->b_resid;
5224 		if (bp->b_flags & B_READ) {
5225 			KSTAT_IO_PTR(iostatp)->reads++;
5226 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5227 		} else {
5228 			KSTAT_IO_PTR(iostatp)->writes++;
5229 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5230 		}
5231 	}
5232 }
5233 
5234 /*
5235  * disable the path to a particular pHCI (pHCI specified in the phci_path
5236  * argument) for a particular client (specified in the client_path argument).
5237  * Disabling a path means that MPxIO will not select the disabled path for
5238  * routing any new I/O requests.
5239  */
5240 int
5241 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5242 {
5243 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5244 }
5245 
5246 /*
5247  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5248  * argument) for a particular client (specified in the client_path argument).
5249  * Enabling a path means that MPxIO may select the enabled path for routing
5250  * future I/O requests, subject to other path state constraints.
5251  */
5252 
5253 int
5254 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5255 {
5256 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5257 }
5258 
5259 
5260 /*
5261  * Common routine for doing enable/disable.
5262  */
5263 int
5264 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5265 {
5266 
5267 	mdi_phci_t	*ph;
5268 	mdi_vhci_t	*vh = NULL;
5269 	mdi_client_t	*ct;
5270 	mdi_pathinfo_t	*next, *pip;
5271 	int		found_it;
5272 	int		(*f)() = NULL;
5273 	int		rv;
5274 	int		sync_flag = 0;
5275 
5276 	ph = i_devi_get_phci(pdip);
5277 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5278 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5279 	if (ph == NULL) {
5280 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5281 			" failed. ph = NULL operation = %d\n", op));
5282 		return (MDI_FAILURE);
5283 	}
5284 
5285 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5286 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5287 			" Invalid operation = %d\n", op));
5288 		return (MDI_FAILURE);
5289 	}
5290 
5291 	sync_flag = (flags << 8) & 0xf00;
5292 
5293 	vh = ph->ph_vhci;
5294 	f = vh->vh_ops->vo_pi_state_change;
5295 
5296 	if (cdip == NULL) {
5297 		/*
5298 		 * Need to mark the Phci as enabled/disabled.
5299 		 */
5300 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5301 		"Operation %d for the phci\n", op));
5302 		MDI_PHCI_LOCK(ph);
5303 		switch (flags) {
5304 			case USER_DISABLE:
5305 				if (op == MDI_DISABLE_OP)
5306 					MDI_PHCI_SET_USER_DISABLE(ph);
5307 				else
5308 					MDI_PHCI_SET_USER_ENABLE(ph);
5309 				break;
5310 			case DRIVER_DISABLE:
5311 				if (op == MDI_DISABLE_OP)
5312 					MDI_PHCI_SET_DRV_DISABLE(ph);
5313 				else
5314 					MDI_PHCI_SET_DRV_ENABLE(ph);
5315 				break;
5316 			case DRIVER_DISABLE_TRANSIENT:
5317 				if (op == MDI_DISABLE_OP)
5318 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5319 				else
5320 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5321 				break;
5322 			default:
5323 				MDI_PHCI_UNLOCK(ph);
5324 				MDI_DEBUG(1, (CE_NOTE, NULL,
5325 				"!i_mdi_pi_enable_disable:"
5326 				" Invalid flag argument= %d\n", flags));
5327 		}
5328 
5329 		/*
5330 		 * Phci has been disabled. Now try to enable/disable
5331 		 * path info's to each client.
5332 		 */
5333 		pip = ph->ph_path_head;
5334 		while (pip != NULL) {
5335 			/*
5336 			 * Do a callback into the mdi consumer to let it
5337 			 * know that path is about to be enabled/disabled.
5338 			 */
5339 			if (f != NULL) {
5340 				rv = (*f)(vh->vh_dip, pip, 0,
5341 					MDI_PI_EXT_STATE(pip),
5342 					MDI_EXT_STATE_CHANGE | sync_flag |
5343 					op | MDI_BEFORE_STATE_CHANGE);
5344 				if (rv != MDI_SUCCESS) {
5345 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5346 				"!vo_pi_state_change: failed rv = %x", rv));
5347 				}
5348 			}
5349 
5350 			MDI_PI_LOCK(pip);
5351 			next =
5352 				(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5353 			switch (flags) {
5354 			case USER_DISABLE:
5355 				if (op == MDI_DISABLE_OP)
5356 					MDI_PI_SET_USER_DISABLE(pip);
5357 				else
5358 					MDI_PI_SET_USER_ENABLE(pip);
5359 				break;
5360 			case DRIVER_DISABLE:
5361 				if (op == MDI_DISABLE_OP)
5362 					MDI_PI_SET_DRV_DISABLE(pip);
5363 				else
5364 					MDI_PI_SET_DRV_ENABLE(pip);
5365 				break;
5366 			case DRIVER_DISABLE_TRANSIENT:
5367 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5368 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5369 				else
5370 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5371 				break;
5372 			}
5373 			MDI_PI_UNLOCK(pip);
5374 			/*
5375 			 * Do a callback into the mdi consumer to let it
5376 			 * know that path is now enabled/disabled.
5377 			 */
5378 			if (f != NULL) {
5379 				rv = (*f)(vh->vh_dip, pip, 0,
5380 					MDI_PI_EXT_STATE(pip),
5381 					MDI_EXT_STATE_CHANGE | sync_flag |
5382 					op | MDI_AFTER_STATE_CHANGE);
5383 				if (rv != MDI_SUCCESS) {
5384 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5385 				"!vo_pi_state_change: failed rv = %x", rv));
5386 				}
5387 			}
5388 			pip = next;
5389 		}
5390 		MDI_PHCI_UNLOCK(ph);
5391 	} else {
5392 
5393 		/*
5394 		 * Disable a specific client.
5395 		 */
5396 		ct = i_devi_get_client(cdip);
5397 		if (ct == NULL) {
5398 			MDI_DEBUG(1, (CE_NOTE, NULL,
5399 			"!i_mdi_pi_enable_disable:"
5400 			" failed. ct = NULL operation = %d\n", op));
5401 			return (MDI_FAILURE);
5402 		}
5403 
5404 		MDI_CLIENT_LOCK(ct);
5405 		pip = ct->ct_path_head;
5406 		found_it = 0;
5407 		while (pip != NULL) {
5408 			MDI_PI_LOCK(pip);
5409 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5410 			if (MDI_PI(pip)->pi_phci == ph) {
5411 				MDI_PI_UNLOCK(pip);
5412 				found_it = 1;
5413 				break;
5414 			}
5415 			MDI_PI_UNLOCK(pip);
5416 			pip = next;
5417 		}
5418 
5419 		MDI_CLIENT_UNLOCK(ct);
5420 		if (found_it == 0) {
5421 			MDI_DEBUG(1, (CE_NOTE, NULL,
5422 			"!i_mdi_pi_enable_disable:"
5423 			" failed. Could not find corresponding pip\n"));
5424 			return (MDI_FAILURE);
5425 		}
5426 		/*
5427 		 * Do a callback into the mdi consumer to let it
5428 		 * know that path is about to get enabled/disabled.
5429 		 */
5430 		if (f != NULL) {
5431 			rv = (*f)(vh->vh_dip, pip, 0,
5432 				MDI_PI_EXT_STATE(pip),
5433 				MDI_EXT_STATE_CHANGE | sync_flag |
5434 				op | MDI_BEFORE_STATE_CHANGE);
5435 			if (rv != MDI_SUCCESS) {
5436 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5437 				"!vo_pi_state_change: failed rv = %x", rv));
5438 			}
5439 		}
5440 		MDI_PI_LOCK(pip);
5441 		switch (flags) {
5442 			case USER_DISABLE:
5443 				if (op == MDI_DISABLE_OP)
5444 					MDI_PI_SET_USER_DISABLE(pip);
5445 				else
5446 					MDI_PI_SET_USER_ENABLE(pip);
5447 				break;
5448 			case DRIVER_DISABLE:
5449 				if (op == MDI_DISABLE_OP)
5450 					MDI_PI_SET_DRV_DISABLE(pip);
5451 				else
5452 					MDI_PI_SET_DRV_ENABLE(pip);
5453 				break;
5454 			case DRIVER_DISABLE_TRANSIENT:
5455 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5456 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5457 				else
5458 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5459 				break;
5460 		}
5461 		MDI_PI_UNLOCK(pip);
5462 		/*
5463 		 * Do a callback into the mdi consumer to let it
5464 		 * know that path is now enabled/disabled.
5465 		 */
5466 		if (f != NULL) {
5467 			rv = (*f)(vh->vh_dip, pip, 0,
5468 				MDI_PI_EXT_STATE(pip),
5469 				MDI_EXT_STATE_CHANGE | sync_flag |
5470 				op | MDI_AFTER_STATE_CHANGE);
5471 			if (rv != MDI_SUCCESS) {
5472 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5473 				"!vo_pi_state_change: failed rv = %x", rv));
5474 			}
5475 		}
5476 	}
5477 
5478 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5479 		" Returning success pdip = %p cdip = %p\n", op, pdip, cdip));
5480 	return (MDI_SUCCESS);
5481 }
5482 
5483 /*ARGSUSED3*/
5484 int
5485 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5486     int flags, clock_t timeout)
5487 {
5488 	mdi_pathinfo_t *pip;
5489 	dev_info_t *dip;
5490 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5491 	char *paddr;
5492 
5493 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5494 
5495 	if (!MDI_PHCI(pdip))
5496 		return (MDI_FAILURE);
5497 
5498 	paddr = strchr(devnm, '@');
5499 	if (paddr == NULL)
5500 		return (MDI_FAILURE);
5501 
5502 	paddr++;	/* skip '@' */
5503 	pip = mdi_pi_find(pdip, NULL, paddr);
5504 	while (pip == NULL && timeout > 0) {
5505 		if (interval > timeout)
5506 			interval = timeout;
5507 		if (flags & NDI_DEVI_DEBUG) {
5508 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5509 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5510 			    paddr, interval, timeout);
5511 		}
5512 		delay(interval);
5513 		timeout -= interval;
5514 		interval += interval;
5515 		pip = mdi_pi_find(pdip, NULL, paddr);
5516 	}
5517 
5518 	if (pip == NULL)
5519 		return (MDI_FAILURE);
5520 	dip = mdi_pi_get_client(pip);
5521 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5522 		return (MDI_FAILURE);
5523 	*cdipp = dip;
5524 
5525 	/* TODO: holding should happen inside search functions */
5526 	ndi_hold_devi(dip);
5527 	return (MDI_SUCCESS);
5528 }
5529 
5530 /*
5531  * Ensure phci powered up
5532  */
5533 static void
5534 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5535 {
5536 	dev_info_t	*ph_dip;
5537 
5538 	ASSERT(pip != NULL);
5539 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5540 
5541 	if (MDI_PI(pip)->pi_pm_held) {
5542 		return;
5543 	}
5544 
5545 	ph_dip = mdi_pi_get_phci(pip);
5546 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5547 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5548 	if (ph_dip == NULL) {
5549 		return;
5550 	}
5551 
5552 	MDI_PI_UNLOCK(pip);
5553 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5554 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5555 	pm_hold_power(ph_dip);
5556 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5557 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5558 	MDI_PI_LOCK(pip);
5559 
5560 	MDI_PI(pip)->pi_pm_held = 1;
5561 }
5562 
5563 /*
5564  * Allow phci powered down
5565  */
5566 static void
5567 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5568 {
5569 	dev_info_t	*ph_dip = NULL;
5570 
5571 	ASSERT(pip != NULL);
5572 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5573 
5574 	if (MDI_PI(pip)->pi_pm_held == 0) {
5575 		return;
5576 	}
5577 
5578 	ph_dip = mdi_pi_get_phci(pip);
5579 	ASSERT(ph_dip != NULL);
5580 
5581 	MDI_PI_UNLOCK(pip);
5582 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5583 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5584 
5585 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5586 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5587 	pm_rele_power(ph_dip);
5588 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5589 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5590 
5591 	MDI_PI_LOCK(pip);
5592 	MDI_PI(pip)->pi_pm_held = 0;
5593 }
5594 
5595 static void
5596 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5597 {
5598 	ASSERT(ct);
5599 
5600 	ct->ct_power_cnt += incr;
5601 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5602 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5603 	ASSERT(ct->ct_power_cnt >= 0);
5604 }
5605 
5606 static void
5607 i_mdi_rele_all_phci(mdi_client_t *ct)
5608 {
5609 	mdi_pathinfo_t  *pip;
5610 
5611 	ASSERT(mutex_owned(&ct->ct_mutex));
5612 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5613 	while (pip != NULL) {
5614 		mdi_hold_path(pip);
5615 		MDI_PI_LOCK(pip);
5616 		i_mdi_pm_rele_pip(pip);
5617 		MDI_PI_UNLOCK(pip);
5618 		mdi_rele_path(pip);
5619 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5620 	}
5621 }
5622 
5623 static void
5624 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5625 {
5626 	ASSERT(ct);
5627 
5628 	if (i_ddi_node_state(ct->ct_dip) >= DS_READY) {
5629 		ct->ct_power_cnt -= decr;
5630 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5631 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5632 	}
5633 
5634 	ASSERT(ct->ct_power_cnt >= 0);
5635 	if (ct->ct_power_cnt == 0) {
5636 		i_mdi_rele_all_phci(ct);
5637 		return;
5638 	}
5639 }
5640 
5641 static void
5642 i_mdi_pm_reset_client(mdi_client_t *ct)
5643 {
5644 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5645 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5646 	ct->ct_power_cnt = 0;
5647 	i_mdi_rele_all_phci(ct);
5648 	ct->ct_powercnt_config = 0;
5649 	ct->ct_powercnt_unconfig = 0;
5650 	ct->ct_powercnt_reset = 1;
5651 }
5652 
5653 static void
5654 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5655 {
5656 	mdi_pathinfo_t  *pip;
5657 	ASSERT(mutex_owned(&ct->ct_mutex));
5658 
5659 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5660 	while (pip != NULL) {
5661 		mdi_hold_path(pip);
5662 		MDI_PI_LOCK(pip);
5663 		i_mdi_pm_hold_pip(pip);
5664 		MDI_PI_UNLOCK(pip);
5665 		mdi_rele_path(pip);
5666 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5667 	}
5668 }
5669 
5670 static int
5671 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5672 {
5673 	int		ret;
5674 	dev_info_t	*ph_dip;
5675 
5676 	MDI_PI_LOCK(pip);
5677 	i_mdi_pm_hold_pip(pip);
5678 
5679 	ph_dip = mdi_pi_get_phci(pip);
5680 	MDI_PI_UNLOCK(pip);
5681 
5682 	/* bring all components of phci to full power */
5683 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5684 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5685 	    ddi_get_instance(ph_dip)));
5686 
5687 	ret = pm_powerup(ph_dip);
5688 
5689 	if (ret == DDI_FAILURE) {
5690 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5691 		    "pm_powerup FAILED for %s%d\n",
5692 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5693 
5694 		MDI_PI_LOCK(pip);
5695 		i_mdi_pm_rele_pip(pip);
5696 		MDI_PI_UNLOCK(pip);
5697 		return (MDI_FAILURE);
5698 	}
5699 
5700 	return (MDI_SUCCESS);
5701 }
5702 
5703 static int
5704 i_mdi_power_all_phci(mdi_client_t *ct)
5705 {
5706 	mdi_pathinfo_t  *pip;
5707 	int		succeeded = 0;
5708 
5709 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5710 	while (pip != NULL) {
5711 		mdi_hold_path(pip);
5712 		MDI_CLIENT_UNLOCK(ct);
5713 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5714 			succeeded = 1;
5715 
5716 		ASSERT(ct == MDI_PI(pip)->pi_client);
5717 		MDI_CLIENT_LOCK(ct);
5718 		mdi_rele_path(pip);
5719 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5720 	}
5721 
5722 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5723 }
5724 
5725 /*
5726  * mdi_bus_power():
5727  *		1. Place the phci(s) into powered up state so that
5728  *		   client can do power management
5729  *		2. Ensure phci powered up as client power managing
5730  * Return Values:
5731  *		MDI_SUCCESS
5732  *		MDI_FAILURE
5733  */
5734 int
5735 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5736     void *arg, void *result)
5737 {
5738 	int			ret = MDI_SUCCESS;
5739 	pm_bp_child_pwrchg_t	*bpc;
5740 	mdi_client_t		*ct;
5741 	dev_info_t		*cdip;
5742 	pm_bp_has_changed_t	*bphc;
5743 
5744 	/*
5745 	 * BUS_POWER_NOINVOL not supported
5746 	 */
5747 	if (op == BUS_POWER_NOINVOL)
5748 		return (MDI_FAILURE);
5749 
5750 	/*
5751 	 * ignore other OPs.
5752 	 * return quickly to save cou cycles on the ct processing
5753 	 */
5754 	switch (op) {
5755 	case BUS_POWER_PRE_NOTIFICATION:
5756 	case BUS_POWER_POST_NOTIFICATION:
5757 		bpc = (pm_bp_child_pwrchg_t *)arg;
5758 		cdip = bpc->bpc_dip;
5759 		break;
5760 	case BUS_POWER_HAS_CHANGED:
5761 		bphc = (pm_bp_has_changed_t *)arg;
5762 		cdip = bphc->bphc_dip;
5763 		break;
5764 	default:
5765 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5766 	}
5767 
5768 	ASSERT(MDI_CLIENT(cdip));
5769 
5770 	ct = i_devi_get_client(cdip);
5771 	if (ct == NULL)
5772 		return (MDI_FAILURE);
5773 
5774 	/*
5775 	 * wait till the mdi_pathinfo node state change are processed
5776 	 */
5777 	MDI_CLIENT_LOCK(ct);
5778 	switch (op) {
5779 	case BUS_POWER_PRE_NOTIFICATION:
5780 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5781 		    "BUS_POWER_PRE_NOTIFICATION:"
5782 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5783 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5784 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5785 
5786 		/* serialize power level change per client */
5787 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5788 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5789 
5790 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5791 
5792 		if (ct->ct_power_cnt == 0) {
5793 			ret = i_mdi_power_all_phci(ct);
5794 		}
5795 
5796 		/*
5797 		 * if new_level > 0:
5798 		 *	- hold phci(s)
5799 		 *	- power up phci(s) if not already
5800 		 * ignore power down
5801 		 */
5802 		if (bpc->bpc_nlevel > 0) {
5803 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5804 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5805 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5806 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5807 			}
5808 		}
5809 		break;
5810 	case BUS_POWER_POST_NOTIFICATION:
5811 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5812 		    "BUS_POWER_POST_NOTIFICATION:"
5813 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5814 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5815 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5816 		    *(int *)result));
5817 
5818 		if (*(int *)result == DDI_SUCCESS) {
5819 			if (bpc->bpc_nlevel > 0) {
5820 				MDI_CLIENT_SET_POWER_UP(ct);
5821 			} else {
5822 				MDI_CLIENT_SET_POWER_DOWN(ct);
5823 			}
5824 		}
5825 
5826 		/* release the hold we did in pre-notification */
5827 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5828 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5829 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5830 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5831 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5832 		}
5833 
5834 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5835 			/* another thread might started attaching */
5836 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5837 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5838 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5839 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5840 			/* detaching has been taken care in pm_post_unconfig */
5841 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5842 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5843 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5844 				i_mdi_pm_reset_client(ct);
5845 			}
5846 		}
5847 
5848 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5849 		cv_broadcast(&ct->ct_powerchange_cv);
5850 
5851 		break;
5852 
5853 	/* need to do more */
5854 	case BUS_POWER_HAS_CHANGED:
5855 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5856 		    "BUS_POWER_HAS_CHANGED:"
5857 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5858 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5859 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5860 
5861 		if (bphc->bphc_nlevel > 0 &&
5862 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5863 			if (ct->ct_power_cnt == 0) {
5864 				ret = i_mdi_power_all_phci(ct);
5865 			}
5866 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5867 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5868 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5869 		}
5870 
5871 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5872 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5873 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5874 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5875 		}
5876 		break;
5877 	}
5878 
5879 	MDI_CLIENT_UNLOCK(ct);
5880 	return (ret);
5881 }
5882 
5883 static int
5884 i_mdi_pm_pre_config_one(dev_info_t *child)
5885 {
5886 	int		ret = MDI_SUCCESS;
5887 	mdi_client_t	*ct;
5888 
5889 	ct = i_devi_get_client(child);
5890 	if (ct == NULL)
5891 		return (MDI_FAILURE);
5892 
5893 	MDI_CLIENT_LOCK(ct);
5894 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5895 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5896 
5897 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5898 		MDI_CLIENT_UNLOCK(ct);
5899 		MDI_DEBUG(4, (CE_NOTE, child,
5900 		    "i_mdi_pm_pre_config_one already configured\n"));
5901 		return (MDI_SUCCESS);
5902 	}
5903 
5904 	if (ct->ct_powercnt_config) {
5905 		MDI_CLIENT_UNLOCK(ct);
5906 		MDI_DEBUG(4, (CE_NOTE, child,
5907 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5908 		return (MDI_SUCCESS);
5909 	}
5910 
5911 	if (ct->ct_power_cnt == 0) {
5912 		ret = i_mdi_power_all_phci(ct);
5913 	}
5914 	MDI_DEBUG(4, (CE_NOTE, child,
5915 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5916 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5917 	ct->ct_powercnt_config = 1;
5918 	ct->ct_powercnt_reset = 0;
5919 	MDI_CLIENT_UNLOCK(ct);
5920 	return (ret);
5921 }
5922 
5923 static int
5924 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5925 {
5926 	int			ret = MDI_SUCCESS;
5927 	dev_info_t		*cdip;
5928 	int			circ;
5929 
5930 	ASSERT(MDI_VHCI(parent));
5931 
5932 	/* ndi_devi_config_one */
5933 	if (child) {
5934 		return (i_mdi_pm_pre_config_one(child));
5935 	}
5936 
5937 	/* devi_config_common */
5938 	ndi_devi_enter(parent, &circ);
5939 	cdip = ddi_get_child(parent);
5940 	while (cdip) {
5941 		dev_info_t *next = ddi_get_next_sibling(cdip);
5942 
5943 		ret = i_mdi_pm_pre_config_one(cdip);
5944 		if (ret != MDI_SUCCESS)
5945 			break;
5946 		cdip = next;
5947 	}
5948 	ndi_devi_exit(parent, circ);
5949 	return (ret);
5950 }
5951 
5952 static int
5953 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5954 {
5955 	int		ret = MDI_SUCCESS;
5956 	mdi_client_t	*ct;
5957 
5958 	ct = i_devi_get_client(child);
5959 	if (ct == NULL)
5960 		return (MDI_FAILURE);
5961 
5962 	MDI_CLIENT_LOCK(ct);
5963 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5964 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5965 
5966 	if (i_ddi_node_state(ct->ct_dip) < DS_READY) {
5967 		MDI_DEBUG(4, (CE_NOTE, child,
5968 		    "i_mdi_pm_pre_unconfig node detached already\n"));
5969 		MDI_CLIENT_UNLOCK(ct);
5970 		return (MDI_SUCCESS);
5971 	}
5972 
5973 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
5974 	    (flags & NDI_AUTODETACH)) {
5975 		MDI_DEBUG(4, (CE_NOTE, child,
5976 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
5977 		MDI_CLIENT_UNLOCK(ct);
5978 		return (MDI_FAILURE);
5979 	}
5980 
5981 	if (ct->ct_powercnt_unconfig) {
5982 		MDI_DEBUG(4, (CE_NOTE, child,
5983 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
5984 		MDI_CLIENT_UNLOCK(ct);
5985 		*held = 1;
5986 		return (MDI_SUCCESS);
5987 	}
5988 
5989 	if (ct->ct_power_cnt == 0) {
5990 		ret = i_mdi_power_all_phci(ct);
5991 	}
5992 	MDI_DEBUG(4, (CE_NOTE, child,
5993 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
5994 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5995 	ct->ct_powercnt_unconfig = 1;
5996 	ct->ct_powercnt_reset = 0;
5997 	MDI_CLIENT_UNLOCK(ct);
5998 	if (ret == MDI_SUCCESS)
5999 		*held = 1;
6000 	return (ret);
6001 }
6002 
6003 static int
6004 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
6005     int flags)
6006 {
6007 	int			ret = MDI_SUCCESS;
6008 	dev_info_t		*cdip;
6009 	int			circ;
6010 
6011 	ASSERT(MDI_VHCI(parent));
6012 	*held = 0;
6013 
6014 	/* ndi_devi_unconfig_one */
6015 	if (child) {
6016 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6017 	}
6018 
6019 	/* devi_unconfig_common */
6020 	ndi_devi_enter(parent, &circ);
6021 	cdip = ddi_get_child(parent);
6022 	while (cdip) {
6023 		dev_info_t *next = ddi_get_next_sibling(cdip);
6024 
6025 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6026 		cdip = next;
6027 	}
6028 	ndi_devi_exit(parent, circ);
6029 
6030 	if (*held)
6031 		ret = MDI_SUCCESS;
6032 
6033 	return (ret);
6034 }
6035 
6036 static void
6037 i_mdi_pm_post_config_one(dev_info_t *child)
6038 {
6039 	mdi_client_t	*ct;
6040 
6041 	ct = i_devi_get_client(child);
6042 	if (ct == NULL)
6043 		return;
6044 
6045 	MDI_CLIENT_LOCK(ct);
6046 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6047 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6048 
6049 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6050 		MDI_DEBUG(4, (CE_NOTE, child,
6051 		    "i_mdi_pm_post_config_one NOT configured\n"));
6052 		MDI_CLIENT_UNLOCK(ct);
6053 		return;
6054 	}
6055 
6056 	/* client has not been updated */
6057 	if (MDI_CLIENT_IS_FAILED(ct)) {
6058 		MDI_DEBUG(4, (CE_NOTE, child,
6059 		    "i_mdi_pm_post_config_one NOT configured\n"));
6060 		MDI_CLIENT_UNLOCK(ct);
6061 		return;
6062 	}
6063 
6064 	/* another thread might have powered it down or detached it */
6065 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6066 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6067 	    (i_ddi_node_state(ct->ct_dip) < DS_READY &&
6068 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6069 		MDI_DEBUG(4, (CE_NOTE, child,
6070 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6071 		i_mdi_pm_reset_client(ct);
6072 	} else {
6073 		mdi_pathinfo_t  *pip, *next;
6074 		int	valid_path_count = 0;
6075 
6076 		MDI_DEBUG(4, (CE_NOTE, child,
6077 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6078 		pip = ct->ct_path_head;
6079 		while (pip != NULL) {
6080 			MDI_PI_LOCK(pip);
6081 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6082 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6083 				valid_path_count ++;
6084 			MDI_PI_UNLOCK(pip);
6085 			pip = next;
6086 		}
6087 		i_mdi_pm_rele_client(ct, valid_path_count);
6088 	}
6089 	ct->ct_powercnt_config = 0;
6090 	MDI_CLIENT_UNLOCK(ct);
6091 }
6092 
6093 static void
6094 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6095 {
6096 	int		circ;
6097 	dev_info_t	*cdip;
6098 	ASSERT(MDI_VHCI(parent));
6099 
6100 	/* ndi_devi_config_one */
6101 	if (child) {
6102 		i_mdi_pm_post_config_one(child);
6103 		return;
6104 	}
6105 
6106 	/* devi_config_common */
6107 	ndi_devi_enter(parent, &circ);
6108 	cdip = ddi_get_child(parent);
6109 	while (cdip) {
6110 		dev_info_t *next = ddi_get_next_sibling(cdip);
6111 
6112 		i_mdi_pm_post_config_one(cdip);
6113 		cdip = next;
6114 	}
6115 	ndi_devi_exit(parent, circ);
6116 }
6117 
6118 static void
6119 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6120 {
6121 	mdi_client_t	*ct;
6122 
6123 	ct = i_devi_get_client(child);
6124 	if (ct == NULL)
6125 		return;
6126 
6127 	MDI_CLIENT_LOCK(ct);
6128 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6129 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6130 
6131 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6132 		MDI_DEBUG(4, (CE_NOTE, child,
6133 		    "i_mdi_pm_post_unconfig NOT held\n"));
6134 		MDI_CLIENT_UNLOCK(ct);
6135 		return;
6136 	}
6137 
6138 	/* failure detaching or another thread just attached it */
6139 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6140 	    i_ddi_node_state(ct->ct_dip) == DS_READY) ||
6141 	    (i_ddi_node_state(ct->ct_dip) != DS_READY &&
6142 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6143 		MDI_DEBUG(4, (CE_NOTE, child,
6144 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6145 		i_mdi_pm_reset_client(ct);
6146 	} else {
6147 		mdi_pathinfo_t  *pip, *next;
6148 		int	valid_path_count = 0;
6149 
6150 		MDI_DEBUG(4, (CE_NOTE, child,
6151 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6152 		pip = ct->ct_path_head;
6153 		while (pip != NULL) {
6154 			MDI_PI_LOCK(pip);
6155 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6156 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6157 				valid_path_count ++;
6158 			MDI_PI_UNLOCK(pip);
6159 			pip = next;
6160 		}
6161 		i_mdi_pm_rele_client(ct, valid_path_count);
6162 		ct->ct_powercnt_unconfig = 0;
6163 	}
6164 
6165 	MDI_CLIENT_UNLOCK(ct);
6166 }
6167 
6168 static void
6169 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6170 {
6171 	int			circ;
6172 	dev_info_t		*cdip;
6173 
6174 	ASSERT(MDI_VHCI(parent));
6175 
6176 	if (!held) {
6177 		MDI_DEBUG(4, (CE_NOTE, parent,
6178 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6179 		return;
6180 	}
6181 
6182 	if (child) {
6183 		i_mdi_pm_post_unconfig_one(child);
6184 		return;
6185 	}
6186 
6187 	ndi_devi_enter(parent, &circ);
6188 	cdip = ddi_get_child(parent);
6189 	while (cdip) {
6190 		dev_info_t *next = ddi_get_next_sibling(cdip);
6191 
6192 		i_mdi_pm_post_unconfig_one(cdip);
6193 		cdip = next;
6194 	}
6195 	ndi_devi_exit(parent, circ);
6196 }
6197 
6198 int
6199 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6200 {
6201 	int			circ, ret = MDI_SUCCESS;
6202 	dev_info_t		*client_dip = NULL;
6203 	mdi_client_t		*ct;
6204 
6205 	/*
6206 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6207 	 * Power up pHCI for the named client device.
6208 	 * Note: Before the client is enumerated under vhci by phci,
6209 	 * client_dip can be NULL. Then proceed to power up all the
6210 	 * pHCIs.
6211 	 */
6212 	if (devnm != NULL) {
6213 		ndi_devi_enter(vdip, &circ);
6214 		client_dip = ndi_devi_findchild(vdip, devnm);
6215 		ndi_devi_exit(vdip, circ);
6216 	}
6217 
6218 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6219 
6220 	switch (op) {
6221 	case MDI_PM_PRE_CONFIG:
6222 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6223 
6224 		break;
6225 	case MDI_PM_PRE_UNCONFIG:
6226 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6227 		    flags);
6228 
6229 		break;
6230 	case MDI_PM_POST_CONFIG:
6231 		i_mdi_pm_post_config(vdip, client_dip);
6232 
6233 		break;
6234 	case MDI_PM_POST_UNCONFIG:
6235 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6236 
6237 		break;
6238 	case MDI_PM_HOLD_POWER:
6239 	case MDI_PM_RELE_POWER:
6240 		ASSERT(args);
6241 
6242 		client_dip = (dev_info_t *)args;
6243 		ASSERT(MDI_CLIENT(client_dip));
6244 
6245 		ct = i_devi_get_client(client_dip);
6246 		MDI_CLIENT_LOCK(ct);
6247 
6248 		if (op == MDI_PM_HOLD_POWER) {
6249 			if (ct->ct_power_cnt == 0) {
6250 				(void) i_mdi_power_all_phci(ct);
6251 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6252 				    "mdi_power i_mdi_pm_hold_client\n"));
6253 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6254 			}
6255 		} else {
6256 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6257 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6258 				    "mdi_power i_mdi_pm_rele_client\n"));
6259 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6260 			} else {
6261 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6262 				    "mdi_power i_mdi_pm_reset_client\n"));
6263 				i_mdi_pm_reset_client(ct);
6264 			}
6265 		}
6266 
6267 		MDI_CLIENT_UNLOCK(ct);
6268 		break;
6269 	default:
6270 		break;
6271 	}
6272 
6273 	return (ret);
6274 }
6275 
6276 int
6277 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6278 {
6279 	mdi_vhci_t *vhci;
6280 
6281 	if (!MDI_VHCI(dip))
6282 		return (MDI_FAILURE);
6283 
6284 	if (mdi_class) {
6285 		vhci = DEVI(dip)->devi_mdi_xhci;
6286 		ASSERT(vhci);
6287 		*mdi_class = vhci->vh_class;
6288 	}
6289 
6290 	return (MDI_SUCCESS);
6291 }
6292 
6293 int
6294 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6295 {
6296 	mdi_phci_t *phci;
6297 
6298 	if (!MDI_PHCI(dip))
6299 		return (MDI_FAILURE);
6300 
6301 	if (mdi_class) {
6302 		phci = DEVI(dip)->devi_mdi_xhci;
6303 		ASSERT(phci);
6304 		*mdi_class = phci->ph_vhci->vh_class;
6305 	}
6306 
6307 	return (MDI_SUCCESS);
6308 }
6309 
6310 int
6311 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6312 {
6313 	mdi_client_t *client;
6314 
6315 	if (!MDI_CLIENT(dip))
6316 		return (MDI_FAILURE);
6317 
6318 	if (mdi_class) {
6319 		client = DEVI(dip)->devi_mdi_client;
6320 		ASSERT(client);
6321 		*mdi_class = client->ct_vhci->vh_class;
6322 	}
6323 
6324 	return (MDI_SUCCESS);
6325 }
6326 
6327 void *
6328 mdi_client_get_vhci_private(dev_info_t *dip)
6329 {
6330 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6331 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6332 		mdi_client_t	*ct;
6333 		ct = i_devi_get_client(dip);
6334 		return (ct->ct_vprivate);
6335 	}
6336 	return (NULL);
6337 }
6338 
6339 void
6340 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6341 {
6342 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6343 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6344 		mdi_client_t	*ct;
6345 		ct = i_devi_get_client(dip);
6346 		ct->ct_vprivate = data;
6347 	}
6348 }
6349 /*
6350  * mdi_pi_get_vhci_private():
6351  *		Get the vhci private information associated with the
6352  *		mdi_pathinfo node
6353  */
6354 void *
6355 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6356 {
6357 	caddr_t	vprivate = NULL;
6358 	if (pip) {
6359 		vprivate = MDI_PI(pip)->pi_vprivate;
6360 	}
6361 	return (vprivate);
6362 }
6363 
6364 /*
6365  * mdi_pi_set_vhci_private():
6366  *		Set the vhci private information in the mdi_pathinfo node
6367  */
6368 void
6369 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6370 {
6371 	if (pip) {
6372 		MDI_PI(pip)->pi_vprivate = priv;
6373 	}
6374 }
6375 
6376 /*
6377  * mdi_phci_get_vhci_private():
6378  *		Get the vhci private information associated with the
6379  *		mdi_phci node
6380  */
6381 void *
6382 mdi_phci_get_vhci_private(dev_info_t *dip)
6383 {
6384 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6385 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6386 		mdi_phci_t	*ph;
6387 		ph = i_devi_get_phci(dip);
6388 		return (ph->ph_vprivate);
6389 	}
6390 	return (NULL);
6391 }
6392 
6393 /*
6394  * mdi_phci_set_vhci_private():
6395  *		Set the vhci private information in the mdi_phci node
6396  */
6397 void
6398 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6399 {
6400 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6401 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6402 		mdi_phci_t	*ph;
6403 		ph = i_devi_get_phci(dip);
6404 		ph->ph_vprivate = priv;
6405 	}
6406 }
6407 
6408 /*
6409  * List of vhci class names:
6410  * A vhci class name must be in this list only if the corresponding vhci
6411  * driver intends to use the mdi provided bus config implementation
6412  * (i.e., mdi_vhci_bus_config()).
6413  */
6414 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6415 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6416 
6417 /*
6418  * Built-in list of phci drivers for every vhci class.
6419  * All phci drivers expect iscsi have root device support.
6420  */
6421 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
6422 	{ "fp", 1 },
6423 	{ "iscsi", 0 },
6424 	{ "ibsrp", 1 }
6425 	};
6426 
6427 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
6428 
6429 /*
6430  * During boot time, the on-disk vhci cache for every vhci class is read
6431  * in the form of an nvlist and stored here.
6432  */
6433 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6434 
6435 /* nvpair names in vhci cache nvlist */
6436 #define	MDI_VHCI_CACHE_VERSION	1
6437 #define	MDI_NVPNAME_VERSION	"version"
6438 #define	MDI_NVPNAME_PHCIS	"phcis"
6439 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6440 
6441 /*
6442  * Given vhci class name, return its on-disk vhci cache filename.
6443  * Memory for the returned filename which includes the full path is allocated
6444  * by this function.
6445  */
6446 static char *
6447 vhclass2vhcache_filename(char *vhclass)
6448 {
6449 	char *filename;
6450 	int len;
6451 	static char *fmt = "/etc/devices/mdi_%s_cache";
6452 
6453 	/*
6454 	 * fmt contains the on-disk vhci cache file name format;
6455 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6456 	 */
6457 
6458 	/* the -1 below is to account for "%s" in the format string */
6459 	len = strlen(fmt) + strlen(vhclass) - 1;
6460 	filename = kmem_alloc(len, KM_SLEEP);
6461 	(void) snprintf(filename, len, fmt, vhclass);
6462 	ASSERT(len == (strlen(filename) + 1));
6463 	return (filename);
6464 }
6465 
6466 /*
6467  * initialize the vhci cache related data structures and read the on-disk
6468  * vhci cached data into memory.
6469  */
6470 static void
6471 setup_vhci_cache(mdi_vhci_t *vh)
6472 {
6473 	mdi_vhci_config_t *vhc;
6474 	mdi_vhci_cache_t *vhcache;
6475 	int i;
6476 	nvlist_t *nvl = NULL;
6477 
6478 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
6479 	vh->vh_config = vhc;
6480 	vhcache = &vhc->vhc_vhcache;
6481 
6482 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
6483 
6484 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
6485 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
6486 
6487 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
6488 
6489 	/*
6490 	 * Create string hash; same as mod_hash_create_strhash() except that
6491 	 * we use NULL key destructor.
6492 	 */
6493 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
6494 	    mdi_bus_config_cache_hash_size,
6495 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
6496 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
6497 
6498 	setup_phci_driver_list(vh);
6499 
6500 	/*
6501 	 * The on-disk vhci cache is read during booting prior to the
6502 	 * lights-out period by mdi_read_devices_files().
6503 	 */
6504 	for (i = 0; i < N_VHCI_CLASSES; i++) {
6505 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
6506 			nvl = vhcache_nvl[i];
6507 			vhcache_nvl[i] = NULL;
6508 			break;
6509 		}
6510 	}
6511 
6512 	/*
6513 	 * this is to cover the case of some one manually causing unloading
6514 	 * (or detaching) and reloading (or attaching) of a vhci driver.
6515 	 */
6516 	if (nvl == NULL && modrootloaded)
6517 		nvl = read_on_disk_vhci_cache(vh->vh_class);
6518 
6519 	if (nvl != NULL) {
6520 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
6521 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
6522 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
6523 		else  {
6524 			cmn_err(CE_WARN,
6525 			    "%s: data file corrupted, will recreate\n",
6526 			    vhc->vhc_vhcache_filename);
6527 		}
6528 		rw_exit(&vhcache->vhcache_lock);
6529 		nvlist_free(nvl);
6530 	}
6531 
6532 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
6533 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
6534 
6535 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
6536 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
6537 }
6538 
6539 /*
6540  * free all vhci cache related resources
6541  */
6542 static int
6543 destroy_vhci_cache(mdi_vhci_t *vh)
6544 {
6545 	mdi_vhci_config_t *vhc = vh->vh_config;
6546 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
6547 	mdi_vhcache_phci_t *cphci, *cphci_next;
6548 	mdi_vhcache_client_t *cct, *cct_next;
6549 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
6550 
6551 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
6552 		return (MDI_FAILURE);
6553 
6554 	kmem_free(vhc->vhc_vhcache_filename,
6555 	    strlen(vhc->vhc_vhcache_filename) + 1);
6556 
6557 	if (vhc->vhc_phci_driver_list)
6558 		free_phci_driver_list(vhc);
6559 
6560 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
6561 
6562 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
6563 	    cphci = cphci_next) {
6564 		cphci_next = cphci->cphci_next;
6565 		free_vhcache_phci(cphci);
6566 	}
6567 
6568 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
6569 		cct_next = cct->cct_next;
6570 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
6571 			cpi_next = cpi->cpi_next;
6572 			free_vhcache_pathinfo(cpi);
6573 		}
6574 		free_vhcache_client(cct);
6575 	}
6576 
6577 	rw_destroy(&vhcache->vhcache_lock);
6578 
6579 	mutex_destroy(&vhc->vhc_lock);
6580 	cv_destroy(&vhc->vhc_cv);
6581 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
6582 	return (MDI_SUCCESS);
6583 }
6584 
6585 /*
6586  * Setup the list of phci drivers associated with the specified vhci class.
6587  * MDI uses this information to rebuild bus config cache if in case the
6588  * cache is not available or corrupted.
6589  */
6590 static void
6591 setup_phci_driver_list(mdi_vhci_t *vh)
6592 {
6593 	mdi_vhci_config_t *vhc = vh->vh_config;
6594 	mdi_phci_driver_info_t *driver_list;
6595 	char **driver_list1;
6596 	uint_t ndrivers, ndrivers1;
6597 	int i, j;
6598 
6599 	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6600 		driver_list = scsi_phci_driver_list;
6601 		ndrivers = sizeof (scsi_phci_driver_list) /
6602 		    sizeof (mdi_phci_driver_info_t);
6603 	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
6604 		driver_list = ib_phci_driver_list;
6605 		ndrivers = sizeof (ib_phci_driver_list) /
6606 		    sizeof (mdi_phci_driver_info_t);
6607 	} else {
6608 		driver_list = NULL;
6609 		ndrivers = 0;
6610 	}
6611 
6612 	/*
6613 	 * The driver.conf file of a vhci driver can specify additional
6614 	 * phci drivers using a project private "phci-drivers" property.
6615 	 */
6616 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
6617 	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
6618 	    &ndrivers1) != DDI_PROP_SUCCESS)
6619 		ndrivers1 = 0;
6620 
6621 	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
6622 	if (vhc->vhc_nphci_drivers == 0)
6623 		return;
6624 
6625 	vhc->vhc_phci_driver_list = kmem_alloc(
6626 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
6627 
6628 	for (i = 0; i < ndrivers; i++) {
6629 		vhc->vhc_phci_driver_list[i].phdriver_name =
6630 		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
6631 		vhc->vhc_phci_driver_list[i].phdriver_root_support =
6632 		    driver_list[i].phdriver_root_support;
6633 	}
6634 
6635 	for (j = 0; j < ndrivers1; j++, i++) {
6636 		vhc->vhc_phci_driver_list[i].phdriver_name =
6637 		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
6638 		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
6639 	}
6640 
6641 	if (ndrivers1)
6642 		ddi_prop_free(driver_list1);
6643 }
6644 
6645 /*
6646  * Free the memory allocated for the phci driver list
6647  */
6648 static void
6649 free_phci_driver_list(mdi_vhci_config_t *vhc)
6650 {
6651 	int i;
6652 
6653 	if (vhc->vhc_phci_driver_list == NULL)
6654 		return;
6655 
6656 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
6657 		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
6658 		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
6659 	}
6660 
6661 	kmem_free(vhc->vhc_phci_driver_list,
6662 	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
6663 }
6664 
6665 /*
6666  * Stop all vhci cache related async threads and free their resources.
6667  */
6668 static int
6669 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
6670 {
6671 	mdi_async_client_config_t *acc, *acc_next;
6672 
6673 	mutex_enter(&vhc->vhc_lock);
6674 	vhc->vhc_flags |= MDI_VHC_EXIT;
6675 	ASSERT(vhc->vhc_acc_thrcount >= 0);
6676 	cv_broadcast(&vhc->vhc_cv);
6677 
6678 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
6679 	    vhc->vhc_acc_thrcount != 0) {
6680 		mutex_exit(&vhc->vhc_lock);
6681 		delay(1);
6682 		mutex_enter(&vhc->vhc_lock);
6683 	}
6684 
6685 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
6686 
6687 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
6688 		acc_next = acc->acc_next;
6689 		free_async_client_config(acc);
6690 	}
6691 	vhc->vhc_acc_list_head = NULL;
6692 	vhc->vhc_acc_list_tail = NULL;
6693 	vhc->vhc_acc_count = 0;
6694 
6695 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6696 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6697 		mutex_exit(&vhc->vhc_lock);
6698 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
6699 			vhcache_dirty(vhc);
6700 			return (MDI_FAILURE);
6701 		}
6702 	} else
6703 		mutex_exit(&vhc->vhc_lock);
6704 
6705 	if (callb_delete(vhc->vhc_cbid) != 0)
6706 		return (MDI_FAILURE);
6707 
6708 	return (MDI_SUCCESS);
6709 }
6710 
6711 /*
6712  * Stop vhci cache flush thread
6713  */
6714 /* ARGSUSED */
6715 static boolean_t
6716 stop_vhcache_flush_thread(void *arg, int code)
6717 {
6718 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
6719 
6720 	mutex_enter(&vhc->vhc_lock);
6721 	vhc->vhc_flags |= MDI_VHC_EXIT;
6722 	cv_broadcast(&vhc->vhc_cv);
6723 
6724 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
6725 		mutex_exit(&vhc->vhc_lock);
6726 		delay(1);
6727 		mutex_enter(&vhc->vhc_lock);
6728 	}
6729 
6730 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
6731 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
6732 		mutex_exit(&vhc->vhc_lock);
6733 		(void) flush_vhcache(vhc, 1);
6734 	} else
6735 		mutex_exit(&vhc->vhc_lock);
6736 
6737 	return (B_TRUE);
6738 }
6739 
6740 /*
6741  * Enqueue the vhcache phci (cphci) at the tail of the list
6742  */
6743 static void
6744 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
6745 {
6746 	cphci->cphci_next = NULL;
6747 	if (vhcache->vhcache_phci_head == NULL)
6748 		vhcache->vhcache_phci_head = cphci;
6749 	else
6750 		vhcache->vhcache_phci_tail->cphci_next = cphci;
6751 	vhcache->vhcache_phci_tail = cphci;
6752 }
6753 
6754 /*
6755  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
6756  */
6757 static void
6758 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6759     mdi_vhcache_pathinfo_t *cpi)
6760 {
6761 	cpi->cpi_next = NULL;
6762 	if (cct->cct_cpi_head == NULL)
6763 		cct->cct_cpi_head = cpi;
6764 	else
6765 		cct->cct_cpi_tail->cpi_next = cpi;
6766 	cct->cct_cpi_tail = cpi;
6767 }
6768 
6769 /*
6770  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
6771  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
6772  * flag set come at the beginning of the list. All cpis which have this
6773  * flag set come at the end of the list.
6774  */
6775 static void
6776 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
6777     mdi_vhcache_pathinfo_t *newcpi)
6778 {
6779 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
6780 
6781 	if (cct->cct_cpi_head == NULL ||
6782 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
6783 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
6784 	else {
6785 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
6786 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
6787 		    prev_cpi = cpi, cpi = cpi->cpi_next)
6788 			;
6789 
6790 		if (prev_cpi == NULL)
6791 			cct->cct_cpi_head = newcpi;
6792 		else
6793 			prev_cpi->cpi_next = newcpi;
6794 
6795 		newcpi->cpi_next = cpi;
6796 
6797 		if (cpi == NULL)
6798 			cct->cct_cpi_tail = newcpi;
6799 	}
6800 }
6801 
6802 /*
6803  * Enqueue the vhcache client (cct) at the tail of the list
6804  */
6805 static void
6806 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
6807     mdi_vhcache_client_t *cct)
6808 {
6809 	cct->cct_next = NULL;
6810 	if (vhcache->vhcache_client_head == NULL)
6811 		vhcache->vhcache_client_head = cct;
6812 	else
6813 		vhcache->vhcache_client_tail->cct_next = cct;
6814 	vhcache->vhcache_client_tail = cct;
6815 }
6816 
6817 static void
6818 free_string_array(char **str, int nelem)
6819 {
6820 	int i;
6821 
6822 	if (str) {
6823 		for (i = 0; i < nelem; i++) {
6824 			if (str[i])
6825 				kmem_free(str[i], strlen(str[i]) + 1);
6826 		}
6827 		kmem_free(str, sizeof (char *) * nelem);
6828 	}
6829 }
6830 
6831 static void
6832 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
6833 {
6834 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
6835 	kmem_free(cphci, sizeof (*cphci));
6836 }
6837 
6838 static void
6839 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
6840 {
6841 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
6842 	kmem_free(cpi, sizeof (*cpi));
6843 }
6844 
6845 static void
6846 free_vhcache_client(mdi_vhcache_client_t *cct)
6847 {
6848 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
6849 	kmem_free(cct, sizeof (*cct));
6850 }
6851 
6852 static char *
6853 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
6854 {
6855 	char *name_addr;
6856 	int len;
6857 
6858 	len = strlen(ct_name) + strlen(ct_addr) + 2;
6859 	name_addr = kmem_alloc(len, KM_SLEEP);
6860 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
6861 
6862 	if (ret_len)
6863 		*ret_len = len;
6864 	return (name_addr);
6865 }
6866 
6867 /*
6868  * Copy the contents of paddrnvl to vhci cache.
6869  * paddrnvl nvlist contains path information for a vhci client.
6870  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
6871  */
6872 static void
6873 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
6874     mdi_vhcache_client_t *cct)
6875 {
6876 	nvpair_t *nvp = NULL;
6877 	mdi_vhcache_pathinfo_t *cpi;
6878 	uint_t nelem;
6879 	uint32_t *val;
6880 
6881 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6882 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
6883 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
6884 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6885 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
6886 		ASSERT(nelem == 2);
6887 		cpi->cpi_cphci = cphci_list[val[0]];
6888 		cpi->cpi_flags = val[1];
6889 		enqueue_tail_vhcache_pathinfo(cct, cpi);
6890 	}
6891 }
6892 
6893 /*
6894  * Copy the contents of caddrmapnvl to vhci cache.
6895  * caddrmapnvl nvlist contains vhci client address to phci client address
6896  * mappings. See the comment in mainnvl_to_vhcache() for the format of
6897  * this nvlist.
6898  */
6899 static void
6900 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
6901     mdi_vhcache_phci_t *cphci_list[])
6902 {
6903 	nvpair_t *nvp = NULL;
6904 	nvlist_t *paddrnvl;
6905 	mdi_vhcache_client_t *cct;
6906 
6907 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
6908 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
6909 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
6910 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
6911 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
6912 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
6913 		/* the client must contain at least one path */
6914 		ASSERT(cct->cct_cpi_head != NULL);
6915 
6916 		enqueue_vhcache_client(vhcache, cct);
6917 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
6918 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
6919 	}
6920 }
6921 
6922 /*
6923  * Copy the contents of the main nvlist to vhci cache.
6924  *
6925  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
6926  * The nvlist contains the mappings between the vhci client addresses and
6927  * their corresponding phci client addresses.
6928  *
6929  * The structure of the nvlist is as follows:
6930  *
6931  * Main nvlist:
6932  *	NAME		TYPE		DATA
6933  *	version		int32		version number
6934  *	phcis		string array	array of phci paths
6935  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
6936  *
6937  * structure of c2paddrs_nvl:
6938  *	NAME		TYPE		DATA
6939  *	caddr1		nvlist_t	paddrs_nvl1
6940  *	caddr2		nvlist_t	paddrs_nvl2
6941  *	...
6942  * where caddr1, caddr2, ... are vhci client name and addresses in the
6943  * form of "<clientname>@<clientaddress>".
6944  * (for example: "ssd@2000002037cd9f72");
6945  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
6946  *
6947  * structure of paddrs_nvl:
6948  *	NAME		TYPE		DATA
6949  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
6950  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
6951  *	...
6952  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
6953  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
6954  * phci-ids are integers that identify PHCIs to which the
6955  * the bus specific address belongs to. These integers are used as an index
6956  * into to the phcis string array in the main nvlist to get the PHCI path.
6957  */
6958 static int
6959 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
6960 {
6961 	char **phcis, **phci_namep;
6962 	uint_t nphcis;
6963 	mdi_vhcache_phci_t *cphci, **cphci_list;
6964 	nvlist_t *caddrmapnvl;
6965 	int32_t ver;
6966 	int i;
6967 	size_t cphci_list_size;
6968 
6969 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
6970 
6971 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
6972 	    ver != MDI_VHCI_CACHE_VERSION)
6973 		return (MDI_FAILURE);
6974 
6975 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
6976 	    &nphcis) != 0)
6977 		return (MDI_SUCCESS);
6978 
6979 	ASSERT(nphcis > 0);
6980 
6981 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
6982 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
6983 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
6984 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
6985 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
6986 		enqueue_vhcache_phci(vhcache, cphci);
6987 		cphci_list[i] = cphci;
6988 	}
6989 
6990 	ASSERT(vhcache->vhcache_phci_head != NULL);
6991 
6992 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
6993 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
6994 
6995 	kmem_free(cphci_list, cphci_list_size);
6996 	return (MDI_SUCCESS);
6997 }
6998 
6999 /*
7000  * Build paddrnvl for the specified client using the information in the
7001  * vhci cache and add it to the caddrmapnnvl.
7002  * Returns 0 on success, errno on failure.
7003  */
7004 static int
7005 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7006     nvlist_t *caddrmapnvl)
7007 {
7008 	mdi_vhcache_pathinfo_t *cpi;
7009 	nvlist_t *nvl;
7010 	int err;
7011 	uint32_t val[2];
7012 
7013 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7014 
7015 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7016 		return (err);
7017 
7018 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7019 		val[0] = cpi->cpi_cphci->cphci_id;
7020 		val[1] = cpi->cpi_flags;
7021 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7022 		    != 0)
7023 			goto out;
7024 	}
7025 
7026 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7027 out:
7028 	nvlist_free(nvl);
7029 	return (err);
7030 }
7031 
7032 /*
7033  * Build caddrmapnvl using the information in the vhci cache
7034  * and add it to the mainnvl.
7035  * Returns 0 on success, errno on failure.
7036  */
7037 static int
7038 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7039 {
7040 	mdi_vhcache_client_t *cct;
7041 	nvlist_t *nvl;
7042 	int err;
7043 
7044 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7045 
7046 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7047 		return (err);
7048 
7049 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7050 	    cct = cct->cct_next) {
7051 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7052 			goto out;
7053 	}
7054 
7055 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7056 out:
7057 	nvlist_free(nvl);
7058 	return (err);
7059 }
7060 
7061 /*
7062  * Build nvlist using the information in the vhci cache.
7063  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7064  * Returns nvl on success, NULL on failure.
7065  */
7066 static nvlist_t *
7067 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7068 {
7069 	mdi_vhcache_phci_t *cphci;
7070 	uint_t phci_count;
7071 	char **phcis;
7072 	nvlist_t *nvl;
7073 	int err, i;
7074 
7075 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7076 		nvl = NULL;
7077 		goto out;
7078 	}
7079 
7080 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7081 	    MDI_VHCI_CACHE_VERSION)) != 0)
7082 		goto out;
7083 
7084 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7085 	if (vhcache->vhcache_phci_head == NULL) {
7086 		rw_exit(&vhcache->vhcache_lock);
7087 		return (nvl);
7088 	}
7089 
7090 	phci_count = 0;
7091 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7092 	    cphci = cphci->cphci_next)
7093 		cphci->cphci_id = phci_count++;
7094 
7095 	/* build phci pathname list */
7096 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7097 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7098 	    cphci = cphci->cphci_next, i++)
7099 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7100 
7101 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7102 	    phci_count);
7103 	free_string_array(phcis, phci_count);
7104 
7105 	if (err == 0 &&
7106 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7107 		rw_exit(&vhcache->vhcache_lock);
7108 		return (nvl);
7109 	}
7110 
7111 	rw_exit(&vhcache->vhcache_lock);
7112 out:
7113 	if (nvl)
7114 		nvlist_free(nvl);
7115 	return (NULL);
7116 }
7117 
7118 /*
7119  * Lookup vhcache phci structure for the specified phci path.
7120  */
7121 static mdi_vhcache_phci_t *
7122 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7123 {
7124 	mdi_vhcache_phci_t *cphci;
7125 
7126 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7127 
7128 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7129 	    cphci = cphci->cphci_next) {
7130 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7131 			return (cphci);
7132 	}
7133 
7134 	return (NULL);
7135 }
7136 
7137 /*
7138  * Lookup vhcache phci structure for the specified phci.
7139  */
7140 static mdi_vhcache_phci_t *
7141 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7142 {
7143 	mdi_vhcache_phci_t *cphci;
7144 
7145 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7146 
7147 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7148 	    cphci = cphci->cphci_next) {
7149 		if (cphci->cphci_phci == ph)
7150 			return (cphci);
7151 	}
7152 
7153 	return (NULL);
7154 }
7155 
7156 /*
7157  * Add the specified phci to the vhci cache if not already present.
7158  */
7159 static void
7160 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7161 {
7162 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7163 	mdi_vhcache_phci_t *cphci;
7164 	char *pathname;
7165 	int cache_updated;
7166 
7167 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7168 
7169 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7170 	(void) ddi_pathname(ph->ph_dip, pathname);
7171 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7172 	    != NULL) {
7173 		cphci->cphci_phci = ph;
7174 		cache_updated = 0;
7175 	} else {
7176 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7177 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7178 		cphci->cphci_phci = ph;
7179 		enqueue_vhcache_phci(vhcache, cphci);
7180 		cache_updated = 1;
7181 	}
7182 
7183 	rw_exit(&vhcache->vhcache_lock);
7184 
7185 	/*
7186 	 * Since a new phci has been added, reset
7187 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7188 	 * during next vhcache_discover_paths().
7189 	 */
7190 	mutex_enter(&vhc->vhc_lock);
7191 	vhc->vhc_path_discovery_cutoff_time = 0;
7192 	mutex_exit(&vhc->vhc_lock);
7193 
7194 	kmem_free(pathname, MAXPATHLEN);
7195 	if (cache_updated)
7196 		vhcache_dirty(vhc);
7197 }
7198 
7199 /*
7200  * Remove the reference to the specified phci from the vhci cache.
7201  */
7202 static void
7203 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7204 {
7205 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7206 	mdi_vhcache_phci_t *cphci;
7207 
7208 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7209 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7210 		/* do not remove the actual mdi_vhcache_phci structure */
7211 		cphci->cphci_phci = NULL;
7212 	}
7213 	rw_exit(&vhcache->vhcache_lock);
7214 }
7215 
7216 static void
7217 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7218     mdi_vhcache_lookup_token_t *src)
7219 {
7220 	if (src == NULL) {
7221 		dst->lt_cct = NULL;
7222 		dst->lt_cct_lookup_time = 0;
7223 	} else {
7224 		dst->lt_cct = src->lt_cct;
7225 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7226 	}
7227 }
7228 
7229 /*
7230  * Look up vhcache client for the specified client.
7231  */
7232 static mdi_vhcache_client_t *
7233 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7234     mdi_vhcache_lookup_token_t *token)
7235 {
7236 	mod_hash_val_t hv;
7237 	char *name_addr;
7238 	int len;
7239 
7240 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7241 
7242 	/*
7243 	 * If no vhcache clean occurred since the last lookup, we can
7244 	 * simply return the cct from the last lookup operation.
7245 	 * It works because ccts are never freed except during the vhcache
7246 	 * cleanup operation.
7247 	 */
7248 	if (token != NULL &&
7249 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7250 		return (token->lt_cct);
7251 
7252 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7253 	if (mod_hash_find(vhcache->vhcache_client_hash,
7254 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7255 		if (token) {
7256 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7257 			token->lt_cct_lookup_time = lbolt64;
7258 		}
7259 	} else {
7260 		if (token) {
7261 			token->lt_cct = NULL;
7262 			token->lt_cct_lookup_time = 0;
7263 		}
7264 		hv = NULL;
7265 	}
7266 	kmem_free(name_addr, len);
7267 	return ((mdi_vhcache_client_t *)hv);
7268 }
7269 
7270 /*
7271  * Add the specified path to the vhci cache if not already present.
7272  * Also add the vhcache client for the client corresponding to this path
7273  * if it doesn't already exist.
7274  */
7275 static void
7276 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7277 {
7278 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7279 	mdi_vhcache_client_t *cct;
7280 	mdi_vhcache_pathinfo_t *cpi;
7281 	mdi_phci_t *ph = pip->pi_phci;
7282 	mdi_client_t *ct = pip->pi_client;
7283 	int cache_updated = 0;
7284 
7285 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7286 
7287 	/* if vhcache client for this pip doesn't already exist, add it */
7288 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7289 	    NULL)) == NULL) {
7290 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7291 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7292 		    ct->ct_guid, NULL);
7293 		enqueue_vhcache_client(vhcache, cct);
7294 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7295 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7296 		cache_updated = 1;
7297 	}
7298 
7299 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7300 		if (cpi->cpi_cphci->cphci_phci == ph &&
7301 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7302 			cpi->cpi_pip = pip;
7303 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7304 				cpi->cpi_flags &=
7305 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7306 				sort_vhcache_paths(cct);
7307 				cache_updated = 1;
7308 			}
7309 			break;
7310 		}
7311 	}
7312 
7313 	if (cpi == NULL) {
7314 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7315 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7316 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7317 		ASSERT(cpi->cpi_cphci != NULL);
7318 		cpi->cpi_pip = pip;
7319 		enqueue_vhcache_pathinfo(cct, cpi);
7320 		cache_updated = 1;
7321 	}
7322 
7323 	rw_exit(&vhcache->vhcache_lock);
7324 
7325 	if (cache_updated)
7326 		vhcache_dirty(vhc);
7327 }
7328 
7329 /*
7330  * Remove the reference to the specified path from the vhci cache.
7331  */
7332 static void
7333 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7334 {
7335 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7336 	mdi_client_t *ct = pip->pi_client;
7337 	mdi_vhcache_client_t *cct;
7338 	mdi_vhcache_pathinfo_t *cpi;
7339 
7340 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7341 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7342 	    NULL)) != NULL) {
7343 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7344 		    cpi = cpi->cpi_next) {
7345 			if (cpi->cpi_pip == pip) {
7346 				cpi->cpi_pip = NULL;
7347 				break;
7348 			}
7349 		}
7350 	}
7351 	rw_exit(&vhcache->vhcache_lock);
7352 }
7353 
7354 /*
7355  * Flush the vhci cache to disk.
7356  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7357  */
7358 static int
7359 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7360 {
7361 	nvlist_t *nvl;
7362 	int err;
7363 	int rv;
7364 
7365 	/*
7366 	 * It is possible that the system may shutdown before
7367 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7368 	 * flushing the cache in this case do not check for
7369 	 * i_ddi_io_initialized when force flag is set.
7370 	 */
7371 	if (force_flag == 0 && !i_ddi_io_initialized())
7372 		return (MDI_FAILURE);
7373 
7374 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7375 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7376 		nvlist_free(nvl);
7377 	} else
7378 		err = EFAULT;
7379 
7380 	rv = MDI_SUCCESS;
7381 	mutex_enter(&vhc->vhc_lock);
7382 	if (err != 0) {
7383 		if (err == EROFS) {
7384 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7385 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7386 			    MDI_VHC_VHCACHE_DIRTY);
7387 		} else {
7388 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7389 				cmn_err(CE_CONT, "%s: update failed\n",
7390 				    vhc->vhc_vhcache_filename);
7391 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7392 			}
7393 			rv = MDI_FAILURE;
7394 		}
7395 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7396 		cmn_err(CE_CONT,
7397 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7398 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7399 	}
7400 	mutex_exit(&vhc->vhc_lock);
7401 
7402 	return (rv);
7403 }
7404 
7405 /*
7406  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7407  * Exits itself if left idle for the idle timeout period.
7408  */
7409 static void
7410 vhcache_flush_thread(void *arg)
7411 {
7412 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7413 	clock_t idle_time, quit_at_ticks;
7414 	callb_cpr_t cprinfo;
7415 
7416 	/* number of seconds to sleep idle before exiting */
7417 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7418 
7419 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7420 	    "mdi_vhcache_flush");
7421 	mutex_enter(&vhc->vhc_lock);
7422 	for (; ; ) {
7423 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7424 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7425 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7426 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7427 				(void) cv_timedwait(&vhc->vhc_cv,
7428 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7429 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7430 			} else {
7431 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7432 				mutex_exit(&vhc->vhc_lock);
7433 
7434 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7435 					vhcache_dirty(vhc);
7436 
7437 				mutex_enter(&vhc->vhc_lock);
7438 			}
7439 		}
7440 
7441 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7442 
7443 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7444 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7445 		    ddi_get_lbolt() < quit_at_ticks) {
7446 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7447 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7448 			    quit_at_ticks);
7449 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7450 		}
7451 
7452 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7453 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7454 			goto out;
7455 	}
7456 
7457 out:
7458 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7459 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7460 	CALLB_CPR_EXIT(&cprinfo);
7461 }
7462 
7463 /*
7464  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7465  */
7466 static void
7467 vhcache_dirty(mdi_vhci_config_t *vhc)
7468 {
7469 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7470 	int create_thread;
7471 
7472 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7473 	/* do not flush cache until the cache is fully built */
7474 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7475 		rw_exit(&vhcache->vhcache_lock);
7476 		return;
7477 	}
7478 	rw_exit(&vhcache->vhcache_lock);
7479 
7480 	mutex_enter(&vhc->vhc_lock);
7481 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7482 		mutex_exit(&vhc->vhc_lock);
7483 		return;
7484 	}
7485 
7486 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7487 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7488 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7489 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7490 		cv_broadcast(&vhc->vhc_cv);
7491 		create_thread = 0;
7492 	} else {
7493 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7494 		create_thread = 1;
7495 	}
7496 	mutex_exit(&vhc->vhc_lock);
7497 
7498 	if (create_thread)
7499 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7500 		    0, &p0, TS_RUN, minclsyspri);
7501 }
7502 
7503 /*
7504  * phci bus config structure - one for for each phci bus config operation that
7505  * we initiate on behalf of a vhci.
7506  */
7507 typedef struct mdi_phci_bus_config_s {
7508 	char *phbc_phci_path;
7509 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7510 	struct mdi_phci_bus_config_s *phbc_next;
7511 } mdi_phci_bus_config_t;
7512 
7513 /* vhci bus config structure - one for each vhci bus config operation */
7514 typedef struct mdi_vhci_bus_config_s {
7515 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7516 	major_t vhbc_op_major;		/* bus config op major */
7517 	uint_t vhbc_op_flags;		/* bus config op flags */
7518 	kmutex_t vhbc_lock;
7519 	kcondvar_t vhbc_cv;
7520 	int vhbc_thr_count;
7521 } mdi_vhci_bus_config_t;
7522 
7523 /*
7524  * bus config the specified phci
7525  */
7526 static void
7527 bus_config_phci(void *arg)
7528 {
7529 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7530 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7531 	dev_info_t *ph_dip;
7532 
7533 	/*
7534 	 * first configure all path components upto phci and then configure
7535 	 * the phci children.
7536 	 */
7537 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7538 	    != NULL) {
7539 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7540 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7541 			(void) ndi_devi_config_driver(ph_dip,
7542 			    vhbc->vhbc_op_flags,
7543 			    vhbc->vhbc_op_major);
7544 		} else
7545 			(void) ndi_devi_config(ph_dip,
7546 			    vhbc->vhbc_op_flags);
7547 
7548 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7549 		ndi_rele_devi(ph_dip);
7550 	}
7551 
7552 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7553 	kmem_free(phbc, sizeof (*phbc));
7554 
7555 	mutex_enter(&vhbc->vhbc_lock);
7556 	vhbc->vhbc_thr_count--;
7557 	if (vhbc->vhbc_thr_count == 0)
7558 		cv_broadcast(&vhbc->vhbc_cv);
7559 	mutex_exit(&vhbc->vhbc_lock);
7560 }
7561 
7562 /*
7563  * Bus config all phcis associated with the vhci in parallel.
7564  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
7565  */
7566 static void
7567 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
7568     ddi_bus_config_op_t op, major_t maj)
7569 {
7570 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
7571 	mdi_vhci_bus_config_t *vhbc;
7572 	mdi_vhcache_phci_t *cphci;
7573 
7574 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7575 	if (vhcache->vhcache_phci_head == NULL) {
7576 		rw_exit(&vhcache->vhcache_lock);
7577 		return;
7578 	}
7579 
7580 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
7581 
7582 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7583 	    cphci = cphci->cphci_next) {
7584 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
7585 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
7586 		    KM_SLEEP);
7587 		phbc->phbc_vhbusconfig = vhbc;
7588 		phbc->phbc_next = phbc_head;
7589 		phbc_head = phbc;
7590 		vhbc->vhbc_thr_count++;
7591 	}
7592 	rw_exit(&vhcache->vhcache_lock);
7593 
7594 	vhbc->vhbc_op = op;
7595 	vhbc->vhbc_op_major = maj;
7596 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
7597 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
7598 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
7599 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
7600 
7601 	/* now create threads to initiate bus config on all phcis in parallel */
7602 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
7603 		phbc_next = phbc->phbc_next;
7604 		if (mdi_mtc_off)
7605 			bus_config_phci((void *)phbc);
7606 		else
7607 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
7608 			    0, &p0, TS_RUN, minclsyspri);
7609 	}
7610 
7611 	mutex_enter(&vhbc->vhbc_lock);
7612 	/* wait until all threads exit */
7613 	while (vhbc->vhbc_thr_count > 0)
7614 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
7615 	mutex_exit(&vhbc->vhbc_lock);
7616 
7617 	mutex_destroy(&vhbc->vhbc_lock);
7618 	cv_destroy(&vhbc->vhbc_cv);
7619 	kmem_free(vhbc, sizeof (*vhbc));
7620 }
7621 
7622 /*
7623  * Single threaded version of bus_config_all_phcis()
7624  */
7625 static void
7626 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
7627     ddi_bus_config_op_t op, major_t maj)
7628 {
7629 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7630 
7631 	single_threaded_vhconfig_enter(vhc);
7632 	bus_config_all_phcis(vhcache, flags, op, maj);
7633 	single_threaded_vhconfig_exit(vhc);
7634 }
7635 
7636 /*
7637  * Perform BUS_CONFIG_ONE on the specified child of the phci.
7638  * The path includes the child component in addition to the phci path.
7639  */
7640 static int
7641 bus_config_one_phci_child(char *path)
7642 {
7643 	dev_info_t *ph_dip, *child;
7644 	char *devnm;
7645 	int rv = MDI_FAILURE;
7646 
7647 	/* extract the child component of the phci */
7648 	devnm = strrchr(path, '/');
7649 	*devnm++ = '\0';
7650 
7651 	/*
7652 	 * first configure all path components upto phci and then
7653 	 * configure the phci child.
7654 	 */
7655 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
7656 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
7657 		    NDI_SUCCESS) {
7658 			/*
7659 			 * release the hold that ndi_devi_config_one() placed
7660 			 */
7661 			ndi_rele_devi(child);
7662 			rv = MDI_SUCCESS;
7663 		}
7664 
7665 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7666 		ndi_rele_devi(ph_dip);
7667 	}
7668 
7669 	devnm--;
7670 	*devnm = '/';
7671 	return (rv);
7672 }
7673 
7674 /*
7675  * Build a list of phci client paths for the specified vhci client.
7676  * The list includes only those phci client paths which aren't configured yet.
7677  */
7678 static mdi_phys_path_t *
7679 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
7680 {
7681 	mdi_vhcache_pathinfo_t *cpi;
7682 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
7683 	int config_path, len;
7684 
7685 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7686 		/*
7687 		 * include only those paths that aren't configured.
7688 		 */
7689 		config_path = 0;
7690 		if (cpi->cpi_pip == NULL)
7691 			config_path = 1;
7692 		else {
7693 			MDI_PI_LOCK(cpi->cpi_pip);
7694 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
7695 				config_path = 1;
7696 			MDI_PI_UNLOCK(cpi->cpi_pip);
7697 		}
7698 
7699 		if (config_path) {
7700 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
7701 			len = strlen(cpi->cpi_cphci->cphci_path) +
7702 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
7703 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
7704 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
7705 			    cpi->cpi_cphci->cphci_path, ct_name,
7706 			    cpi->cpi_addr);
7707 			pp->phys_path_next = NULL;
7708 
7709 			if (pp_head == NULL)
7710 				pp_head = pp;
7711 			else
7712 				pp_tail->phys_path_next = pp;
7713 			pp_tail = pp;
7714 		}
7715 	}
7716 
7717 	return (pp_head);
7718 }
7719 
7720 /*
7721  * Free the memory allocated for phci client path list.
7722  */
7723 static void
7724 free_phclient_path_list(mdi_phys_path_t *pp_head)
7725 {
7726 	mdi_phys_path_t *pp, *pp_next;
7727 
7728 	for (pp = pp_head; pp != NULL; pp = pp_next) {
7729 		pp_next = pp->phys_path_next;
7730 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
7731 		kmem_free(pp, sizeof (*pp));
7732 	}
7733 }
7734 
7735 /*
7736  * Allocated async client structure and initialize with the specified values.
7737  */
7738 static mdi_async_client_config_t *
7739 alloc_async_client_config(char *ct_name, char *ct_addr,
7740     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7741 {
7742 	mdi_async_client_config_t *acc;
7743 
7744 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
7745 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
7746 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
7747 	acc->acc_phclient_path_list_head = pp_head;
7748 	init_vhcache_lookup_token(&acc->acc_token, tok);
7749 	acc->acc_next = NULL;
7750 	return (acc);
7751 }
7752 
7753 /*
7754  * Free the memory allocated for the async client structure and their members.
7755  */
7756 static void
7757 free_async_client_config(mdi_async_client_config_t *acc)
7758 {
7759 	if (acc->acc_phclient_path_list_head)
7760 		free_phclient_path_list(acc->acc_phclient_path_list_head);
7761 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
7762 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
7763 	kmem_free(acc, sizeof (*acc));
7764 }
7765 
7766 /*
7767  * Sort vhcache pathinfos (cpis) of the specified client.
7768  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7769  * flag set come at the beginning of the list. All cpis which have this
7770  * flag set come at the end of the list.
7771  */
7772 static void
7773 sort_vhcache_paths(mdi_vhcache_client_t *cct)
7774 {
7775 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
7776 
7777 	cpi_head = cct->cct_cpi_head;
7778 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
7779 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
7780 		cpi_next = cpi->cpi_next;
7781 		enqueue_vhcache_pathinfo(cct, cpi);
7782 	}
7783 }
7784 
7785 /*
7786  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
7787  * every vhcache pathinfo of the specified client. If not adjust the flag
7788  * setting appropriately.
7789  *
7790  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
7791  * on-disk vhci cache. So every time this flag is updated the cache must be
7792  * flushed.
7793  */
7794 static void
7795 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7796     mdi_vhcache_lookup_token_t *tok)
7797 {
7798 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7799 	mdi_vhcache_client_t *cct;
7800 	mdi_vhcache_pathinfo_t *cpi;
7801 
7802 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7803 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
7804 	    == NULL) {
7805 		rw_exit(&vhcache->vhcache_lock);
7806 		return;
7807 	}
7808 
7809 	/*
7810 	 * to avoid unnecessary on-disk cache updates, first check if an
7811 	 * update is really needed. If no update is needed simply return.
7812 	 */
7813 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7814 		if ((cpi->cpi_pip != NULL &&
7815 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
7816 		    (cpi->cpi_pip == NULL &&
7817 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
7818 			break;
7819 		}
7820 	}
7821 	if (cpi == NULL) {
7822 		rw_exit(&vhcache->vhcache_lock);
7823 		return;
7824 	}
7825 
7826 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
7827 		rw_exit(&vhcache->vhcache_lock);
7828 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7829 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
7830 		    tok)) == NULL) {
7831 			rw_exit(&vhcache->vhcache_lock);
7832 			return;
7833 		}
7834 	}
7835 
7836 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7837 		if (cpi->cpi_pip != NULL)
7838 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7839 		else
7840 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7841 	}
7842 	sort_vhcache_paths(cct);
7843 
7844 	rw_exit(&vhcache->vhcache_lock);
7845 	vhcache_dirty(vhc);
7846 }
7847 
7848 /*
7849  * Configure all specified paths of the client.
7850  */
7851 static void
7852 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7853     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7854 {
7855 	mdi_phys_path_t *pp;
7856 
7857 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
7858 		(void) bus_config_one_phci_child(pp->phys_path);
7859 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
7860 }
7861 
7862 /*
7863  * Dequeue elements from vhci async client config list and bus configure
7864  * their corresponding phci clients.
7865  */
7866 static void
7867 config_client_paths_thread(void *arg)
7868 {
7869 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7870 	mdi_async_client_config_t *acc;
7871 	clock_t quit_at_ticks;
7872 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
7873 	callb_cpr_t cprinfo;
7874 
7875 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7876 	    "mdi_config_client_paths");
7877 
7878 	for (; ; ) {
7879 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7880 
7881 		mutex_enter(&vhc->vhc_lock);
7882 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7883 		    vhc->vhc_acc_list_head == NULL &&
7884 		    ddi_get_lbolt() < quit_at_ticks) {
7885 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7886 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7887 			    quit_at_ticks);
7888 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7889 		}
7890 
7891 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7892 		    vhc->vhc_acc_list_head == NULL)
7893 			goto out;
7894 
7895 		acc = vhc->vhc_acc_list_head;
7896 		vhc->vhc_acc_list_head = acc->acc_next;
7897 		if (vhc->vhc_acc_list_head == NULL)
7898 			vhc->vhc_acc_list_tail = NULL;
7899 		vhc->vhc_acc_count--;
7900 		mutex_exit(&vhc->vhc_lock);
7901 
7902 		config_client_paths_sync(vhc, acc->acc_ct_name,
7903 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
7904 		    &acc->acc_token);
7905 
7906 		free_async_client_config(acc);
7907 	}
7908 
7909 out:
7910 	vhc->vhc_acc_thrcount--;
7911 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7912 	CALLB_CPR_EXIT(&cprinfo);
7913 }
7914 
7915 /*
7916  * Arrange for all the phci client paths (pp_head) for the specified client
7917  * to be bus configured asynchronously by a thread.
7918  */
7919 static void
7920 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
7921     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
7922 {
7923 	mdi_async_client_config_t *acc, *newacc;
7924 	int create_thread;
7925 
7926 	if (pp_head == NULL)
7927 		return;
7928 
7929 	if (mdi_mtc_off) {
7930 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
7931 		free_phclient_path_list(pp_head);
7932 		return;
7933 	}
7934 
7935 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
7936 	ASSERT(newacc);
7937 
7938 	mutex_enter(&vhc->vhc_lock);
7939 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
7940 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
7941 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
7942 			free_async_client_config(newacc);
7943 			mutex_exit(&vhc->vhc_lock);
7944 			return;
7945 		}
7946 	}
7947 
7948 	if (vhc->vhc_acc_list_head == NULL)
7949 		vhc->vhc_acc_list_head = newacc;
7950 	else
7951 		vhc->vhc_acc_list_tail->acc_next = newacc;
7952 	vhc->vhc_acc_list_tail = newacc;
7953 	vhc->vhc_acc_count++;
7954 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
7955 		cv_broadcast(&vhc->vhc_cv);
7956 		create_thread = 0;
7957 	} else {
7958 		vhc->vhc_acc_thrcount++;
7959 		create_thread = 1;
7960 	}
7961 	mutex_exit(&vhc->vhc_lock);
7962 
7963 	if (create_thread)
7964 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
7965 		    0, &p0, TS_RUN, minclsyspri);
7966 }
7967 
7968 /*
7969  * Return number of online paths for the specified client.
7970  */
7971 static int
7972 nonline_paths(mdi_vhcache_client_t *cct)
7973 {
7974 	mdi_vhcache_pathinfo_t *cpi;
7975 	int online_count = 0;
7976 
7977 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7978 		if (cpi->cpi_pip != NULL) {
7979 			MDI_PI_LOCK(cpi->cpi_pip);
7980 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
7981 				online_count++;
7982 			MDI_PI_UNLOCK(cpi->cpi_pip);
7983 		}
7984 	}
7985 
7986 	return (online_count);
7987 }
7988 
7989 /*
7990  * Bus configure all paths for the specified vhci client.
7991  * If at least one path for the client is already online, the remaining paths
7992  * will be configured asynchronously. Otherwise, it synchronously configures
7993  * the paths until at least one path is online and then rest of the paths
7994  * will be configured asynchronously.
7995  */
7996 static void
7997 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
7998 {
7999 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8000 	mdi_phys_path_t *pp_head, *pp;
8001 	mdi_vhcache_client_t *cct;
8002 	mdi_vhcache_lookup_token_t tok;
8003 
8004 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8005 
8006 	init_vhcache_lookup_token(&tok, NULL);
8007 
8008 	if (ct_name == NULL || ct_addr == NULL ||
8009 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8010 	    == NULL ||
8011 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8012 		rw_exit(&vhcache->vhcache_lock);
8013 		return;
8014 	}
8015 
8016 	/* if at least one path is online, configure the rest asynchronously */
8017 	if (nonline_paths(cct) > 0) {
8018 		rw_exit(&vhcache->vhcache_lock);
8019 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8020 		return;
8021 	}
8022 
8023 	rw_exit(&vhcache->vhcache_lock);
8024 
8025 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8026 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8027 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8028 
8029 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8030 			    ct_addr, &tok)) == NULL) {
8031 				rw_exit(&vhcache->vhcache_lock);
8032 				goto out;
8033 			}
8034 
8035 			if (nonline_paths(cct) > 0 &&
8036 			    pp->phys_path_next != NULL) {
8037 				rw_exit(&vhcache->vhcache_lock);
8038 				config_client_paths_async(vhc, ct_name, ct_addr,
8039 				    pp->phys_path_next, &tok);
8040 				pp->phys_path_next = NULL;
8041 				goto out;
8042 			}
8043 
8044 			rw_exit(&vhcache->vhcache_lock);
8045 		}
8046 	}
8047 
8048 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8049 out:
8050 	free_phclient_path_list(pp_head);
8051 }
8052 
8053 static void
8054 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8055 {
8056 	mutex_enter(&vhc->vhc_lock);
8057 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8058 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8059 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8060 	mutex_exit(&vhc->vhc_lock);
8061 }
8062 
8063 static void
8064 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8065 {
8066 	mutex_enter(&vhc->vhc_lock);
8067 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8068 	cv_broadcast(&vhc->vhc_cv);
8069 	mutex_exit(&vhc->vhc_lock);
8070 }
8071 
8072 /*
8073  * Attach the phci driver instances associated with the vhci:
8074  * If root is mounted attach all phci driver instances.
8075  * If root is not mounted, attach the instances of only those phci
8076  * drivers that have the root support.
8077  */
8078 static void
8079 attach_phci_drivers(mdi_vhci_config_t *vhc)
8080 {
8081 	int  i;
8082 	major_t m;
8083 
8084 	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
8085 		if (modrootloaded == 0 &&
8086 		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
8087 			continue;
8088 
8089 		m = ddi_name_to_major(
8090 		    vhc->vhc_phci_driver_list[i].phdriver_name);
8091 		if (m != (major_t)-1) {
8092 			if (ddi_hold_installed_driver(m) != NULL)
8093 				ddi_rele_driver(m);
8094 		}
8095 	}
8096 }
8097 
8098 /*
8099  * Build vhci cache:
8100  *
8101  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8102  * the phci driver instances. During this process the cache gets built.
8103  *
8104  * Cache is built fully if the root is mounted.
8105  * If the root is not mounted, phci drivers that do not have root support
8106  * are not attached. As a result the cache is built partially. The entries
8107  * in the cache reflect only those phci drivers that have root support.
8108  */
8109 static int
8110 build_vhci_cache(mdi_vhci_config_t *vhc)
8111 {
8112 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8113 
8114 	single_threaded_vhconfig_enter(vhc);
8115 
8116 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8117 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8118 		rw_exit(&vhcache->vhcache_lock);
8119 		single_threaded_vhconfig_exit(vhc);
8120 		return (0);
8121 	}
8122 	rw_exit(&vhcache->vhcache_lock);
8123 
8124 	attach_phci_drivers(vhc);
8125 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8126 	    BUS_CONFIG_ALL, (major_t)-1);
8127 
8128 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8129 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8130 	rw_exit(&vhcache->vhcache_lock);
8131 
8132 	single_threaded_vhconfig_exit(vhc);
8133 	vhcache_dirty(vhc);
8134 	return (1);
8135 }
8136 
8137 /*
8138  * Determine if discovery of paths is needed.
8139  */
8140 static int
8141 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8142 {
8143 	int rv = 1;
8144 
8145 	mutex_enter(&vhc->vhc_lock);
8146 	if (i_ddi_io_initialized() == 0) {
8147 		if (vhc->vhc_path_discovery_boot > 0) {
8148 			vhc->vhc_path_discovery_boot--;
8149 			goto out;
8150 		}
8151 	} else {
8152 		if (vhc->vhc_path_discovery_postboot > 0) {
8153 			vhc->vhc_path_discovery_postboot--;
8154 			goto out;
8155 		}
8156 	}
8157 
8158 	/*
8159 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8160 	 * This is to avoid a series of full path discoveries when opening
8161 	 * stale /dev/[r]dsk links.
8162 	 */
8163 	if (mdi_path_discovery_interval != -1 &&
8164 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8165 		goto out;
8166 
8167 	rv = 0;
8168 out:
8169 	mutex_exit(&vhc->vhc_lock);
8170 	return (rv);
8171 }
8172 
8173 /*
8174  * Discover all paths:
8175  *
8176  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8177  * driver instances. During this process all paths will be discovered.
8178  */
8179 static int
8180 vhcache_discover_paths(mdi_vhci_config_t *vhc)
8181 {
8182 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8183 	int rv = 0;
8184 
8185 	single_threaded_vhconfig_enter(vhc);
8186 
8187 	if (vhcache_do_discovery(vhc)) {
8188 		attach_phci_drivers(vhc);
8189 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8190 		    NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1);
8191 
8192 		mutex_enter(&vhc->vhc_lock);
8193 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8194 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8195 		mutex_exit(&vhc->vhc_lock);
8196 		rv = 1;
8197 	}
8198 
8199 	single_threaded_vhconfig_exit(vhc);
8200 	return (rv);
8201 }
8202 
8203 /*
8204  * Generic vhci bus config implementation:
8205  *
8206  * Parameters
8207  *	vdip	vhci dip
8208  *	flags	bus config flags
8209  *	op	bus config operation
8210  *	The remaining parameters are bus config operation specific
8211  *
8212  * for BUS_CONFIG_ONE
8213  *	arg	pointer to name@addr
8214  *	child	upon successful return from this function, *child will be
8215  *		set to the configured and held devinfo child node of vdip.
8216  *	ct_addr	pointer to client address (i.e. GUID)
8217  *
8218  * for BUS_CONFIG_DRIVER
8219  *	arg	major number of the driver
8220  *	child and ct_addr parameters are ignored
8221  *
8222  * for BUS_CONFIG_ALL
8223  *	arg, child, and ct_addr parameters are ignored
8224  *
8225  * Note that for the rest of the bus config operations, this function simply
8226  * calls the framework provided default bus config routine.
8227  */
8228 int
8229 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8230     void *arg, dev_info_t **child, char *ct_addr)
8231 {
8232 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8233 	mdi_vhci_config_t *vhc = vh->vh_config;
8234 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8235 	int rv = 0;
8236 	int params_valid = 0;
8237 	char *cp;
8238 
8239 	/*
8240 	 * While bus configuring phcis, the phci driver interactions with MDI
8241 	 * cause child nodes to be enumerated under the vhci node for which
8242 	 * they need to ndi_devi_enter the vhci node.
8243 	 *
8244 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
8245 	 * for the bus config operations on phcis to finish while holding the
8246 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
8247 	 * phcis and call the default framework provided bus config function
8248 	 * if we are called with ndi_devi_enter lock held.
8249 	 */
8250 	if (DEVI_BUSY_OWNED(vdip)) {
8251 		MDI_DEBUG(2, (CE_NOTE, vdip,
8252 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
8253 		goto default_bus_config;
8254 	}
8255 
8256 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8257 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8258 		rw_exit(&vhcache->vhcache_lock);
8259 		rv = build_vhci_cache(vhc);
8260 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8261 	}
8262 
8263 	switch (op) {
8264 	case BUS_CONFIG_ONE:
8265 		if (arg != NULL && ct_addr != NULL) {
8266 			/* extract node name */
8267 			cp = (char *)arg;
8268 			while (*cp != '\0' && *cp != '@')
8269 				cp++;
8270 			if (*cp == '@') {
8271 				params_valid = 1;
8272 				*cp = '\0';
8273 				config_client_paths(vhc, (char *)arg, ct_addr);
8274 				/* config_client_paths() releases cache_lock */
8275 				*cp = '@';
8276 				break;
8277 			}
8278 		}
8279 
8280 		rw_exit(&vhcache->vhcache_lock);
8281 		break;
8282 
8283 	case BUS_CONFIG_DRIVER:
8284 		rw_exit(&vhcache->vhcache_lock);
8285 		if (rv == 0)
8286 			st_bus_config_all_phcis(vhc, flags, op,
8287 			    (major_t)(uintptr_t)arg);
8288 		break;
8289 
8290 	case BUS_CONFIG_ALL:
8291 		rw_exit(&vhcache->vhcache_lock);
8292 		if (rv == 0)
8293 			st_bus_config_all_phcis(vhc, flags, op, -1);
8294 		break;
8295 
8296 	default:
8297 		rw_exit(&vhcache->vhcache_lock);
8298 		break;
8299 	}
8300 
8301 
8302 default_bus_config:
8303 	/*
8304 	 * All requested child nodes are enumerated under the vhci.
8305 	 * Now configure them.
8306 	 */
8307 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8308 	    NDI_SUCCESS) {
8309 		return (MDI_SUCCESS);
8310 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8311 		/* discover all paths and try configuring again */
8312 		if (vhcache_discover_paths(vhc) &&
8313 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8314 		    NDI_SUCCESS)
8315 			return (MDI_SUCCESS);
8316 	}
8317 
8318 	return (MDI_FAILURE);
8319 }
8320 
8321 /*
8322  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8323  */
8324 static nvlist_t *
8325 read_on_disk_vhci_cache(char *vhci_class)
8326 {
8327 	nvlist_t *nvl;
8328 	int err;
8329 	char *filename;
8330 
8331 	filename = vhclass2vhcache_filename(vhci_class);
8332 
8333 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8334 		kmem_free(filename, strlen(filename) + 1);
8335 		return (nvl);
8336 	} else if (err == EIO)
8337 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8338 	else if (err == EINVAL)
8339 		cmn_err(CE_WARN,
8340 		    "%s: data file corrupted, will recreate\n", filename);
8341 
8342 	kmem_free(filename, strlen(filename) + 1);
8343 	return (NULL);
8344 }
8345 
8346 /*
8347  * Read on-disk vhci cache into nvlists for all vhci classes.
8348  * Called during booting by i_ddi_read_devices_files().
8349  */
8350 void
8351 mdi_read_devices_files(void)
8352 {
8353 	int i;
8354 
8355 	for (i = 0; i < N_VHCI_CLASSES; i++)
8356 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8357 }
8358 
8359 /*
8360  * Remove all stale entries from vhci cache.
8361  */
8362 static void
8363 clean_vhcache(mdi_vhci_config_t *vhc)
8364 {
8365 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8366 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8367 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8368 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8369 
8370 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8371 
8372 	cct_head = vhcache->vhcache_client_head;
8373 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8374 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8375 		cct_next = cct->cct_next;
8376 
8377 		cpi_head = cct->cct_cpi_head;
8378 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8379 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8380 			cpi_next = cpi->cpi_next;
8381 			if (cpi->cpi_pip != NULL) {
8382 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8383 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8384 			} else
8385 				free_vhcache_pathinfo(cpi);
8386 		}
8387 
8388 		if (cct->cct_cpi_head != NULL)
8389 			enqueue_vhcache_client(vhcache, cct);
8390 		else {
8391 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8392 			    (mod_hash_key_t)cct->cct_name_addr);
8393 			free_vhcache_client(cct);
8394 		}
8395 	}
8396 
8397 	cphci_head = vhcache->vhcache_phci_head;
8398 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8399 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8400 		cphci_next = cphci->cphci_next;
8401 		if (cphci->cphci_phci != NULL)
8402 			enqueue_vhcache_phci(vhcache, cphci);
8403 		else
8404 			free_vhcache_phci(cphci);
8405 	}
8406 
8407 	vhcache->vhcache_clean_time = lbolt64;
8408 	rw_exit(&vhcache->vhcache_lock);
8409 	vhcache_dirty(vhc);
8410 }
8411 
8412 /*
8413  * Remove all stale entries from vhci cache.
8414  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8415  */
8416 void
8417 mdi_clean_vhcache(void)
8418 {
8419 	mdi_vhci_t *vh;
8420 
8421 	mutex_enter(&mdi_mutex);
8422 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8423 		vh->vh_refcnt++;
8424 		mutex_exit(&mdi_mutex);
8425 		clean_vhcache(vh->vh_config);
8426 		mutex_enter(&mdi_mutex);
8427 		vh->vh_refcnt--;
8428 	}
8429 	mutex_exit(&mdi_mutex);
8430 }
8431 
8432 /*
8433  * mdi_vhci_walk_clients():
8434  *		Walker routine to traverse client dev_info nodes
8435  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
8436  * below the client, including nexus devices, which we dont want.
8437  * So we just traverse the immediate siblings, starting from 1st client.
8438  */
8439 void
8440 mdi_vhci_walk_clients(dev_info_t *vdip,
8441     int (*f)(dev_info_t *, void *), void *arg)
8442 {
8443 	dev_info_t	*cdip;
8444 	mdi_client_t	*ct;
8445 
8446 	mutex_enter(&mdi_mutex);
8447 
8448 	cdip = ddi_get_child(vdip);
8449 
8450 	while (cdip) {
8451 		ct = i_devi_get_client(cdip);
8452 		MDI_CLIENT_LOCK(ct);
8453 
8454 		switch ((*f)(cdip, arg)) {
8455 		case DDI_WALK_CONTINUE:
8456 			cdip = ddi_get_next_sibling(cdip);
8457 			MDI_CLIENT_UNLOCK(ct);
8458 			break;
8459 
8460 		default:
8461 			MDI_CLIENT_UNLOCK(ct);
8462 			mutex_exit(&mdi_mutex);
8463 			return;
8464 		}
8465 	}
8466 
8467 	mutex_exit(&mdi_mutex);
8468 }
8469 
8470 /*
8471  * mdi_vhci_walk_phcis():
8472  *		Walker routine to traverse phci dev_info nodes
8473  */
8474 void
8475 mdi_vhci_walk_phcis(dev_info_t *vdip,
8476     int (*f)(dev_info_t *, void *), void *arg)
8477 {
8478 	mdi_vhci_t	*vh = NULL;
8479 	mdi_phci_t	*ph = NULL;
8480 
8481 	mutex_enter(&mdi_mutex);
8482 
8483 	vh = i_devi_get_vhci(vdip);
8484 	ph = vh->vh_phci_head;
8485 
8486 	while (ph) {
8487 		MDI_PHCI_LOCK(ph);
8488 
8489 		switch ((*f)(ph->ph_dip, arg)) {
8490 		case DDI_WALK_CONTINUE:
8491 			MDI_PHCI_UNLOCK(ph);
8492 			ph = ph->ph_next;
8493 			break;
8494 
8495 		default:
8496 			MDI_PHCI_UNLOCK(ph);
8497 			mutex_exit(&mdi_mutex);
8498 			return;
8499 		}
8500 	}
8501 
8502 	mutex_exit(&mdi_mutex);
8503 }
8504 
8505 
8506 /*
8507  * mdi_walk_vhcis():
8508  *		Walker routine to traverse vhci dev_info nodes
8509  */
8510 void
8511 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
8512 {
8513 	mdi_vhci_t	*vh = NULL;
8514 
8515 	mutex_enter(&mdi_mutex);
8516 	/*
8517 	 * Scan for already registered vhci
8518 	 */
8519 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
8520 		vh->vh_refcnt++;
8521 		mutex_exit(&mdi_mutex);
8522 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
8523 			mutex_enter(&mdi_mutex);
8524 			vh->vh_refcnt--;
8525 			break;
8526 		} else {
8527 			mutex_enter(&mdi_mutex);
8528 			vh->vh_refcnt--;
8529 		}
8530 	}
8531 
8532 	mutex_exit(&mdi_mutex);
8533 }
8534 
8535 /*
8536  * i_mdi_log_sysevent():
8537  *		Logs events for pickup by syseventd
8538  */
8539 static void
8540 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
8541 {
8542 	char		*path_name;
8543 	nvlist_t	*attr_list;
8544 
8545 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
8546 	    KM_SLEEP) != DDI_SUCCESS) {
8547 		goto alloc_failed;
8548 	}
8549 
8550 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
8551 	(void) ddi_pathname(dip, path_name);
8552 
8553 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
8554 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
8555 		goto error;
8556 	}
8557 
8558 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
8559 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
8560 		goto error;
8561 	}
8562 
8563 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
8564 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
8565 		goto error;
8566 	}
8567 
8568 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
8569 	    path_name) != DDI_SUCCESS) {
8570 		goto error;
8571 	}
8572 
8573 	if (nvlist_add_string(attr_list, DDI_CLASS,
8574 	    ph_vh_class) != DDI_SUCCESS) {
8575 		goto error;
8576 	}
8577 
8578 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
8579 	    attr_list, NULL, DDI_SLEEP);
8580 
8581 error:
8582 	kmem_free(path_name, MAXPATHLEN);
8583 	nvlist_free(attr_list);
8584 	return;
8585 
8586 alloc_failed:
8587 	MDI_DEBUG(1, (CE_WARN, dip,
8588 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
8589 }
8590