xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
30  * detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
39  */
40 
41 #include <sys/note.h>
42 #include <sys/types.h>
43 #include <sys/varargs.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
49 #include <sys/open.h>
50 #include <sys/kmem.h>
51 #include <sys/poll.h>
52 #include <sys/conf.h>
53 #include <sys/bootconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/stat.h>
56 #include <sys/ddi.h>
57 #include <sys/sunddi.h>
58 #include <sys/ddipropdefs.h>
59 #include <sys/sunndi.h>
60 #include <sys/ndi_impldefs.h>
61 #include <sys/promif.h>
62 #include <sys/sunmdi.h>
63 #include <sys/mdi_impldefs.h>
64 #include <sys/taskq.h>
65 #include <sys/epm.h>
66 #include <sys/sunpm.h>
67 
68 #ifdef	DEBUG
69 #include <sys/debug.h>
70 int	mdi_debug = 1;
71 #define	MDI_DEBUG(level, stmnt) \
72 	    if (mdi_debug >= (level)) i_mdi_log stmnt
73 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
74 #else	/* !DEBUG */
75 #define	MDI_DEBUG(level, stmnt)
76 #endif	/* DEBUG */
77 
78 extern pri_t	minclsyspri;
79 extern int	modrootloaded;
80 
81 /*
82  * Global mutex:
83  * Protects vHCI list and structure members, pHCI and Client lists.
84  */
85 kmutex_t	mdi_mutex;
86 
87 /*
88  * Registered vHCI class driver lists
89  */
90 int		mdi_vhci_count;
91 mdi_vhci_t	*mdi_vhci_head;
92 mdi_vhci_t	*mdi_vhci_tail;
93 
94 /*
95  * Client Hash Table size
96  */
97 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
98 
99 /*
100  * taskq interface definitions
101  */
102 #define	MDI_TASKQ_N_THREADS	8
103 #define	MDI_TASKQ_PRI		minclsyspri
104 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
105 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
106 
107 taskq_t				*mdi_taskq;
108 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
109 
110 static int		mdi_max_bus_config_threads = 100;
111 /*
112  * To reduce unnecessary BUS_CONFIG_ALLs, do not BUS_CONFIG_ALL phcis in the
113  * context of a BUS_CONFIG_ONE if a BUS_CONFIG_ALL has already been performed
114  * in the last mdi_bus_config_timeout seconds.
115  */
116 static int		mdi_bus_config_timeout = 60;	/* in seconds */
117 
118 /*
119  * MDI component property name/value string definitions
120  */
121 const char 		*mdi_component_prop = "mpxio-component";
122 const char		*mdi_component_prop_vhci = "vhci";
123 const char		*mdi_component_prop_phci = "phci";
124 const char		*mdi_component_prop_client = "client";
125 
126 /*
127  * MDI client global unique identifier property name
128  */
129 const char		*mdi_client_guid_prop = "client-guid";
130 
131 /*
132  * MDI client load balancing property name/value string definitions
133  */
134 const char		*mdi_load_balance = "load-balance";
135 const char		*mdi_load_balance_none = "none";
136 const char		*mdi_load_balance_rr = "round-robin";
137 const char		*mdi_load_balance_lba = "logical-block";
138 
139 /*
140  * Obsolete vHCI class definition; to be removed after Leadville update
141  */
142 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
143 
144 static char vhci_greeting[] =
145 	"\tThere already exists one vHCI driver for class %s\n"
146 	"\tOnly one vHCI driver for each class is allowed\n";
147 
148 /*
149  * Static function prototypes
150  */
151 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
152 static int		i_mdi_client_offline(dev_info_t *, uint_t);
153 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
154 static void		i_mdi_phci_post_detach(dev_info_t *,
155 			    ddi_detach_cmd_t, int);
156 static int		i_mdi_client_pre_detach(dev_info_t *,
157 			    ddi_detach_cmd_t);
158 static void		i_mdi_client_post_detach(dev_info_t *,
159 			    ddi_detach_cmd_t, int);
160 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
161 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
162 static int 		i_mdi_lba_lb(mdi_client_t *ct,
163 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
164 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
165 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
166 static void		i_mdi_pm_reset_client(mdi_client_t *);
167 static void		i_mdi_pm_hold_all_phci(mdi_client_t *);
168 static int		i_mdi_power_all_phci(mdi_client_t *);
169 
170 
171 /*
172  * Internal mdi_pathinfo node functions
173  */
174 static int		i_mdi_pi_kstat_create(mdi_pathinfo_t *);
175 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
176 
177 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
178 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
179 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
180 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
181 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
182 			    mdi_client_t *);
183 static void		i_mdi_phci_unlock(mdi_phci_t *);
184 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *,
185 			    mdi_client_t *, int);
186 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
187 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
188 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
189 			    mdi_client_t *);
190 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
191 static void		i_mdi_client_remove_path(mdi_client_t *,
192 			    mdi_pathinfo_t *);
193 
194 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
195 			    mdi_pathinfo_state_t, int);
196 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
197 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
198 			    char **, int, int);
199 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
200 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
201 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
202 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *, int);
203 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
204 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
205 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *);
206 static void		i_mdi_client_update_state(mdi_client_t *);
207 static int		i_mdi_client_compute_state(mdi_client_t *,
208 			    mdi_phci_t *);
209 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
210 static void		i_mdi_client_unlock(mdi_client_t *);
211 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
212 static mdi_client_t	*i_devi_get_client(dev_info_t *);
213 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int,
214 			int);
215 /*
216  * Failover related function prototypes
217  */
218 static int		i_mdi_failover(void *);
219 
220 /*
221  * misc internal functions
222  */
223 static int		i_mdi_get_hash_key(char *);
224 static int		i_map_nvlist_error_to_mdi(int);
225 static void		i_mdi_report_path_state(mdi_client_t *,
226 			    mdi_pathinfo_t *);
227 
228 /* called once when first vhci registers with mdi */
229 static void
230 i_mdi_init()
231 {
232 	static int initialized = 0;
233 
234 	if (initialized)
235 		return;
236 	initialized = 1;
237 
238 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
239 	/*
240 	 * Create our taskq resources
241 	 */
242 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
243 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
244 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
245 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
246 }
247 
248 /*
249  * mdi_get_component_type():
250  *		Return mpxio component type
251  * Return Values:
252  *		MDI_COMPONENT_NONE
253  *		MDI_COMPONENT_VHCI
254  *		MDI_COMPONENT_PHCI
255  *		MDI_COMPONENT_CLIENT
256  * XXX This doesn't work under multi-level MPxIO and should be
257  *	removed when clients migrate mdi_is_*() interfaces.
258  */
259 int
260 mdi_get_component_type(dev_info_t *dip)
261 {
262 	return (DEVI(dip)->devi_mdi_component);
263 }
264 
265 /*
266  * mdi_vhci_register():
267  *		Register a vHCI module with the mpxio framework
268  *		mdi_vhci_register() is called by vHCI drivers to register the
269  *		'class_driver' vHCI driver and its MDI entrypoints with the
270  *		mpxio framework.  The vHCI driver must call this interface as
271  *		part of its attach(9e) handler.
272  *		Competing threads may try to attach mdi_vhci_register() as
273  *		the vHCI drivers are loaded and attached as a result of pHCI
274  *		driver instance registration (mdi_phci_register()) with the
275  *		framework.
276  * Return Values:
277  *		MDI_SUCCESS
278  *		MDI_FAILURE
279  */
280 
281 /*ARGSUSED*/
282 int
283 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
284     int flags)
285 {
286 	mdi_vhci_t		*vh = NULL;
287 
288 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
289 
290 	i_mdi_init();
291 
292 	mutex_enter(&mdi_mutex);
293 	/*
294 	 * Scan for already registered vhci
295 	 */
296 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
297 		if (strcmp(vh->vh_class, class) == 0) {
298 			/*
299 			 * vHCI has already been created.  Check for valid
300 			 * vHCI ops registration.  We only support one vHCI
301 			 * module per class
302 			 */
303 			if (vh->vh_ops != NULL) {
304 				mutex_exit(&mdi_mutex);
305 				cmn_err(CE_NOTE, vhci_greeting, class);
306 				return (MDI_FAILURE);
307 			}
308 			break;
309 		}
310 	}
311 
312 	/*
313 	 * if not yet created, create the vHCI component
314 	 */
315 	if (vh == NULL) {
316 		struct client_hash	*hash = NULL;
317 		char			*load_balance;
318 
319 		/*
320 		 * Allocate and initialize the mdi extensions
321 		 */
322 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
323 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
324 		    KM_SLEEP);
325 		vh->vh_client_table = hash;
326 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
327 		(void) strcpy(vh->vh_class, class);
328 		vh->vh_lb = LOAD_BALANCE_RR;
329 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
330 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
331 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
332 				vh->vh_lb = LOAD_BALANCE_NONE;
333 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
334 				    == 0) {
335 				vh->vh_lb = LOAD_BALANCE_LBA;
336 			}
337 			ddi_prop_free(load_balance);
338 		}
339 
340 		/*
341 		 * Store the vHCI ops vectors
342 		 */
343 		vh->vh_dip = vdip;
344 		vh->vh_ops = vops;
345 
346 		/*
347 		 * other members of vh_bus_config are initialized by
348 		 * the above kmem_zalloc of the vhci structure.
349 		 */
350 		cv_init(&vh->vh_bus_config.vhc_cv, NULL, CV_DRIVER, NULL);
351 
352 		if (mdi_vhci_head == NULL) {
353 			mdi_vhci_head = vh;
354 		}
355 		if (mdi_vhci_tail) {
356 			mdi_vhci_tail->vh_next = vh;
357 		}
358 		mdi_vhci_tail = vh;
359 		mdi_vhci_count++;
360 	}
361 
362 	/*
363 	 * Claim the devfs node as a vhci component
364 	 */
365 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
366 
367 	/*
368 	 * Initialize our back reference from dev_info node
369 	 */
370 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
371 	mutex_exit(&mdi_mutex);
372 	return (MDI_SUCCESS);
373 }
374 
375 /*
376  * mdi_vhci_unregister():
377  *		Unregister a vHCI module from mpxio framework
378  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
379  * 		of a vhci to unregister it from the framework.
380  * Return Values:
381  *		MDI_SUCCESS
382  *		MDI_FAILURE
383  */
384 
385 /*ARGSUSED*/
386 int
387 mdi_vhci_unregister(dev_info_t *vdip, int flags)
388 {
389 	mdi_vhci_t	*found, *vh, *prev = NULL;
390 	mdi_phci_config_t *phc, *next_phc;
391 
392 	/*
393 	 * Check for invalid VHCI
394 	 */
395 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
396 		return (MDI_FAILURE);
397 
398 	mutex_enter(&mdi_mutex);
399 
400 	/*
401 	 * Scan the list of registered vHCIs for a match
402 	 */
403 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
404 		if (found == vh)
405 			break;
406 		prev = found;
407 	}
408 
409 	if (found == NULL) {
410 		mutex_exit(&mdi_mutex);
411 		return (MDI_FAILURE);
412 	}
413 
414 	/*
415 	 * Check the pHCI and client count. All the pHCIs and clients
416 	 * should have been unregistered, before a vHCI can be
417 	 * unregistered.
418 	 */
419 	if (vh->vh_phci_count || vh->vh_client_count) {
420 		MDI_DEBUG(1, (CE_NOTE, NULL,
421 		    "!mdi_vhci_unregister: pHCI in registered state.\n"));
422 		mutex_exit(&mdi_mutex);
423 		return (MDI_FAILURE);
424 	}
425 
426 	/*
427 	 * Remove the vHCI from the global list
428 	 */
429 	if (vh == mdi_vhci_head) {
430 		mdi_vhci_head = vh->vh_next;
431 	} else {
432 		prev->vh_next = vh->vh_next;
433 	}
434 	if (vh == mdi_vhci_tail) {
435 		mdi_vhci_tail = prev;
436 	}
437 
438 	vh->vh_ops = NULL;
439 	mdi_vhci_count--;
440 	mutex_exit(&mdi_mutex);
441 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
442 	DEVI(vdip)->devi_mdi_xhci = NULL;
443 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
444 	kmem_free(vh->vh_client_table,
445 	    mdi_client_table_size * sizeof (struct client_hash));
446 
447 	/*
448 	 * there must be no more tasks on the bus config taskq as the vhci
449 	 * driver can not be detached while bus config is in progress.
450 	 */
451 	ASSERT(vh->vh_bus_config.vhc_start_time == 0);
452 
453 	if (vh->vh_bus_config.vhc_taskq != NULL)
454 		taskq_destroy(vh->vh_bus_config.vhc_taskq);
455 
456 	for (phc = vh->vh_bus_config.vhc_phc; phc != NULL; phc = next_phc) {
457 		next_phc = phc->phc_next;
458 		kmem_free(phc, sizeof (*phc));
459 	}
460 
461 	cv_destroy(&vh->vh_bus_config.vhc_cv);
462 
463 	kmem_free(vh, sizeof (mdi_vhci_t));
464 	return (MDI_SUCCESS);
465 }
466 
467 /*
468  * i_mdi_vhci_class2vhci():
469  *		Look for a matching vHCI module given a vHCI class name
470  * Return Values:
471  *		Handle to a vHCI component
472  *		NULL
473  */
474 static mdi_vhci_t *
475 i_mdi_vhci_class2vhci(char *class)
476 {
477 	mdi_vhci_t	*vh = NULL;
478 
479 	ASSERT(!MUTEX_HELD(&mdi_mutex));
480 
481 	mutex_enter(&mdi_mutex);
482 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
483 		if (strcmp(vh->vh_class, class) == 0) {
484 			break;
485 		}
486 	}
487 	mutex_exit(&mdi_mutex);
488 	return (vh);
489 }
490 
491 /*
492  * i_devi_get_vhci():
493  *		Utility function to get the handle to a vHCI component
494  * Return Values:
495  *		Handle to a vHCI component
496  *		NULL
497  */
498 mdi_vhci_t *
499 i_devi_get_vhci(dev_info_t *vdip)
500 {
501 	mdi_vhci_t	*vh = NULL;
502 	if (MDI_VHCI(vdip)) {
503 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
504 	}
505 	return (vh);
506 }
507 
508 /*
509  * mdi_phci_register():
510  *		Register a pHCI module with mpxio framework
511  *		mdi_phci_register() is called by pHCI drivers to register with
512  *		the mpxio framework and a specific 'class_driver' vHCI.  The
513  *		pHCI driver must call this interface as part of its attach(9e)
514  *		handler.
515  * Return Values:
516  *		MDI_SUCCESS
517  *		MDI_FAILURE
518  */
519 
520 /*ARGSUSED*/
521 int
522 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
523 {
524 	mdi_phci_t		*ph;
525 	mdi_vhci_t		*vh;
526 	char			*data;
527 	char			*pathname;
528 
529 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
530 	(void) ddi_pathname(pdip, pathname);
531 
532 	/*
533 	 * Check for mpxio-disable property. Enable mpxio if the property is
534 	 * missing or not set to "yes".
535 	 * If the property is set to "yes" then emit a brief message.
536 	 */
537 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
538 	    &data) == DDI_SUCCESS)) {
539 		if (strcmp(data, "yes") == 0) {
540 			MDI_DEBUG(1, (CE_CONT, pdip,
541 			    "?%s (%s%d) multipath capabilities "
542 			    "disabled via %s.conf.\n", pathname,
543 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
544 			    ddi_driver_name(pdip)));
545 			ddi_prop_free(data);
546 			kmem_free(pathname, MAXPATHLEN);
547 			return (MDI_FAILURE);
548 		}
549 		ddi_prop_free(data);
550 	}
551 
552 	kmem_free(pathname, MAXPATHLEN);
553 
554 	/*
555 	 * Search for a matching vHCI
556 	 */
557 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
558 	if (vh == NULL) {
559 		return (MDI_FAILURE);
560 	}
561 
562 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
563 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
564 	ph->ph_dip = pdip;
565 	ph->ph_vhci = vh;
566 	ph->ph_next = NULL;
567 	ph->ph_unstable = 0;
568 	ph->ph_vprivate = 0;
569 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
570 	cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL);
571 
572 	MDI_PHCI_SET_POWER_UP(ph);
573 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
574 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
575 
576 	mutex_enter(&mdi_mutex);
577 	if (vh->vh_phci_head == NULL) {
578 		vh->vh_phci_head = ph;
579 	}
580 	if (vh->vh_phci_tail) {
581 		vh->vh_phci_tail->ph_next = ph;
582 	}
583 	vh->vh_phci_tail = ph;
584 	vh->vh_phci_count++;
585 	/* to force discovery of all phci children during busconfig */
586 	vh->vh_bus_config.vhc_cutoff_time = -1;
587 	mutex_exit(&mdi_mutex);
588 	return (MDI_SUCCESS);
589 }
590 
591 /*
592  * mdi_phci_unregister():
593  *		Unregister a pHCI module from mpxio framework
594  *		mdi_phci_unregister() is called by the pHCI drivers from their
595  *		detach(9E) handler to unregister their instances from the
596  *		framework.
597  * Return Values:
598  *		MDI_SUCCESS
599  *		MDI_FAILURE
600  */
601 
602 /*ARGSUSED*/
603 int
604 mdi_phci_unregister(dev_info_t *pdip, int flags)
605 {
606 	mdi_vhci_t		*vh;
607 	mdi_phci_t		*ph;
608 	mdi_phci_t		*tmp;
609 	mdi_phci_t		*prev = NULL;
610 
611 	ph = i_devi_get_phci(pdip);
612 	if (ph == NULL) {
613 		MDI_DEBUG(1, (CE_WARN, pdip,
614 		    "!pHCI unregister: Not a valid pHCI"));
615 		return (MDI_FAILURE);
616 	}
617 
618 	vh = ph->ph_vhci;
619 	ASSERT(vh != NULL);
620 	if (vh == NULL) {
621 		MDI_DEBUG(1, (CE_WARN, pdip,
622 		    "!pHCI unregister: Not a valid vHCI"));
623 		return (MDI_FAILURE);
624 	}
625 
626 	mutex_enter(&mdi_mutex);
627 	tmp = vh->vh_phci_head;
628 	while (tmp) {
629 		if (tmp == ph) {
630 			break;
631 		}
632 		prev = tmp;
633 		tmp = tmp->ph_next;
634 	}
635 
636 	if (ph == vh->vh_phci_head) {
637 		vh->vh_phci_head = ph->ph_next;
638 	} else {
639 		prev->ph_next = ph->ph_next;
640 	}
641 
642 	if (ph == vh->vh_phci_tail) {
643 		vh->vh_phci_tail = prev;
644 	}
645 
646 	vh->vh_phci_count--;
647 
648 	/*
649 	 * If no busconfig is in progress, release the phci busconfig resources.
650 	 * We only need vh->vh_phci_count of busconfig resources.
651 	 */
652 	if (vh->vh_bus_config.vhc_start_time == 0 &&
653 	    vh->vh_bus_config.vhc_phc_cnt > vh->vh_phci_count) {
654 		int count;
655 
656 		count = vh->vh_bus_config.vhc_phc_cnt - vh->vh_phci_count;
657 		while (count--) {
658 			mdi_phci_config_t *phc;
659 
660 			phc = vh->vh_bus_config.vhc_phc;
661 			vh->vh_bus_config.vhc_phc = phc->phc_next;
662 			kmem_free(phc, sizeof (*phc));
663 		}
664 		vh->vh_bus_config.vhc_phc_cnt = vh->vh_phci_count;
665 	}
666 
667 	mutex_exit(&mdi_mutex);
668 
669 	cv_destroy(&ph->ph_unstable_cv);
670 	cv_destroy(&ph->ph_powerchange_cv);
671 	mutex_destroy(&ph->ph_mutex);
672 	kmem_free(ph, sizeof (mdi_phci_t));
673 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
674 	DEVI(pdip)->devi_mdi_xhci = NULL;
675 	return (MDI_SUCCESS);
676 }
677 
678 /*
679  * i_devi_get_phci():
680  * 		Utility function to return the phci extensions.
681  */
682 static mdi_phci_t *
683 i_devi_get_phci(dev_info_t *pdip)
684 {
685 	mdi_phci_t	*ph = NULL;
686 	if (MDI_PHCI(pdip)) {
687 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
688 	}
689 	return (ph);
690 }
691 
692 /*
693  * mdi_phci_path2devinfo():
694  * 		Utility function to search for a valid phci device given
695  *		the devfs pathname.
696  */
697 
698 dev_info_t *
699 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
700 {
701 	char		*temp_pathname;
702 	mdi_vhci_t	*vh;
703 	mdi_phci_t	*ph;
704 	dev_info_t 	*pdip = NULL;
705 
706 	vh = i_devi_get_vhci(vdip);
707 	ASSERT(vh != NULL);
708 
709 	if (vh == NULL) {
710 		/*
711 		 * Invalid vHCI component, return failure
712 		 */
713 		return (NULL);
714 	}
715 
716 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
717 	mutex_enter(&mdi_mutex);
718 	ph = vh->vh_phci_head;
719 	while (ph != NULL) {
720 		pdip = ph->ph_dip;
721 		ASSERT(pdip != NULL);
722 		*temp_pathname = '\0';
723 		(void) ddi_pathname(pdip, temp_pathname);
724 		if (strcmp(temp_pathname, pathname) == 0) {
725 			break;
726 		}
727 		ph = ph->ph_next;
728 	}
729 	if (ph == NULL) {
730 		pdip = NULL;
731 	}
732 	mutex_exit(&mdi_mutex);
733 	kmem_free(temp_pathname, MAXPATHLEN);
734 	return (pdip);
735 }
736 
737 /*
738  * mdi_phci_get_path_count():
739  * 		get number of path information nodes associated with a given
740  *		pHCI device.
741  */
742 int
743 mdi_phci_get_path_count(dev_info_t *pdip)
744 {
745 	mdi_phci_t	*ph;
746 	int		count = 0;
747 
748 	ph = i_devi_get_phci(pdip);
749 	if (ph != NULL) {
750 		count = ph->ph_path_count;
751 	}
752 	return (count);
753 }
754 
755 /*
756  * i_mdi_phci_lock():
757  *		Lock a pHCI device
758  * Return Values:
759  *		None
760  * Note:
761  *		The default locking order is:
762  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
763  *		But there are number of situations where locks need to be
764  *		grabbed in reverse order.  This routine implements try and lock
765  *		mechanism depending on the requested parameter option.
766  */
767 static void
768 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
769 {
770 	if (pip) {
771 		/* Reverse locking is requested. */
772 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
773 			/*
774 			 * tryenter failed. Try to grab again
775 			 * after a small delay
776 			 */
777 			MDI_PI_HOLD(pip);
778 			MDI_PI_UNLOCK(pip);
779 			delay(1);
780 			MDI_PI_LOCK(pip);
781 			MDI_PI_RELE(pip);
782 		}
783 	} else {
784 		MDI_PHCI_LOCK(ph);
785 	}
786 }
787 
788 /*
789  * i_mdi_phci_get_client_lock():
790  *		Lock a pHCI device
791  * Return Values:
792  *		None
793  * Note:
794  *		The default locking order is:
795  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
796  *		But there are number of situations where locks need to be
797  *		grabbed in reverse order.  This routine implements try and lock
798  *		mechanism depending on the requested parameter option.
799  */
800 static void
801 i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct)
802 {
803 	if (ct) {
804 		/* Reverse locking is requested. */
805 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
806 			/*
807 			 * tryenter failed. Try to grab again
808 			 * after a small delay
809 			 */
810 			MDI_CLIENT_UNLOCK(ct);
811 			delay(1);
812 			MDI_CLIENT_LOCK(ct);
813 		}
814 	} else {
815 		MDI_PHCI_LOCK(ph);
816 	}
817 }
818 
819 /*
820  * i_mdi_phci_unlock():
821  *		Unlock the pHCI component
822  */
823 static void
824 i_mdi_phci_unlock(mdi_phci_t *ph)
825 {
826 	MDI_PHCI_UNLOCK(ph);
827 }
828 
829 /*
830  * i_mdi_devinfo_create():
831  *		create client device's devinfo node
832  * Return Values:
833  *		dev_info
834  *		NULL
835  * Notes:
836  */
837 static dev_info_t *
838 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
839 	char **compatible, int ncompatible, int flags)
840 {
841 	dev_info_t *cdip = NULL;
842 
843 	ASSERT(MUTEX_HELD(&mdi_mutex));
844 
845 	/* Verify for duplicate entry */
846 	cdip = i_mdi_devinfo_find(vh, name, guid);
847 	ASSERT(cdip == NULL);
848 	if (cdip) {
849 		cmn_err(CE_WARN,
850 		    "i_mdi_devinfo_create: client dip %p already exists",
851 			(void *)cdip);
852 	}
853 
854 	if (flags == DDI_SLEEP) {
855 		ndi_devi_alloc_sleep(vh->vh_dip, name,
856 		    DEVI_SID_NODEID, &cdip);
857 	} else {
858 		(void) ndi_devi_alloc(vh->vh_dip, name,
859 		    DEVI_SID_NODEID, &cdip);
860 	}
861 	if (cdip == NULL)
862 		goto fail;
863 
864 	/*
865 	 * Create component type and Global unique identifier
866 	 * properties
867 	 */
868 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
869 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
870 		goto fail;
871 	}
872 
873 	/* Decorate the node with compatible property */
874 	if (compatible &&
875 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
876 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
877 		goto fail;
878 	}
879 
880 	return (cdip);
881 
882 fail:
883 	if (cdip) {
884 		(void) ndi_prop_remove_all(cdip);
885 		(void) ndi_devi_free(cdip);
886 	}
887 	return (NULL);
888 }
889 
890 /*
891  * i_mdi_devinfo_find():
892  *		Find a matching devinfo node for given client node name
893  *		and its guid.
894  * Return Values:
895  *		Handle to a dev_info node or NULL
896  */
897 
898 static dev_info_t *
899 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
900 {
901 	char			*data;
902 	dev_info_t 		*cdip = NULL;
903 	dev_info_t 		*ndip = NULL;
904 	int			circular;
905 
906 	ndi_devi_enter(vh->vh_dip, &circular);
907 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
908 	while ((cdip = ndip) != NULL) {
909 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
910 
911 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
912 			continue;
913 		}
914 
915 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
916 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
917 		    &data) != DDI_PROP_SUCCESS) {
918 			continue;
919 		}
920 
921 		if (strcmp(data, guid) != 0) {
922 			ddi_prop_free(data);
923 			continue;
924 		}
925 		ddi_prop_free(data);
926 		break;
927 	}
928 	ndi_devi_exit(vh->vh_dip, circular);
929 	return (cdip);
930 }
931 
932 /*
933  * i_mdi_devinfo_remove():
934  *		Remove a client device node
935  */
936 static int
937 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
938 {
939 	int	rv = MDI_SUCCESS;
940 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
941 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
942 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
943 		if (rv != NDI_SUCCESS) {
944 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
945 			    " failed. cdip = %p\n", cdip));
946 		}
947 		/*
948 		 * Convert to MDI error code
949 		 */
950 		switch (rv) {
951 		case NDI_SUCCESS:
952 			rv = MDI_SUCCESS;
953 			break;
954 		case NDI_BUSY:
955 			rv = MDI_BUSY;
956 			break;
957 		default:
958 			rv = MDI_FAILURE;
959 			break;
960 		}
961 	}
962 	return (rv);
963 }
964 
965 /*
966  * i_devi_get_client()
967  *		Utility function to get mpxio component extensions
968  */
969 static mdi_client_t *
970 i_devi_get_client(dev_info_t *cdip)
971 {
972 	mdi_client_t	*ct = NULL;
973 	if (MDI_CLIENT(cdip)) {
974 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
975 	}
976 	return (ct);
977 }
978 
979 /*
980  * i_mdi_is_child_present():
981  *		Search for the presence of client device dev_info node
982  */
983 
984 static int
985 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
986 {
987 	int		rv = MDI_FAILURE;
988 	struct dev_info	*dip;
989 	int		circular;
990 
991 	ndi_devi_enter(vdip, &circular);
992 	dip = DEVI(vdip)->devi_child;
993 	while (dip) {
994 		if (dip == DEVI(cdip)) {
995 			rv = MDI_SUCCESS;
996 			break;
997 		}
998 		dip = dip->devi_sibling;
999 	}
1000 	ndi_devi_exit(vdip, circular);
1001 	return (rv);
1002 }
1003 
1004 
1005 /*
1006  * i_mdi_client_lock():
1007  *		Grab client component lock
1008  * Return Values:
1009  *		None
1010  * Note:
1011  *		The default locking order is:
1012  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1013  *		But there are number of situations where locks need to be
1014  *		grabbed in reverse order.  This routine implements try and lock
1015  *		mechanism depending on the requested parameter option.
1016  */
1017 
1018 static void
1019 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1020 {
1021 	if (pip) {
1022 		/*
1023 		 * Reverse locking is requested.
1024 		 */
1025 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1026 			/*
1027 			 * tryenter failed. Try to grab again
1028 			 * after a small delay
1029 			 */
1030 			MDI_PI_HOLD(pip);
1031 			MDI_PI_UNLOCK(pip);
1032 			delay(1);
1033 			MDI_PI_LOCK(pip);
1034 			MDI_PI_RELE(pip);
1035 		}
1036 	} else {
1037 		MDI_CLIENT_LOCK(ct);
1038 	}
1039 }
1040 
1041 /*
1042  * i_mdi_client_unlock():
1043  *		Unlock a client component
1044  */
1045 
1046 static void
1047 i_mdi_client_unlock(mdi_client_t *ct)
1048 {
1049 	MDI_CLIENT_UNLOCK(ct);
1050 }
1051 
1052 /*
1053  * i_mdi_client_alloc():
1054  * 		Allocate and initialize a client structure.  Caller should
1055  *		hold the global mdi_mutex.
1056  * Return Values:
1057  *		Handle to a client component
1058  */
1059 /*ARGSUSED*/
1060 static mdi_client_t *
1061 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid, int flags)
1062 {
1063 	mdi_client_t	*ct;
1064 	char		*drvname = NULL;
1065 	char		*guid = NULL;
1066 	client_lb_args_t 	*lb_args = NULL;
1067 
1068 	ASSERT(MUTEX_HELD(&mdi_mutex));
1069 
1070 	/*
1071 	 * Allocate and initialize a component structure.
1072 	 */
1073 	ct = kmem_zalloc(sizeof (*ct),
1074 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1075 	if (ct == NULL)
1076 		goto fail;
1077 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1078 	ct->ct_hnext = NULL;
1079 	ct->ct_hprev = NULL;
1080 	ct->ct_dip = NULL;
1081 	ct->ct_vhci = vh;
1082 	drvname = kmem_alloc(strlen(name) + 1,
1083 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1084 	if (drvname == NULL)
1085 		goto fail;
1086 	ct->ct_drvname = drvname;
1087 	(void) strcpy(ct->ct_drvname, name);
1088 	guid = kmem_alloc(strlen(lguid) + 1,
1089 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1090 	if (guid == NULL)
1091 		goto fail;
1092 	ct->ct_guid = guid;
1093 	(void) strcpy(ct->ct_guid, lguid);
1094 	ct->ct_cprivate = NULL;
1095 	ct->ct_vprivate = NULL;
1096 	ct->ct_flags = 0;
1097 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1098 	MDI_CLIENT_SET_OFFLINE(ct);
1099 	MDI_CLIENT_SET_DETACH(ct);
1100 	MDI_CLIENT_SET_POWER_UP(ct);
1101 	ct->ct_failover_flags = 0;
1102 	ct->ct_failover_status = 0;
1103 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1104 	ct->ct_unstable = 0;
1105 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1106 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1107 	ct->ct_lb = vh->vh_lb;
1108 	lb_args =  kmem_zalloc(sizeof (client_lb_args_t),
1109 		(flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1110 	if (lb_args == NULL)
1111 		goto fail;
1112 	ct->ct_lb_args = lb_args;
1113 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1114 	ct->ct_path_count = 0;
1115 	ct->ct_path_head = NULL;
1116 	ct->ct_path_tail = NULL;
1117 	ct->ct_path_last = NULL;
1118 
1119 
1120 	/*
1121 	 * Add this client component to our client hash queue
1122 	 */
1123 	i_mdi_client_enlist_table(vh, ct);
1124 	return (ct);
1125 
1126 fail:
1127 	if (guid)
1128 		kmem_free(guid, strlen(lguid) + 1);
1129 	if (drvname)
1130 		kmem_free(drvname, strlen(name) + 1);
1131 	if (lb_args)
1132 		kmem_free(lb_args, sizeof (client_lb_args_t));
1133 	kmem_free(ct, sizeof (*ct));
1134 	return (NULL);
1135 }
1136 
1137 /*
1138  * i_mdi_client_enlist_table():
1139  *		Attach the client device to the client hash table. Caller
1140  *		should hold the mdi_mutex
1141  */
1142 
1143 static void
1144 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1145 {
1146 	int 			index;
1147 	struct client_hash	*head;
1148 
1149 	ASSERT(MUTEX_HELD(&mdi_mutex));
1150 	index = i_mdi_get_hash_key(ct->ct_guid);
1151 	head = &vh->vh_client_table[index];
1152 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1153 	head->ct_hash_head = ct;
1154 	head->ct_hash_count++;
1155 	vh->vh_client_count++;
1156 }
1157 
1158 /*
1159  * i_mdi_client_delist_table():
1160  *		Attach the client device to the client hash table.
1161  *		Caller should hold the mdi_mutex
1162  */
1163 
1164 static void
1165 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1166 {
1167 	int			index;
1168 	char			*guid;
1169 	struct client_hash 	*head;
1170 	mdi_client_t		*next;
1171 	mdi_client_t		*last;
1172 
1173 	ASSERT(MUTEX_HELD(&mdi_mutex));
1174 	guid = ct->ct_guid;
1175 	index = i_mdi_get_hash_key(guid);
1176 	head = &vh->vh_client_table[index];
1177 
1178 	last = NULL;
1179 	next = (mdi_client_t *)head->ct_hash_head;
1180 	while (next != NULL) {
1181 		if (next == ct) {
1182 			break;
1183 		}
1184 		last = next;
1185 		next = next->ct_hnext;
1186 	}
1187 
1188 	if (next) {
1189 		head->ct_hash_count--;
1190 		if (last == NULL) {
1191 			head->ct_hash_head = ct->ct_hnext;
1192 		} else {
1193 			last->ct_hnext = ct->ct_hnext;
1194 		}
1195 		ct->ct_hnext = NULL;
1196 		vh->vh_client_count--;
1197 	}
1198 }
1199 
1200 
1201 /*
1202  * i_mdi_client_free():
1203  *		Free a client component
1204  */
1205 static int
1206 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1207 {
1208 	int		rv = MDI_SUCCESS;
1209 	int		flags = ct->ct_flags;
1210 	dev_info_t	*cdip;
1211 	dev_info_t	*vdip;
1212 
1213 	ASSERT(MUTEX_HELD(&mdi_mutex));
1214 	vdip = vh->vh_dip;
1215 	cdip = ct->ct_dip;
1216 
1217 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1218 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1219 	DEVI(cdip)->devi_mdi_client = NULL;
1220 
1221 	/*
1222 	 * Clear out back ref. to dev_info_t node
1223 	 */
1224 	ct->ct_dip = NULL;
1225 
1226 	/*
1227 	 * Remove this client from our hash queue
1228 	 */
1229 	i_mdi_client_delist_table(vh, ct);
1230 
1231 	/*
1232 	 * Uninitialize and free the component
1233 	 */
1234 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1235 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1236 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1237 	cv_destroy(&ct->ct_failover_cv);
1238 	cv_destroy(&ct->ct_unstable_cv);
1239 	cv_destroy(&ct->ct_powerchange_cv);
1240 	mutex_destroy(&ct->ct_mutex);
1241 	kmem_free(ct, sizeof (*ct));
1242 
1243 	if (cdip != NULL) {
1244 		mutex_exit(&mdi_mutex);
1245 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1246 		mutex_enter(&mdi_mutex);
1247 	}
1248 	return (rv);
1249 }
1250 
1251 /*
1252  * i_mdi_client_find():
1253  * 		Find the client structure corresponding to a given guid
1254  *		Caller should hold the mdi_mutex
1255  */
1256 static mdi_client_t *
1257 i_mdi_client_find(mdi_vhci_t *vh, char *guid)
1258 {
1259 	int			index;
1260 	struct client_hash	*head;
1261 	mdi_client_t		*ct;
1262 
1263 	ASSERT(MUTEX_HELD(&mdi_mutex));
1264 	index = i_mdi_get_hash_key(guid);
1265 	head = &vh->vh_client_table[index];
1266 
1267 	ct = head->ct_hash_head;
1268 	while (ct != NULL) {
1269 		if (strcmp(ct->ct_guid, guid) == 0) {
1270 			break;
1271 		}
1272 		ct = ct->ct_hnext;
1273 	}
1274 	return (ct);
1275 }
1276 
1277 
1278 
1279 /*
1280  * i_mdi_client_update_state():
1281  *		Compute and update client device state
1282  * Notes:
1283  *		A client device can be in any of three possible states:
1284  *
1285  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1286  *		one online/standby paths. Can tolerate failures.
1287  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1288  *		no alternate paths available as standby. A failure on the online
1289  *		would result in loss of access to device data.
1290  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1291  *		no paths available to access the device.
1292  */
1293 static void
1294 i_mdi_client_update_state(mdi_client_t *ct)
1295 {
1296 	int state;
1297 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1298 	state = i_mdi_client_compute_state(ct, NULL);
1299 	MDI_CLIENT_SET_STATE(ct, state);
1300 }
1301 
1302 /*
1303  * i_mdi_client_compute_state():
1304  *		Compute client device state
1305  *
1306  *		mdi_phci_t *	Pointer to pHCI structure which should
1307  *				while computing the new value.  Used by
1308  *				i_mdi_phci_offline() to find the new
1309  *				client state after DR of a pHCI.
1310  */
1311 static int
1312 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1313 {
1314 	int		state;
1315 	int		online_count = 0;
1316 	int		standby_count = 0;
1317 	mdi_pathinfo_t	*pip, *next;
1318 
1319 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1320 	pip = ct->ct_path_head;
1321 	while (pip != NULL) {
1322 		MDI_PI_LOCK(pip);
1323 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1324 		if (MDI_PI(pip)->pi_phci == ph) {
1325 			MDI_PI_UNLOCK(pip);
1326 			pip = next;
1327 			continue;
1328 		}
1329 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1330 				== MDI_PATHINFO_STATE_ONLINE)
1331 			online_count++;
1332 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1333 				== MDI_PATHINFO_STATE_STANDBY)
1334 			standby_count++;
1335 		MDI_PI_UNLOCK(pip);
1336 		pip = next;
1337 	}
1338 
1339 	if (online_count == 0) {
1340 		if (standby_count == 0) {
1341 			state = MDI_CLIENT_STATE_FAILED;
1342 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1343 			    " ct = %p\n", ct));
1344 		} else if (standby_count == 1) {
1345 			state = MDI_CLIENT_STATE_DEGRADED;
1346 		} else {
1347 			state = MDI_CLIENT_STATE_OPTIMAL;
1348 		}
1349 	} else if (online_count == 1) {
1350 		if (standby_count == 0) {
1351 			state = MDI_CLIENT_STATE_DEGRADED;
1352 		} else {
1353 			state = MDI_CLIENT_STATE_OPTIMAL;
1354 		}
1355 	} else {
1356 		state = MDI_CLIENT_STATE_OPTIMAL;
1357 	}
1358 	return (state);
1359 }
1360 
1361 /*
1362  * i_mdi_client2devinfo():
1363  *		Utility function
1364  */
1365 dev_info_t *
1366 i_mdi_client2devinfo(mdi_client_t *ct)
1367 {
1368 	return (ct->ct_dip);
1369 }
1370 
1371 /*
1372  * mdi_client_path2_devinfo():
1373  * 		Given the parent devinfo and child devfs pathname, search for
1374  *		a valid devfs node handle.
1375  */
1376 dev_info_t *
1377 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1378 {
1379 	dev_info_t 	*cdip = NULL;
1380 	dev_info_t 	*ndip = NULL;
1381 	char		*temp_pathname;
1382 	int		circular;
1383 
1384 	/*
1385 	 * Allocate temp buffer
1386 	 */
1387 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1388 
1389 	/*
1390 	 * Lock parent against changes
1391 	 */
1392 	ndi_devi_enter(vdip, &circular);
1393 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1394 	while ((cdip = ndip) != NULL) {
1395 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1396 
1397 		*temp_pathname = '\0';
1398 		(void) ddi_pathname(cdip, temp_pathname);
1399 		if (strcmp(temp_pathname, pathname) == 0) {
1400 			break;
1401 		}
1402 	}
1403 	/*
1404 	 * Release devinfo lock
1405 	 */
1406 	ndi_devi_exit(vdip, circular);
1407 
1408 	/*
1409 	 * Free the temp buffer
1410 	 */
1411 	kmem_free(temp_pathname, MAXPATHLEN);
1412 	return (cdip);
1413 }
1414 
1415 
1416 /*
1417  * mdi_client_get_path_count():
1418  * 		Utility function to get number of path information nodes
1419  *		associated with a given client device.
1420  */
1421 int
1422 mdi_client_get_path_count(dev_info_t *cdip)
1423 {
1424 	mdi_client_t	*ct;
1425 	int		count = 0;
1426 
1427 	ct = i_devi_get_client(cdip);
1428 	if (ct != NULL) {
1429 		count = ct->ct_path_count;
1430 	}
1431 	return (count);
1432 }
1433 
1434 
1435 /*
1436  * i_mdi_get_hash_key():
1437  * 		Create a hash using strings as keys
1438  *
1439  */
1440 static int
1441 i_mdi_get_hash_key(char *str)
1442 {
1443 	uint32_t	g, hash = 0;
1444 	char		*p;
1445 
1446 	for (p = str; *p != '\0'; p++) {
1447 		g = *p;
1448 		hash += g;
1449 	}
1450 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1451 }
1452 
1453 /*
1454  * mdi_get_lb_policy():
1455  * 		Get current load balancing policy for a given client device
1456  */
1457 client_lb_t
1458 mdi_get_lb_policy(dev_info_t *cdip)
1459 {
1460 	client_lb_t	lb = LOAD_BALANCE_NONE;
1461 	mdi_client_t	*ct;
1462 
1463 	ct = i_devi_get_client(cdip);
1464 	if (ct != NULL) {
1465 		lb = ct->ct_lb;
1466 	}
1467 	return (lb);
1468 }
1469 
1470 /*
1471  * mdi_set_lb_region_size():
1472  * 		Set current region size for the load-balance
1473  */
1474 int
1475 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1476 {
1477 	mdi_client_t	*ct;
1478 	int		rv = MDI_FAILURE;
1479 
1480 	ct = i_devi_get_client(cdip);
1481 	if (ct != NULL && ct->ct_lb_args != NULL) {
1482 		ct->ct_lb_args->region_size = region_size;
1483 		rv = MDI_SUCCESS;
1484 	}
1485 	return (rv);
1486 }
1487 
1488 /*
1489  * mdi_Set_lb_policy():
1490  * 		Set current load balancing policy for a given client device
1491  */
1492 int
1493 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1494 {
1495 	mdi_client_t	*ct;
1496 	int		rv = MDI_FAILURE;
1497 
1498 	ct = i_devi_get_client(cdip);
1499 	if (ct != NULL) {
1500 		ct->ct_lb = lb;
1501 		rv = MDI_SUCCESS;
1502 	}
1503 	return (rv);
1504 }
1505 
1506 /*
1507  * mdi_failover():
1508  *		failover function called by the vHCI drivers to initiate
1509  *		a failover operation.  This is typically due to non-availability
1510  *		of online paths to route I/O requests.  Failover can be
1511  *		triggered through user application also.
1512  *
1513  *		The vHCI driver calls mdi_failover() to initiate a failover
1514  *		operation. mdi_failover() calls back into the vHCI driver's
1515  *		vo_failover() entry point to perform the actual failover
1516  *		operation.  The reason for requiring the vHCI driver to
1517  *		initiate failover by calling mdi_failover(), instead of directly
1518  *		executing vo_failover() itself, is to ensure that the mdi
1519  *		framework can keep track of the client state properly.
1520  *		Additionally, mdi_failover() provides as a convenience the
1521  *		option of performing the failover operation synchronously or
1522  *		asynchronously
1523  *
1524  *		Upon successful completion of the failover operation, the
1525  *		paths that were previously ONLINE will be in the STANDBY state,
1526  *		and the newly activated paths will be in the ONLINE state.
1527  *
1528  *		The flags modifier determines whether the activation is done
1529  *		synchronously: MDI_FAILOVER_SYNC
1530  * Return Values:
1531  *		MDI_SUCCESS
1532  *		MDI_FAILURE
1533  *		MDI_BUSY
1534  */
1535 /*ARGSUSED*/
1536 int
1537 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1538 {
1539 	int			rv;
1540 	mdi_client_t		*ct;
1541 
1542 	ct = i_devi_get_client(cdip);
1543 	ASSERT(ct != NULL);
1544 	if (ct == NULL) {
1545 		/* cdip is not a valid client device. Nothing more to do. */
1546 		return (MDI_FAILURE);
1547 	}
1548 
1549 	MDI_CLIENT_LOCK(ct);
1550 
1551 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1552 		/* A path to the client is being freed */
1553 		MDI_CLIENT_UNLOCK(ct);
1554 		return (MDI_BUSY);
1555 	}
1556 
1557 
1558 	if (MDI_CLIENT_IS_FAILED(ct)) {
1559 		/*
1560 		 * Client is in failed state. Nothing more to do.
1561 		 */
1562 		MDI_CLIENT_UNLOCK(ct);
1563 		return (MDI_FAILURE);
1564 	}
1565 
1566 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1567 		/*
1568 		 * Failover is already in progress; return BUSY
1569 		 */
1570 		MDI_CLIENT_UNLOCK(ct);
1571 		return (MDI_BUSY);
1572 	}
1573 	/*
1574 	 * Make sure that mdi_pathinfo node state changes are processed.
1575 	 * We do not allow failovers to progress while client path state
1576 	 * changes are in progress
1577 	 */
1578 	if (ct->ct_unstable) {
1579 		if (flags == MDI_FAILOVER_ASYNC) {
1580 			MDI_CLIENT_UNLOCK(ct);
1581 			return (MDI_BUSY);
1582 		} else {
1583 			while (ct->ct_unstable)
1584 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1585 		}
1586 	}
1587 
1588 	/*
1589 	 * Client device is in stable state. Before proceeding, perform sanity
1590 	 * checks again.
1591 	 */
1592 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1593 	    (i_ddi_node_state(ct->ct_dip) < DS_READY)) {
1594 		/*
1595 		 * Client is in failed state. Nothing more to do.
1596 		 */
1597 		MDI_CLIENT_UNLOCK(ct);
1598 		return (MDI_FAILURE);
1599 	}
1600 
1601 	/*
1602 	 * Set the client state as failover in progress.
1603 	 */
1604 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1605 	ct->ct_failover_flags = flags;
1606 	MDI_CLIENT_UNLOCK(ct);
1607 
1608 	if (flags == MDI_FAILOVER_ASYNC) {
1609 		/*
1610 		 * Submit the initiate failover request via CPR safe
1611 		 * taskq threads.
1612 		 */
1613 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1614 		    ct, KM_SLEEP);
1615 		return (MDI_ACCEPT);
1616 	} else {
1617 		/*
1618 		 * Synchronous failover mode.  Typically invoked from the user
1619 		 * land.
1620 		 */
1621 		rv = i_mdi_failover(ct);
1622 	}
1623 	return (rv);
1624 }
1625 
1626 /*
1627  * i_mdi_failover():
1628  *		internal failover function. Invokes vHCI drivers failover
1629  *		callback function and process the failover status
1630  * Return Values:
1631  *		None
1632  *
1633  * Note: A client device in failover state can not be detached or freed.
1634  */
1635 static int
1636 i_mdi_failover(void *arg)
1637 {
1638 	int		rv = MDI_SUCCESS;
1639 	mdi_client_t	*ct = (mdi_client_t *)arg;
1640 	mdi_vhci_t	*vh = ct->ct_vhci;
1641 
1642 	ASSERT(!MUTEX_HELD(&ct->ct_mutex));
1643 
1644 	if (vh->vh_ops->vo_failover != NULL) {
1645 		/*
1646 		 * Call vHCI drivers callback routine
1647 		 */
1648 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1649 		    ct->ct_failover_flags);
1650 	}
1651 
1652 	MDI_CLIENT_LOCK(ct);
1653 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1654 
1655 	/*
1656 	 * Save the failover return status
1657 	 */
1658 	ct->ct_failover_status = rv;
1659 
1660 	/*
1661 	 * As a result of failover, client status would have been changed.
1662 	 * Update the client state and wake up anyone waiting on this client
1663 	 * device.
1664 	 */
1665 	i_mdi_client_update_state(ct);
1666 
1667 	cv_broadcast(&ct->ct_failover_cv);
1668 	MDI_CLIENT_UNLOCK(ct);
1669 	return (rv);
1670 }
1671 
1672 /*
1673  * Load balancing is logical block.
1674  * IOs within the range described by region_size
1675  * would go on the same path. This would improve the
1676  * performance by cache-hit on some of the RAID devices.
1677  * Search only for online paths(At some point we
1678  * may want to balance across target ports).
1679  * If no paths are found then default to round-robin.
1680  */
1681 static int
1682 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1683 {
1684 	int		path_index = -1;
1685 	int		online_path_count = 0;
1686 	int		online_nonpref_path_count = 0;
1687 	int 		region_size = ct->ct_lb_args->region_size;
1688 	mdi_pathinfo_t	*pip;
1689 	mdi_pathinfo_t	*next;
1690 	int		preferred, path_cnt;
1691 
1692 	pip = ct->ct_path_head;
1693 	while (pip) {
1694 		MDI_PI_LOCK(pip);
1695 		if (MDI_PI(pip)->pi_state ==
1696 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1697 			online_path_count++;
1698 		} else if (MDI_PI(pip)->pi_state ==
1699 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1700 			online_nonpref_path_count++;
1701 		}
1702 		next = (mdi_pathinfo_t *)
1703 		    MDI_PI(pip)->pi_client_link;
1704 		MDI_PI_UNLOCK(pip);
1705 		pip = next;
1706 	}
1707 	/* if found any online/preferred then use this type */
1708 	if (online_path_count > 0) {
1709 		path_cnt = online_path_count;
1710 		preferred = 1;
1711 	} else if (online_nonpref_path_count > 0) {
1712 		path_cnt = online_nonpref_path_count;
1713 		preferred = 0;
1714 	} else {
1715 		path_cnt = 0;
1716 	}
1717 	if (path_cnt) {
1718 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1719 		pip = ct->ct_path_head;
1720 		while (pip && path_index != -1) {
1721 			MDI_PI_LOCK(pip);
1722 			if (path_index == 0 &&
1723 			    (MDI_PI(pip)->pi_state ==
1724 			    MDI_PATHINFO_STATE_ONLINE) &&
1725 				MDI_PI(pip)->pi_preferred == preferred) {
1726 				MDI_PI_HOLD(pip);
1727 				MDI_PI_UNLOCK(pip);
1728 				*ret_pip = pip;
1729 				return (MDI_SUCCESS);
1730 			}
1731 			path_index --;
1732 			next = (mdi_pathinfo_t *)
1733 			    MDI_PI(pip)->pi_client_link;
1734 			MDI_PI_UNLOCK(pip);
1735 			pip = next;
1736 		}
1737 		if (pip == NULL) {
1738 			MDI_DEBUG(4, (CE_NOTE, NULL,
1739 			    "!lba %p, no pip !!\n",
1740 				bp->b_blkno));
1741 		} else {
1742 			MDI_DEBUG(4, (CE_NOTE, NULL,
1743 			    "!lba %p, no pip for path_index, "
1744 			    "pip %p\n", pip));
1745 		}
1746 	}
1747 	return (MDI_FAILURE);
1748 }
1749 
1750 /*
1751  * mdi_select_path():
1752  *		select a path to access a client device.
1753  *
1754  *		mdi_select_path() function is called by the vHCI drivers to
1755  *		select a path to route the I/O request to.  The caller passes
1756  *		the block I/O data transfer structure ("buf") as one of the
1757  *		parameters.  The mpxio framework uses the buf structure
1758  *		contents to maintain per path statistics (total I/O size /
1759  *		count pending).  If more than one online paths are available to
1760  *		select, the framework automatically selects a suitable path
1761  *		for routing I/O request. If a failover operation is active for
1762  *		this client device the call shall be failed with MDI_BUSY error
1763  *		code.
1764  *
1765  *		By default this function returns a suitable path in online
1766  *		state based on the current load balancing policy.  Currently
1767  *		we support LOAD_BALANCE_NONE (Previously selected online path
1768  *		will continue to be used till the path is usable) and
1769  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1770  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1771  *		based on the logical block).  The load balancing
1772  *		through vHCI drivers configuration file (driver.conf).
1773  *
1774  *		vHCI drivers may override this default behavior by specifying
1775  *		appropriate flags.  If start_pip is specified (non NULL) is
1776  *		used as start point to walk and find the next appropriate path.
1777  *		The following values are currently defined:
1778  *		MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
1779  *		MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
1780  *
1781  *		The non-standard behavior is used by the scsi_vhci driver,
1782  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1783  *		attach of client devices (to avoid an unnecessary failover
1784  *		when the STANDBY path comes up first), during failover
1785  *		(to activate a STANDBY path as ONLINE).
1786  *
1787  *		The selected path in returned in a held state (ref_cnt).
1788  *		Caller should release the hold by calling mdi_rele_path().
1789  *
1790  * Return Values:
1791  *		MDI_SUCCESS	- Completed successfully
1792  *		MDI_BUSY 	- Client device is busy failing over
1793  *		MDI_NOPATH	- Client device is online, but no valid path are
1794  *				  available to access this client device
1795  *		MDI_FAILURE	- Invalid client device or state
1796  *		MDI_DEVI_ONLINING
1797  *				- Client device (struct dev_info state) is in
1798  *				  onlining state.
1799  */
1800 
1801 /*ARGSUSED*/
1802 int
1803 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1804     mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip)
1805 {
1806 	mdi_client_t	*ct;
1807 	mdi_pathinfo_t	*pip;
1808 	mdi_pathinfo_t	*next;
1809 	mdi_pathinfo_t	*head;
1810 	mdi_pathinfo_t	*start;
1811 	client_lb_t	lbp;	/* load balancing policy */
1812 	int		sb = 1;	/* standard behavior */
1813 	int		preferred = 1;	/* preferred path */
1814 	int		cond, cont = 1;
1815 	int		retry = 0;
1816 
1817 	if (flags != 0) {
1818 		/*
1819 		 * disable default behavior
1820 		 */
1821 		sb = 0;
1822 	}
1823 
1824 	*ret_pip = NULL;
1825 	ct = i_devi_get_client(cdip);
1826 	if (ct == NULL) {
1827 		/* mdi extensions are NULL, Nothing more to do */
1828 		return (MDI_FAILURE);
1829 	}
1830 
1831 	MDI_CLIENT_LOCK(ct);
1832 
1833 	if (sb) {
1834 		if (MDI_CLIENT_IS_FAILED(ct)) {
1835 			/*
1836 			 * Client is not ready to accept any I/O requests.
1837 			 * Fail this request.
1838 			 */
1839 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1840 			    "client state offline ct = %p\n", ct));
1841 			MDI_CLIENT_UNLOCK(ct);
1842 			return (MDI_FAILURE);
1843 		}
1844 
1845 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1846 			/*
1847 			 * Check for Failover is in progress. If so tell the
1848 			 * caller that this device is busy.
1849 			 */
1850 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
1851 			    "client failover in progress ct = %p\n", ct));
1852 			MDI_CLIENT_UNLOCK(ct);
1853 			return (MDI_BUSY);
1854 		}
1855 
1856 		/*
1857 		 * Check to see whether the client device is attached.
1858 		 * If not so, let the vHCI driver manually select a path
1859 		 * (standby) and let the probe/attach process to continue.
1860 		 */
1861 		if ((MDI_CLIENT_IS_DETACHED(ct)) ||
1862 		    i_ddi_node_state(cdip) < DS_READY) {
1863 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n"));
1864 			MDI_CLIENT_UNLOCK(ct);
1865 			return (MDI_DEVI_ONLINING);
1866 		}
1867 	}
1868 
1869 	/*
1870 	 * Cache in the client list head.  If head of the list is NULL
1871 	 * return MDI_NOPATH
1872 	 */
1873 	head = ct->ct_path_head;
1874 	if (head == NULL) {
1875 		MDI_CLIENT_UNLOCK(ct);
1876 		return (MDI_NOPATH);
1877 	}
1878 
1879 	/*
1880 	 * for non default behavior, bypass current
1881 	 * load balancing policy and always use LOAD_BALANCE_RR
1882 	 * except that the start point will be adjusted based
1883 	 * on the provided start_pip
1884 	 */
1885 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
1886 
1887 	switch (lbp) {
1888 	case LOAD_BALANCE_NONE:
1889 		/*
1890 		 * Load balancing is None  or Alternate path mode
1891 		 * Start looking for a online mdi_pathinfo node starting from
1892 		 * last known selected path
1893 		 */
1894 		preferred = 1;
1895 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
1896 		if (pip == NULL) {
1897 			pip = head;
1898 		}
1899 		start = pip;
1900 		do {
1901 			MDI_PI_LOCK(pip);
1902 			/*
1903 			 * No need to explicitly check if the path is disabled.
1904 			 * Since we are checking for state == ONLINE and the
1905 			 * same veriable is used for DISABLE/ENABLE information.
1906 			 */
1907 			if (MDI_PI(pip)->pi_state  ==
1908 				MDI_PATHINFO_STATE_ONLINE &&
1909 				preferred == MDI_PI(pip)->pi_preferred) {
1910 				/*
1911 				 * Return the path in hold state. Caller should
1912 				 * release the lock by calling mdi_rele_path()
1913 				 */
1914 				MDI_PI_HOLD(pip);
1915 				MDI_PI_UNLOCK(pip);
1916 				ct->ct_path_last = pip;
1917 				*ret_pip = pip;
1918 				MDI_CLIENT_UNLOCK(ct);
1919 				return (MDI_SUCCESS);
1920 			}
1921 
1922 			/*
1923 			 * Path is busy.
1924 			 */
1925 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
1926 			    MDI_PI_IS_TRANSIENT(pip))
1927 				retry = 1;
1928 			/*
1929 			 * Keep looking for a next available online path
1930 			 */
1931 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1932 			if (next == NULL) {
1933 				next = head;
1934 			}
1935 			MDI_PI_UNLOCK(pip);
1936 			pip = next;
1937 			if (start == pip && preferred) {
1938 				preferred = 0;
1939 			} else if (start == pip && !preferred) {
1940 				cont = 0;
1941 			}
1942 		} while (cont);
1943 		break;
1944 
1945 	case LOAD_BALANCE_LBA:
1946 		/*
1947 		 * Make sure we are looking
1948 		 * for an online path. Otherwise, if it is for a STANDBY
1949 		 * path request, it will go through and fetch an ONLINE
1950 		 * path which is not desirable.
1951 		 */
1952 		if ((ct->ct_lb_args != NULL) &&
1953 			    (ct->ct_lb_args->region_size) && bp &&
1954 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
1955 			if (i_mdi_lba_lb(ct, ret_pip, bp)
1956 				    == MDI_SUCCESS) {
1957 				MDI_CLIENT_UNLOCK(ct);
1958 				return (MDI_SUCCESS);
1959 			}
1960 		}
1961 		/*  FALLTHROUGH */
1962 	case LOAD_BALANCE_RR:
1963 		/*
1964 		 * Load balancing is Round Robin. Start looking for a online
1965 		 * mdi_pathinfo node starting from last known selected path
1966 		 * as the start point.  If override flags are specified,
1967 		 * process accordingly.
1968 		 * If the search is already in effect(start_pip not null),
1969 		 * then lets just use the same path preference to continue the
1970 		 * traversal.
1971 		 */
1972 
1973 		if (start_pip != NULL) {
1974 			preferred = MDI_PI(start_pip)->pi_preferred;
1975 		} else {
1976 			preferred = 1;
1977 		}
1978 
1979 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
1980 		if (start == NULL) {
1981 			pip = head;
1982 		} else {
1983 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
1984 			if (pip == NULL) {
1985 				if (!sb) {
1986 					if (preferred == 0) {
1987 						/*
1988 						 * Looks like we have completed
1989 						 * the traversal as preferred
1990 						 * value is 0. Time to bail out.
1991 						 */
1992 						*ret_pip = NULL;
1993 						MDI_CLIENT_UNLOCK(ct);
1994 						return (MDI_NOPATH);
1995 					} else {
1996 						/*
1997 						 * Looks like we reached the
1998 						 * end of the list. Lets enable
1999 						 * traversal of non preferred
2000 						 * paths.
2001 						 */
2002 						preferred = 0;
2003 					}
2004 				}
2005 				pip = head;
2006 			}
2007 		}
2008 		start = pip;
2009 		do {
2010 			MDI_PI_LOCK(pip);
2011 			if (sb) {
2012 				cond = ((MDI_PI(pip)->pi_state ==
2013 				    MDI_PATHINFO_STATE_ONLINE &&
2014 					MDI_PI(pip)->pi_preferred ==
2015 						preferred) ? 1 : 0);
2016 			} else {
2017 				if (flags == MDI_SELECT_ONLINE_PATH) {
2018 					cond = ((MDI_PI(pip)->pi_state ==
2019 					    MDI_PATHINFO_STATE_ONLINE &&
2020 						MDI_PI(pip)->pi_preferred ==
2021 						preferred) ? 1 : 0);
2022 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2023 					cond = ((MDI_PI(pip)->pi_state ==
2024 					    MDI_PATHINFO_STATE_STANDBY &&
2025 						MDI_PI(pip)->pi_preferred ==
2026 						preferred) ? 1 : 0);
2027 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2028 				    MDI_SELECT_STANDBY_PATH)) {
2029 					cond = (((MDI_PI(pip)->pi_state ==
2030 					    MDI_PATHINFO_STATE_ONLINE ||
2031 					    (MDI_PI(pip)->pi_state ==
2032 					    MDI_PATHINFO_STATE_STANDBY)) &&
2033 						MDI_PI(pip)->pi_preferred ==
2034 						preferred) ? 1 : 0);
2035 				} else {
2036 					cond = 0;
2037 				}
2038 			}
2039 			/*
2040 			 * No need to explicitly check if the path is disabled.
2041 			 * Since we are checking for state == ONLINE and the
2042 			 * same veriable is used for DISABLE/ENABLE information.
2043 			 */
2044 			if (cond) {
2045 				/*
2046 				 * Return the path in hold state. Caller should
2047 				 * release the lock by calling mdi_rele_path()
2048 				 */
2049 				MDI_PI_HOLD(pip);
2050 				MDI_PI_UNLOCK(pip);
2051 				if (sb)
2052 					ct->ct_path_last = pip;
2053 				*ret_pip = pip;
2054 				MDI_CLIENT_UNLOCK(ct);
2055 				return (MDI_SUCCESS);
2056 			}
2057 			/*
2058 			 * Path is busy.
2059 			 */
2060 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2061 			    MDI_PI_IS_TRANSIENT(pip))
2062 				retry = 1;
2063 
2064 			/*
2065 			 * Keep looking for a next available online path
2066 			 */
2067 do_again:
2068 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2069 			if (next == NULL) {
2070 				if (!sb) {
2071 					if (preferred == 1) {
2072 						/*
2073 						 * Looks like we reached the
2074 						 * end of the list. Lets enable
2075 						 * traversal of non preferred
2076 						 * paths.
2077 						 */
2078 						preferred = 0;
2079 						next = head;
2080 					} else {
2081 						/*
2082 						 * We have done both the passes
2083 						 * Preferred as well as for
2084 						 * Non-preferred. Bail out now.
2085 						 */
2086 						cont = 0;
2087 					}
2088 				} else {
2089 					/*
2090 					 * Standard behavior case.
2091 					 */
2092 					next = head;
2093 				}
2094 			}
2095 			MDI_PI_UNLOCK(pip);
2096 			if (cont == 0) {
2097 				break;
2098 			}
2099 			pip = next;
2100 
2101 			if (!sb) {
2102 				/*
2103 				 * We need to handle the selection of
2104 				 * non-preferred path in the following
2105 				 * case:
2106 				 *
2107 				 * +------+   +------+   +------+   +-----+
2108 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2109 				 * +------+   +------+   +------+   +-----+
2110 				 *
2111 				 * If we start the search with B, we need to
2112 				 * skip beyond B to pick C which is non -
2113 				 * preferred in the second pass. The following
2114 				 * test, if true, will allow us to skip over
2115 				 * the 'start'(B in the example) to select
2116 				 * other non preferred elements.
2117 				 */
2118 				if ((start_pip != NULL) && (start_pip == pip) &&
2119 				    (MDI_PI(start_pip)->pi_preferred
2120 				    != preferred)) {
2121 					/*
2122 					 * try again after going past the start
2123 					 * pip
2124 					 */
2125 					MDI_PI_LOCK(pip);
2126 					goto do_again;
2127 				}
2128 			} else {
2129 				/*
2130 				 * Standard behavior case
2131 				 */
2132 				if (start == pip && preferred) {
2133 					/* look for nonpreferred paths */
2134 					preferred = 0;
2135 				} else if (start == pip && !preferred) {
2136 					/*
2137 					 * Exit condition
2138 					 */
2139 					cont = 0;
2140 				}
2141 			}
2142 		} while (cont);
2143 		break;
2144 	}
2145 
2146 	MDI_CLIENT_UNLOCK(ct);
2147 	if (retry == 1) {
2148 		return (MDI_BUSY);
2149 	} else {
2150 		return (MDI_NOPATH);
2151 	}
2152 }
2153 
2154 /*
2155  * For a client, return the next available path to any phci
2156  *
2157  * Note:
2158  *		Caller should hold the branch's devinfo node to get a consistent
2159  *		snap shot of the mdi_pathinfo nodes.
2160  *
2161  *		Please note that even the list is stable the mdi_pathinfo
2162  *		node state and properties are volatile.  The caller should lock
2163  *		and unlock the nodes by calling mdi_pi_lock() and
2164  *		mdi_pi_unlock() functions to get a stable properties.
2165  *
2166  *		If there is a need to use the nodes beyond the hold of the
2167  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2168  *		need to be held against unexpected removal by calling
2169  *		mdi_hold_path() and should be released by calling
2170  *		mdi_rele_path() on completion.
2171  */
2172 mdi_pathinfo_t *
2173 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2174 {
2175 	mdi_client_t *ct;
2176 
2177 	if (!MDI_CLIENT(ct_dip))
2178 		return (NULL);
2179 
2180 	/*
2181 	 * Walk through client link
2182 	 */
2183 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2184 	ASSERT(ct != NULL);
2185 
2186 	if (pip == NULL)
2187 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2188 
2189 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2190 }
2191 
2192 /*
2193  * For a phci, return the next available path to any client
2194  * Note: ditto mdi_get_next_phci_path()
2195  */
2196 mdi_pathinfo_t *
2197 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2198 {
2199 	mdi_phci_t *ph;
2200 
2201 	if (!MDI_PHCI(ph_dip))
2202 		return (NULL);
2203 
2204 	/*
2205 	 * Walk through pHCI link
2206 	 */
2207 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2208 	ASSERT(ph != NULL);
2209 
2210 	if (pip == NULL)
2211 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2212 
2213 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2214 }
2215 
2216 /*
2217  * mdi_get_nextpath():
2218  *		mdi_pathinfo node walker function.  Get the next node from the
2219  *		client or pHCI device list.
2220  *
2221  * XXX This is wrapper function for compatibility purposes only.
2222  *
2223  *	It doesn't work under Multi-level MPxIO, where a dip
2224  *	is both client and phci (which link should next_path follow?).
2225  *	Once Leadville is modified to call mdi_get_next_phci/client_path,
2226  *	this interface should be removed.
2227  */
2228 void
2229 mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip,
2230     mdi_pathinfo_t **ret_pip)
2231 {
2232 	if (MDI_CLIENT(dip)) {
2233 		*ret_pip = mdi_get_next_phci_path(dip, pip);
2234 	} else if (MDI_PHCI(dip)) {
2235 		*ret_pip = mdi_get_next_client_path(dip, pip);
2236 	} else {
2237 		*ret_pip = NULL;
2238 	}
2239 }
2240 
2241 /*
2242  * mdi_hold_path():
2243  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2244  * Return Values:
2245  *		None
2246  */
2247 void
2248 mdi_hold_path(mdi_pathinfo_t *pip)
2249 {
2250 	if (pip) {
2251 		MDI_PI_LOCK(pip);
2252 		MDI_PI_HOLD(pip);
2253 		MDI_PI_UNLOCK(pip);
2254 	}
2255 }
2256 
2257 
2258 /*
2259  * mdi_rele_path():
2260  *		Release the mdi_pathinfo node which was selected
2261  *		through mdi_select_path() mechanism or manually held by
2262  *		calling mdi_hold_path().
2263  * Return Values:
2264  *		None
2265  */
2266 void
2267 mdi_rele_path(mdi_pathinfo_t *pip)
2268 {
2269 	if (pip) {
2270 		MDI_PI_LOCK(pip);
2271 		MDI_PI_RELE(pip);
2272 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2273 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2274 		}
2275 		MDI_PI_UNLOCK(pip);
2276 	}
2277 }
2278 
2279 
2280 /*
2281  * mdi_pi_lock():
2282  * 		Lock the mdi_pathinfo node.
2283  * Note:
2284  *		The caller should release the lock by calling mdi_pi_unlock()
2285  */
2286 void
2287 mdi_pi_lock(mdi_pathinfo_t *pip)
2288 {
2289 	ASSERT(pip != NULL);
2290 	if (pip) {
2291 		MDI_PI_LOCK(pip);
2292 	}
2293 }
2294 
2295 
2296 /*
2297  * mdi_pi_unlock():
2298  * 		Unlock the mdi_pathinfo node.
2299  * Note:
2300  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2301  */
2302 void
2303 mdi_pi_unlock(mdi_pathinfo_t *pip)
2304 {
2305 	ASSERT(pip != NULL);
2306 	if (pip) {
2307 		MDI_PI_UNLOCK(pip);
2308 	}
2309 }
2310 
2311 /*
2312  * mdi_pi_find():
2313  *		Search the list of mdi_pathinfo nodes attached to the
2314  *		pHCI/Client device node whose path address matches "paddr".
2315  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2316  *		found.
2317  * Return Values:
2318  *		mdi_pathinfo node handle
2319  *		NULL
2320  * Notes:
2321  *		Caller need not hold any locks to call this function.
2322  */
2323 mdi_pathinfo_t *
2324 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2325 {
2326 	mdi_phci_t		*ph;
2327 	mdi_vhci_t		*vh;
2328 	mdi_client_t		*ct;
2329 	mdi_pathinfo_t		*pip = NULL;
2330 
2331 	if ((pdip == NULL) || (paddr == NULL)) {
2332 		return (NULL);
2333 	}
2334 	ph = i_devi_get_phci(pdip);
2335 	if (ph == NULL) {
2336 		/*
2337 		 * Invalid pHCI device, Nothing more to do.
2338 		 */
2339 		MDI_DEBUG(2, (CE_WARN, NULL,
2340 		    "!mdi_pi_find: invalid phci"));
2341 		return (NULL);
2342 	}
2343 
2344 	vh = ph->ph_vhci;
2345 	if (vh == NULL) {
2346 		/*
2347 		 * Invalid vHCI device, Nothing more to do.
2348 		 */
2349 		MDI_DEBUG(2, (CE_WARN, NULL,
2350 		    "!mdi_pi_find: invalid phci"));
2351 		return (NULL);
2352 	}
2353 
2354 	/*
2355 	 * Look for client device identified by caddr (guid)
2356 	 */
2357 	if (caddr == NULL) {
2358 		/*
2359 		 * Find a mdi_pathinfo node under pHCI list for a matching
2360 		 * unit address.
2361 		 */
2362 		mutex_enter(&ph->ph_mutex);
2363 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2364 
2365 		while (pip != NULL) {
2366 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2367 				break;
2368 			}
2369 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2370 		}
2371 		mutex_exit(&ph->ph_mutex);
2372 		return (pip);
2373 	}
2374 
2375 	/*
2376 	 * Find the client device corresponding to 'caddr'
2377 	 */
2378 	mutex_enter(&mdi_mutex);
2379 	ct = i_mdi_client_find(vh, caddr);
2380 	if (ct == NULL) {
2381 		/*
2382 		 * Client not found, Obviously mdi_pathinfo node has not been
2383 		 * created yet.
2384 		 */
2385 		mutex_exit(&mdi_mutex);
2386 		return (pip);
2387 	}
2388 
2389 	/*
2390 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2391 	 * pHCI and paddr
2392 	 */
2393 	MDI_CLIENT_LOCK(ct);
2394 
2395 	/*
2396 	 * Release the global mutex as it is no more needed. Note: We always
2397 	 * respect the locking order while acquiring.
2398 	 */
2399 	mutex_exit(&mdi_mutex);
2400 
2401 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2402 	while (pip != NULL) {
2403 		/*
2404 		 * Compare the unit address
2405 		 */
2406 		if ((MDI_PI(pip)->pi_phci == ph) &&
2407 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2408 			break;
2409 		}
2410 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2411 	}
2412 	MDI_CLIENT_UNLOCK(ct);
2413 	return (pip);
2414 }
2415 
2416 /*
2417  * mdi_pi_alloc():
2418  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2419  *		The mdi_pathinfo node returned by this function identifies a
2420  *		unique device path is capable of having properties attached
2421  *		and passed to mdi_pi_online() to fully attach and online the
2422  *		path and client device node.
2423  *		The mdi_pathinfo node returned by this function must be
2424  *		destroyed using mdi_pi_free() if the path is no longer
2425  *		operational or if the caller fails to attach a client device
2426  *		node when calling mdi_pi_online(). The framework will not free
2427  *		the resources allocated.
2428  *		This function can be called from both interrupt and kernel
2429  *		contexts.  DDI_NOSLEEP flag should be used while calling
2430  *		from interrupt contexts.
2431  * Return Values:
2432  *		MDI_SUCCESS
2433  *		MDI_FAILURE
2434  *		MDI_NOMEM
2435  */
2436 /*ARGSUSED*/
2437 int
2438 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2439     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2440 {
2441 	mdi_vhci_t	*vh;
2442 	mdi_phci_t	*ph;
2443 	mdi_client_t	*ct;
2444 	mdi_pathinfo_t	*pip = NULL;
2445 	dev_info_t	*cdip;
2446 	int		rv = MDI_NOMEM;
2447 
2448 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2449 	    ret_pip == NULL) {
2450 		/* Nothing more to do */
2451 		return (MDI_FAILURE);
2452 	}
2453 
2454 	*ret_pip = NULL;
2455 	ph = i_devi_get_phci(pdip);
2456 	ASSERT(ph != NULL);
2457 	if (ph == NULL) {
2458 		/* Invalid pHCI device, return failure */
2459 		MDI_DEBUG(1, (CE_WARN, NULL,
2460 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2461 		return (MDI_FAILURE);
2462 	}
2463 
2464 	MDI_PHCI_LOCK(ph);
2465 	vh = ph->ph_vhci;
2466 	if (vh == NULL) {
2467 		/* Invalid vHCI device, return failure */
2468 		MDI_DEBUG(1, (CE_WARN, NULL,
2469 		    "!mdi_pi_alloc: invalid pHCI=%p", pdip));
2470 		MDI_PHCI_UNLOCK(ph);
2471 		return (MDI_FAILURE);
2472 	}
2473 
2474 	if (MDI_PHCI_IS_READY(ph) == 0) {
2475 		/*
2476 		 * Do not allow new node creation when pHCI is in
2477 		 * offline/suspended states
2478 		 */
2479 		MDI_DEBUG(1, (CE_WARN, NULL,
2480 		    "mdi_pi_alloc: pHCI=%p is not ready", ph));
2481 		MDI_PHCI_UNLOCK(ph);
2482 		return (MDI_BUSY);
2483 	}
2484 	MDI_PHCI_UNSTABLE(ph);
2485 	MDI_PHCI_UNLOCK(ph);
2486 
2487 	/*
2488 	 * Look for a client device with matching guid identified by caddr,
2489 	 * If not found create one
2490 	 */
2491 	mutex_enter(&mdi_mutex);
2492 	ct = i_mdi_client_find(vh, caddr);
2493 	if (ct == NULL) {
2494 		ct = i_mdi_client_alloc(vh, cname, caddr, flags);
2495 		if (ct == NULL)
2496 			goto fail;
2497 	}
2498 
2499 	if (ct->ct_dip == NULL) {
2500 		/*
2501 		 * Allocate a devinfo node
2502 		 */
2503 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2504 		    compatible, ncompatible, flags);
2505 		if (ct->ct_dip == NULL) {
2506 			(void) i_mdi_client_free(vh, ct);
2507 			goto fail;
2508 		}
2509 	}
2510 	cdip = ct->ct_dip;
2511 
2512 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2513 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2514 
2515 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2516 	while (pip != NULL) {
2517 		/*
2518 		 * Compare the unit address
2519 		 */
2520 		if ((MDI_PI(pip)->pi_phci == ph) &&
2521 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2522 			break;
2523 		}
2524 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2525 	}
2526 
2527 	if (pip == NULL) {
2528 		/*
2529 		 * This is a new path for this client device.  Allocate and
2530 		 * initialize a new pathinfo node
2531 		 */
2532 		pip = i_mdi_pi_alloc(ph, paddr, ct, flags);
2533 		if (pip == NULL) {
2534 			(void) i_mdi_client_free(vh, ct);
2535 			goto fail;
2536 		}
2537 	}
2538 	rv = MDI_SUCCESS;
2539 
2540 fail:
2541 	/*
2542 	 * Release the global mutex.
2543 	 */
2544 	mutex_exit(&mdi_mutex);
2545 
2546 	/*
2547 	 * Mark the pHCI as stable
2548 	 */
2549 	MDI_PHCI_LOCK(ph);
2550 	MDI_PHCI_STABLE(ph);
2551 	MDI_PHCI_UNLOCK(ph);
2552 	*ret_pip = pip;
2553 	return (rv);
2554 }
2555 
2556 /*ARGSUSED*/
2557 int
2558 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2559     int flags, mdi_pathinfo_t **ret_pip)
2560 {
2561 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2562 	    flags, ret_pip));
2563 }
2564 
2565 /*
2566  * i_mdi_pi_alloc():
2567  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2568  * Return Values:
2569  *		mdi_pathinfo
2570  */
2571 
2572 /*ARGSUSED*/
2573 static mdi_pathinfo_t *
2574 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct, int flags)
2575 {
2576 	mdi_pathinfo_t	*pip = NULL;
2577 	char		*pi_addr = NULL;
2578 	nvlist_t	*pi_prop = NULL;
2579 
2580 	int		ct_circular;
2581 	int		ph_circular;
2582 
2583 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo),
2584 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2585 	if (pip == NULL)
2586 		goto fail;
2587 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2588 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2589 	    MDI_PATHINFO_STATE_TRANSIENT;
2590 
2591 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2592 		MDI_PI_SET_USER_DISABLE(pip);
2593 
2594 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2595 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2596 
2597 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2598 		MDI_PI_SET_DRV_DISABLE(pip);
2599 
2600 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2601 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2602 	MDI_PI(pip)->pi_client = ct;
2603 	MDI_PI(pip)->pi_phci = ph;
2604 	pi_addr =
2605 	    MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1,
2606 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2607 	if (pi_addr == NULL)
2608 		goto fail;
2609 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2610 	(void) nvlist_alloc(&pi_prop, NV_UNIQUE_NAME,
2611 	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2612 	if (pi_prop == NULL)
2613 		goto fail;
2614 	MDI_PI(pip)->pi_prop = pi_prop;
2615 	MDI_PI(pip)->pi_pprivate = NULL;
2616 	MDI_PI(pip)->pi_cprivate = NULL;
2617 	MDI_PI(pip)->pi_vprivate = NULL;
2618 	MDI_PI(pip)->pi_client_link = NULL;
2619 	MDI_PI(pip)->pi_phci_link = NULL;
2620 	MDI_PI(pip)->pi_ref_cnt = 0;
2621 	MDI_PI(pip)->pi_kstats = NULL;
2622 	MDI_PI(pip)->pi_preferred = 1;
2623 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2624 
2625 	/*
2626 	 * Lock both dev_info nodes against changes in parallel.
2627 	 */
2628 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2629 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2630 
2631 	i_mdi_phci_add_path(ph, pip);
2632 	i_mdi_client_add_path(ct, pip);
2633 
2634 	ndi_devi_exit(ph->ph_dip, ph_circular);
2635 	ndi_devi_exit(ct->ct_dip, ct_circular);
2636 
2637 	return (pip);
2638 
2639 fail:
2640 	if (pi_prop)
2641 		(void) nvlist_free(pi_prop);
2642 	if (pi_addr)
2643 		kmem_free(pi_addr, strlen(paddr) + 1);
2644 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2645 	return (NULL);
2646 }
2647 
2648 /*
2649  * i_mdi_phci_add_path():
2650  * 		Add a mdi_pathinfo node to pHCI list.
2651  * Notes:
2652  *		Caller should per-pHCI mutex
2653  */
2654 
2655 static void
2656 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2657 {
2658 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2659 
2660 	if (ph->ph_path_head == NULL) {
2661 		ph->ph_path_head = pip;
2662 	} else {
2663 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2664 	}
2665 	ph->ph_path_tail = pip;
2666 	ph->ph_path_count++;
2667 }
2668 
2669 /*
2670  * i_mdi_client_add_path():
2671  *		Add mdi_pathinfo node to client list
2672  */
2673 
2674 static void
2675 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2676 {
2677 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2678 
2679 	if (ct->ct_path_head == NULL) {
2680 		ct->ct_path_head = pip;
2681 	} else {
2682 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2683 	}
2684 	ct->ct_path_tail = pip;
2685 	ct->ct_path_count++;
2686 }
2687 
2688 /*
2689  * mdi_pi_free():
2690  *		Free the mdi_pathinfo node and also client device node if this
2691  *		is the last path to the device
2692  * Return Values:
2693  *		MDI_SUCCESS
2694  *		MDI_FAILURE
2695  *		MDI_BUSY
2696  */
2697 
2698 /*ARGSUSED*/
2699 int
2700 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2701 {
2702 	int		rv = MDI_SUCCESS;
2703 	mdi_vhci_t	*vh;
2704 	mdi_phci_t	*ph;
2705 	mdi_client_t	*ct;
2706 	int		(*f)();
2707 	int		client_held = 0;
2708 
2709 	MDI_PI_LOCK(pip);
2710 	ph = MDI_PI(pip)->pi_phci;
2711 	ASSERT(ph != NULL);
2712 	if (ph == NULL) {
2713 		/*
2714 		 * Invalid pHCI device, return failure
2715 		 */
2716 		MDI_DEBUG(1, (CE_WARN, NULL,
2717 		    "!mdi_pi_free: invalid pHCI"));
2718 		MDI_PI_UNLOCK(pip);
2719 		return (MDI_FAILURE);
2720 	}
2721 
2722 	vh = ph->ph_vhci;
2723 	ASSERT(vh != NULL);
2724 	if (vh == NULL) {
2725 		/* Invalid pHCI device, return failure */
2726 		MDI_DEBUG(1, (CE_WARN, NULL,
2727 		    "!mdi_pi_free: invalid vHCI"));
2728 		MDI_PI_UNLOCK(pip);
2729 		return (MDI_FAILURE);
2730 	}
2731 
2732 	ct = MDI_PI(pip)->pi_client;
2733 	ASSERT(ct != NULL);
2734 	if (ct == NULL) {
2735 		/*
2736 		 * Invalid Client device, return failure
2737 		 */
2738 		MDI_DEBUG(1, (CE_WARN, NULL,
2739 		    "!mdi_pi_free: invalid client"));
2740 		MDI_PI_UNLOCK(pip);
2741 		return (MDI_FAILURE);
2742 	}
2743 
2744 	/*
2745 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
2746 	 * if the node state is either offline or init and the reference count
2747 	 * is zero.
2748 	 */
2749 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
2750 	    MDI_PI_IS_INITING(pip))) {
2751 		/*
2752 		 * Node is busy
2753 		 */
2754 		MDI_DEBUG(1, (CE_WARN, NULL,
2755 		    "!mdi_pi_free: pathinfo node is busy pip=%p", pip));
2756 		MDI_PI_UNLOCK(pip);
2757 		return (MDI_BUSY);
2758 	}
2759 
2760 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
2761 		/*
2762 		 * Give a chance for pending I/Os to complete.
2763 		 */
2764 		MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!i_mdi_pi_free: "
2765 		    "%d cmds still pending on path: %p\n",
2766 		    MDI_PI(pip)->pi_ref_cnt, pip));
2767 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
2768 		    &MDI_PI(pip)->pi_mutex,
2769 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
2770 			/*
2771 			 * The timeout time reached without ref_cnt being zero
2772 			 * being signaled.
2773 			 */
2774 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2775 			    "!i_mdi_pi_free: "
2776 			    "Timeout reached on path %p without the cond\n",
2777 			    pip));
2778 			MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip,
2779 			    "!i_mdi_pi_free: "
2780 			    "%d cmds still pending on path: %p\n",
2781 			    MDI_PI(pip)->pi_ref_cnt, pip));
2782 			MDI_PI_UNLOCK(pip);
2783 			return (MDI_BUSY);
2784 		}
2785 	}
2786 	if (MDI_PI(pip)->pi_pm_held) {
2787 		client_held = 1;
2788 	}
2789 	MDI_PI_UNLOCK(pip);
2790 
2791 	MDI_CLIENT_LOCK(ct);
2792 
2793 	/* Prevent further failovers till mdi_mutex is held */
2794 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
2795 
2796 	/*
2797 	 * Wait till failover is complete before removing this node.
2798 	 */
2799 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
2800 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
2801 
2802 	MDI_CLIENT_UNLOCK(ct);
2803 	mutex_enter(&mdi_mutex);
2804 	MDI_CLIENT_LOCK(ct);
2805 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
2806 
2807 	if (!MDI_PI_IS_INITING(pip)) {
2808 		f = vh->vh_ops->vo_pi_uninit;
2809 		if (f != NULL) {
2810 			rv = (*f)(vh->vh_dip, pip, 0);
2811 		}
2812 	}
2813 	/*
2814 	 * If vo_pi_uninit() completed successfully.
2815 	 */
2816 	if (rv == MDI_SUCCESS) {
2817 		if (client_held) {
2818 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
2819 			    "i_mdi_pm_rele_client\n"));
2820 			i_mdi_pm_rele_client(ct, 1);
2821 		}
2822 		i_mdi_pi_free(ph, pip, ct);
2823 		if (ct->ct_path_count == 0) {
2824 			/*
2825 			 * Client lost its last path.
2826 			 * Clean up the client device
2827 			 */
2828 			MDI_CLIENT_UNLOCK(ct);
2829 			(void) i_mdi_client_free(ct->ct_vhci, ct);
2830 			mutex_exit(&mdi_mutex);
2831 			return (rv);
2832 		}
2833 	}
2834 	MDI_CLIENT_UNLOCK(ct);
2835 	mutex_exit(&mdi_mutex);
2836 	return (rv);
2837 }
2838 
2839 /*
2840  * i_mdi_pi_free():
2841  *		Free the mdi_pathinfo node
2842  */
2843 static void
2844 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
2845 {
2846 	int	ct_circular;
2847 	int	ph_circular;
2848 
2849 	/*
2850 	 * remove any per-path kstats
2851 	 */
2852 	i_mdi_pi_kstat_destroy(pip);
2853 
2854 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2855 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2856 
2857 	i_mdi_client_remove_path(ct, pip);
2858 	i_mdi_phci_remove_path(ph, pip);
2859 
2860 	ndi_devi_exit(ph->ph_dip, ph_circular);
2861 	ndi_devi_exit(ct->ct_dip, ct_circular);
2862 
2863 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
2864 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
2865 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
2866 	if (MDI_PI(pip)->pi_addr) {
2867 		kmem_free(MDI_PI(pip)->pi_addr,
2868 		    strlen(MDI_PI(pip)->pi_addr) + 1);
2869 		MDI_PI(pip)->pi_addr = NULL;
2870 	}
2871 
2872 	if (MDI_PI(pip)->pi_prop) {
2873 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
2874 		MDI_PI(pip)->pi_prop = NULL;
2875 	}
2876 	kmem_free(pip, sizeof (struct mdi_pathinfo));
2877 }
2878 
2879 
2880 /*
2881  * i_mdi_phci_remove_path():
2882  * 		Remove a mdi_pathinfo node from pHCI list.
2883  * Notes:
2884  *		Caller should hold per-pHCI mutex
2885  */
2886 
2887 static void
2888 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2889 {
2890 	mdi_pathinfo_t	*prev = NULL;
2891 	mdi_pathinfo_t	*path = NULL;
2892 
2893 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2894 
2895 	path = ph->ph_path_head;
2896 	while (path != NULL) {
2897 		if (path == pip) {
2898 			break;
2899 		}
2900 		prev = path;
2901 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2902 	}
2903 
2904 	if (path) {
2905 		ph->ph_path_count--;
2906 		if (prev) {
2907 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
2908 		} else {
2909 			ph->ph_path_head =
2910 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
2911 		}
2912 		if (ph->ph_path_tail == path) {
2913 			ph->ph_path_tail = prev;
2914 		}
2915 	}
2916 
2917 	/*
2918 	 * Clear the pHCI link
2919 	 */
2920 	MDI_PI(pip)->pi_phci_link = NULL;
2921 	MDI_PI(pip)->pi_phci = NULL;
2922 }
2923 
2924 /*
2925  * i_mdi_client_remove_path():
2926  * 		Remove a mdi_pathinfo node from client path list.
2927  */
2928 
2929 static void
2930 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2931 {
2932 	mdi_pathinfo_t	*prev = NULL;
2933 	mdi_pathinfo_t	*path;
2934 
2935 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2936 
2937 	path = ct->ct_path_head;
2938 	while (path != NULL) {
2939 		if (path == pip) {
2940 			break;
2941 		}
2942 		prev = path;
2943 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2944 	}
2945 
2946 	if (path) {
2947 		ct->ct_path_count--;
2948 		if (prev) {
2949 			MDI_PI(prev)->pi_client_link =
2950 			    MDI_PI(path)->pi_client_link;
2951 		} else {
2952 			ct->ct_path_head =
2953 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
2954 		}
2955 		if (ct->ct_path_tail == path) {
2956 			ct->ct_path_tail = prev;
2957 		}
2958 		if (ct->ct_path_last == path) {
2959 			ct->ct_path_last = ct->ct_path_head;
2960 		}
2961 	}
2962 	MDI_PI(pip)->pi_client_link = NULL;
2963 	MDI_PI(pip)->pi_client = NULL;
2964 }
2965 
2966 /*
2967  * i_mdi_pi_state_change():
2968  *		online a mdi_pathinfo node
2969  *
2970  * Return Values:
2971  *		MDI_SUCCESS
2972  *		MDI_FAILURE
2973  */
2974 /*ARGSUSED*/
2975 static int
2976 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
2977 {
2978 	int		rv = MDI_SUCCESS;
2979 	mdi_vhci_t	*vh;
2980 	mdi_phci_t	*ph;
2981 	mdi_client_t	*ct;
2982 	int		(*f)();
2983 	dev_info_t	*cdip;
2984 
2985 	MDI_PI_LOCK(pip);
2986 
2987 	ph = MDI_PI(pip)->pi_phci;
2988 	ASSERT(ph);
2989 	if (ph == NULL) {
2990 		/*
2991 		 * Invalid pHCI device, fail the request
2992 		 */
2993 		MDI_PI_UNLOCK(pip);
2994 		MDI_DEBUG(1, (CE_WARN, NULL,
2995 		    "!mdi_pi_state_change: invalid phci"));
2996 		return (MDI_FAILURE);
2997 	}
2998 
2999 	vh = ph->ph_vhci;
3000 	ASSERT(vh);
3001 	if (vh == NULL) {
3002 		/*
3003 		 * Invalid vHCI device, fail the request
3004 		 */
3005 		MDI_PI_UNLOCK(pip);
3006 		MDI_DEBUG(1, (CE_WARN, NULL,
3007 		    "!mdi_pi_state_change: invalid vhci"));
3008 		return (MDI_FAILURE);
3009 	}
3010 
3011 	ct = MDI_PI(pip)->pi_client;
3012 	ASSERT(ct != NULL);
3013 	if (ct == NULL) {
3014 		/*
3015 		 * Invalid client device, fail the request
3016 		 */
3017 		MDI_PI_UNLOCK(pip);
3018 		MDI_DEBUG(1, (CE_WARN, NULL,
3019 		    "!mdi_pi_state_change: invalid client"));
3020 		return (MDI_FAILURE);
3021 	}
3022 
3023 	/*
3024 	 * If this path has not been initialized yet, Callback vHCI driver's
3025 	 * pathinfo node initialize entry point
3026 	 */
3027 
3028 	if (MDI_PI_IS_INITING(pip)) {
3029 		MDI_PI_UNLOCK(pip);
3030 		f = vh->vh_ops->vo_pi_init;
3031 		if (f != NULL) {
3032 			rv = (*f)(vh->vh_dip, pip, 0);
3033 			if (rv != MDI_SUCCESS) {
3034 				MDI_DEBUG(1, (CE_WARN, vh->vh_dip,
3035 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3036 				    vh, pip));
3037 				return (MDI_FAILURE);
3038 			}
3039 		}
3040 		MDI_PI_LOCK(pip);
3041 		MDI_PI_CLEAR_TRANSIENT(pip);
3042 	}
3043 
3044 	/*
3045 	 * Do not allow state transition when pHCI is in offline/suspended
3046 	 * states
3047 	 */
3048 	i_mdi_phci_lock(ph, pip);
3049 	if (MDI_PHCI_IS_READY(ph) == 0) {
3050 		MDI_DEBUG(1, (CE_WARN, NULL,
3051 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph));
3052 		MDI_PI_UNLOCK(pip);
3053 		i_mdi_phci_unlock(ph);
3054 		return (MDI_BUSY);
3055 	}
3056 	MDI_PHCI_UNSTABLE(ph);
3057 	i_mdi_phci_unlock(ph);
3058 
3059 	/*
3060 	 * Check if mdi_pathinfo state is in transient state.
3061 	 * If yes, offlining is in progress and wait till transient state is
3062 	 * cleared.
3063 	 */
3064 	if (MDI_PI_IS_TRANSIENT(pip)) {
3065 		while (MDI_PI_IS_TRANSIENT(pip)) {
3066 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3067 			    &MDI_PI(pip)->pi_mutex);
3068 		}
3069 	}
3070 
3071 	/*
3072 	 * Grab the client lock in reverse order sequence and release the
3073 	 * mdi_pathinfo mutex.
3074 	 */
3075 	i_mdi_client_lock(ct, pip);
3076 	MDI_PI_UNLOCK(pip);
3077 
3078 	/*
3079 	 * Wait till failover state is cleared
3080 	 */
3081 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3082 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3083 
3084 	/*
3085 	 * Mark the mdi_pathinfo node state as transient
3086 	 */
3087 	MDI_PI_LOCK(pip);
3088 	switch (state) {
3089 	case MDI_PATHINFO_STATE_ONLINE:
3090 		MDI_PI_SET_ONLINING(pip);
3091 		break;
3092 
3093 	case MDI_PATHINFO_STATE_STANDBY:
3094 		MDI_PI_SET_STANDBYING(pip);
3095 		break;
3096 
3097 	case MDI_PATHINFO_STATE_FAULT:
3098 		/*
3099 		 * Mark the pathinfo state as FAULTED
3100 		 */
3101 		MDI_PI_SET_FAULTING(pip);
3102 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3103 		break;
3104 
3105 	case MDI_PATHINFO_STATE_OFFLINE:
3106 		/*
3107 		 * ndi_devi_offline() cannot hold pip or ct locks.
3108 		 */
3109 		MDI_PI_UNLOCK(pip);
3110 		/*
3111 		 * Do not offline if path will become last path and path
3112 		 * is busy for user initiated events.
3113 		 */
3114 		cdip = ct->ct_dip;
3115 		if ((flag & NDI_DEVI_REMOVE) &&
3116 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3117 			i_mdi_client_unlock(ct);
3118 			rv = ndi_devi_offline(cdip, 0);
3119 			if (rv != NDI_SUCCESS) {
3120 				/*
3121 				 * Convert to MDI error code
3122 				 */
3123 				switch (rv) {
3124 				case NDI_BUSY:
3125 					rv = MDI_BUSY;
3126 					break;
3127 				default:
3128 					rv = MDI_FAILURE;
3129 					break;
3130 				}
3131 				goto state_change_exit;
3132 			} else {
3133 				i_mdi_client_lock(ct, NULL);
3134 			}
3135 		}
3136 		/*
3137 		 * Mark the mdi_pathinfo node state as transient
3138 		 */
3139 		MDI_PI_LOCK(pip);
3140 		MDI_PI_SET_OFFLINING(pip);
3141 		break;
3142 	}
3143 	MDI_PI_UNLOCK(pip);
3144 	MDI_CLIENT_UNSTABLE(ct);
3145 	i_mdi_client_unlock(ct);
3146 
3147 	f = vh->vh_ops->vo_pi_state_change;
3148 	if (f != NULL) {
3149 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3150 		if (rv == MDI_NOT_SUPPORTED) {
3151 			MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3152 		}
3153 		if (rv != MDI_SUCCESS) {
3154 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
3155 			    "!vo_pi_state_change: failed rv = %x", rv));
3156 		}
3157 	}
3158 	MDI_CLIENT_LOCK(ct);
3159 	MDI_PI_LOCK(pip);
3160 	if (MDI_PI_IS_TRANSIENT(pip)) {
3161 		if (rv == MDI_SUCCESS) {
3162 			MDI_PI_CLEAR_TRANSIENT(pip);
3163 		} else {
3164 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3165 		}
3166 	}
3167 
3168 	/*
3169 	 * Wake anyone waiting for this mdi_pathinfo node
3170 	 */
3171 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3172 	MDI_PI_UNLOCK(pip);
3173 
3174 	/*
3175 	 * Mark the client device as stable
3176 	 */
3177 	MDI_CLIENT_STABLE(ct);
3178 	if (rv == MDI_SUCCESS) {
3179 		if (ct->ct_unstable == 0) {
3180 			cdip = ct->ct_dip;
3181 
3182 			/*
3183 			 * Onlining the mdi_pathinfo node will impact the
3184 			 * client state Update the client and dev_info node
3185 			 * state accordingly
3186 			 */
3187 			rv = NDI_SUCCESS;
3188 			i_mdi_client_update_state(ct);
3189 			switch (MDI_CLIENT_STATE(ct)) {
3190 			case MDI_CLIENT_STATE_OPTIMAL:
3191 			case MDI_CLIENT_STATE_DEGRADED:
3192 				if (cdip &&
3193 				    (i_ddi_node_state(cdip) < DS_READY) &&
3194 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3195 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3196 
3197 					i_mdi_client_unlock(ct);
3198 					/*
3199 					 * Must do ndi_devi_online() through
3200 					 * hotplug thread for deferred
3201 					 * attach mechanism to work
3202 					 */
3203 					rv = ndi_devi_online(cdip, 0);
3204 					i_mdi_client_lock(ct, NULL);
3205 					if ((rv != NDI_SUCCESS) &&
3206 					    (MDI_CLIENT_STATE(ct) ==
3207 					    MDI_CLIENT_STATE_DEGRADED)) {
3208 						/*
3209 						 * ndi_devi_online failed.
3210 						 * Reset client flags to
3211 						 * offline.
3212 						 */
3213 						MDI_DEBUG(1, (CE_WARN, cdip,
3214 						    "!ndi_devi_online: failed "
3215 						    " Error: %x", rv));
3216 						MDI_CLIENT_SET_OFFLINE(ct);
3217 					}
3218 					if (rv != NDI_SUCCESS) {
3219 						/* Reset the path state */
3220 						MDI_PI_LOCK(pip);
3221 						MDI_PI(pip)->pi_state =
3222 						    MDI_PI_OLD_STATE(pip);
3223 						MDI_PI_UNLOCK(pip);
3224 					}
3225 				}
3226 				break;
3227 
3228 			case MDI_CLIENT_STATE_FAILED:
3229 				/*
3230 				 * This is the last path case for
3231 				 * non-user initiated events.
3232 				 */
3233 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3234 				    cdip && (i_ddi_node_state(cdip) >=
3235 				    DS_INITIALIZED)) {
3236 					i_mdi_client_unlock(ct);
3237 					rv = ndi_devi_offline(cdip, 0);
3238 					i_mdi_client_lock(ct, NULL);
3239 
3240 					if (rv != NDI_SUCCESS) {
3241 						/*
3242 						 * ndi_devi_offline failed.
3243 						 * Reset client flags to
3244 						 * online as the path could not
3245 						 * be offlined.
3246 						 */
3247 						MDI_DEBUG(1, (CE_WARN, cdip,
3248 						    "!ndi_devi_offline: failed "
3249 						    " Error: %x", rv));
3250 						MDI_CLIENT_SET_ONLINE(ct);
3251 					}
3252 				}
3253 				break;
3254 			}
3255 			/*
3256 			 * Convert to MDI error code
3257 			 */
3258 			switch (rv) {
3259 			case NDI_SUCCESS:
3260 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3261 				i_mdi_report_path_state(ct, pip);
3262 				rv = MDI_SUCCESS;
3263 				break;
3264 			case NDI_BUSY:
3265 				rv = MDI_BUSY;
3266 				break;
3267 			default:
3268 				rv = MDI_FAILURE;
3269 				break;
3270 			}
3271 		}
3272 	}
3273 	MDI_CLIENT_UNLOCK(ct);
3274 
3275 state_change_exit:
3276 	/*
3277 	 * Mark the pHCI as stable again.
3278 	 */
3279 	MDI_PHCI_LOCK(ph);
3280 	MDI_PHCI_STABLE(ph);
3281 	MDI_PHCI_UNLOCK(ph);
3282 	return (rv);
3283 }
3284 
3285 /*
3286  * mdi_pi_online():
3287  *		Place the path_info node in the online state.  The path is
3288  *		now available to be selected by mdi_select_path() for
3289  *		transporting I/O requests to client devices.
3290  * Return Values:
3291  *		MDI_SUCCESS
3292  *		MDI_FAILURE
3293  */
3294 int
3295 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3296 {
3297 	mdi_client_t *ct = MDI_PI(pip)->pi_client;
3298 	dev_info_t *cdip;
3299 	int		client_held = 0;
3300 	int rv;
3301 
3302 	ASSERT(ct != NULL);
3303 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3304 	if (rv != MDI_SUCCESS)
3305 		return (rv);
3306 
3307 	MDI_PI_LOCK(pip);
3308 	if (MDI_PI(pip)->pi_pm_held == 0) {
3309 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3310 		    "i_mdi_pm_hold_pip\n"));
3311 		i_mdi_pm_hold_pip(pip);
3312 		client_held = 1;
3313 	}
3314 	MDI_PI_UNLOCK(pip);
3315 
3316 	if (client_held) {
3317 		MDI_CLIENT_LOCK(ct);
3318 		if (ct->ct_power_cnt == 0) {
3319 			rv = i_mdi_power_all_phci(ct);
3320 		}
3321 
3322 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3323 		    "i_mdi_pm_hold_client\n"));
3324 		i_mdi_pm_hold_client(ct, 1);
3325 		MDI_CLIENT_UNLOCK(ct);
3326 	}
3327 
3328 	/*
3329 	 * Create the per-path (pathinfo) IO and error kstats which
3330 	 * are reported via iostat(1m).
3331 	 *
3332 	 * Defer creating the per-path kstats if device is not yet
3333 	 * attached;  the names of the kstats are constructed in part
3334 	 * using the devices instance number which is assigned during
3335 	 * process of attaching the client device.
3336 	 *
3337 	 * The framework post_attach handler, mdi_post_attach(), is
3338 	 * is responsible for initializing the client's pathinfo list
3339 	 * once successfully attached.
3340 	 */
3341 	cdip = ct->ct_dip;
3342 	ASSERT(cdip);
3343 	if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED))
3344 		return (rv);
3345 
3346 	MDI_CLIENT_LOCK(ct);
3347 	rv = i_mdi_pi_kstat_create(pip);
3348 	MDI_CLIENT_UNLOCK(ct);
3349 	return (rv);
3350 }
3351 
3352 /*
3353  * mdi_pi_standby():
3354  *		Place the mdi_pathinfo node in standby state
3355  *
3356  * Return Values:
3357  *		MDI_SUCCESS
3358  *		MDI_FAILURE
3359  */
3360 int
3361 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3362 {
3363 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3364 }
3365 
3366 /*
3367  * mdi_pi_fault():
3368  *		Place the mdi_pathinfo node in fault'ed state
3369  * Return Values:
3370  *		MDI_SUCCESS
3371  *		MDI_FAILURE
3372  */
3373 int
3374 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3375 {
3376 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3377 }
3378 
3379 /*
3380  * mdi_pi_offline():
3381  *		Offline a mdi_pathinfo node.
3382  * Return Values:
3383  *		MDI_SUCCESS
3384  *		MDI_FAILURE
3385  */
3386 int
3387 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3388 {
3389 	int	ret, client_held = 0;
3390 	mdi_client_t	*ct;
3391 
3392 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3393 
3394 	if (ret == MDI_SUCCESS) {
3395 		MDI_PI_LOCK(pip);
3396 		if (MDI_PI(pip)->pi_pm_held) {
3397 			client_held = 1;
3398 		}
3399 		MDI_PI_UNLOCK(pip);
3400 
3401 		if (client_held) {
3402 			ct = MDI_PI(pip)->pi_client;
3403 			MDI_CLIENT_LOCK(ct);
3404 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3405 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3406 			i_mdi_pm_rele_client(ct, 1);
3407 			MDI_CLIENT_UNLOCK(ct);
3408 		}
3409 	}
3410 
3411 	return (ret);
3412 }
3413 
3414 /*
3415  * i_mdi_pi_offline():
3416  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3417  */
3418 static int
3419 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3420 {
3421 	dev_info_t	*vdip = NULL;
3422 	mdi_vhci_t	*vh = NULL;
3423 	mdi_client_t	*ct = NULL;
3424 	int		(*f)();
3425 	int		rv;
3426 
3427 	MDI_PI_LOCK(pip);
3428 	ct = MDI_PI(pip)->pi_client;
3429 	ASSERT(ct != NULL);
3430 
3431 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3432 		/*
3433 		 * Give a chance for pending I/Os to complete.
3434 		 */
3435 		MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3436 		    "%d cmds still pending on path: %p\n",
3437 		    MDI_PI(pip)->pi_ref_cnt, pip));
3438 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3439 		    &MDI_PI(pip)->pi_mutex,
3440 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3441 			/*
3442 			 * The timeout time reached without ref_cnt being zero
3443 			 * being signaled.
3444 			 */
3445 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3446 			    "Timeout reached on path %p without the cond\n",
3447 			    pip));
3448 			MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: "
3449 			    "%d cmds still pending on path: %p\n",
3450 			    MDI_PI(pip)->pi_ref_cnt, pip));
3451 		}
3452 	}
3453 	vh = ct->ct_vhci;
3454 	vdip = vh->vh_dip;
3455 
3456 	/*
3457 	 * Notify vHCI that has registered this event
3458 	 */
3459 	ASSERT(vh->vh_ops);
3460 	f = vh->vh_ops->vo_pi_state_change;
3461 
3462 	if (f != NULL) {
3463 		MDI_PI_UNLOCK(pip);
3464 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3465 		    flags)) != MDI_SUCCESS) {
3466 			MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed "
3467 			    "vdip 0x%x, pip 0x%x", vdip, pip));
3468 		}
3469 		MDI_PI_LOCK(pip);
3470 	}
3471 
3472 	/*
3473 	 * Set the mdi_pathinfo node state and clear the transient condition
3474 	 */
3475 	MDI_PI_SET_OFFLINE(pip);
3476 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3477 	MDI_PI_UNLOCK(pip);
3478 
3479 	MDI_CLIENT_LOCK(ct);
3480 	if (rv == MDI_SUCCESS) {
3481 		if (ct->ct_unstable == 0) {
3482 			dev_info_t	*cdip = ct->ct_dip;
3483 
3484 			/*
3485 			 * Onlining the mdi_pathinfo node will impact the
3486 			 * client state Update the client and dev_info node
3487 			 * state accordingly
3488 			 */
3489 			i_mdi_client_update_state(ct);
3490 			rv = NDI_SUCCESS;
3491 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3492 				if (cdip &&
3493 				    (i_ddi_node_state(cdip) >=
3494 				    DS_INITIALIZED)) {
3495 					MDI_CLIENT_UNLOCK(ct);
3496 					rv = ndi_devi_offline(cdip, 0);
3497 					MDI_CLIENT_LOCK(ct);
3498 					if (rv != NDI_SUCCESS) {
3499 						/*
3500 						 * ndi_devi_offline failed.
3501 						 * Reset client flags to
3502 						 * online.
3503 						 */
3504 						MDI_DEBUG(4, (CE_WARN, cdip,
3505 						    "!ndi_devi_offline: failed "
3506 						    " Error: %x", rv));
3507 						MDI_CLIENT_SET_ONLINE(ct);
3508 					}
3509 				}
3510 			}
3511 			/*
3512 			 * Convert to MDI error code
3513 			 */
3514 			switch (rv) {
3515 			case NDI_SUCCESS:
3516 				rv = MDI_SUCCESS;
3517 				break;
3518 			case NDI_BUSY:
3519 				rv = MDI_BUSY;
3520 				break;
3521 			default:
3522 				rv = MDI_FAILURE;
3523 				break;
3524 			}
3525 		}
3526 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3527 		i_mdi_report_path_state(ct, pip);
3528 	}
3529 
3530 	MDI_CLIENT_UNLOCK(ct);
3531 
3532 	/*
3533 	 * Change in the mdi_pathinfo node state will impact the client state
3534 	 */
3535 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3536 	    ct, pip));
3537 	return (rv);
3538 }
3539 
3540 
3541 /*
3542  * mdi_pi_get_addr():
3543  *		Get the unit address associated with a mdi_pathinfo node
3544  *
3545  * Return Values:
3546  *		char *
3547  */
3548 char *
3549 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3550 {
3551 	char *addr;
3552 
3553 	if (pip == NULL)
3554 		return (NULL);
3555 
3556 	addr = MDI_PI(pip)->pi_addr;
3557 
3558 	/*
3559 	 * XXX To be removed when libg_fc is updated to
3560 	 * skip leading 'w' in NWS consolidation.
3561 	 */
3562 	if (*addr == 'w')
3563 		addr += 1;
3564 
3565 	return (addr);
3566 }
3567 
3568 /*
3569  * mdi_pi_get_client():
3570  *		Get the client devinfo associated with a mdi_pathinfo node
3571  *
3572  * Return Values:
3573  *		Handle to client device dev_info node
3574  */
3575 dev_info_t *
3576 mdi_pi_get_client(mdi_pathinfo_t *pip)
3577 {
3578 	dev_info_t	*dip = NULL;
3579 	if (pip) {
3580 		dip = MDI_PI(pip)->pi_client->ct_dip;
3581 	}
3582 	return (dip);
3583 }
3584 
3585 /*
3586  * mdi_pi_get_phci():
3587  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3588  * Return Values:
3589  *		Handle to dev_info node
3590  */
3591 dev_info_t *
3592 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3593 {
3594 	dev_info_t	*dip = NULL;
3595 	if (pip) {
3596 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3597 	}
3598 	return (dip);
3599 }
3600 
3601 /*
3602  * mdi_pi_get_client_private():
3603  *		Get the client private information associated with the
3604  *		mdi_pathinfo node
3605  */
3606 void *
3607 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3608 {
3609 	void *cprivate = NULL;
3610 	if (pip) {
3611 		cprivate = MDI_PI(pip)->pi_cprivate;
3612 	}
3613 	return (cprivate);
3614 }
3615 
3616 /*
3617  * mdi_pi_set_client_private():
3618  *		Set the client private information in the mdi_pathinfo node
3619  */
3620 void
3621 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3622 {
3623 	if (pip) {
3624 		MDI_PI(pip)->pi_cprivate = priv;
3625 	}
3626 }
3627 
3628 /*
3629  * mdi_pi_get_phci_private():
3630  *		Get the pHCI private information associated with the
3631  *		mdi_pathinfo node
3632  */
3633 caddr_t
3634 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3635 {
3636 	caddr_t	pprivate = NULL;
3637 	if (pip) {
3638 		pprivate = MDI_PI(pip)->pi_pprivate;
3639 	}
3640 	return (pprivate);
3641 }
3642 
3643 /*
3644  * mdi_pi_set_phci_private():
3645  *		Set the pHCI private information in the mdi_pathinfo node
3646  */
3647 void
3648 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3649 {
3650 	if (pip) {
3651 		MDI_PI(pip)->pi_pprivate = priv;
3652 	}
3653 }
3654 
3655 /*
3656  * mdi_pi_get_state():
3657  *		Get the mdi_pathinfo node state. Transient states are internal
3658  *		and not provided to the users
3659  */
3660 mdi_pathinfo_state_t
3661 mdi_pi_get_state(mdi_pathinfo_t *pip)
3662 {
3663 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3664 
3665 	if (pip) {
3666 		if (MDI_PI_IS_TRANSIENT(pip)) {
3667 			/*
3668 			 * mdi_pathinfo is in state transition.  Return the
3669 			 * last good state.
3670 			 */
3671 			state = MDI_PI_OLD_STATE(pip);
3672 		} else {
3673 			state = MDI_PI_STATE(pip);
3674 		}
3675 	}
3676 	return (state);
3677 }
3678 
3679 /*
3680  * Note that the following function needs to be the new interface for
3681  * mdi_pi_get_state when mpxio gets integrated to ON.
3682  */
3683 int
3684 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
3685 		uint32_t *ext_state)
3686 {
3687 	*state = MDI_PATHINFO_STATE_INIT;
3688 
3689 	if (pip) {
3690 		if (MDI_PI_IS_TRANSIENT(pip)) {
3691 			/*
3692 			 * mdi_pathinfo is in state transition.  Return the
3693 			 * last good state.
3694 			 */
3695 			*state = MDI_PI_OLD_STATE(pip);
3696 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
3697 		} else {
3698 			*state = MDI_PI_STATE(pip);
3699 			*ext_state = MDI_PI_EXT_STATE(pip);
3700 		}
3701 	}
3702 	return (MDI_SUCCESS);
3703 }
3704 
3705 /*
3706  * mdi_pi_get_preferred:
3707  *	Get the preferred path flag
3708  */
3709 int
3710 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
3711 {
3712 	if (pip) {
3713 		return (MDI_PI(pip)->pi_preferred);
3714 	}
3715 	return (0);
3716 }
3717 
3718 /*
3719  * mdi_pi_set_preferred:
3720  *	Set the preferred path flag
3721  */
3722 void
3723 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
3724 {
3725 	if (pip) {
3726 		MDI_PI(pip)->pi_preferred = preferred;
3727 	}
3728 }
3729 
3730 
3731 /*
3732  * mdi_pi_set_state():
3733  *		Set the mdi_pathinfo node state
3734  */
3735 void
3736 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
3737 {
3738 	uint32_t	ext_state;
3739 
3740 	if (pip) {
3741 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
3742 		MDI_PI(pip)->pi_state = state;
3743 		MDI_PI(pip)->pi_state |= ext_state;
3744 	}
3745 }
3746 
3747 /*
3748  * Property functions:
3749  */
3750 
3751 int
3752 i_map_nvlist_error_to_mdi(int val)
3753 {
3754 	int rv;
3755 
3756 	switch (val) {
3757 	case 0:
3758 		rv = DDI_PROP_SUCCESS;
3759 		break;
3760 	case EINVAL:
3761 	case ENOTSUP:
3762 		rv = DDI_PROP_INVAL_ARG;
3763 		break;
3764 	case ENOMEM:
3765 		rv = DDI_PROP_NO_MEMORY;
3766 		break;
3767 	default:
3768 		rv = DDI_PROP_NOT_FOUND;
3769 		break;
3770 	}
3771 	return (rv);
3772 }
3773 
3774 /*
3775  * mdi_pi_get_next_prop():
3776  * 		Property walk function.  The caller should hold mdi_pi_lock()
3777  *		and release by calling mdi_pi_unlock() at the end of walk to
3778  *		get a consistent value.
3779  */
3780 
3781 nvpair_t *
3782 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
3783 {
3784 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3785 		return (NULL);
3786 	}
3787 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3788 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
3789 }
3790 
3791 /*
3792  * mdi_prop_remove():
3793  * 		Remove the named property from the named list.
3794  */
3795 
3796 int
3797 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
3798 {
3799 	if (pip == NULL) {
3800 		return (DDI_PROP_NOT_FOUND);
3801 	}
3802 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3803 	MDI_PI_LOCK(pip);
3804 	if (MDI_PI(pip)->pi_prop == NULL) {
3805 		MDI_PI_UNLOCK(pip);
3806 		return (DDI_PROP_NOT_FOUND);
3807 	}
3808 	if (name) {
3809 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
3810 	} else {
3811 		char		nvp_name[MAXNAMELEN];
3812 		nvpair_t	*nvp;
3813 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
3814 		while (nvp) {
3815 			nvpair_t	*next;
3816 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
3817 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
3818 			    nvpair_name(nvp));
3819 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
3820 			    nvp_name);
3821 			nvp = next;
3822 		}
3823 	}
3824 	MDI_PI_UNLOCK(pip);
3825 	return (DDI_PROP_SUCCESS);
3826 }
3827 
3828 /*
3829  * mdi_prop_size():
3830  * 		Get buffer size needed to pack the property data.
3831  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
3832  *		buffer size.
3833  */
3834 
3835 int
3836 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
3837 {
3838 	int	rv;
3839 	size_t	bufsize;
3840 
3841 	*buflenp = 0;
3842 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
3843 		return (DDI_PROP_NOT_FOUND);
3844 	}
3845 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3846 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
3847 	    &bufsize, NV_ENCODE_NATIVE);
3848 	*buflenp = bufsize;
3849 	return (i_map_nvlist_error_to_mdi(rv));
3850 }
3851 
3852 /*
3853  * mdi_prop_pack():
3854  * 		pack the property list.  The caller should hold the
3855  *		mdi_pathinfo_t node to get a consistent data
3856  */
3857 
3858 int
3859 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
3860 {
3861 	int	rv;
3862 	size_t	bufsize;
3863 
3864 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
3865 		return (DDI_PROP_NOT_FOUND);
3866 	}
3867 
3868 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3869 
3870 	bufsize = buflen;
3871 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
3872 	    NV_ENCODE_NATIVE, KM_SLEEP);
3873 
3874 	return (i_map_nvlist_error_to_mdi(rv));
3875 }
3876 
3877 /*
3878  * mdi_prop_update_byte():
3879  *		Create/Update a byte property
3880  */
3881 int
3882 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
3883 {
3884 	int rv;
3885 
3886 	if (pip == NULL) {
3887 		return (DDI_PROP_INVAL_ARG);
3888 	}
3889 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3890 	MDI_PI_LOCK(pip);
3891 	if (MDI_PI(pip)->pi_prop == NULL) {
3892 		MDI_PI_UNLOCK(pip);
3893 		return (DDI_PROP_NOT_FOUND);
3894 	}
3895 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
3896 	MDI_PI_UNLOCK(pip);
3897 	return (i_map_nvlist_error_to_mdi(rv));
3898 }
3899 
3900 /*
3901  * mdi_prop_update_byte_array():
3902  *		Create/Update a byte array property
3903  */
3904 int
3905 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
3906     uint_t nelements)
3907 {
3908 	int rv;
3909 
3910 	if (pip == NULL) {
3911 		return (DDI_PROP_INVAL_ARG);
3912 	}
3913 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3914 	MDI_PI_LOCK(pip);
3915 	if (MDI_PI(pip)->pi_prop == NULL) {
3916 		MDI_PI_UNLOCK(pip);
3917 		return (DDI_PROP_NOT_FOUND);
3918 	}
3919 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
3920 	MDI_PI_UNLOCK(pip);
3921 	return (i_map_nvlist_error_to_mdi(rv));
3922 }
3923 
3924 /*
3925  * mdi_prop_update_int():
3926  *		Create/Update a 32 bit integer property
3927  */
3928 int
3929 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
3930 {
3931 	int rv;
3932 
3933 	if (pip == NULL) {
3934 		return (DDI_PROP_INVAL_ARG);
3935 	}
3936 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3937 	MDI_PI_LOCK(pip);
3938 	if (MDI_PI(pip)->pi_prop == NULL) {
3939 		MDI_PI_UNLOCK(pip);
3940 		return (DDI_PROP_NOT_FOUND);
3941 	}
3942 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
3943 	MDI_PI_UNLOCK(pip);
3944 	return (i_map_nvlist_error_to_mdi(rv));
3945 }
3946 
3947 /*
3948  * mdi_prop_update_int64():
3949  *		Create/Update a 64 bit integer property
3950  */
3951 int
3952 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
3953 {
3954 	int rv;
3955 
3956 	if (pip == NULL) {
3957 		return (DDI_PROP_INVAL_ARG);
3958 	}
3959 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3960 	MDI_PI_LOCK(pip);
3961 	if (MDI_PI(pip)->pi_prop == NULL) {
3962 		MDI_PI_UNLOCK(pip);
3963 		return (DDI_PROP_NOT_FOUND);
3964 	}
3965 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
3966 	MDI_PI_UNLOCK(pip);
3967 	return (i_map_nvlist_error_to_mdi(rv));
3968 }
3969 
3970 /*
3971  * mdi_prop_update_int_array():
3972  *		Create/Update a int array property
3973  */
3974 int
3975 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
3976 	    uint_t nelements)
3977 {
3978 	int rv;
3979 
3980 	if (pip == NULL) {
3981 		return (DDI_PROP_INVAL_ARG);
3982 	}
3983 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
3984 	MDI_PI_LOCK(pip);
3985 	if (MDI_PI(pip)->pi_prop == NULL) {
3986 		MDI_PI_UNLOCK(pip);
3987 		return (DDI_PROP_NOT_FOUND);
3988 	}
3989 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
3990 	    nelements);
3991 	MDI_PI_UNLOCK(pip);
3992 	return (i_map_nvlist_error_to_mdi(rv));
3993 }
3994 
3995 /*
3996  * mdi_prop_update_string():
3997  *		Create/Update a string property
3998  */
3999 int
4000 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4001 {
4002 	int rv;
4003 
4004 	if (pip == NULL) {
4005 		return (DDI_PROP_INVAL_ARG);
4006 	}
4007 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4008 	MDI_PI_LOCK(pip);
4009 	if (MDI_PI(pip)->pi_prop == NULL) {
4010 		MDI_PI_UNLOCK(pip);
4011 		return (DDI_PROP_NOT_FOUND);
4012 	}
4013 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4014 	MDI_PI_UNLOCK(pip);
4015 	return (i_map_nvlist_error_to_mdi(rv));
4016 }
4017 
4018 /*
4019  * mdi_prop_update_string_array():
4020  *		Create/Update a string array property
4021  */
4022 int
4023 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4024     uint_t nelements)
4025 {
4026 	int rv;
4027 
4028 	if (pip == NULL) {
4029 		return (DDI_PROP_INVAL_ARG);
4030 	}
4031 	ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
4032 	MDI_PI_LOCK(pip);
4033 	if (MDI_PI(pip)->pi_prop == NULL) {
4034 		MDI_PI_UNLOCK(pip);
4035 		return (DDI_PROP_NOT_FOUND);
4036 	}
4037 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4038 	    nelements);
4039 	MDI_PI_UNLOCK(pip);
4040 	return (i_map_nvlist_error_to_mdi(rv));
4041 }
4042 
4043 /*
4044  * mdi_prop_lookup_byte():
4045  * 		Look for byte property identified by name.  The data returned
4046  *		is the actual property and valid as long as mdi_pathinfo_t node
4047  *		is alive.
4048  */
4049 int
4050 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4051 {
4052 	int rv;
4053 
4054 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4055 		return (DDI_PROP_NOT_FOUND);
4056 	}
4057 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4058 	return (i_map_nvlist_error_to_mdi(rv));
4059 }
4060 
4061 
4062 /*
4063  * mdi_prop_lookup_byte_array():
4064  * 		Look for byte array property identified by name.  The data
4065  *		returned is the actual property and valid as long as
4066  *		mdi_pathinfo_t node is alive.
4067  */
4068 int
4069 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4070     uint_t *nelements)
4071 {
4072 	int rv;
4073 
4074 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4075 		return (DDI_PROP_NOT_FOUND);
4076 	}
4077 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4078 	    nelements);
4079 	return (i_map_nvlist_error_to_mdi(rv));
4080 }
4081 
4082 /*
4083  * mdi_prop_lookup_int():
4084  * 		Look for int property identified by name.  The data returned
4085  *		is the actual property and valid as long as mdi_pathinfo_t
4086  *		node is alive.
4087  */
4088 int
4089 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4090 {
4091 	int rv;
4092 
4093 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4094 		return (DDI_PROP_NOT_FOUND);
4095 	}
4096 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4097 	return (i_map_nvlist_error_to_mdi(rv));
4098 }
4099 
4100 /*
4101  * mdi_prop_lookup_int64():
4102  * 		Look for int64 property identified by name.  The data returned
4103  *		is the actual property and valid as long as mdi_pathinfo_t node
4104  *		is alive.
4105  */
4106 int
4107 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4108 {
4109 	int rv;
4110 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4111 		return (DDI_PROP_NOT_FOUND);
4112 	}
4113 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4114 	return (i_map_nvlist_error_to_mdi(rv));
4115 }
4116 
4117 /*
4118  * mdi_prop_lookup_int_array():
4119  * 		Look for int array property identified by name.  The data
4120  *		returned is the actual property and valid as long as
4121  *		mdi_pathinfo_t node is alive.
4122  */
4123 int
4124 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4125     uint_t *nelements)
4126 {
4127 	int rv;
4128 
4129 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4130 		return (DDI_PROP_NOT_FOUND);
4131 	}
4132 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4133 	    (int32_t **)data, nelements);
4134 	return (i_map_nvlist_error_to_mdi(rv));
4135 }
4136 
4137 /*
4138  * mdi_prop_lookup_string():
4139  * 		Look for string property identified by name.  The data
4140  *		returned is the actual property and valid as long as
4141  *		mdi_pathinfo_t node is alive.
4142  */
4143 int
4144 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4145 {
4146 	int rv;
4147 
4148 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4149 		return (DDI_PROP_NOT_FOUND);
4150 	}
4151 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4152 	return (i_map_nvlist_error_to_mdi(rv));
4153 }
4154 
4155 /*
4156  * mdi_prop_lookup_string_array():
4157  * 		Look for string array property identified by name.  The data
4158  *		returned is the actual property and valid as long as
4159  *		mdi_pathinfo_t node is alive.
4160  */
4161 
4162 int
4163 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4164     uint_t *nelements)
4165 {
4166 	int rv;
4167 
4168 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4169 		return (DDI_PROP_NOT_FOUND);
4170 	}
4171 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4172 	    nelements);
4173 	return (i_map_nvlist_error_to_mdi(rv));
4174 }
4175 
4176 /*
4177  * mdi_prop_free():
4178  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4179  *		functions return the pointer to actual property data and not a
4180  *		copy of it.  So the data returned is valid as long as
4181  *		mdi_pathinfo_t node is valid.
4182  */
4183 
4184 /*ARGSUSED*/
4185 int
4186 mdi_prop_free(void *data)
4187 {
4188 	return (DDI_PROP_SUCCESS);
4189 }
4190 
4191 /*ARGSUSED*/
4192 static void
4193 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4194 {
4195 	char		*phci_path, *ct_path;
4196 	char		*ct_status;
4197 	char		*status;
4198 	dev_info_t	*dip = ct->ct_dip;
4199 	char		lb_buf[64];
4200 
4201 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4202 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4203 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4204 		return;
4205 	}
4206 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4207 		ct_status = "optimal";
4208 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4209 		ct_status = "degraded";
4210 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4211 		ct_status = "failed";
4212 	} else {
4213 		ct_status = "unknown";
4214 	}
4215 
4216 	if (MDI_PI_IS_OFFLINE(pip)) {
4217 		status = "offline";
4218 	} else if (MDI_PI_IS_ONLINE(pip)) {
4219 		status = "online";
4220 	} else if (MDI_PI_IS_STANDBY(pip)) {
4221 		status = "standby";
4222 	} else if (MDI_PI_IS_FAULT(pip)) {
4223 		status = "faulted";
4224 	} else {
4225 		status = "unknown";
4226 	}
4227 
4228 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4229 		(void) snprintf(lb_buf, sizeof (lb_buf),
4230 		    "%s, region-size: %d", mdi_load_balance_lba,
4231 			ct->ct_lb_args->region_size);
4232 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4233 		(void) snprintf(lb_buf, sizeof (lb_buf),
4234 		    "%s", mdi_load_balance_none);
4235 	} else {
4236 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4237 		    mdi_load_balance_rr);
4238 	}
4239 
4240 	if (dip) {
4241 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4242 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4243 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4244 		    "path %s (%s%d) to target address: %s is %s"
4245 		    " Load balancing: %s\n",
4246 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4247 		    ddi_get_instance(dip), ct_status,
4248 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4249 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4250 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4251 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4252 		kmem_free(phci_path, MAXPATHLEN);
4253 		kmem_free(ct_path, MAXPATHLEN);
4254 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4255 	}
4256 }
4257 
4258 #ifdef	DEBUG
4259 /*
4260  * i_mdi_log():
4261  *		Utility function for error message management
4262  *
4263  */
4264 
4265 /*VARARGS3*/
4266 static void
4267 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4268 {
4269 	char		buf[MAXNAMELEN];
4270 	char		name[MAXNAMELEN];
4271 	va_list		ap;
4272 	int		log_only = 0;
4273 	int		boot_only = 0;
4274 	int		console_only = 0;
4275 
4276 	if (dip) {
4277 		if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) {
4278 			(void) snprintf(name, MAXNAMELEN, "%s%d:\n",
4279 			    ddi_node_name(dip), ddi_get_instance(dip));
4280 		} else {
4281 			(void) snprintf(name, MAXNAMELEN, "%s%d:",
4282 			    ddi_node_name(dip), ddi_get_instance(dip));
4283 		}
4284 	} else {
4285 		name[0] = '\0';
4286 	}
4287 
4288 	va_start(ap, fmt);
4289 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4290 	va_end(ap);
4291 
4292 	switch (buf[0]) {
4293 	case '!':
4294 		log_only = 1;
4295 		break;
4296 	case '?':
4297 		boot_only = 1;
4298 		break;
4299 	case '^':
4300 		console_only = 1;
4301 		break;
4302 	}
4303 
4304 	switch (level) {
4305 	case CE_NOTE:
4306 		level = CE_CONT;
4307 		/* FALLTHROUGH */
4308 	case CE_CONT:
4309 	case CE_WARN:
4310 	case CE_PANIC:
4311 		if (boot_only) {
4312 			cmn_err(level, "?%s\t%s", name, &buf[1]);
4313 		} else if (console_only) {
4314 			cmn_err(level, "^%s\t%s", name, &buf[1]);
4315 		} else if (log_only) {
4316 			cmn_err(level, "!%s\t%s", name, &buf[1]);
4317 		} else {
4318 			cmn_err(level, "%s\t%s", name, buf);
4319 		}
4320 		break;
4321 	default:
4322 		cmn_err(level, "%s\t%s", name, buf);
4323 		break;
4324 	}
4325 }
4326 #endif	/* DEBUG */
4327 
4328 void
4329 i_mdi_client_online(dev_info_t *ct_dip)
4330 {
4331 	mdi_client_t	*ct;
4332 
4333 	/*
4334 	 * Client online notification. Mark client state as online
4335 	 * restore our binding with dev_info node
4336 	 */
4337 	ct = i_devi_get_client(ct_dip);
4338 	ASSERT(ct != NULL);
4339 	MDI_CLIENT_LOCK(ct);
4340 	MDI_CLIENT_SET_ONLINE(ct);
4341 	/* catch for any memory leaks */
4342 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4343 	ct->ct_dip = ct_dip;
4344 
4345 	if (ct->ct_power_cnt == 0)
4346 		(void) i_mdi_power_all_phci(ct);
4347 
4348 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4349 	    "i_mdi_pm_hold_client\n"));
4350 	i_mdi_pm_hold_client(ct, 1);
4351 
4352 	MDI_CLIENT_UNLOCK(ct);
4353 }
4354 
4355 void
4356 i_mdi_phci_online(dev_info_t *ph_dip)
4357 {
4358 	mdi_phci_t	*ph;
4359 
4360 	/* pHCI online notification. Mark state accordingly */
4361 	ph = i_devi_get_phci(ph_dip);
4362 	ASSERT(ph != NULL);
4363 	MDI_PHCI_LOCK(ph);
4364 	MDI_PHCI_SET_ONLINE(ph);
4365 	MDI_PHCI_UNLOCK(ph);
4366 }
4367 
4368 /*
4369  * mdi_devi_online():
4370  * 		Online notification from NDI framework on pHCI/client
4371  *		device online.
4372  * Return Values:
4373  *		NDI_SUCCESS
4374  *		MDI_FAILURE
4375  */
4376 
4377 /*ARGSUSED*/
4378 int
4379 mdi_devi_online(dev_info_t *dip, uint_t flags)
4380 {
4381 	if (MDI_PHCI(dip)) {
4382 		i_mdi_phci_online(dip);
4383 	}
4384 
4385 	if (MDI_CLIENT(dip)) {
4386 		i_mdi_client_online(dip);
4387 	}
4388 	return (NDI_SUCCESS);
4389 }
4390 
4391 /*
4392  * mdi_devi_offline():
4393  * 		Offline notification from NDI framework on pHCI/Client device
4394  *		offline.
4395  *
4396  * Return Values:
4397  *		NDI_SUCCESS
4398  *		NDI_FAILURE
4399  */
4400 
4401 /*ARGSUSED*/
4402 int
4403 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4404 {
4405 	int		rv = NDI_SUCCESS;
4406 
4407 	if (MDI_CLIENT(dip)) {
4408 		rv = i_mdi_client_offline(dip, flags);
4409 		if (rv != NDI_SUCCESS)
4410 			return (rv);
4411 	}
4412 
4413 	if (MDI_PHCI(dip)) {
4414 		rv = i_mdi_phci_offline(dip, flags);
4415 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4416 			/* set client back online */
4417 			i_mdi_client_online(dip);
4418 		}
4419 	}
4420 
4421 	return (rv);
4422 }
4423 
4424 /*ARGSUSED*/
4425 static int
4426 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4427 {
4428 	int		rv = NDI_SUCCESS;
4429 	mdi_phci_t	*ph;
4430 	mdi_client_t	*ct;
4431 	mdi_pathinfo_t	*pip;
4432 	mdi_pathinfo_t	*next;
4433 	mdi_pathinfo_t	*failed_pip = NULL;
4434 	dev_info_t	*cdip;
4435 
4436 	/*
4437 	 * pHCI component offline notification
4438 	 * Make sure that this pHCI instance is free to be offlined.
4439 	 * If it is OK to proceed, Offline and remove all the child
4440 	 * mdi_pathinfo nodes.  This process automatically offlines
4441 	 * corresponding client devices, for which this pHCI provides
4442 	 * critical services.
4443 	 */
4444 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n",
4445 	    dip));
4446 
4447 	ph = i_devi_get_phci(dip);
4448 	if (ph == NULL) {
4449 		return (rv);
4450 	}
4451 
4452 	MDI_PHCI_LOCK(ph);
4453 
4454 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4455 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph));
4456 		MDI_PHCI_UNLOCK(ph);
4457 		return (NDI_SUCCESS);
4458 	}
4459 
4460 	/*
4461 	 * Check to see if the pHCI can be offlined
4462 	 */
4463 	if (ph->ph_unstable) {
4464 		MDI_DEBUG(1, (CE_WARN, dip,
4465 		    "!One or more target devices are in transient "
4466 		    "state. This device can not be removed at "
4467 		    "this moment. Please try again later."));
4468 		MDI_PHCI_UNLOCK(ph);
4469 		return (NDI_BUSY);
4470 	}
4471 
4472 	pip = ph->ph_path_head;
4473 	while (pip != NULL) {
4474 		MDI_PI_LOCK(pip);
4475 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4476 		/*
4477 		 * The mdi_pathinfo state is OK. Check the client state.
4478 		 * If failover in progress fail the pHCI from offlining
4479 		 */
4480 		ct = MDI_PI(pip)->pi_client;
4481 		i_mdi_client_lock(ct, pip);
4482 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4483 		    (ct->ct_unstable)) {
4484 			/*
4485 			 * Failover is in progress, Fail the DR
4486 			 */
4487 			MDI_DEBUG(1, (CE_WARN, dip,
4488 			    "!pHCI device (%s%d) is Busy. %s",
4489 			    ddi_driver_name(dip), ddi_get_instance(dip),
4490 			    "This device can not be removed at "
4491 			    "this moment. Please try again later."));
4492 			MDI_PI_UNLOCK(pip);
4493 			MDI_CLIENT_UNLOCK(ct);
4494 			MDI_PHCI_UNLOCK(ph);
4495 			return (NDI_BUSY);
4496 		}
4497 		MDI_PI_UNLOCK(pip);
4498 
4499 		/*
4500 		 * Check to see of we are removing the last path of this
4501 		 * client device...
4502 		 */
4503 		cdip = ct->ct_dip;
4504 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4505 		    (i_mdi_client_compute_state(ct, ph) ==
4506 		    MDI_CLIENT_STATE_FAILED)) {
4507 			i_mdi_client_unlock(ct);
4508 			MDI_PHCI_UNLOCK(ph);
4509 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4510 				/*
4511 				 * ndi_devi_offline() failed.
4512 				 * This pHCI provides the critical path
4513 				 * to one or more client devices.
4514 				 * Return busy.
4515 				 */
4516 				MDI_PHCI_LOCK(ph);
4517 				MDI_DEBUG(1, (CE_WARN, dip,
4518 				    "!pHCI device (%s%d) is Busy. %s",
4519 				    ddi_driver_name(dip), ddi_get_instance(dip),
4520 				    "This device can not be removed at "
4521 				    "this moment. Please try again later."));
4522 				failed_pip = pip;
4523 				break;
4524 			} else {
4525 				MDI_PHCI_LOCK(ph);
4526 				pip = next;
4527 			}
4528 		} else {
4529 			i_mdi_client_unlock(ct);
4530 			pip = next;
4531 		}
4532 	}
4533 
4534 	if (failed_pip) {
4535 		pip = ph->ph_path_head;
4536 		while (pip != failed_pip) {
4537 			MDI_PI_LOCK(pip);
4538 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4539 			ct = MDI_PI(pip)->pi_client;
4540 			i_mdi_client_lock(ct, pip);
4541 			cdip = ct->ct_dip;
4542 			switch (MDI_CLIENT_STATE(ct)) {
4543 			case MDI_CLIENT_STATE_OPTIMAL:
4544 			case MDI_CLIENT_STATE_DEGRADED:
4545 				if (cdip) {
4546 					MDI_PI_UNLOCK(pip);
4547 					i_mdi_client_unlock(ct);
4548 					MDI_PHCI_UNLOCK(ph);
4549 					(void) ndi_devi_online(cdip, 0);
4550 					MDI_PHCI_LOCK(ph);
4551 					pip = next;
4552 					continue;
4553 				}
4554 				break;
4555 
4556 			case MDI_CLIENT_STATE_FAILED:
4557 				if (cdip) {
4558 					MDI_PI_UNLOCK(pip);
4559 					i_mdi_client_unlock(ct);
4560 					MDI_PHCI_UNLOCK(ph);
4561 					(void) ndi_devi_offline(cdip, 0);
4562 					MDI_PHCI_LOCK(ph);
4563 					pip = next;
4564 					continue;
4565 				}
4566 				break;
4567 			}
4568 			MDI_PI_UNLOCK(pip);
4569 			i_mdi_client_unlock(ct);
4570 			pip = next;
4571 		}
4572 		MDI_PHCI_UNLOCK(ph);
4573 		return (NDI_BUSY);
4574 	}
4575 
4576 	/*
4577 	 * Mark the pHCI as offline
4578 	 */
4579 	MDI_PHCI_SET_OFFLINE(ph);
4580 
4581 	/*
4582 	 * Mark the child mdi_pathinfo nodes as transient
4583 	 */
4584 	pip = ph->ph_path_head;
4585 	while (pip != NULL) {
4586 		MDI_PI_LOCK(pip);
4587 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4588 		MDI_PI_SET_OFFLINING(pip);
4589 		MDI_PI_UNLOCK(pip);
4590 		pip = next;
4591 	}
4592 	MDI_PHCI_UNLOCK(ph);
4593 	/*
4594 	 * Give a chance for any pending commands to execute
4595 	 */
4596 	delay(1);
4597 	MDI_PHCI_LOCK(ph);
4598 	pip = ph->ph_path_head;
4599 	while (pip != NULL) {
4600 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4601 		(void) i_mdi_pi_offline(pip, flags);
4602 		MDI_PI_LOCK(pip);
4603 		ct = MDI_PI(pip)->pi_client;
4604 		if (!MDI_PI_IS_OFFLINE(pip)) {
4605 			MDI_DEBUG(1, (CE_WARN, dip,
4606 			    "!pHCI device (%s%d) is Busy. %s",
4607 			    ddi_driver_name(dip), ddi_get_instance(dip),
4608 			    "This device can not be removed at "
4609 			    "this moment. Please try again later."));
4610 			MDI_PI_UNLOCK(pip);
4611 			MDI_PHCI_SET_ONLINE(ph);
4612 			MDI_PHCI_UNLOCK(ph);
4613 			return (NDI_BUSY);
4614 		}
4615 		MDI_PI_UNLOCK(pip);
4616 		pip = next;
4617 	}
4618 	MDI_PHCI_UNLOCK(ph);
4619 
4620 	return (rv);
4621 }
4622 
4623 /*ARGSUSED*/
4624 static int
4625 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
4626 {
4627 	int		rv = NDI_SUCCESS;
4628 	mdi_client_t	*ct;
4629 
4630 	/*
4631 	 * Client component to go offline.  Make sure that we are
4632 	 * not in failing over state and update client state
4633 	 * accordingly
4634 	 */
4635 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n",
4636 	    dip));
4637 	ct = i_devi_get_client(dip);
4638 	if (ct != NULL) {
4639 		MDI_CLIENT_LOCK(ct);
4640 		if (ct->ct_unstable) {
4641 			/*
4642 			 * One or more paths are in transient state,
4643 			 * Dont allow offline of a client device
4644 			 */
4645 			MDI_DEBUG(1, (CE_WARN, dip,
4646 			    "!One or more paths to this device is "
4647 			    "in transient state. This device can not "
4648 			    "be removed at this moment. "
4649 			    "Please try again later."));
4650 			MDI_CLIENT_UNLOCK(ct);
4651 			return (NDI_BUSY);
4652 		}
4653 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
4654 			/*
4655 			 * Failover is in progress, Dont allow DR of
4656 			 * a client device
4657 			 */
4658 			MDI_DEBUG(1, (CE_WARN, dip,
4659 			    "!Client device (%s%d) is Busy. %s",
4660 			    ddi_driver_name(dip), ddi_get_instance(dip),
4661 			    "This device can not be removed at "
4662 			    "this moment. Please try again later."));
4663 			MDI_CLIENT_UNLOCK(ct);
4664 			return (NDI_BUSY);
4665 		}
4666 		MDI_CLIENT_SET_OFFLINE(ct);
4667 
4668 		/*
4669 		 * Unbind our relationship with the dev_info node
4670 		 */
4671 		if (flags & NDI_DEVI_REMOVE) {
4672 			ct->ct_dip = NULL;
4673 		}
4674 		MDI_CLIENT_UNLOCK(ct);
4675 	}
4676 	return (rv);
4677 }
4678 
4679 /*
4680  * mdi_pre_attach():
4681  *		Pre attach() notification handler
4682  */
4683 
4684 /*ARGSUSED*/
4685 int
4686 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4687 {
4688 	/* don't support old DDI_PM_RESUME */
4689 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
4690 	    (cmd == DDI_PM_RESUME))
4691 		return (DDI_FAILURE);
4692 
4693 	return (DDI_SUCCESS);
4694 }
4695 
4696 /*
4697  * mdi_post_attach():
4698  *		Post attach() notification handler
4699  */
4700 
4701 /*ARGSUSED*/
4702 void
4703 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
4704 {
4705 	mdi_phci_t	*ph;
4706 	mdi_client_t	*ct;
4707 	mdi_pathinfo_t	*pip;
4708 
4709 	if (MDI_PHCI(dip)) {
4710 		ph = i_devi_get_phci(dip);
4711 		ASSERT(ph != NULL);
4712 
4713 		MDI_PHCI_LOCK(ph);
4714 		switch (cmd) {
4715 		case DDI_ATTACH:
4716 			MDI_DEBUG(2, (CE_NOTE, dip,
4717 			    "!pHCI post_attach: called %p\n", ph));
4718 			if (error == DDI_SUCCESS) {
4719 				MDI_PHCI_SET_ATTACH(ph);
4720 			} else {
4721 				MDI_DEBUG(1, (CE_NOTE, dip,
4722 				    "!pHCI post_attach: failed error=%d\n",
4723 				    error));
4724 				MDI_PHCI_SET_DETACH(ph);
4725 			}
4726 			break;
4727 
4728 		case DDI_RESUME:
4729 			MDI_DEBUG(2, (CE_NOTE, dip,
4730 			    "!pHCI post_resume: called %p\n", ph));
4731 			if (error == DDI_SUCCESS) {
4732 				MDI_PHCI_SET_RESUME(ph);
4733 			} else {
4734 				MDI_DEBUG(1, (CE_NOTE, dip,
4735 				    "!pHCI post_resume: failed error=%d\n",
4736 				    error));
4737 				MDI_PHCI_SET_SUSPEND(ph);
4738 			}
4739 			break;
4740 		}
4741 		MDI_PHCI_UNLOCK(ph);
4742 	}
4743 
4744 	if (MDI_CLIENT(dip)) {
4745 		ct = i_devi_get_client(dip);
4746 		ASSERT(ct != NULL);
4747 
4748 		MDI_CLIENT_LOCK(ct);
4749 		switch (cmd) {
4750 		case DDI_ATTACH:
4751 			MDI_DEBUG(2, (CE_NOTE, dip,
4752 			    "!Client post_attach: called %p\n", ct));
4753 			if (error != DDI_SUCCESS) {
4754 				MDI_DEBUG(1, (CE_NOTE, dip,
4755 				    "!Client post_attach: failed error=%d\n",
4756 				    error));
4757 				MDI_CLIENT_SET_DETACH(ct);
4758 				MDI_DEBUG(4, (CE_WARN, dip,
4759 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
4760 				i_mdi_pm_reset_client(ct);
4761 				break;
4762 			}
4763 
4764 			/*
4765 			 * Client device has successfully attached.
4766 			 * Create kstats for any pathinfo structures
4767 			 * initially associated with this client.
4768 			 */
4769 			for (pip = ct->ct_path_head; pip != NULL;
4770 			    pip = (mdi_pathinfo_t *)
4771 			    MDI_PI(pip)->pi_client_link) {
4772 				(void) i_mdi_pi_kstat_create(pip);
4773 				i_mdi_report_path_state(ct, pip);
4774 			}
4775 			MDI_CLIENT_SET_ATTACH(ct);
4776 			break;
4777 
4778 		case DDI_RESUME:
4779 			MDI_DEBUG(2, (CE_NOTE, dip,
4780 			    "!Client post_attach: called %p\n", ct));
4781 			if (error == DDI_SUCCESS) {
4782 				MDI_CLIENT_SET_RESUME(ct);
4783 			} else {
4784 				MDI_DEBUG(1, (CE_NOTE, dip,
4785 				    "!Client post_resume: failed error=%d\n",
4786 				    error));
4787 				MDI_CLIENT_SET_SUSPEND(ct);
4788 			}
4789 			break;
4790 		}
4791 		MDI_CLIENT_UNLOCK(ct);
4792 	}
4793 }
4794 
4795 /*
4796  * mdi_pre_detach():
4797  *		Pre detach notification handler
4798  */
4799 
4800 /*ARGSUSED*/
4801 int
4802 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4803 {
4804 	int rv = DDI_SUCCESS;
4805 
4806 	if (MDI_CLIENT(dip)) {
4807 		(void) i_mdi_client_pre_detach(dip, cmd);
4808 	}
4809 
4810 	if (MDI_PHCI(dip)) {
4811 		rv = i_mdi_phci_pre_detach(dip, cmd);
4812 	}
4813 
4814 	return (rv);
4815 }
4816 
4817 /*ARGSUSED*/
4818 static int
4819 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4820 {
4821 	int		rv = DDI_SUCCESS;
4822 	mdi_phci_t	*ph;
4823 	mdi_client_t	*ct;
4824 	mdi_pathinfo_t	*pip;
4825 	mdi_pathinfo_t	*failed_pip = NULL;
4826 	mdi_pathinfo_t	*next;
4827 
4828 	ph = i_devi_get_phci(dip);
4829 	if (ph == NULL) {
4830 		return (rv);
4831 	}
4832 
4833 	MDI_PHCI_LOCK(ph);
4834 	switch (cmd) {
4835 	case DDI_DETACH:
4836 		MDI_DEBUG(2, (CE_NOTE, dip,
4837 		    "!pHCI pre_detach: called %p\n", ph));
4838 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
4839 			/*
4840 			 * mdi_pathinfo nodes are still attached to
4841 			 * this pHCI. Fail the detach for this pHCI.
4842 			 */
4843 			MDI_DEBUG(2, (CE_WARN, dip,
4844 			    "!pHCI pre_detach: "
4845 			    "mdi_pathinfo nodes are still attached "
4846 			    "%p\n", ph));
4847 			rv = DDI_FAILURE;
4848 			break;
4849 		}
4850 		MDI_PHCI_SET_DETACH(ph);
4851 		break;
4852 
4853 	case DDI_SUSPEND:
4854 		/*
4855 		 * pHCI is getting suspended.  Since mpxio client
4856 		 * devices may not be suspended at this point, to avoid
4857 		 * a potential stack overflow, it is important to suspend
4858 		 * client devices before pHCI can be suspended.
4859 		 */
4860 
4861 		MDI_DEBUG(2, (CE_NOTE, dip,
4862 		    "!pHCI pre_suspend: called %p\n", ph));
4863 		/*
4864 		 * Suspend all the client devices accessible through this pHCI
4865 		 */
4866 		pip = ph->ph_path_head;
4867 		while (pip != NULL && rv == DDI_SUCCESS) {
4868 			dev_info_t *cdip;
4869 			MDI_PI_LOCK(pip);
4870 			next =
4871 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4872 			ct = MDI_PI(pip)->pi_client;
4873 			i_mdi_client_lock(ct, pip);
4874 			cdip = ct->ct_dip;
4875 			MDI_PI_UNLOCK(pip);
4876 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
4877 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
4878 				i_mdi_client_unlock(ct);
4879 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
4880 				    DDI_SUCCESS) {
4881 					/*
4882 					 * Suspend of one of the client
4883 					 * device has failed.
4884 					 */
4885 					MDI_DEBUG(1, (CE_WARN, dip,
4886 					    "!Suspend of device (%s%d) failed.",
4887 					    ddi_driver_name(cdip),
4888 					    ddi_get_instance(cdip)));
4889 					failed_pip = pip;
4890 					break;
4891 				}
4892 			} else {
4893 				i_mdi_client_unlock(ct);
4894 			}
4895 			pip = next;
4896 		}
4897 
4898 		if (rv == DDI_SUCCESS) {
4899 			/*
4900 			 * Suspend of client devices is complete. Proceed
4901 			 * with pHCI suspend.
4902 			 */
4903 			MDI_PHCI_SET_SUSPEND(ph);
4904 		} else {
4905 			/*
4906 			 * Revert back all the suspended client device states
4907 			 * to converse.
4908 			 */
4909 			pip = ph->ph_path_head;
4910 			while (pip != failed_pip) {
4911 				dev_info_t *cdip;
4912 				MDI_PI_LOCK(pip);
4913 				next =
4914 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4915 				ct = MDI_PI(pip)->pi_client;
4916 				i_mdi_client_lock(ct, pip);
4917 				cdip = ct->ct_dip;
4918 				MDI_PI_UNLOCK(pip);
4919 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
4920 					i_mdi_client_unlock(ct);
4921 					(void) devi_attach(cdip, DDI_RESUME);
4922 				} else {
4923 					i_mdi_client_unlock(ct);
4924 				}
4925 				pip = next;
4926 			}
4927 		}
4928 		break;
4929 
4930 	default:
4931 		rv = DDI_FAILURE;
4932 		break;
4933 	}
4934 	MDI_PHCI_UNLOCK(ph);
4935 	return (rv);
4936 }
4937 
4938 /*ARGSUSED*/
4939 static int
4940 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4941 {
4942 	int		rv = DDI_SUCCESS;
4943 	mdi_client_t	*ct;
4944 
4945 	ct = i_devi_get_client(dip);
4946 	if (ct == NULL) {
4947 		return (rv);
4948 	}
4949 
4950 	MDI_CLIENT_LOCK(ct);
4951 	switch (cmd) {
4952 	case DDI_DETACH:
4953 		MDI_DEBUG(2, (CE_NOTE, dip,
4954 		    "!Client pre_detach: called %p\n", ct));
4955 		MDI_CLIENT_SET_DETACH(ct);
4956 		break;
4957 
4958 	case DDI_SUSPEND:
4959 		MDI_DEBUG(2, (CE_NOTE, dip,
4960 		    "!Client pre_suspend: called %p\n", ct));
4961 		MDI_CLIENT_SET_SUSPEND(ct);
4962 		break;
4963 
4964 	default:
4965 		rv = DDI_FAILURE;
4966 		break;
4967 	}
4968 	MDI_CLIENT_UNLOCK(ct);
4969 	return (rv);
4970 }
4971 
4972 /*
4973  * mdi_post_detach():
4974  *		Post detach notification handler
4975  */
4976 
4977 /*ARGSUSED*/
4978 void
4979 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4980 {
4981 	/*
4982 	 * Detach/Suspend of mpxio component failed. Update our state
4983 	 * too
4984 	 */
4985 	if (MDI_PHCI(dip))
4986 		i_mdi_phci_post_detach(dip, cmd, error);
4987 
4988 	if (MDI_CLIENT(dip))
4989 		i_mdi_client_post_detach(dip, cmd, error);
4990 }
4991 
4992 /*ARGSUSED*/
4993 static void
4994 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
4995 {
4996 	mdi_phci_t	*ph;
4997 
4998 	/*
4999 	 * Detach/Suspend of phci component failed. Update our state
5000 	 * too
5001 	 */
5002 	ph = i_devi_get_phci(dip);
5003 	if (ph == NULL) {
5004 		return;
5005 	}
5006 
5007 	MDI_PHCI_LOCK(ph);
5008 	/*
5009 	 * Detach of pHCI failed. Restore back converse
5010 	 * state
5011 	 */
5012 	switch (cmd) {
5013 	case DDI_DETACH:
5014 		MDI_DEBUG(2, (CE_NOTE, dip,
5015 		    "!pHCI post_detach: called %p\n", ph));
5016 		if (error != DDI_SUCCESS)
5017 			MDI_PHCI_SET_ATTACH(ph);
5018 		break;
5019 
5020 	case DDI_SUSPEND:
5021 		MDI_DEBUG(2, (CE_NOTE, dip,
5022 		    "!pHCI post_suspend: called %p\n", ph));
5023 		if (error != DDI_SUCCESS)
5024 			MDI_PHCI_SET_RESUME(ph);
5025 		break;
5026 	}
5027 	MDI_PHCI_UNLOCK(ph);
5028 }
5029 
5030 /*ARGSUSED*/
5031 static void
5032 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5033 {
5034 	mdi_client_t	*ct;
5035 
5036 	ct = i_devi_get_client(dip);
5037 	if (ct == NULL) {
5038 		return;
5039 	}
5040 	MDI_CLIENT_LOCK(ct);
5041 	/*
5042 	 * Detach of Client failed. Restore back converse
5043 	 * state
5044 	 */
5045 	switch (cmd) {
5046 	case DDI_DETACH:
5047 		MDI_DEBUG(2, (CE_NOTE, dip,
5048 		    "!Client post_detach: called %p\n", ct));
5049 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5050 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5051 			    "i_mdi_pm_rele_client\n"));
5052 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5053 		} else {
5054 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5055 			    "i_mdi_pm_reset_client\n"));
5056 			i_mdi_pm_reset_client(ct);
5057 		}
5058 		if (error != DDI_SUCCESS)
5059 			MDI_CLIENT_SET_ATTACH(ct);
5060 		break;
5061 
5062 	case DDI_SUSPEND:
5063 		MDI_DEBUG(2, (CE_NOTE, dip,
5064 		    "!Client post_suspend: called %p\n", ct));
5065 		if (error != DDI_SUCCESS)
5066 			MDI_CLIENT_SET_RESUME(ct);
5067 		break;
5068 	}
5069 	MDI_CLIENT_UNLOCK(ct);
5070 }
5071 
5072 /*
5073  * create and install per-path (client - pHCI) statistics
5074  * I/O stats supported: nread, nwritten, reads, and writes
5075  * Error stats - hard errors, soft errors, & transport errors
5076  */
5077 static int
5078 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip)
5079 {
5080 
5081 	dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip;
5082 	dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip;
5083 	char ksname[KSTAT_STRLEN];
5084 	mdi_pathinfo_t *cpip;
5085 	const char *err_postfix = ",err";
5086 	kstat_t	*kiosp, *kerrsp;
5087 	struct pi_errs	*nsp;
5088 	struct mdi_pi_kstats *mdi_statp;
5089 
5090 	ASSERT(client != NULL && ppath != NULL);
5091 
5092 	ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex)));
5093 
5094 	if (MDI_PI(pip)->pi_kstats != NULL)
5095 		return (MDI_SUCCESS);
5096 
5097 	for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL;
5098 	    cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) {
5099 		if (cpip == pip)
5100 			continue;
5101 		/*
5102 		 * We have found a different path with same parent
5103 		 * kstats for a given client-pHCI are common
5104 		 */
5105 		if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) &&
5106 		    (MDI_PI(cpip)->pi_kstats != NULL)) {
5107 			MDI_PI(cpip)->pi_kstats->pi_kstat_ref++;
5108 			MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats;
5109 			return (MDI_SUCCESS);
5110 		}
5111 	}
5112 
5113 	/*
5114 	 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0"
5115 	 * clamp length of name against max length of error kstat name
5116 	 */
5117 	if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d",
5118 	    ddi_driver_name(client), ddi_get_instance(client),
5119 	    ddi_driver_name(ppath), ddi_get_instance(ppath)) >
5120 	    (KSTAT_STRLEN - strlen(err_postfix))) {
5121 		return (MDI_FAILURE);
5122 	}
5123 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5124 	    KSTAT_TYPE_IO, 1, 0)) == NULL) {
5125 		return (MDI_FAILURE);
5126 	}
5127 
5128 	(void) strcat(ksname, err_postfix);
5129 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5130 	    KSTAT_TYPE_NAMED,
5131 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5132 
5133 	if (kerrsp == NULL) {
5134 		kstat_delete(kiosp);
5135 		return (MDI_FAILURE);
5136 	}
5137 
5138 	nsp = (struct pi_errs *)kerrsp->ks_data;
5139 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5140 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5141 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5142 	    KSTAT_DATA_UINT32);
5143 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5144 	    KSTAT_DATA_UINT32);
5145 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5146 	    KSTAT_DATA_UINT32);
5147 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5148 	    KSTAT_DATA_UINT32);
5149 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5150 	    KSTAT_DATA_UINT32);
5151 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5152 	    KSTAT_DATA_UINT32);
5153 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5154 	    KSTAT_DATA_UINT32);
5155 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5156 
5157 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5158 	mdi_statp->pi_kstat_ref = 1;
5159 	mdi_statp->pi_kstat_iostats = kiosp;
5160 	mdi_statp->pi_kstat_errstats = kerrsp;
5161 	kstat_install(kiosp);
5162 	kstat_install(kerrsp);
5163 	MDI_PI(pip)->pi_kstats = mdi_statp;
5164 	return (MDI_SUCCESS);
5165 }
5166 
5167 /*
5168  * destroy per-path properties
5169  */
5170 static void
5171 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5172 {
5173 
5174 	struct mdi_pi_kstats *mdi_statp;
5175 
5176 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5177 		return;
5178 
5179 	MDI_PI(pip)->pi_kstats = NULL;
5180 
5181 	/*
5182 	 * the kstat may be shared between multiple pathinfo nodes
5183 	 * decrement this pathinfo's usage, removing the kstats
5184 	 * themselves when the last pathinfo reference is removed.
5185 	 */
5186 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5187 	if (--mdi_statp->pi_kstat_ref != 0)
5188 		return;
5189 
5190 	kstat_delete(mdi_statp->pi_kstat_iostats);
5191 	kstat_delete(mdi_statp->pi_kstat_errstats);
5192 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5193 }
5194 
5195 /*
5196  * update I/O paths KSTATS
5197  */
5198 void
5199 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5200 {
5201 	kstat_t *iostatp;
5202 	size_t xfer_cnt;
5203 
5204 	ASSERT(pip != NULL);
5205 
5206 	/*
5207 	 * I/O can be driven across a path prior to having path
5208 	 * statistics available, i.e. probe(9e).
5209 	 */
5210 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5211 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5212 		xfer_cnt = bp->b_bcount - bp->b_resid;
5213 		if (bp->b_flags & B_READ) {
5214 			KSTAT_IO_PTR(iostatp)->reads++;
5215 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5216 		} else {
5217 			KSTAT_IO_PTR(iostatp)->writes++;
5218 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5219 		}
5220 	}
5221 }
5222 
5223 /*
5224  * disable the path to a particular pHCI (pHCI specified in the phci_path
5225  * argument) for a particular client (specified in the client_path argument).
5226  * Disabling a path means that MPxIO will not select the disabled path for
5227  * routing any new I/O requests.
5228  */
5229 int
5230 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5231 {
5232 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5233 }
5234 
5235 /*
5236  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5237  * argument) for a particular client (specified in the client_path argument).
5238  * Enabling a path means that MPxIO may select the enabled path for routing
5239  * future I/O requests, subject to other path state constraints.
5240  */
5241 
5242 int
5243 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5244 {
5245 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5246 }
5247 
5248 
5249 /*
5250  * Common routine for doing enable/disable.
5251  */
5252 int
5253 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5254 {
5255 
5256 	mdi_phci_t	*ph;
5257 	mdi_vhci_t	*vh = NULL;
5258 	mdi_client_t	*ct;
5259 	mdi_pathinfo_t	*next, *pip;
5260 	int		found_it;
5261 	int		(*f)() = NULL;
5262 	int		rv;
5263 	int		sync_flag = 0;
5264 
5265 	ph = i_devi_get_phci(pdip);
5266 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5267 		" Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip));
5268 	if (ph == NULL) {
5269 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5270 			" failed. ph = NULL operation = %d\n", op));
5271 		return (MDI_FAILURE);
5272 	}
5273 
5274 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5275 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5276 			" Invalid operation = %d\n", op));
5277 		return (MDI_FAILURE);
5278 	}
5279 
5280 	sync_flag = (flags << 8) & 0xf00;
5281 
5282 	vh = ph->ph_vhci;
5283 	f = vh->vh_ops->vo_pi_state_change;
5284 
5285 	if (cdip == NULL) {
5286 		/*
5287 		 * Need to mark the Phci as enabled/disabled.
5288 		 */
5289 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5290 		"Operation %d for the phci\n", op));
5291 		MDI_PHCI_LOCK(ph);
5292 		switch (flags) {
5293 			case USER_DISABLE:
5294 				if (op == MDI_DISABLE_OP)
5295 					MDI_PHCI_SET_USER_DISABLE(ph);
5296 				else
5297 					MDI_PHCI_SET_USER_ENABLE(ph);
5298 				break;
5299 			case DRIVER_DISABLE:
5300 				if (op == MDI_DISABLE_OP)
5301 					MDI_PHCI_SET_DRV_DISABLE(ph);
5302 				else
5303 					MDI_PHCI_SET_DRV_ENABLE(ph);
5304 				break;
5305 			case DRIVER_DISABLE_TRANSIENT:
5306 				if (op == MDI_DISABLE_OP)
5307 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5308 				else
5309 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
5310 				break;
5311 			default:
5312 				MDI_PHCI_UNLOCK(ph);
5313 				MDI_DEBUG(1, (CE_NOTE, NULL,
5314 				"!i_mdi_pi_enable_disable:"
5315 				" Invalid flag argument= %d\n", flags));
5316 		}
5317 
5318 		/*
5319 		 * Phci has been disabled. Now try to enable/disable
5320 		 * path info's to each client.
5321 		 */
5322 		pip = ph->ph_path_head;
5323 		while (pip != NULL) {
5324 			/*
5325 			 * Do a callback into the mdi consumer to let it
5326 			 * know that path is about to be enabled/disabled.
5327 			 */
5328 			if (f != NULL) {
5329 				rv = (*f)(vh->vh_dip, pip, 0,
5330 					MDI_PI_EXT_STATE(pip),
5331 					MDI_EXT_STATE_CHANGE | sync_flag |
5332 					op | MDI_BEFORE_STATE_CHANGE);
5333 				if (rv != MDI_SUCCESS) {
5334 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5335 				"!vo_pi_state_change: failed rv = %x", rv));
5336 				}
5337 			}
5338 
5339 			MDI_PI_LOCK(pip);
5340 			next =
5341 				(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5342 			switch (flags) {
5343 			case USER_DISABLE:
5344 				if (op == MDI_DISABLE_OP)
5345 					MDI_PI_SET_USER_DISABLE(pip);
5346 				else
5347 					MDI_PI_SET_USER_ENABLE(pip);
5348 				break;
5349 			case DRIVER_DISABLE:
5350 				if (op == MDI_DISABLE_OP)
5351 					MDI_PI_SET_DRV_DISABLE(pip);
5352 				else
5353 					MDI_PI_SET_DRV_ENABLE(pip);
5354 				break;
5355 			case DRIVER_DISABLE_TRANSIENT:
5356 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5357 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5358 				else
5359 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5360 				break;
5361 			}
5362 			MDI_PI_UNLOCK(pip);
5363 			/*
5364 			 * Do a callback into the mdi consumer to let it
5365 			 * know that path is now enabled/disabled.
5366 			 */
5367 			if (f != NULL) {
5368 				rv = (*f)(vh->vh_dip, pip, 0,
5369 					MDI_PI_EXT_STATE(pip),
5370 					MDI_EXT_STATE_CHANGE | sync_flag |
5371 					op | MDI_AFTER_STATE_CHANGE);
5372 				if (rv != MDI_SUCCESS) {
5373 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5374 				"!vo_pi_state_change: failed rv = %x", rv));
5375 				}
5376 			}
5377 			pip = next;
5378 		}
5379 		MDI_PHCI_UNLOCK(ph);
5380 	} else {
5381 
5382 		/*
5383 		 * Disable a specific client.
5384 		 */
5385 		ct = i_devi_get_client(cdip);
5386 		if (ct == NULL) {
5387 			MDI_DEBUG(1, (CE_NOTE, NULL,
5388 			"!i_mdi_pi_enable_disable:"
5389 			" failed. ct = NULL operation = %d\n", op));
5390 			return (MDI_FAILURE);
5391 		}
5392 
5393 		MDI_CLIENT_LOCK(ct);
5394 		pip = ct->ct_path_head;
5395 		found_it = 0;
5396 		while (pip != NULL) {
5397 			MDI_PI_LOCK(pip);
5398 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5399 			if (MDI_PI(pip)->pi_phci == ph) {
5400 				MDI_PI_UNLOCK(pip);
5401 				found_it = 1;
5402 				break;
5403 			}
5404 			MDI_PI_UNLOCK(pip);
5405 			pip = next;
5406 		}
5407 
5408 		MDI_CLIENT_UNLOCK(ct);
5409 		if (found_it == 0) {
5410 			MDI_DEBUG(1, (CE_NOTE, NULL,
5411 			"!i_mdi_pi_enable_disable:"
5412 			" failed. Could not find corresponding pip\n"));
5413 			return (MDI_FAILURE);
5414 		}
5415 		/*
5416 		 * Do a callback into the mdi consumer to let it
5417 		 * know that path is about to get enabled/disabled.
5418 		 */
5419 		if (f != NULL) {
5420 			rv = (*f)(vh->vh_dip, pip, 0,
5421 				MDI_PI_EXT_STATE(pip),
5422 				MDI_EXT_STATE_CHANGE | sync_flag |
5423 				op | MDI_BEFORE_STATE_CHANGE);
5424 			if (rv != MDI_SUCCESS) {
5425 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5426 				"!vo_pi_state_change: failed rv = %x", rv));
5427 			}
5428 		}
5429 		MDI_PI_LOCK(pip);
5430 		switch (flags) {
5431 			case USER_DISABLE:
5432 				if (op == MDI_DISABLE_OP)
5433 					MDI_PI_SET_USER_DISABLE(pip);
5434 				else
5435 					MDI_PI_SET_USER_ENABLE(pip);
5436 				break;
5437 			case DRIVER_DISABLE:
5438 				if (op == MDI_DISABLE_OP)
5439 					MDI_PI_SET_DRV_DISABLE(pip);
5440 				else
5441 					MDI_PI_SET_DRV_ENABLE(pip);
5442 				break;
5443 			case DRIVER_DISABLE_TRANSIENT:
5444 				if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS)
5445 					MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5446 				else
5447 					MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5448 				break;
5449 		}
5450 		MDI_PI_UNLOCK(pip);
5451 		/*
5452 		 * Do a callback into the mdi consumer to let it
5453 		 * know that path is now enabled/disabled.
5454 		 */
5455 		if (f != NULL) {
5456 			rv = (*f)(vh->vh_dip, pip, 0,
5457 				MDI_PI_EXT_STATE(pip),
5458 				MDI_EXT_STATE_CHANGE | sync_flag |
5459 				op | MDI_AFTER_STATE_CHANGE);
5460 			if (rv != MDI_SUCCESS) {
5461 				MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5462 				"!vo_pi_state_change: failed rv = %x", rv));
5463 			}
5464 		}
5465 	}
5466 
5467 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5468 		" Returning success pdip = %p cdip = %p\n", op, pdip, cdip));
5469 	return (MDI_SUCCESS);
5470 }
5471 
5472 /*ARGSUSED3*/
5473 int
5474 mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5475     int flags, clock_t timeout)
5476 {
5477 	mdi_pathinfo_t *pip;
5478 	dev_info_t *dip;
5479 	clock_t interval = drv_usectohz(100000);	/* 0.1 sec */
5480 	char *paddr;
5481 
5482 	MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm));
5483 
5484 	if (!MDI_PHCI(pdip))
5485 		return (MDI_FAILURE);
5486 
5487 	paddr = strchr(devnm, '@');
5488 	if (paddr == NULL)
5489 		return (MDI_FAILURE);
5490 
5491 	paddr++;	/* skip '@' */
5492 	pip = mdi_pi_find(pdip, NULL, paddr);
5493 	while (pip == NULL && timeout > 0) {
5494 		if (interval > timeout)
5495 			interval = timeout;
5496 		if (flags & NDI_DEVI_DEBUG) {
5497 			cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n",
5498 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
5499 			    paddr, interval, timeout);
5500 		}
5501 		delay(interval);
5502 		timeout -= interval;
5503 		interval += interval;
5504 		pip = mdi_pi_find(pdip, NULL, paddr);
5505 	}
5506 
5507 	if (pip == NULL)
5508 		return (MDI_FAILURE);
5509 	dip = mdi_pi_get_client(pip);
5510 	if (ndi_devi_online(dip, flags) != NDI_SUCCESS)
5511 		return (MDI_FAILURE);
5512 	*cdipp = dip;
5513 
5514 	/* TODO: holding should happen inside search functions */
5515 	ndi_hold_devi(dip);
5516 	return (MDI_SUCCESS);
5517 }
5518 
5519 /*
5520  * Ensure phci powered up
5521  */
5522 static void
5523 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
5524 {
5525 	dev_info_t	*ph_dip;
5526 
5527 	ASSERT(pip != NULL);
5528 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5529 
5530 	if (MDI_PI(pip)->pi_pm_held) {
5531 		return;
5532 	}
5533 
5534 	ph_dip = mdi_pi_get_phci(pip);
5535 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n",
5536 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5537 	if (ph_dip == NULL) {
5538 		return;
5539 	}
5540 
5541 	MDI_PI_UNLOCK(pip);
5542 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5543 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5544 	pm_hold_power(ph_dip);
5545 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5546 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5547 	MDI_PI_LOCK(pip);
5548 
5549 	MDI_PI(pip)->pi_pm_held = 1;
5550 }
5551 
5552 /*
5553  * Allow phci powered down
5554  */
5555 static void
5556 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
5557 {
5558 	dev_info_t	*ph_dip = NULL;
5559 
5560 	ASSERT(pip != NULL);
5561 	ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex));
5562 
5563 	if (MDI_PI(pip)->pi_pm_held == 0) {
5564 		return;
5565 	}
5566 
5567 	ph_dip = mdi_pi_get_phci(pip);
5568 	ASSERT(ph_dip != NULL);
5569 
5570 	MDI_PI_UNLOCK(pip);
5571 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n",
5572 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5573 
5574 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
5575 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5576 	pm_rele_power(ph_dip);
5577 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
5578 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
5579 
5580 	MDI_PI_LOCK(pip);
5581 	MDI_PI(pip)->pi_pm_held = 0;
5582 }
5583 
5584 static void
5585 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
5586 {
5587 	ASSERT(ct);
5588 
5589 	ct->ct_power_cnt += incr;
5590 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client "
5591 	    "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr));
5592 	ASSERT(ct->ct_power_cnt >= 0);
5593 }
5594 
5595 static void
5596 i_mdi_rele_all_phci(mdi_client_t *ct)
5597 {
5598 	mdi_pathinfo_t  *pip;
5599 
5600 	ASSERT(mutex_owned(&ct->ct_mutex));
5601 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5602 	while (pip != NULL) {
5603 		mdi_hold_path(pip);
5604 		MDI_PI_LOCK(pip);
5605 		i_mdi_pm_rele_pip(pip);
5606 		MDI_PI_UNLOCK(pip);
5607 		mdi_rele_path(pip);
5608 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5609 	}
5610 }
5611 
5612 static void
5613 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
5614 {
5615 	ASSERT(ct);
5616 
5617 	if (i_ddi_node_state(ct->ct_dip) >= DS_READY) {
5618 		ct->ct_power_cnt -= decr;
5619 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client "
5620 		    "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr));
5621 	}
5622 
5623 	ASSERT(ct->ct_power_cnt >= 0);
5624 	if (ct->ct_power_cnt == 0) {
5625 		i_mdi_rele_all_phci(ct);
5626 		return;
5627 	}
5628 }
5629 
5630 static void
5631 i_mdi_pm_reset_client(mdi_client_t *ct)
5632 {
5633 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client "
5634 	    "ct_power_cnt = %d\n", ct->ct_power_cnt));
5635 	ct->ct_power_cnt = 0;
5636 	i_mdi_rele_all_phci(ct);
5637 	ct->ct_powercnt_reset = 1;
5638 	ct->ct_powercnt_held = 0;
5639 }
5640 
5641 static void
5642 i_mdi_pm_hold_all_phci(mdi_client_t *ct)
5643 {
5644 	mdi_pathinfo_t  *pip;
5645 	ASSERT(mutex_owned(&ct->ct_mutex));
5646 
5647 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5648 	while (pip != NULL) {
5649 		mdi_hold_path(pip);
5650 		MDI_PI_LOCK(pip);
5651 		i_mdi_pm_hold_pip(pip);
5652 		MDI_PI_UNLOCK(pip);
5653 		mdi_rele_path(pip);
5654 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5655 	}
5656 }
5657 
5658 static int
5659 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
5660 {
5661 	int		ret;
5662 	dev_info_t	*ph_dip;
5663 
5664 	MDI_PI_LOCK(pip);
5665 	i_mdi_pm_hold_pip(pip);
5666 
5667 	ph_dip = mdi_pi_get_phci(pip);
5668 	MDI_PI_UNLOCK(pip);
5669 
5670 	/* bring all components of phci to full power */
5671 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5672 	    "pm_powerup for %s%d\n", ddi_get_name(ph_dip),
5673 	    ddi_get_instance(ph_dip)));
5674 
5675 	ret = pm_powerup(ph_dip);
5676 
5677 	if (ret == DDI_FAILURE) {
5678 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
5679 		    "pm_powerup FAILED for %s%d\n",
5680 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip)));
5681 
5682 		MDI_PI_LOCK(pip);
5683 		i_mdi_pm_rele_pip(pip);
5684 		MDI_PI_UNLOCK(pip);
5685 		return (MDI_FAILURE);
5686 	}
5687 
5688 	return (MDI_SUCCESS);
5689 }
5690 
5691 static int
5692 i_mdi_power_all_phci(mdi_client_t *ct)
5693 {
5694 	mdi_pathinfo_t  *pip;
5695 	int		succeeded = 0;
5696 
5697 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
5698 	while (pip != NULL) {
5699 		mdi_hold_path(pip);
5700 		MDI_CLIENT_UNLOCK(ct);
5701 		if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
5702 			succeeded = 1;
5703 
5704 		ASSERT(ct == MDI_PI(pip)->pi_client);
5705 		MDI_CLIENT_LOCK(ct);
5706 		mdi_rele_path(pip);
5707 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
5708 	}
5709 
5710 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
5711 }
5712 
5713 /*
5714  * mdi_bus_power():
5715  *		1. Place the phci(s) into powered up state so that
5716  *		   client can do power management
5717  *		2. Ensure phci powered up as client power managing
5718  * Return Values:
5719  *		MDI_SUCCESS
5720  *		MDI_FAILURE
5721  */
5722 int
5723 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
5724     void *arg, void *result)
5725 {
5726 	int			ret = MDI_SUCCESS;
5727 	pm_bp_child_pwrchg_t	*bpc;
5728 	mdi_client_t		*ct;
5729 	dev_info_t		*cdip;
5730 	pm_bp_has_changed_t	*bphc;
5731 
5732 	/*
5733 	 * BUS_POWER_NOINVOL not supported
5734 	 */
5735 	if (op == BUS_POWER_NOINVOL)
5736 		return (MDI_FAILURE);
5737 
5738 	/*
5739 	 * ignore other OPs.
5740 	 * return quickly to save cou cycles on the ct processing
5741 	 */
5742 	switch (op) {
5743 	case BUS_POWER_PRE_NOTIFICATION:
5744 	case BUS_POWER_POST_NOTIFICATION:
5745 		bpc = (pm_bp_child_pwrchg_t *)arg;
5746 		cdip = bpc->bpc_dip;
5747 		break;
5748 	case BUS_POWER_HAS_CHANGED:
5749 		bphc = (pm_bp_has_changed_t *)arg;
5750 		cdip = bphc->bphc_dip;
5751 		break;
5752 	default:
5753 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
5754 	}
5755 
5756 	ASSERT(MDI_CLIENT(cdip));
5757 
5758 	ct = i_devi_get_client(cdip);
5759 	if (ct == NULL)
5760 		return (MDI_FAILURE);
5761 
5762 	/*
5763 	 * wait till the mdi_pathinfo node state change are processed
5764 	 */
5765 	MDI_CLIENT_LOCK(ct);
5766 	switch (op) {
5767 	case BUS_POWER_PRE_NOTIFICATION:
5768 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5769 		    "BUS_POWER_PRE_NOTIFICATION:"
5770 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5771 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5772 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
5773 
5774 		/* serialize power level change per client */
5775 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5776 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5777 
5778 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
5779 
5780 		if (ct->ct_power_cnt == 0) {
5781 			ret = i_mdi_power_all_phci(ct);
5782 		}
5783 
5784 		/*
5785 		 * if new_level > 0:
5786 		 *	- hold phci(s)
5787 		 *	- power up phci(s) if not already
5788 		 * ignore power down
5789 		 */
5790 		if (bpc->bpc_nlevel > 0) {
5791 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
5792 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5793 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
5794 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
5795 			}
5796 		}
5797 		break;
5798 	case BUS_POWER_POST_NOTIFICATION:
5799 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
5800 		    "BUS_POWER_POST_NOTIFICATION:"
5801 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
5802 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
5803 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
5804 		    *(int *)result));
5805 
5806 		if (*(int *)result == DDI_SUCCESS) {
5807 			if (bpc->bpc_nlevel > 0) {
5808 				MDI_CLIENT_SET_POWER_UP(ct);
5809 			} else {
5810 				MDI_CLIENT_SET_POWER_DOWN(ct);
5811 			}
5812 		}
5813 
5814 		/* release the hold we did in pre-notification */
5815 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
5816 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
5817 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5818 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5819 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5820 		}
5821 
5822 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
5823 			/* another thread might started attaching */
5824 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5825 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5826 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
5827 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
5828 			/* detaching has been taken care in pm_post_unconfig */
5829 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
5830 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
5831 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
5832 				i_mdi_pm_reset_client(ct);
5833 			}
5834 		}
5835 
5836 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
5837 		cv_broadcast(&ct->ct_powerchange_cv);
5838 
5839 		break;
5840 
5841 	/* need to do more */
5842 	case BUS_POWER_HAS_CHANGED:
5843 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
5844 		    "BUS_POWER_HAS_CHANGED:"
5845 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
5846 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
5847 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
5848 
5849 		if (bphc->bphc_nlevel > 0 &&
5850 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
5851 			if (ct->ct_power_cnt == 0) {
5852 				ret = i_mdi_power_all_phci(ct);
5853 			}
5854 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5855 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
5856 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
5857 		}
5858 
5859 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
5860 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
5861 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
5862 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5863 		}
5864 		break;
5865 	}
5866 
5867 	MDI_CLIENT_UNLOCK(ct);
5868 	return (ret);
5869 }
5870 
5871 static int
5872 i_mdi_pm_pre_config_one(dev_info_t *child)
5873 {
5874 	int		ret = MDI_SUCCESS;
5875 	mdi_client_t	*ct;
5876 
5877 	ct = i_devi_get_client(child);
5878 	if (ct == NULL)
5879 		return (MDI_FAILURE);
5880 
5881 	MDI_CLIENT_LOCK(ct);
5882 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5883 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5884 
5885 	if (!MDI_CLIENT_IS_FAILED(ct)) {
5886 		MDI_CLIENT_UNLOCK(ct);
5887 		MDI_DEBUG(4, (CE_NOTE, child,
5888 		    "i_mdi_pm_pre_config_one already configured\n"));
5889 		return (MDI_SUCCESS);
5890 	}
5891 
5892 	if (ct->ct_powercnt_held) {
5893 		MDI_CLIENT_UNLOCK(ct);
5894 		MDI_DEBUG(4, (CE_NOTE, child,
5895 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
5896 		return (MDI_SUCCESS);
5897 	}
5898 
5899 	if (ct->ct_power_cnt == 0) {
5900 		ret = i_mdi_power_all_phci(ct);
5901 	}
5902 	MDI_DEBUG(4, (CE_NOTE, child,
5903 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
5904 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5905 	ct->ct_powercnt_held = 1;
5906 	ct->ct_powercnt_reset = 0;
5907 	MDI_CLIENT_UNLOCK(ct);
5908 	return (ret);
5909 }
5910 
5911 static int
5912 i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child)
5913 {
5914 	int			ret = MDI_SUCCESS;
5915 	dev_info_t		*cdip;
5916 	int			circ;
5917 
5918 	ASSERT(MDI_VHCI(parent));
5919 
5920 	/* ndi_devi_config_one */
5921 	if (child) {
5922 		return (i_mdi_pm_pre_config_one(child));
5923 	}
5924 
5925 	/* devi_config_common */
5926 	ndi_devi_enter(parent, &circ);
5927 	cdip = ddi_get_child(parent);
5928 	while (cdip) {
5929 		dev_info_t *next = ddi_get_next_sibling(cdip);
5930 
5931 		ret = i_mdi_pm_pre_config_one(cdip);
5932 		if (ret != MDI_SUCCESS)
5933 			break;
5934 		cdip = next;
5935 	}
5936 	ndi_devi_exit(parent, circ);
5937 	return (ret);
5938 }
5939 
5940 static int
5941 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
5942 {
5943 	int		ret = MDI_SUCCESS;
5944 	mdi_client_t	*ct;
5945 
5946 	ct = i_devi_get_client(child);
5947 	if (ct == NULL)
5948 		return (MDI_FAILURE);
5949 
5950 	MDI_CLIENT_LOCK(ct);
5951 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
5952 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
5953 
5954 	if (i_ddi_node_state(ct->ct_dip) < DS_READY) {
5955 		MDI_DEBUG(4, (CE_NOTE, child,
5956 		    "i_mdi_pm_pre_unconfig node detached already\n"));
5957 		MDI_CLIENT_UNLOCK(ct);
5958 		return (MDI_SUCCESS);
5959 	}
5960 
5961 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
5962 	    (flags & NDI_AUTODETACH)) {
5963 		MDI_DEBUG(4, (CE_NOTE, child,
5964 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
5965 		MDI_CLIENT_UNLOCK(ct);
5966 		return (MDI_FAILURE);
5967 	}
5968 
5969 	if (ct->ct_powercnt_held) {
5970 		MDI_DEBUG(4, (CE_NOTE, child,
5971 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
5972 		MDI_CLIENT_UNLOCK(ct);
5973 		*held = 1;
5974 		return (MDI_SUCCESS);
5975 	}
5976 
5977 	if (ct->ct_power_cnt == 0) {
5978 		ret = i_mdi_power_all_phci(ct);
5979 	}
5980 	MDI_DEBUG(4, (CE_NOTE, child,
5981 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
5982 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
5983 	ct->ct_powercnt_held = 1;
5984 	ct->ct_powercnt_reset = 0;
5985 	MDI_CLIENT_UNLOCK(ct);
5986 	if (ret == MDI_SUCCESS)
5987 		*held = 1;
5988 	return (ret);
5989 }
5990 
5991 static int
5992 i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held,
5993     int flags)
5994 {
5995 	int			ret = MDI_SUCCESS;
5996 	dev_info_t		*cdip;
5997 	int			circ;
5998 
5999 	ASSERT(MDI_VHCI(parent));
6000 	*held = 0;
6001 
6002 	/* ndi_devi_unconfig_one */
6003 	if (child) {
6004 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6005 	}
6006 
6007 	/* devi_unconfig_common */
6008 	ndi_devi_enter(parent, &circ);
6009 	cdip = ddi_get_child(parent);
6010 	while (cdip) {
6011 		dev_info_t *next = ddi_get_next_sibling(cdip);
6012 
6013 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6014 		cdip = next;
6015 	}
6016 	ndi_devi_exit(parent, circ);
6017 
6018 	if (*held)
6019 		ret = MDI_SUCCESS;
6020 
6021 	return (ret);
6022 }
6023 
6024 static void
6025 i_mdi_pm_post_config_one(dev_info_t *child)
6026 {
6027 	mdi_client_t	*ct;
6028 
6029 	ct = i_devi_get_client(child);
6030 	if (ct == NULL)
6031 		return;
6032 
6033 	MDI_CLIENT_LOCK(ct);
6034 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6035 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6036 
6037 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) {
6038 		MDI_DEBUG(4, (CE_NOTE, child,
6039 		    "i_mdi_pm_post_config_one NOT held\n"));
6040 		MDI_CLIENT_UNLOCK(ct);
6041 		return;
6042 	}
6043 
6044 	/* client has not been updated */
6045 	if (MDI_CLIENT_IS_FAILED(ct)) {
6046 		MDI_DEBUG(4, (CE_NOTE, child,
6047 		    "i_mdi_pm_post_config_one NOT configured\n"));
6048 		MDI_CLIENT_UNLOCK(ct);
6049 		return;
6050 	}
6051 
6052 	/* another thread might have powered it down or detached it */
6053 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6054 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6055 	    (i_ddi_node_state(ct->ct_dip) < DS_READY &&
6056 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6057 		MDI_DEBUG(4, (CE_NOTE, child,
6058 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6059 		i_mdi_pm_reset_client(ct);
6060 	} else {
6061 		mdi_pathinfo_t	*pip, *next;
6062 		int	valid_path_count = 0;
6063 
6064 		MDI_DEBUG(4, (CE_NOTE, child,
6065 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6066 		pip = ct->ct_path_head;
6067 		while (pip != NULL) {
6068 			MDI_PI_LOCK(pip);
6069 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6070 			if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6071 				== MDI_PATHINFO_STATE_ONLINE ||
6072 			    (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
6073 				== MDI_PATHINFO_STATE_STANDBY)
6074 				valid_path_count ++;
6075 			MDI_PI_UNLOCK(pip);
6076 			pip = next;
6077 		}
6078 		i_mdi_pm_rele_client(ct, valid_path_count);
6079 	}
6080 	ct->ct_powercnt_held = 0;
6081 	MDI_CLIENT_UNLOCK(ct);
6082 }
6083 
6084 static void
6085 i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child)
6086 {
6087 	int		circ;
6088 	dev_info_t	*cdip;
6089 	ASSERT(MDI_VHCI(parent));
6090 
6091 	/* ndi_devi_config_one */
6092 	if (child) {
6093 		i_mdi_pm_post_config_one(child);
6094 		return;
6095 	}
6096 
6097 	/* devi_config_common */
6098 	ndi_devi_enter(parent, &circ);
6099 	cdip = ddi_get_child(parent);
6100 	while (cdip) {
6101 		dev_info_t *next = ddi_get_next_sibling(cdip);
6102 
6103 		i_mdi_pm_post_config_one(cdip);
6104 		cdip = next;
6105 	}
6106 	ndi_devi_exit(parent, circ);
6107 }
6108 
6109 static void
6110 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6111 {
6112 	mdi_client_t	*ct;
6113 
6114 	ct = i_devi_get_client(child);
6115 	if (ct == NULL)
6116 		return;
6117 
6118 	MDI_CLIENT_LOCK(ct);
6119 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6120 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6121 
6122 	if (!ct->ct_powercnt_held) {
6123 		MDI_DEBUG(4, (CE_NOTE, child,
6124 		    "i_mdi_pm_post_unconfig NOT held\n"));
6125 		MDI_CLIENT_UNLOCK(ct);
6126 		return;
6127 	}
6128 
6129 	/* failure detaching or another thread just attached it */
6130 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6131 	    i_ddi_node_state(ct->ct_dip) == DS_READY) ||
6132 	    (i_ddi_node_state(ct->ct_dip) != DS_READY &&
6133 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6134 		MDI_DEBUG(4, (CE_NOTE, child,
6135 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6136 		i_mdi_pm_reset_client(ct);
6137 	}
6138 
6139 	MDI_DEBUG(4, (CE_NOTE, child,
6140 	    "i_mdi_pm_post_unconfig not changed\n"));
6141 	MDI_CLIENT_UNLOCK(ct);
6142 }
6143 
6144 static void
6145 i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held)
6146 {
6147 	int			circ;
6148 	dev_info_t		*cdip;
6149 
6150 	ASSERT(MDI_VHCI(parent));
6151 
6152 	if (!held) {
6153 		MDI_DEBUG(4, (CE_NOTE, parent,
6154 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6155 		return;
6156 	}
6157 
6158 	if (child) {
6159 		i_mdi_pm_post_unconfig_one(child);
6160 		return;
6161 	}
6162 
6163 	ndi_devi_enter(parent, &circ);
6164 	cdip = ddi_get_child(parent);
6165 	while (cdip) {
6166 		dev_info_t *next = ddi_get_next_sibling(cdip);
6167 
6168 		i_mdi_pm_post_unconfig_one(cdip);
6169 		cdip = next;
6170 	}
6171 	ndi_devi_exit(parent, circ);
6172 }
6173 
6174 int
6175 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6176 {
6177 	int			circ, ret = MDI_SUCCESS;
6178 	dev_info_t		*client_dip = NULL;
6179 	mdi_client_t		*ct;
6180 
6181 	/*
6182 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6183 	 * Power up pHCI for the named client device.
6184 	 * Note: Before the client is enumerated under vhci by phci,
6185 	 * client_dip can be NULL. Then proceed to power up all the
6186 	 * pHCIs.
6187 	 */
6188 	if (devnm != NULL) {
6189 		ndi_devi_enter(vdip, &circ);
6190 		client_dip = ndi_devi_findchild(vdip, devnm);
6191 		ndi_devi_exit(vdip, circ);
6192 	}
6193 
6194 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op));
6195 
6196 	switch (op) {
6197 	case MDI_PM_PRE_CONFIG:
6198 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6199 
6200 		break;
6201 	case MDI_PM_PRE_UNCONFIG:
6202 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6203 		    flags);
6204 
6205 		break;
6206 	case MDI_PM_POST_CONFIG:
6207 		i_mdi_pm_post_config(vdip, client_dip);
6208 
6209 		break;
6210 	case MDI_PM_POST_UNCONFIG:
6211 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6212 
6213 		break;
6214 	case MDI_PM_HOLD_POWER:
6215 	case MDI_PM_RELE_POWER:
6216 		ASSERT(args);
6217 
6218 		client_dip = (dev_info_t *)args;
6219 		ASSERT(MDI_CLIENT(client_dip));
6220 
6221 		ct = i_devi_get_client(client_dip);
6222 		MDI_CLIENT_LOCK(ct);
6223 
6224 		if (op == MDI_PM_HOLD_POWER) {
6225 			if (ct->ct_power_cnt == 0) {
6226 				(void) i_mdi_power_all_phci(ct);
6227 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6228 				    "mdi_power i_mdi_pm_hold_client\n"));
6229 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6230 			}
6231 		} else {
6232 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6233 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6234 				    "mdi_power i_mdi_pm_rele_client\n"));
6235 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6236 			} else {
6237 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6238 				    "mdi_power i_mdi_pm_reset_client\n"));
6239 				i_mdi_pm_reset_client(ct);
6240 			}
6241 		}
6242 
6243 		MDI_CLIENT_UNLOCK(ct);
6244 		break;
6245 	default:
6246 		break;
6247 	}
6248 
6249 	return (ret);
6250 }
6251 
6252 int
6253 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6254 {
6255 	mdi_vhci_t *vhci;
6256 
6257 	if (!MDI_VHCI(dip))
6258 		return (MDI_FAILURE);
6259 
6260 	if (mdi_class) {
6261 		vhci = DEVI(dip)->devi_mdi_xhci;
6262 		ASSERT(vhci);
6263 		*mdi_class = vhci->vh_class;
6264 	}
6265 
6266 	return (MDI_SUCCESS);
6267 }
6268 
6269 int
6270 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6271 {
6272 	mdi_phci_t *phci;
6273 
6274 	if (!MDI_PHCI(dip))
6275 		return (MDI_FAILURE);
6276 
6277 	if (mdi_class) {
6278 		phci = DEVI(dip)->devi_mdi_xhci;
6279 		ASSERT(phci);
6280 		*mdi_class = phci->ph_vhci->vh_class;
6281 	}
6282 
6283 	return (MDI_SUCCESS);
6284 }
6285 
6286 int
6287 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6288 {
6289 	mdi_client_t *client;
6290 
6291 	if (!MDI_CLIENT(dip))
6292 		return (MDI_FAILURE);
6293 
6294 	if (mdi_class) {
6295 		client = DEVI(dip)->devi_mdi_client;
6296 		ASSERT(client);
6297 		*mdi_class = client->ct_vhci->vh_class;
6298 	}
6299 
6300 	return (MDI_SUCCESS);
6301 }
6302 
6303 /*
6304  * XXX This list should include all phci drivers needed during boot time
6305  * though it currently contains "fp" only.
6306  * Hopefully, the mechanism provided here will be replaced with a better
6307  * mechanism by vhci driven enumeration project.
6308  */
6309 static char *phci_driver_list[] = { "fp" };
6310 #define	N_PHCI_DRIVERS	(sizeof (phci_driver_list) / sizeof (char *))
6311 
6312 static void
6313 i_mdi_attach_phci_drivers()
6314 {
6315 	int  i;
6316 	major_t m;
6317 
6318 	for (i = 0; i < N_PHCI_DRIVERS; i++) {
6319 		m = ddi_name_to_major(phci_driver_list[i]);
6320 		if (m != (major_t)-1) {
6321 			if (ddi_hold_installed_driver(m) != NULL)
6322 				ddi_rele_driver(m);
6323 		}
6324 	}
6325 }
6326 
6327 /* bus config the specified phci */
6328 static void
6329 i_mdi_phci_bus_config(void *arg)
6330 {
6331 	mdi_phci_config_t *phc = (mdi_phci_config_t *)arg;
6332 	mdi_vhci_config_t *vhc;
6333 	dev_info_t	*ph_dip;
6334 	int		rv;
6335 
6336 	ASSERT(phc);
6337 	vhc = phc->phc_vhc;
6338 	ASSERT(vhc->vhc_op == BUS_CONFIG_ALL ||
6339 	    vhc->vhc_op == BUS_CONFIG_DRIVER);
6340 
6341 	/*
6342 	 * Must have already held the phci parent in
6343 	 * i_mdi_bus_config_all_phcis().
6344 	 * First configure the phci itself.
6345 	 */
6346 	rv = ndi_devi_config_one(phc->phc_parent_dip, phc->phc_devnm + 1,
6347 	    &ph_dip, vhc->vhc_flags);
6348 
6349 	/* release the hold that i_mdi_bus_config_all_phcis() placed */
6350 	ndi_rele_devi(phc->phc_parent_dip);
6351 
6352 	if (rv == NDI_SUCCESS) {
6353 		/* now bus config the phci */
6354 		if (vhc->vhc_op == BUS_CONFIG_DRIVER) {
6355 			(void) ndi_devi_config_driver(ph_dip, vhc->vhc_flags,
6356 				vhc->vhc_major);
6357 		} else
6358 			(void) ndi_devi_config(ph_dip, vhc->vhc_flags);
6359 
6360 		/* release the hold that ndi_devi_config_one() placed */
6361 		ndi_rele_devi(ph_dip);
6362 	}
6363 }
6364 
6365 /*
6366  * Bus config all registered phcis associated with the vhci in parallel.
6367  * This process guarantees that the child nodes are enumerated under the vhci,
6368  * but not necessarily attached.
6369  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
6370  */
6371 static int
6372 i_mdi_bus_config_all_phcis(dev_info_t *vdip, uint_t flags,
6373     ddi_bus_config_op_t op, major_t maj, int optimize)
6374 {
6375 	mdi_vhci_t		*vh;
6376 	mdi_phci_t		*ph;
6377 	mdi_phci_config_t	*phc;
6378 	int64_t			req_time;
6379 	int			phci_count, rv;
6380 	static int		first_time = 1;
6381 
6382 	ASSERT(op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER);
6383 	ASSERT(!DEVI_BUSY_OWNED(vdip));
6384 
6385 	MDI_DEBUG(2, (CE_NOTE, vdip,
6386 	    "!MDI: %s on all phcis: major = %d, flags = 0x%x, optimize = %d\n",
6387 	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
6388 	    (int)maj, flags, optimize));
6389 
6390 	vh = i_devi_get_vhci(vdip);
6391 	ASSERT(vh);
6392 
6393 	mutex_enter(&mdi_mutex);
6394 
6395 	req_time = lbolt64;
6396 
6397 	/*
6398 	 * Reduce unnecessary BUS_CONFIG_ALLs when opening stale
6399 	 * /dev/[r]dsk links.
6400 	 */
6401 	if (optimize && (req_time < vh->vh_bus_config.vhc_cutoff_time)) {
6402 		mutex_exit(&mdi_mutex);
6403 		return (MDI_SUCCESS);
6404 	}
6405 
6406 	/*
6407 	 * To initiate bus configs on all phcis in parallel, create a taskq
6408 	 * with multiple threads. Since creation of a taskq is a heavy weight
6409 	 * operation, taskq is created once per vhci and destroyed only when
6410 	 * vhci unregisters with mdi.
6411 	 *
6412 	 * If multiple bus config requests arrive at a time, bus configs on
6413 	 * phcis are initiated on behalf of one of the requests. Other requests
6414 	 * wait until the bus configs on phcis is done.
6415 	 *
6416 	 * When a BUS_CONFIG_ALL on phcis completes, the following is done
6417 	 * to avoid more of unnecessary bus configs.
6418 	 *
6419 	 *	o all BUS_CONFIG_ALL requests currently waiting with optimize
6420 	 *	flag set are returned, i.e., no new BUS_CONFIG_ALL is initiated
6421 	 *	on phcis on behalf of these requests.
6422 	 *
6423 	 *	o all BUS_CONFIG_ALL or BUS_CONFIG_DRIVER requests currently
6424 	 *	waiting but have arrived prior to initiating BUS_CONFIG_ALL on
6425 	 *	phcis are also returned.
6426 	 *
6427 	 * In other cases a new BUS_CONFIG_ALL or BUS_CONFIG_DRIVER is
6428 	 * initiated on phcis on behalf of a new request.
6429 	 */
6430 
6431 	/* check if a bus config on phcis is in progress */
6432 	while (vh->vh_bus_config.vhc_start_time != 0) {
6433 		ddi_bus_config_op_t current_op;
6434 		int64_t start_time;
6435 
6436 		current_op = vh->vh_bus_config.vhc_op;
6437 		start_time = vh->vh_bus_config.vhc_start_time;
6438 
6439 		/* wait until the current bus configs on phcis are done */
6440 		while (vh->vh_bus_config.vhc_start_time == start_time)
6441 			cv_wait(&vh->vh_bus_config.vhc_cv, &mdi_mutex);
6442 
6443 		if (current_op == BUS_CONFIG_ALL &&
6444 		    vh->vh_bus_config.vhc_cutoff_time > 0 && (optimize ||
6445 		    req_time < start_time)) {
6446 			mutex_exit(&mdi_mutex);
6447 			return (MDI_SUCCESS);
6448 		}
6449 	}
6450 
6451 	/*
6452 	 * At this point we are single threaded until vh_bus_config.start_time
6453 	 * is reset to 0 at the end of this function.
6454 	 */
6455 
6456 	vh->vh_bus_config.vhc_op = op;
6457 	vh->vh_bus_config.vhc_major = maj;
6458 	vh->vh_bus_config.vhc_flags = flags;
6459 	vh->vh_bus_config.vhc_start_time = lbolt64;
6460 
6461 	if (first_time && strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
6462 		mutex_exit(&mdi_mutex);
6463 		i_mdi_attach_phci_drivers();
6464 		mutex_enter(&mdi_mutex);
6465 		first_time = 0;
6466 	}
6467 
6468 	ASSERT(vh->vh_phci_count >= 0);
6469 	if (vh->vh_phci_count == 0) {
6470 		rv = MDI_SUCCESS;
6471 		goto out1;
6472 	}
6473 
6474 	/*
6475 	 * Create a taskq to initiate bus configs in parallel on phcis.
6476 	 * Taskq allocation can be done in mdi_vhci_register() routine
6477 	 * instead of here. For most systems, doing it here on demand saves
6478 	 * resources as this code path is never called most of the times.
6479 	 */
6480 	if (vh->vh_bus_config.vhc_taskq == NULL) {
6481 		/*
6482 		 * it is ok even if vh->vh_phci_count changes after we release
6483 		 * the mdi_mutex as phci_count is used just as an
6484 		 * advisory number to taskq_create.
6485 		 */
6486 		phci_count = vh->vh_phci_count;
6487 		mutex_exit(&mdi_mutex);
6488 
6489 		/*
6490 		 * As we are single threaded, it is ok to access the
6491 		 * vh_bus_config.taskq member of vh outside of mdi_mutex
6492 		 */
6493 		if ((vh->vh_bus_config.vhc_taskq = taskq_create(
6494 		    "mdi_bus_config_taskq", mdi_max_bus_config_threads,
6495 		    MDI_TASKQ_PRI, phci_count, INT_MAX,
6496 		    TASKQ_PREPOPULATE | TASKQ_DYNAMIC)) == NULL) {
6497 			rv = MDI_FAILURE;
6498 			goto out;
6499 		}
6500 
6501 		mutex_enter(&mdi_mutex);
6502 	}
6503 
6504 	/* allocate at least vh->vh_phci_count phci bus config structures */
6505 	while (vh->vh_bus_config.vhc_phc_cnt < vh->vh_phci_count) {
6506 		int count;
6507 
6508 		count = vh->vh_phci_count - vh->vh_bus_config.vhc_phc_cnt;
6509 		mutex_exit(&mdi_mutex);
6510 		while (count--) {
6511 			phc = kmem_alloc(sizeof (*phc), KM_SLEEP);
6512 			phc->phc_vhc = &vh->vh_bus_config;
6513 			/*
6514 			 * there is no need to hold a lock here as we
6515 			 * are single threaded and no one else manipulates
6516 			 * the list while we are here.
6517 			 */
6518 			phc->phc_next = vh->vh_bus_config.vhc_phc;
6519 			vh->vh_bus_config.vhc_phc = phc;
6520 			vh->vh_bus_config.vhc_phc_cnt++;
6521 		}
6522 		mutex_enter(&mdi_mutex);
6523 		/*
6524 		 * as new phcis could register with mdi after we dropped
6525 		 * the mdi_mutex, we need to recheck the vh->vh_phci_count.
6526 		 * Hence the while loop.
6527 		 */
6528 	}
6529 
6530 	for (ph = vh->vh_phci_head, phc = vh->vh_bus_config.vhc_phc;
6531 	    ph != NULL; ph = ph->ph_next, phc = phc->phc_next) {
6532 
6533 		ASSERT(phc != NULL);
6534 
6535 		/* build a phci config handle to be passed to a taskq thread */
6536 		MDI_PHCI_LOCK(ph);
6537 		ASSERT(ph->ph_dip);
6538 
6539 		/*
6540 		 * We need to hold the phci dip before bus configuring the phci.
6541 		 * But placing a hold on the phci dip is not safe here due to
6542 		 * the race with phci detach. To get around this race,
6543 		 * we place a hold on the phci dip's parent and note down
6544 		 * the phci's name@addr. Later, in i_mdi_phci_bus_config(),
6545 		 * we'll first configure the phci itself before bus
6546 		 * configuring the phci.
6547 		 */
6548 		phc->phc_parent_dip = ddi_get_parent(ph->ph_dip);
6549 		ndi_hold_devi(phc->phc_parent_dip);
6550 		(void) ddi_deviname(ph->ph_dip, phc->phc_devnm);
6551 		MDI_PHCI_UNLOCK(ph);
6552 	}
6553 
6554 	phci_count = vh->vh_phci_count;
6555 	if (vh->vh_bus_config.vhc_cutoff_time == -1)
6556 		vh->vh_bus_config.vhc_cutoff_time = 0;
6557 	mutex_exit(&mdi_mutex);
6558 
6559 	MDI_DEBUG(2, (CE_NOTE, vdip,
6560 	    "!MDI: initiating %s on all phcis, major = %d, flags = 0x%x\n",
6561 	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
6562 	    (int)maj, flags));
6563 
6564 	/*
6565 	 * again, no need to hold a lock here as we are single threaded and
6566 	 * no one else manipulates the list while we are here.
6567 	 */
6568 	for (phc = vh->vh_bus_config.vhc_phc; phci_count--;
6569 	    phc = phc->phc_next) {
6570 		(void) taskq_dispatch(vh->vh_bus_config.vhc_taskq,
6571 		    i_mdi_phci_bus_config, phc, TQ_SLEEP);
6572 	}
6573 
6574 	/* wait until all phci bus configs are done */
6575 	taskq_wait(vh->vh_bus_config.vhc_taskq);
6576 	rv = MDI_SUCCESS;
6577 
6578 out:
6579 	mutex_enter(&mdi_mutex);
6580 out1:
6581 	vh->vh_bus_config.vhc_start_time = 0;
6582 	if (op == BUS_CONFIG_ALL && vh->vh_bus_config.vhc_cutoff_time != -1) {
6583 		vh->vh_bus_config.vhc_cutoff_time = lbolt64 +
6584 		    (int64_t)drv_usectohz(mdi_bus_config_timeout * 1000000);
6585 	}
6586 	cv_broadcast(&vh->vh_bus_config.vhc_cv);
6587 	mutex_exit(&mdi_mutex);
6588 
6589 	MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: %s on all phcis %s\n",
6590 	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
6591 	    (rv == MDI_SUCCESS) ? "successful" : "failed"));
6592 
6593 	return (rv);
6594 }
6595 
6596 /*
6597  * A simple bus config implementation for vhcis with the assumption that all
6598  * phcis are always registered with MDI.
6599  *
6600  * BUS_CONFIG_ALL
6601  *
6602  * 	Do BUS_CONFIG_ALL on all phcis associated with the vhci.
6603  *
6604  * BUS_CONFIG_DRIVER
6605  *
6606  * 	Do BUS_CONFIG_DRIVER on all phcis associated with the vhci.
6607  *
6608  * BUS_CONFIG_ONE
6609  *
6610  *	If the requested child has already been enumerated under the vhci
6611  *	configure the child and return. Otherwise do BUS_CONFIG_ALL on all
6612  *	phcis associated with the vhci.
6613  */
6614 int
6615 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
6616     void *arg, dev_info_t **child)
6617 {
6618 	int rv = MDI_SUCCESS;
6619 
6620 	/*
6621 	 * While bus configuring phcis, the phci driver interactions with MDI
6622 	 * cause child nodes to be enumerated under the vhci node for which
6623 	 * they need to ndi_devi_enter the vhci node.
6624 	 *
6625 	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
6626 	 * for the bus config operations on phcis to finish while holding the
6627 	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
6628 	 * phcis and call the default framework provided bus config function
6629 	 * if we are called with ndi_devi_enter lock held.
6630 	 */
6631 	if (DEVI_BUSY_OWNED(vdip)) {
6632 		MDI_DEBUG(2, (CE_NOTE, vdip,
6633 		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
6634 		goto default_bus_config;
6635 	}
6636 
6637 	switch (op) {
6638 	case BUS_CONFIG_ONE:
6639 		/*
6640 		 * First try to directly configure the requested child.
6641 		 * This will work only if the requested child has already
6642 		 * been enumerated under vhci, which is usually the most common
6643 		 * case.
6644 		 */
6645 		if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
6646 		    NDI_SUCCESS) {
6647 			return (MDI_SUCCESS);
6648 		}
6649 
6650 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: BUS_CONFIG_ONE on %s: "
6651 		    "will do BUS_CONFIG_ALL on all phcis\n", (char *)arg));
6652 
6653 		/* now do BUS_CONFIG_ALL on all phcis */
6654 		rv = i_mdi_bus_config_all_phcis(vdip, flags,
6655 		    BUS_CONFIG_ALL, -1, 1);
6656 		break;
6657 
6658 	case BUS_CONFIG_DRIVER:
6659 		rv = i_mdi_bus_config_all_phcis(vdip, flags, op,
6660 		    (major_t)(uintptr_t)arg, 0);
6661 		break;
6662 
6663 	case BUS_CONFIG_ALL:
6664 		rv = i_mdi_bus_config_all_phcis(vdip, flags, op, -1, 0);
6665 		break;
6666 
6667 	default:
6668 		break;
6669 	}
6670 
6671 default_bus_config:
6672 	/*
6673 	 * i_mdi_bus_config_all_phcis() guarantees that child nodes are
6674 	 * enumerated under the vhci, but not necessarily attached.
6675 	 * Now configure the appropriate child nodes.
6676 	 */
6677 	if (rv == MDI_SUCCESS &&
6678 	    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
6679 	    NDI_SUCCESS) {
6680 		return (MDI_SUCCESS);
6681 	}
6682 
6683 	return (MDI_FAILURE);
6684 }
6685 
6686 
6687 void *
6688 mdi_client_get_vhci_private(dev_info_t *dip)
6689 {
6690 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6691 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6692 		mdi_client_t	*ct;
6693 		ct = i_devi_get_client(dip);
6694 		return (ct->ct_vprivate);
6695 	}
6696 	return (NULL);
6697 }
6698 
6699 void
6700 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6701 {
6702 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6703 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6704 		mdi_client_t	*ct;
6705 		ct = i_devi_get_client(dip);
6706 		ct->ct_vprivate = data;
6707 	}
6708 }
6709 /*
6710  * mdi_pi_get_vhci_private():
6711  *		Get the vhci private information associated with the
6712  *		mdi_pathinfo node
6713  */
6714 void *
6715 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6716 {
6717 	caddr_t	vprivate = NULL;
6718 	if (pip) {
6719 		vprivate = MDI_PI(pip)->pi_vprivate;
6720 	}
6721 	return (vprivate);
6722 }
6723 
6724 /*
6725  * mdi_pi_set_vhci_private():
6726  *		Set the vhci private information in the mdi_pathinfo node
6727  */
6728 void
6729 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6730 {
6731 	if (pip) {
6732 		MDI_PI(pip)->pi_vprivate = priv;
6733 	}
6734 }
6735 
6736 /*
6737  * mdi_phci_get_vhci_private():
6738  *		Get the vhci private information associated with the
6739  *		mdi_phci node
6740  */
6741 void *
6742 mdi_phci_get_vhci_private(dev_info_t *dip)
6743 {
6744 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6745 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6746 		mdi_phci_t	*ph;
6747 		ph = i_devi_get_phci(dip);
6748 		return (ph->ph_vprivate);
6749 	}
6750 	return (NULL);
6751 }
6752 
6753 /*
6754  * mdi_phci_set_vhci_private():
6755  *		Set the vhci private information in the mdi_phci node
6756  */
6757 void
6758 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6759 {
6760 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6761 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6762 		mdi_phci_t	*ph;
6763 		ph = i_devi_get_phci(dip);
6764 		ph->ph_vprivate = priv;
6765 	}
6766 }
6767