xref: /titanic_50/usr/src/uts/common/sys/mdi_impldefs.h (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_MDI_IMPLDEFS_H
28 #define	_SYS_MDI_IMPLDEFS_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/note.h>
33 #include <sys/sunmdi.h>
34 
35 #ifdef	__cplusplus
36 extern "C" {
37 #endif
38 
39 #ifdef _KERNEL
40 
41 /*
42  * Multipath Driver Interfaces
43  *
44  * The multipathing framework is provided in two modules.  The 'mpxio' misc.
45  * module provides the core multipath framework and the 'scsi_vhci' nexus
46  * driver provides the SCSI-III command set driver functionality for
47  * managing Fibre-Channel storage devices.
48  *
49  * As in any multipathing solution there are three major problems to solve:
50  *
51  * 1) Identification and enumeration of multipath client devices.
52  * 2) Optimal path selection when routing I/O requests.
53  * 3) Observability interfaces to snapshot the multipath configuration,
54  *    and infrastructure to provide performance and error statistics.
55  *
56  * The mpxio framework consists of several major components:
57  *
58  * 1) The MDI is the Multiplexed Device Interface; this is the core glue which
59  *    holds the following components together.
60  * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing
61  *    services for a given bus technology (example: 'scsi_vhci' provides
62  *    multipathing support for SCSI-III fibre-channel devices).
63  * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport
64  *    services for a given host controller (example: 'fcp' provides transport
65  *    for fibre-channel devices).
66  * 4) Client Devices are standard Solaris target (or leaf) drivers
67  *    (example: 'ssd' is the standard disk driver for fibre-channel arrays).
68  * 5) Multipath information nodes ('pathinfo' nodes) connect client device
69  *    nodes and pHCI device nodes in the device tree.
70  *
71  * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might
72  * look like this:
73  *
74  *	+-----------+   +-----------+
75  *      | scsi_vhci |   |  pci@1f,0 |
76  *      +-----------+   +-----------+
77  *         /     \               \
78  * +----------+ +-----------+    +-------------+
79  * | ssd1     | | ssd2	    |    | qlc@0,0     |
80  * +----------+ +-----------+    +-------------+
81  *   |          |                  /        \
82  *   |          |        +-------------+   +------------+
83  *   |          |        | pHCI 1      |   |  pHCI 2    |
84  *   |          |        +-------------+   +------------+
85  *   |          |          /        |      /          |
86  *   |          |    +------+       |    +------+     |
87  *   |          |    |  ssd |       |    |  ssd |     |
88  *   |          |    | (OBP)|       |    | (OBP)|     |
89  *   |          |    +------+       |    +------+     |
90  *   |          |                   |                 |
91  *   |          |               +-------+           +--------+
92  *   |          +-------------->| path  |---------->| path   |
93  *   |                          | info  |           | info   |
94  *   |                          | node 1|           | node 3 |
95  *   |                          +-------+           +--------+
96  *   |                              |                 |
97  *   |                          +-------+           +--------+
98  *   +------------------------->| path  |---------->| path   |
99  *                              | info  |           | info   |
100  *                              | node 2|           | node 4 |
101  *                              +-------+           +--------+
102  *
103  * The multipath information nodes (mdi_pathinfo nodes) establish the
104  * relationship between the pseudo client driver instance nodes and the
105  * physical host controller interconnect (pHCI drivers) forming a matrix
106  * structure.
107  *
108  * The mpxio module implements locking at multiple granularity levels to
109  * support the needs of various consumers.  The multipath matrix can be
110  * globally locked, column locked, or row locked depending on the consumer.
111  * The intention is to balance simplicity and performance.
112  *
113  * Locking:
114  *
115  * The current implementation utilizes the following locks:
116  *
117  *   mdi_mutex: protects the vHCI list, per-vHCI structure and the
118  *   list of pHCIs and Client devices registered against them (protection
119  *   against multi-threaded add/remove).
120  *
121  *   devinfo_tree_lock (rw): protects system wide creation/removal of
122  *   mdi_pathinfo nodes into the multipath matrix.  Consumers (like the devinfo
123  *   driver) can freeze the configuration by acquiring this as a reader.
124  *
125  *   per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo node list)
126  *   and per-pHCI structure fields.  mdi_pathinfo node creation, deletion and
127  *   child mdi_pathinfo node state changes are serialized on per pHCI basis
128  *   (Protection against DR).
129  *
130  *   per-client (mutex) lock: protects the row (client-mdi_pathinfo node list)
131  *   and per-client structure fields.  The client-mdi_pathinfo node list is
132  *   typically walked to select an optimal path when routing I/O requests.
133  *
134  *   per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node structure
135  *   fields.
136  *
137  * Note that per-Client structure and per-pHCI fields are freely readable when
138  * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo
139  * node guarantees that its corresponding client and pHCI devices will not be
140  * freed.
141  */
142 
143 /*
144  * MDI Client global unique identifier property name string definition
145  */
146 extern const char			*mdi_client_guid_prop;
147 #define	MDI_CLIENT_GUID_PROP		(char *)mdi_client_guid_prop
148 
149 /*
150  * MDI Client load balancing policy definitions
151  *
152  * Load balancing policies are determined on a per-vHCI basis and are
153  * configurable via the vHCI's driver.conf file.
154  */
155 typedef enum {
156 	LOAD_BALANCE_NONE,		/* Alternate pathing		*/
157 	LOAD_BALANCE_RR,		/* Round Robin			*/
158 	LOAD_BALANCE_LBA		/* Logical Block Addressing	*/
159 } client_lb_t;
160 
161 typedef struct {
162 	int region_size;
163 }client_lb_args_t;
164 
165 /*
166  * MDI client load balancing property name/value string definitions
167  */
168 extern const char			*mdi_load_balance;
169 extern const char			*mdi_load_balance_none;
170 extern const char			*mdi_load_balance_ap;
171 extern const char			*mdi_load_balance_rr;
172 extern const char			*mdi_load_balance_lba;
173 
174 #define	LOAD_BALANCE_PROP		(char *)mdi_load_balance
175 #define	LOAD_BALANCE_PROP_NONE		(char *)mdi_load_balance_none
176 #define	LOAD_BALANCE_PROP_AP		(char *)mdi_load_balance_ap
177 #define	LOAD_BALANCE_PROP_RR		(char *)mdi_load_balance_rr
178 #define	LOAD_BALANCE_PROP_LBA		(char *)mdi_load_balance_lba
179 
180 /* default for region size */
181 #define	LOAD_BALANCE_DEFAULT_REGION_SIZE	18
182 
183 /*
184  * vHCI drivers:
185  *
186  * vHCI drivers are pseudo nexus drivers which implement multipath services
187  * for a specific command set or bus architecture ('class').  There is a
188  * single instance of the vHCI driver for each command set which supports
189  * multipath devices.
190  *
191  * Each vHCI driver registers the following callbacks from attach(9e).
192  */
193 #define	MDI_VHCI_OPS_REV_1		1
194 /*
195  * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV
196  */
197 #define	MDI_VHCI_OPS_REV	MDI_VHCI_OPS_REV_1
198 #define	MDI_VHCI_OPS_REV_NAME	"1"
199 
200 typedef struct mdi_vhci_ops {
201 	/* revision management */
202 	int	vo_revision;
203 
204 	/* mdi_pathinfo node init callback */
205 	int	(*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags);
206 
207 	/* mdi_pathinfo node uninit callback */
208 	int	(*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip,
209 		    int flags);
210 
211 	/* mdi_pathinfo node state change callback */
212 	int	(*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip,
213 		    mdi_pathinfo_state_t state, uint32_t, int flags);
214 
215 	/* Client path failover callback */
216 	int	(*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags);
217 } mdi_vhci_ops_t;
218 
219 /*
220  * phci bus config structure - one for for each phci bus config operation that
221  * we initiate on behalf of a vhci.
222  */
223 typedef struct mdi_phci_config {
224 	struct mdi_vhci_config	*phc_vhc;	/* vhci bus config */
225 	struct mdi_phci_config	*phc_next;	/* next one on this list */
226 	dev_info_t	*phc_parent_dip;	/* parent of the phci */
227 	char		phc_devnm[MAXNAMELEN];	/* /name@addr of the phci */
228 } mdi_phci_config_t;
229 
230 /* vhci bus config structure - one for vhci instance */
231 typedef struct mdi_vhci_config {
232 	volatile ddi_bus_config_op_t vhc_op;	/* bus config - op type */
233 	major_t			vhc_major;	/* bus config - major */
234 	int			vhc_flags;	/* bus config - flags */
235 	volatile int64_t	vhc_start_time;	/* bus config start time */
236 	int64_t			vhc_cutoff_time; /* end time + some timeout */
237 	taskq_t			*vhc_taskq;
238 	kcondvar_t		vhc_cv;		/* mutex is mdi_mutex */
239 	mdi_phci_config_t	*vhc_phc;	/* phci bus config list */
240 	int			vhc_phc_cnt;	/* # of phcs on vhc_phc list */
241 } mdi_vhci_config_t;
242 
243 /*
244  * An mdi_vhci structure is created and bound to the devinfo node of every
245  * registered vHCI class driver; this happens when a vHCI registers itself from
246  * attach(9e).  This structure is unbound and freed when the vHCI unregisters
247  * at detach(9e) time;
248  *
249  * Each vHCI driver is associated with a vHCI class name; this is the handle
250  * used to register and unregister pHCI drivers for a given transport.
251  *
252  * Locking: This structure is guarded by the mdi_mutex; however, depending
253  * on the context, some of the fields can be freely read without holding any
254  * locks (ex. holding a child's lock also guarantees that the vHCI (parent)
255  * cannot be unexpectedly freed).
256  */
257 typedef struct mdi_vhci {
258 	struct mdi_vhci		*vh_next;	/* next link		*/
259 	struct mdi_vhci		*vh_prev;	/* prev link		*/
260 	int			vh_flags;	/* Operation flags	*/
261 	dev_info_t		*vh_dip;	/* devi handle		*/
262 	char			*vh_class;	/* Class name		*/
263 	struct mdi_vhci_ops	*vh_ops;	/* Callback vectors	*/
264 	client_lb_t		vh_lb;		/* Global cache		*/
265 	int			vh_phci_count;	/* pHCI device count	*/
266 	struct mdi_phci		*vh_phci_head;	/* pHCI list head	*/
267 	struct mdi_phci		*vh_phci_tail;	/* pHCI list tail	*/
268 	int			vh_client_count;	/* Client count	*/
269 	struct client_hash	*vh_client_table;	/* Client hash	*/
270 	mdi_vhci_config_t	vh_bus_config;
271 } mdi_vhci_t;
272 
273 /*
274  * GUID Hash definitions
275  *
276  * Since all the mpxio managed devices for a given class are enumerated under
277  * the single vHCI instance for that class, sequentially walking through the
278  * client device link to find a client would be prohibitively slow.
279  */
280 
281 #define	CLIENT_HASH_TABLE_SIZE	(32)	/* GUID hash */
282 
283 /*
284  * Client hash table structure
285  */
286 struct client_hash {
287 	struct mdi_client	*ct_hash_head;	/* Client hash head	*/
288 	int			ct_hash_count;	/* Client hash count	*/
289 };
290 
291 
292 /*
293  * pHCI Drivers:
294  *
295  * Physical HBA drivers provide transport services for mpxio-managed devices.
296  * As each pHCI instance is attached, it must register itself with the mpxio
297  * framework using mdi_phci_register().  When the pHCI is detached it must
298  * similarly call mdi_phci_unregister().
299  *
300  * The framework maintains a list of registered pHCI device instances for each
301  * vHCI.  This list is vHCI->vh_phci_count, vHCI->vh_phci_head,
302  * vHCI->vh_phci_tail and pHCI->ph_next.  This list is protected by the global
303  * mdi_mutex.
304  *
305  * Locking order:
306  *
307  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
308  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock))
309  */
310 typedef struct mdi_phci {
311 	kmutex_t		ph_mutex;	/* per-pHCI mutex	*/
312 	struct mdi_phci		*ph_next;	/* next link		*/
313 	struct mdi_phci		*ph_prev;	/* prev link		*/
314 	dev_info_t		*ph_dip;	/* devi handle		*/
315 	struct mdi_vhci 	*ph_vhci;	/* back ref. to vHCI	*/
316 	int			ph_flags;	/* pHCI operation flags	*/
317 	int			ph_path_count;	/* child pi count	*/
318 	mdi_pathinfo_t		*ph_path_head;	/* pi list head		*/
319 	mdi_pathinfo_t		*ph_path_tail;	/* pi list tail		*/
320 	int			ph_unstable;	/* Paths in transient state */
321 	kcondvar_t		ph_unstable_cv;	/* Paths in transient state */
322 	kcondvar_t		ph_powerchange_cv;
323 						/* Paths in transient state */
324 	void			*ph_vprivate;	/* vHCI driver private	*/
325 } mdi_phci_t;
326 
327 /*
328  * A pHCI device is 'unstable' while one or more paths are in a transitional
329  * state.  Hotplugging is prevented during this state.
330  */
331 #define	MDI_PHCI_UNSTABLE(ph)		(ph)->ph_unstable++;
332 #define	MDI_PHCI_STABLE(ph) { \
333 	(ph)->ph_unstable--; \
334 	if ((ph)->ph_unstable == 0) { \
335 		cv_broadcast(&(ph)->ph_unstable_cv); \
336 	} \
337 }
338 
339 /*
340  * per-pHCI lock macros
341  */
342 #define	MDI_PHCI_LOCK(ph)		mutex_enter(&((ph))->ph_mutex)
343 #define	MDI_PHCI_TRYLOCK(ph)		mutex_tryenter(&((ph))->ph_mutex)
344 #define	MDI_PHCI_UNLOCK(ph)		mutex_exit(&((ph))->ph_mutex)
345 
346 /*
347  * pHCI state definitions and macros to track the pHCI driver instance state
348  */
349 #define	MDI_PHCI_FLAGS_OFFLINE		0x1	/* pHCI is offline */
350 #define	MDI_PHCI_FLAGS_SUSPEND		0x2	/* pHCI is suspended */
351 #define	MDI_PHCI_FLAGS_POWER_DOWN	0x4	/* pHCI is power down */
352 #define	MDI_PHCI_FLAGS_DETACH		0x8	/* pHCI is detached */
353 #define	MDI_PHCI_FLAGS_USER_DISABLE	0x10	/* pHCI is disabled,user */
354 #define	MDI_PHCI_FLAGS_D_DISABLE	0x20	/* pHCI is disabled,driver */
355 #define	MDI_PHCI_FLAGS_D_DISABLE_TRANS	0x40	/* pHCI is disabled,transient */
356 #define	MDI_PHCI_FLAGS_POWER_TRANSITION	0x80	/* pHCI is power transition */
357 
358 #define	MDI_PHCI_DISABLE_MASK	(~(MDI_PHCI_FLAGS_USER_DISABLE | \
359 				MDI_PHCI_FLAGS_D_DISABLE | \
360 				MDI_PHCI_FLAGS_D_DISABLE_TRANS))
361 #define	MDI_PHCI_IS_READY(ph) \
362 	(((ph)->ph_flags &  (MDI_PHCI_DISABLE_MASK)) == 0)
363 
364 #define	MDI_PHCI_SET_OFFLINE(ph) \
365 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE)
366 
367 #define	MDI_PHCI_SET_ONLINE(ph) \
368 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE)
369 
370 #define	MDI_PHCI_SET_SUSPEND(ph) \
371 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND)
372 
373 #define	MDI_PHCI_SET_RESUME(ph) \
374 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND)
375 
376 #define	MDI_PHCI_IS_OFFLINE(ph) \
377 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE)
378 
379 #define	MDI_PHCI_IS_SUSPENDED(ph) \
380 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
381 
382 #define	MDI_PHCI_SET_DETACH(ph) \
383 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH)
384 
385 #define	MDI_PHCI_SET_ATTACH(ph) \
386 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH)
387 
388 #define	MDI_PHCI_SET_POWER_DOWN(ph) \
389 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN)
390 
391 #define	MDI_PHCI_SET_POWER_UP(ph) \
392 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN)
393 
394 #define	MDI_PHCI_SET_USER_ENABLE(ph) \
395 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE)
396 
397 #define	MDI_PHCI_SET_USER_DISABLE(ph) \
398 		((ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE)
399 
400 #define	MDI_PHCI_SET_DRV_ENABLE(ph)	\
401 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE)
402 
403 #define	MDI_PHCI_SET_DRV_DISABLE(ph)	\
404 		((ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE)
405 
406 #define	MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph)	\
407 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS)
408 
409 #define	MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph)	\
410 		((ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS)
411 
412 #define	MDI_PHCI_IS_USER_DISABLED(ph) \
413 		((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE)
414 
415 #define	MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)	\
416 		((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS)
417 
418 #define	MDI_PHCI_IS_DRV_DISABLED(ph)	\
419 		((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE)
420 
421 #define	MDI_PHCI_IS_POWERED_DOWN(ph) \
422 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN)
423 
424 #define	MDI_PHCI_SET_POWER_TRANSITION(ph) \
425 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION)
426 
427 #define	MDI_PHCI_CLEAR_POWER_TRANSITION(ph) \
428 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION)
429 
430 #define	MDI_PHCI_IS_POWER_TRANSITION(ph) \
431 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION)
432 
433 /*
434  * mpxio Managed Clients:
435  *
436  * This framework creates a struct mdi_client for every client device created
437  * by the framework as a result of self-enumeration of target devices by the
438  * registered pHCI devices.  This structure is bound to client device dev_info
439  * node at the time of client device allocation (ndi_devi_alloc(9e)). This
440  * structure is unbound from the dev_info node when mpxio framework removes a
441  * client device node from the system.
442  *
443  * This structure is created when a first path is enumerated and removed when
444  * last path is de-enumerated from the system.
445  *
446  * Multipath client devices are instantiated as children of corresponding vHCI
447  * driver instance. Each client device is uniquely identified by a GUID
448  * provided by target device itself.  The parent vHCI device also maintains a
449  * hashed list of client devices, protected by the global mdi_mutex.
450  *
451  * Typically pHCI devices self-enumerate their child devices using taskq,
452  * resulting in multiple paths to the same client device to be enumerated by
453  * competing threads.  mdi_mutex is also used to serialize the client device
454  * creation.
455  *
456  * Currently this framework supports two kinds of load-balancing policy
457  * configurable through the vHCI driver configuration files.
458  *
459  * NONE		- Legacy AP mode
460  * Round Robin	- Balance the pHCI load in a Round Robin fashion.
461  *
462  * This framework identifies the client device in three distinct states:
463  *
464  * OPTIMAL	- Client device has atleast one redundant path.
465  * DEGRADED	- No redundant paths (critical).  Failure in the current active
466  *                path would result in data access failures.
467  * FAILED 	- No paths are available to access this device.
468  *
469  * Locking order:
470  *
471  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
472  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock))
473  */
474 typedef struct mdi_client {
475 	kmutex_t		ct_mutex;	/* per-client mutex	*/
476 	struct mdi_client	*ct_hnext;	/* next client		*/
477 	struct mdi_client	*ct_hprev;	/* prev client		*/
478 	dev_info_t		*ct_dip;	/* client devi handle	*/
479 	struct mdi_vhci		*ct_vhci;	/* vHCI back ref	*/
480 	char			*ct_drvname;	/* client driver name	*/
481 	char			*ct_guid;	/* client guid		*/
482 	void			*ct_cprivate;	/* client driver private */
483 	client_lb_t		ct_lb;		/* load balancing scheme */
484 	client_lb_args_t	*ct_lb_args; 	/* load balancing args */
485 	int			ct_flags;	/* Driver op. flags	*/
486 	int			ct_state;	/* state information	*/
487 	int			ct_failover_flags;	/* Failover args */
488 	int			ct_failover_status;	/* last fo status */
489 	kcondvar_t		ct_failover_cv;	/* Failover status cv	*/
490 	int			ct_path_count;	/* multi path count	*/
491 	mdi_pathinfo_t		*ct_path_head;	/* multi path list head	*/
492 	mdi_pathinfo_t		*ct_path_tail;	/* multi path list tail	*/
493 	mdi_pathinfo_t		*ct_path_last;	/* last path used for i/o */
494 	int			ct_unstable;	/* Paths in transient state */
495 	kcondvar_t		ct_unstable_cv;	/* Paths in transient state */
496 	int			ct_power_cnt;	/* Hold count on parent power */
497 	kcondvar_t		ct_powerchange_cv;
498 					/* Paths in power transient state */
499 	int			ct_powercnt_held;
500 					/* ct_power_cnt held in pre_unconfig */
501 	int			ct_powercnt_reset;
502 					/* ct_power_cnt was resetted */
503 	void			*ct_vprivate;	/* vHCI driver private	*/
504 } mdi_client_t;
505 
506 /*
507  * per-Client device locking definitions
508  */
509 #define	MDI_CLIENT_LOCK(ct)		mutex_enter(&((ct))->ct_mutex)
510 #define	MDI_CLIENT_TRYLOCK(ct)		mutex_tryenter(&((ct))->ct_mutex)
511 #define	MDI_CLIENT_UNLOCK(ct)		mutex_exit(&((ct))->ct_mutex)
512 
513 /*
514  * A Client device is in unstable while one or more paths are in transitional
515  * state.  We do not allow failover to take place while paths are in transient
516  * state. Similarly we do not allow state transition while client device
517  * failover is in progress.
518  */
519 #define	MDI_CLIENT_UNSTABLE(ct)		(ct)->ct_unstable++;
520 #define	MDI_CLIENT_STABLE(ct) { \
521 	(ct)->ct_unstable--; \
522 	if ((ct)->ct_unstable == 0) { \
523 		cv_broadcast(&(ct)->ct_unstable_cv); \
524 	} \
525 }
526 
527 /*
528  * Client driver instance state definitions:
529  */
530 #define	MDI_CLIENT_FLAGS_OFFLINE		0x00000001
531 #define	MDI_CLIENT_FLAGS_SUSPEND		0x00000002
532 #define	MDI_CLIENT_FLAGS_POWER_DOWN		0x00000004
533 #define	MDI_CLIENT_FLAGS_DETACH			0x00000008
534 #define	MDI_CLIENT_FLAGS_FAILOVER		0x00000010
535 #define	MDI_CLIENT_FLAGS_REPORT_DEV		0x00000020
536 #define	MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS	0x00000040
537 #define	MDI_CLIENT_FLAGS_ASYNC_FREE		0x00000080
538 #define	MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED	0x00000100
539 #define	MDI_CLIENT_FLAGS_POWER_TRANSITION	0x00000200
540 
541 #define	MDI_CLIENT_SET_OFFLINE(ct) \
542 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE)
543 
544 #define	MDI_CLIENT_SET_ONLINE(ct) \
545 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE)
546 
547 #define	MDI_CLIENT_IS_OFFLINE(ct) \
548 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE)
549 
550 #define	MDI_CLIENT_SET_SUSPEND(ct) \
551 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND)
552 
553 #define	MDI_CLIENT_SET_RESUME(ct) \
554 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND)
555 
556 #define	MDI_CLIENT_IS_SUSPENDED(ct) \
557 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND)
558 
559 #define	MDI_CLIENT_SET_POWER_DOWN(ct) \
560 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN)
561 
562 #define	MDI_CLIENT_SET_POWER_UP(ct) \
563 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN)
564 
565 #define	MDI_CLIENT_IS_POWERED_DOWN(ct) \
566 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN)
567 
568 #define	MDI_CLIENT_SET_POWER_TRANSITION(ct) \
569 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION)
570 
571 #define	MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) \
572 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION)
573 
574 #define	MDI_CLIENT_IS_POWER_TRANSITION(ct) \
575 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION)
576 
577 #define	MDI_CLIENT_SET_DETACH(ct) \
578 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH)
579 
580 #define	MDI_CLIENT_SET_ATTACH(ct) \
581 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH)
582 
583 #define	MDI_CLIENT_IS_DETACHED(ct) \
584 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH)
585 
586 #define	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) \
587 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER)
588 
589 #define	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) \
590 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER)
591 
592 #define	MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \
593 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER)
594 
595 #define	MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) \
596 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV)
597 
598 #define	MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) \
599 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV)
600 
601 #define	MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \
602 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV)
603 
604 #define	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) \
605 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
606 
607 #define	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) \
608 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
609 
610 #define	MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \
611 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
612 
613 #define	MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) \
614 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
615 
616 #define	MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \
617 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
618 
619 /*
620  * Client operating states.
621  */
622 #define	MDI_CLIENT_STATE_OPTIMAL	1
623 #define	MDI_CLIENT_STATE_DEGRADED	2
624 #define	MDI_CLIENT_STATE_FAILED		3
625 
626 #define	MDI_CLIENT_STATE(ct) ((ct)->ct_state)
627 #define	MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state)
628 
629 #define	MDI_CLIENT_IS_FAILED(ct) \
630 	    ((ct)->ct_state == MDI_CLIENT_STATE_FAILED)
631 
632 /*
633  * mdi_pathinfo nodes:
634  *
635  * From this framework's perspective, a 'path' is a tuple consisting of a
636  * client or end device, a host controller which provides device
637  * identification and transport services (pHCI), and bus specific unit
638  * addressing information.  A path may be decorated with properties which
639  * describe the capabilities of the path; such properties are analogous to
640  * device node and minor node properties.
641  *
642  * The framework maintains link list of mdi_pathinfo nodes created by every
643  * pHCI driver instance via the pi_phci_link linkage; this is used (for example)
644  * to make sure that all relevant pathinfo nodes are freed before the pHCI
645  * is unregistered.
646  *
647  * Locking order:
648  *
649  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
650  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
651  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
652  * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex))
653  *
654  * mdi_pathinfo node structure definition
655  */
656 struct mdi_pathinfo {
657 	kmutex_t		pi_mutex;	/* per path mutex	*/
658 	mdi_pathinfo_state_t	pi_state;	/* path state		*/
659 	mdi_pathinfo_state_t	pi_old_state;	/* path state		*/
660 	kcondvar_t		pi_state_cv;	/* path state condvar	*/
661 	mdi_client_t		*pi_client;	/* client		*/
662 	mdi_phci_t		*pi_phci;	/* pHCI dev_info node	*/
663 	char			*pi_addr;	/* path unit address	*/
664 	nvlist_t		*pi_prop;	/* Properties		*/
665 	void			*pi_cprivate;	/* client private info	*/
666 	void			*pi_pprivate;	/* phci private info	*/
667 	struct mdi_pathinfo	*pi_client_link; /* next path in client list */
668 	struct mdi_pathinfo	*pi_phci_link;	 /* next path in phci list */
669 	int			pi_ref_cnt;	/* pi reference count	*/
670 	kcondvar_t		pi_ref_cv;	/* condition variable	*/
671 	struct mdi_pi_kstats	*pi_kstats;	/* aggregate kstats */
672 	int			pi_pm_held;	/* phci's kidsup incremented */
673 	int			pi_preferred;	/* Preferred path 	*/
674 	void			*pi_vprivate;	/* vhci private info	*/
675 };
676 
677 /*
678  * pathinfo statistics:
679  *
680  * The mpxio architecture allows for multiple pathinfo nodes for each
681  * client-pHCI combination.  For statistics purposes, these statistics are
682  * aggregated into a single client-pHCI set of kstats.
683  */
684 struct mdi_pi_kstats {
685 	int	pi_kstat_ref;		/* # paths aggregated, also a ref cnt */
686 	kstat_t	*pi_kstat_iostats;	/* mdi:iopath statistic set */
687 	kstat_t *pi_kstat_errstats;	/* error statistics */
688 };
689 
690 /*
691  * pathinfo error kstat
692  */
693 struct pi_errs {
694 	struct kstat_named pi_softerrs;		/* "Soft" Error */
695 	struct kstat_named pi_harderrs;		/* "Hard" Error */
696 	struct kstat_named pi_transerrs;	/* Transport Errors */
697 	struct kstat_named pi_icnt_busy;	/* Interconnect Busy */
698 	struct kstat_named pi_icnt_errors;	/* Interconnect Errors */
699 	struct kstat_named pi_phci_rsrc;	/* pHCI No Resources */
700 	struct kstat_named pi_phci_localerr;	/* pHCI Local Errors */
701 	struct kstat_named pi_phci_invstate;	/* pHCI Invalid State */
702 	struct kstat_named pi_failedfrom;	/* Failover: Failed From */
703 	struct kstat_named pi_failedto;		/* Failover: Failed To */
704 };
705 
706 /*
707  * increment an error counter
708  */
709 #define	MDI_PI_ERRSTAT(pip, x) { \
710 	if (MDI_PI((pip))->pi_kstats != NULL) { \
711 		struct pi_errs *pep; \
712 		pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \
713 		pep->x.value.ui32++; \
714 	} \
715 }
716 
717 /*
718  * error codes which can be passed to MDI_PI_ERRSTAT
719  */
720 #define	MDI_PI_SOFTERR	pi_softerrs
721 #define	MDI_PI_HARDERR	pi_harderrs
722 #define	MDI_PI_TRANSERR	pi_transerrs
723 #define	MDI_PI_ICNTBUSY	pi_icnt_busy
724 #define	MDI_PI_ICNTERR	pi_icnt_errors
725 #define	MDI_PI_PHCIRSRC	pi_phci_rsrc
726 #define	MDI_PI_PHCILOCL	pi_phci_localerr
727 #define	MDI_PI_PHCIINVS	pi_phci_invstate
728 #define	MDI_PI_FAILFROM	pi_failedfrom
729 #define	MDI_PI_FAILTO	pi_failedto
730 
731 #define	MDI_PI(type)			((struct mdi_pathinfo *)(type))
732 
733 #define	MDI_PI_LOCK(pip)		mutex_enter(&MDI_PI((pip))->pi_mutex)
734 #define	MDI_PI_UNLOCK(pip)		mutex_exit(&MDI_PI((pip))->pi_mutex)
735 #define	MDI_PI_HOLD(pip)		(++MDI_PI((pip))->pi_ref_cnt)
736 #define	MDI_PI_RELE(pip)		(--MDI_PI((pip))->pi_ref_cnt)
737 
738 #define	MDI_EXT_STATE_CHANGE		0x10000000
739 
740 
741 #define	MDI_DISABLE_OP			0x1
742 #define	MDI_ENABLE_OP			0x2
743 #define	MDI_BEFORE_STATE_CHANGE		0x4
744 #define	MDI_AFTER_STATE_CHANGE		0x8
745 #define	MDI_SYNC_FLAG			0x10
746 
747 #define	MDI_PI_STATE(pip) \
748 	    (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK)
749 
750 #define	MDI_PI_OLD_STATE(pip) \
751 	    (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK)
752 
753 #define	MDI_PI_EXT_STATE(pip) \
754 		(MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK)
755 
756 #define	MDI_PI_OLD_EXT_STATE(pip) \
757 		(MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK)
758 
759 #define	MDI_PI_SET_TRANSIENT(pip) \
760 	    (MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT)
761 
762 #define	MDI_PI_CLEAR_TRANSIENT(pip) \
763 	    (MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT)
764 
765 #define	MDI_PI_IS_TRANSIENT(pip) \
766 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT)
767 
768 #define	MDI_PI_SET_USER_DISABLE(pip) \
769 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE)
770 
771 #define	MDI_PI_SET_DRV_DISABLE(pip) \
772 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE)
773 
774 #define	MDI_PI_SET_DRV_DISABLE_TRANS(pip) \
775 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
776 
777 #define	MDI_PI_SET_USER_ENABLE(pip) \
778 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE)
779 
780 #define	MDI_PI_SET_DRV_ENABLE(pip) \
781 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE)
782 
783 #define	MDI_PI_SET_DRV_ENABLE_TRANS(pip) \
784 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
785 
786 #define	MDI_PI_IS_USER_DISABLE(pip)	\
787 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE)
788 
789 #define	MDI_PI_IS_DRV_DISABLE(pip)	\
790 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE)
791 
792 #define	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)	\
793 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
794 
795 #define	MDI_PI_IS_DISABLE(pip)	\
796 	(MDI_PI_IS_USER_DISABLE(pip) || \
797 	MDI_PI_IS_DRV_DISABLE(pip) || \
798 	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip))
799 
800 #define	MDI_PI_IS_INIT(pip) \
801 	    ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
802 		MDI_PATHINFO_STATE_INIT)
803 
804 #define	MDI_PI_IS_INITING(pip) \
805 	    ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
806 		(MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT))
807 
808 #define	MDI_PI_SET_INIT(pip) \
809 	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT)
810 
811 #define	MDI_PI_SET_ONLINING(pip) { \
812 	uint32_t	ext_state; \
813 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
814 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
815 	MDI_PI(pip)->pi_state = \
816 	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \
817 	MDI_PI(pip)->pi_state |= ext_state; \
818 }
819 
820 #define	MDI_PI_IS_ONLINING(pip) \
821 	((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
822 	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT))
823 
824 #define	MDI_PI_SET_ONLINE(pip) { \
825 	uint32_t	ext_state; \
826 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
827 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \
828 	MDI_PI(pip)->pi_state |= ext_state; \
829 }
830 
831 
832 #define	MDI_PI_IS_ONLINE(pip) \
833 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
834 	MDI_PATHINFO_STATE_ONLINE)
835 
836 #define	MDI_PI_SET_OFFLINING(pip) { \
837 	uint32_t	ext_state; \
838 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
839 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
840 	MDI_PI(pip)->pi_state = \
841 	(MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \
842 	MDI_PI(pip)->pi_state |= ext_state; \
843 }
844 
845 #define	MDI_PI_IS_OFFLINING(pip) \
846 	    ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
847 	    (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT))
848 
849 #define	MDI_PI_SET_OFFLINE(pip) { \
850 	uint32_t	ext_state; \
851 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
852 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \
853 	MDI_PI(pip)->pi_state |= ext_state; \
854 }
855 
856 #define	MDI_PI_IS_OFFLINE(pip) \
857 	    ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
858 		MDI_PATHINFO_STATE_OFFLINE)
859 
860 #define	MDI_PI_SET_STANDBYING(pip) { \
861 	uint32_t	ext_state; \
862 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
863 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
864 	MDI_PI(pip)->pi_state = \
865 	(MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \
866 	MDI_PI(pip)->pi_state |= ext_state; \
867 }
868 
869 #define	MDI_PI_SET_STANDBY(pip) { \
870 	uint32_t	ext_state; \
871 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
872 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \
873 	MDI_PI(pip)->pi_state |= ext_state; \
874 }
875 
876 #define	MDI_PI_IS_STANDBY(pip) \
877 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
878 	MDI_PATHINFO_STATE_STANDBY)
879 
880 #define	MDI_PI_SET_FAULTING(pip) { \
881 	uint32_t	ext_state; \
882 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
883 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
884 	MDI_PI(pip)->pi_state = \
885 	    (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \
886 	MDI_PI(pip)->pi_state |= ext_state; \
887 }
888 
889 #define	MDI_PI_SET_FAULT(pip) { \
890 	uint32_t	ext_state; \
891 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
892 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \
893 	MDI_PI(pip)->pi_state |= ext_state; \
894 }
895 
896 #define	MDI_PI_IS_FAULT(pip) \
897 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
898 	MDI_PATHINFO_STATE_FAULT)
899 
900 #define	MDI_PI_IS_SUSPENDED(pip) \
901 	    ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
902 
903 /*
904  * vHCI driver instance registration/unregistration
905  *
906  * mdi_vhci_register() is called by a vHCI driver to register itself as the
907  * manager of devices from a particular 'class'.  This should be called from
908  * attach(9e).
909  *
910  * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI
911  * instance from the framework.
912  */
913 int		mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int);
914 int		mdi_vhci_unregister(dev_info_t *, int);
915 
916 /*
917  * Utility functions
918  */
919 int		mdi_phci_get_path_count(dev_info_t *);
920 dev_info_t	*mdi_phci_path2devinfo(dev_info_t *, caddr_t);
921 
922 
923 /*
924  * Path Selection Functions:
925  *
926  * mdi_select_path() is called by a vHCI driver to select to which path an
927  * I/O request should be routed.  The caller passes the 'buf' structure as
928  * one of the parameters.  The mpxio framework uses the buf's contents to
929  * maintain per path statistics (total I/O size / count pending).  If more
930  * than one online path is available, the framework automatically selects
931  * a suitable one.  If a failover operation is active for this client device
932  * the call fails, returning MDI_BUSY.
933  *
934  * By default this function returns a suitable path in the 'online' state,
935  * based on the current load balancing policy.  Currently we support
936  * LOAD_BALANCE_NONE (Previously selected online path will continue to be
937  * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths
938  * will be selected in a round robin fashion).  The load balancing scheme
939  * can be configured in the vHCI driver's configuration file (driver.conf).
940  *
941  * vHCI drivers may override this default behaviour by specifying appropriate
942  * flags.  If start_pip is specified (non NULL), it is used as the routine's
943  * starting point; it starts walking from there to find the next appropriate
944  * path.
945  *
946  * The following values for 'flags' are currently defined:
947  *
948  * 	MDI_SELECT_ONLINE_PATH: select an ONLINE path
949  *	MDI_SELECT_STANDBY_PATH: select a STANDBY path
950  *
951  * The selected paths are returned in a held state (ref_cnt) and caller should
952  * release the hold by calling mdi_rele_path() at the end of operation.
953  */
954 int		mdi_select_path(dev_info_t *, struct buf *, int,
955 		    mdi_pathinfo_t *, mdi_pathinfo_t **);
956 void		mdi_hold_path(mdi_pathinfo_t *);
957 void		mdi_rele_path(mdi_pathinfo_t *);
958 int		mdi_set_lb_policy(dev_info_t *, client_lb_t);
959 int		mdi_set_lb_region_size(dev_info_t *, int);
960 client_lb_t	mdi_get_lb_policy(dev_info_t *);
961 
962 /*
963  * flags for mdi_select_path() routine
964  */
965 #define	MDI_SELECT_ONLINE_PATH		0x0001
966 #define	MDI_SELECT_STANDBY_PATH		0x0002
967 
968 /*
969  * MDI client device utility functions
970  */
971 int		mdi_client_get_path_count(dev_info_t *);
972 dev_info_t	*mdi_client_path2devinfo(dev_info_t *, caddr_t);
973 
974 /*
975  * Failover:
976  *
977  * The vHCI driver calls mdi_failover() to initiate a failover operation.
978  * mdi_failover() calls back into the vHCI driver's vo_failover()
979  * entry point to perform the actual failover operation.  The reason
980  * for requiring the vHCI driver to initiate failover by calling
981  * mdi_failover(), instead of directly executing vo_failover() itself,
982  * is to ensure that the mdi framework can keep track of the client
983  * state properly.  Additionally, mdi_failover() provides as a
984  * convenience the option of performing the failover operation
985  * synchronously or asynchronously
986  *
987  * Upon successful completion of the failover operation, the paths that were
988  * previously ONLINE will be in the STANDBY state, and the newly activated
989  * paths will be in the ONLINE state.
990  *
991  * The flags modifier determines whether the activation is done synchronously
992  */
993 int mdi_failover(dev_info_t *, dev_info_t *, int);
994 
995 /*
996  * Client device failover mode of operation
997  */
998 #define	MDI_FAILOVER_SYNC	1	/* Syncronous Failover		*/
999 #define	MDI_FAILOVER_ASYNC	2	/* Asyncronous Failover		*/
1000 
1001 /*
1002  * mdi_pathinfo node state change functions.
1003  */
1004 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *);
1005 
1006 /*
1007  * mdi_pathinfo node extended state change functions.
1008  */
1009 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *);
1010 int mdi_pi_get_preferred(mdi_pathinfo_t *);
1011 
1012 /*
1013  * mdi_pathinfo node member functions
1014  */
1015 void *mdi_pi_get_client_private(mdi_pathinfo_t *);
1016 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *);
1017 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t);
1018 void mdi_pi_set_preferred(mdi_pathinfo_t *, int);
1019 
1020 /* get/set vhci private data */
1021 void *mdi_client_get_vhci_private(dev_info_t *);
1022 void mdi_client_set_vhci_private(dev_info_t *, void *);
1023 void *mdi_phci_get_vhci_private(dev_info_t *);
1024 void mdi_phci_set_vhci_private(dev_info_t *, void *);
1025 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *);
1026 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *);
1027 
1028 /*
1029  * mdi_pathinfo Property utilities
1030  */
1031 int mdi_prop_size(mdi_pathinfo_t *, size_t *);
1032 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t);
1033 
1034 /* obsolete interface, to be removed */
1035 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **);
1036 int mdi_get_component_type(dev_info_t *);
1037 
1038 #endif	/* _KERNEL */
1039 
1040 #ifdef	__cplusplus
1041 }
1042 #endif
1043 
1044 #endif	/* _SYS_MDI_IMPLDEFS_H */
1045