xref: /illumos-gate/usr/src/uts/common/sys/mdi_impldefs.h (revision 7f7322febbcfe774b7270abc3b191c094bfcc517)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_MDI_IMPLDEFS_H
28 #define	_SYS_MDI_IMPLDEFS_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/note.h>
33 #include <sys/types.h>
34 #include <sys/sunmdi.h>
35 #include <sys/modhash.h>
36 #include <sys/callb.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 #ifdef _KERNEL
43 
44 /*
45  * Multipath Driver Interfaces
46  *
47  * The multipathing framework is provided in two modules.  The 'mpxio' misc.
48  * module provides the core multipath framework and the 'scsi_vhci' nexus
49  * driver provides the SCSI-III command set driver functionality for
50  * managing Fibre-Channel storage devices.
51  *
52  * As in any multipathing solution there are three major problems to solve:
53  *
54  * 1) Identification and enumeration of multipath client devices.
55  * 2) Optimal path selection when routing I/O requests.
56  * 3) Observability interfaces to snapshot the multipath configuration,
57  *    and infrastructure to provide performance and error statistics.
58  *
59  * The mpxio framework consists of several major components:
60  *
61  * 1) The MDI is the Multiplexed Device Interface; this is the core glue which
62  *    holds the following components together.
63  * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing
64  *    services for a given bus technology (example: 'scsi_vhci' provides
65  *    multipathing support for SCSI-III fibre-channel devices).
66  * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport
67  *    services for a given host controller (example: 'fcp' provides transport
68  *    for fibre-channel devices).
69  * 4) Client Devices are standard Solaris target (or leaf) drivers
70  *    (example: 'ssd' is the standard disk driver for fibre-channel arrays).
71  * 5) Multipath information nodes ('pathinfo' nodes) connect client device
72  *    nodes and pHCI device nodes in the device tree.
73  *
74  * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might
75  * look like this:
76  *
77  *	+-----------+   +-----------+
78  *      | scsi_vhci |   |  pci@1f,0 |
79  *      +-----------+   +-----------+
80  *         /     \               \
81  * +----------+ +-----------+    +-------------+
82  * | ssd1     | | ssd2	    |    | qlc@0,0     |
83  * +----------+ +-----------+    +-------------+
84  *   |          |                  /        \
85  *   |          |        +-------------+   +------------+
86  *   |          |        | pHCI 1      |   |  pHCI 2    |
87  *   |          |        +-------------+   +------------+
88  *   |          |          /        |      /          |
89  *   |          |    +------+       |    +------+     |
90  *   |          |    |  ssd |       |    |  ssd |     |
91  *   |          |    | (OBP)|       |    | (OBP)|     |
92  *   |          |    +------+       |    +------+     |
93  *   |          |                   |                 |
94  *   |          |               +-------+           +--------+
95  *   |          +-------------->| path  |---------->| path   |
96  *   |                          | info  |           | info   |
97  *   |                          | node 1|           | node 3 |
98  *   |                          +-------+           +--------+
99  *   |                              |                 |
100  *   |                          +-------+           +--------+
101  *   +------------------------->| path  |---------->| path   |
102  *                              | info  |           | info   |
103  *                              | node 2|           | node 4 |
104  *                              +-------+           +--------+
105  *
106  * The multipath information nodes (mdi_pathinfo nodes) establish the
107  * relationship between the pseudo client driver instance nodes and the
108  * physical host controller interconnect (pHCI drivers) forming a matrix
109  * structure.
110  *
111  * The mpxio module implements locking at multiple granularity levels to
112  * support the needs of various consumers.  The multipath matrix can be
113  * globally locked, column locked, or row locked depending on the consumer.
114  * The intention is to balance simplicity and performance.
115  *
116  * Locking:
117  *
118  * The current implementation utilizes the following locks:
119  *
120  *   mdi_mutex: protects the vHCI list, per-vHCI structure and the
121  *   list of pHCIs and Client devices registered against them (protection
122  *   against multi-threaded add/remove).
123  *
124  *   devinfo_tree_lock (rw): protects system wide creation/removal of
125  *   mdi_pathinfo nodes into the multipath matrix.  Consumers (like the devinfo
126  *   driver) can freeze the configuration by acquiring this as a reader.
127  *
128  *   per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo node list)
129  *   and per-pHCI structure fields.  mdi_pathinfo node creation, deletion and
130  *   child mdi_pathinfo node state changes are serialized on per pHCI basis
131  *   (Protection against DR).
132  *
133  *   per-client (mutex) lock: protects the row (client-mdi_pathinfo node list)
134  *   and per-client structure fields.  The client-mdi_pathinfo node list is
135  *   typically walked to select an optimal path when routing I/O requests.
136  *
137  *   per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node structure
138  *   fields.
139  *
140  * Note that per-Client structure and per-pHCI fields are freely readable when
141  * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo
142  * node guarantees that its corresponding client and pHCI devices will not be
143  * freed.
144  */
145 
146 /*
147  * MDI Client global unique identifier property name string definition
148  */
149 extern const char			*mdi_client_guid_prop;
150 #define	MDI_CLIENT_GUID_PROP		(char *)mdi_client_guid_prop
151 
152 /*
153  * MDI Client load balancing policy definitions
154  *
155  * Load balancing policies are determined on a per-vHCI basis and are
156  * configurable via the vHCI's driver.conf file.
157  */
158 typedef enum {
159 	LOAD_BALANCE_NONE,		/* Alternate pathing		*/
160 	LOAD_BALANCE_RR,		/* Round Robin			*/
161 	LOAD_BALANCE_LBA		/* Logical Block Addressing	*/
162 } client_lb_t;
163 
164 typedef struct {
165 	int region_size;
166 }client_lb_args_t;
167 
168 /*
169  * MDI client load balancing property name/value string definitions
170  */
171 extern const char			*mdi_load_balance;
172 extern const char			*mdi_load_balance_none;
173 extern const char			*mdi_load_balance_ap;
174 extern const char			*mdi_load_balance_rr;
175 extern const char			*mdi_load_balance_lba;
176 
177 #define	LOAD_BALANCE_PROP		(char *)mdi_load_balance
178 #define	LOAD_BALANCE_PROP_NONE		(char *)mdi_load_balance_none
179 #define	LOAD_BALANCE_PROP_AP		(char *)mdi_load_balance_ap
180 #define	LOAD_BALANCE_PROP_RR		(char *)mdi_load_balance_rr
181 #define	LOAD_BALANCE_PROP_LBA		(char *)mdi_load_balance_lba
182 
183 /* default for region size */
184 #define	LOAD_BALANCE_DEFAULT_REGION_SIZE	18
185 
186 /*
187  * vHCI drivers:
188  *
189  * vHCI drivers are pseudo nexus drivers which implement multipath services
190  * for a specific command set or bus architecture ('class').  There is a
191  * single instance of the vHCI driver for each command set which supports
192  * multipath devices.
193  *
194  * Each vHCI driver registers the following callbacks from attach(9e).
195  */
196 #define	MDI_VHCI_OPS_REV_1		1
197 /*
198  * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV
199  */
200 #define	MDI_VHCI_OPS_REV	MDI_VHCI_OPS_REV_1
201 #define	MDI_VHCI_OPS_REV_NAME	"1"
202 
203 typedef struct mdi_vhci_ops {
204 	/* revision management */
205 	int	vo_revision;
206 
207 	/* mdi_pathinfo node init callback */
208 	int	(*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags);
209 
210 	/* mdi_pathinfo node uninit callback */
211 	int	(*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip,
212 		    int flags);
213 
214 	/* mdi_pathinfo node state change callback */
215 	int	(*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip,
216 		    mdi_pathinfo_state_t state, uint32_t, int flags);
217 
218 	/* Client path failover callback */
219 	int	(*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags);
220 } mdi_vhci_ops_t;
221 
222 /*
223  * An mdi_vhci structure is created and bound to the devinfo node of every
224  * registered vHCI class driver; this happens when a vHCI registers itself from
225  * attach(9e).  This structure is unbound and freed when the vHCI unregisters
226  * at detach(9e) time;
227  *
228  * Each vHCI driver is associated with a vHCI class name; this is the handle
229  * used to register and unregister pHCI drivers for a given transport.
230  *
231  * Locking: This structure is guarded by the mdi_mutex; however, depending
232  * on the context, some of the fields can be freely read without holding any
233  * locks (ex. holding a child's lock also guarantees that the vHCI (parent)
234  * cannot be unexpectedly freed).
235  */
236 typedef struct mdi_vhci {
237 	struct mdi_vhci		*vh_next;	/* next link		*/
238 	struct mdi_vhci		*vh_prev;	/* prev link		*/
239 	int			vh_flags;	/* Operation flags	*/
240 	dev_info_t		*vh_dip;	/* devi handle		*/
241 	char			*vh_class;	/* Class name		*/
242 	struct mdi_vhci_ops	*vh_ops;	/* Callback vectors	*/
243 	client_lb_t		vh_lb;		/* Global cache		*/
244 	int			vh_phci_count;	/* pHCI device count	*/
245 	struct mdi_phci		*vh_phci_head;	/* pHCI list head	*/
246 	struct mdi_phci		*vh_phci_tail;	/* pHCI list tail	*/
247 	int			vh_client_count;	/* Client count	*/
248 	struct client_hash	*vh_client_table;	/* Client hash	*/
249 	int			vh_refcnt;	/* reference count */
250 	struct mdi_vhci_config	*vh_config;	/* vhci config */
251 } mdi_vhci_t;
252 
253 /*
254  * GUID Hash definitions
255  *
256  * Since all the mpxio managed devices for a given class are enumerated under
257  * the single vHCI instance for that class, sequentially walking through the
258  * client device link to find a client would be prohibitively slow.
259  */
260 
261 #define	CLIENT_HASH_TABLE_SIZE	(32)	/* GUID hash */
262 
263 /*
264  * Client hash table structure
265  */
266 struct client_hash {
267 	struct mdi_client	*ct_hash_head;	/* Client hash head	*/
268 	int			ct_hash_count;	/* Client hash count	*/
269 };
270 
271 
272 /*
273  * pHCI Drivers:
274  *
275  * Physical HBA drivers provide transport services for mpxio-managed devices.
276  * As each pHCI instance is attached, it must register itself with the mpxio
277  * framework using mdi_phci_register().  When the pHCI is detached it must
278  * similarly call mdi_phci_unregister().
279  *
280  * The framework maintains a list of registered pHCI device instances for each
281  * vHCI.  This list is vHCI->vh_phci_count, vHCI->vh_phci_head,
282  * vHCI->vh_phci_tail and pHCI->ph_next.  This list is protected by the global
283  * mdi_mutex.
284  *
285  * Locking order:
286  *
287  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
288  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock))
289  */
290 typedef struct mdi_phci {
291 	kmutex_t		ph_mutex;	/* per-pHCI mutex	*/
292 	struct mdi_phci		*ph_next;	/* next link		*/
293 	struct mdi_phci		*ph_prev;	/* prev link		*/
294 	dev_info_t		*ph_dip;	/* devi handle		*/
295 	struct mdi_vhci 	*ph_vhci;	/* back ref. to vHCI	*/
296 	int			ph_flags;	/* pHCI operation flags	*/
297 	int			ph_path_count;	/* child pi count	*/
298 	mdi_pathinfo_t		*ph_path_head;	/* pi list head		*/
299 	mdi_pathinfo_t		*ph_path_tail;	/* pi list tail		*/
300 	int			ph_unstable;	/* Paths in transient state */
301 	kcondvar_t		ph_unstable_cv;	/* Paths in transient state */
302 	kcondvar_t		ph_powerchange_cv;
303 						/* Paths in transient state */
304 	void			*ph_vprivate;	/* vHCI driver private	*/
305 } mdi_phci_t;
306 
307 /*
308  * A pHCI device is 'unstable' while one or more paths are in a transitional
309  * state.  Hotplugging is prevented during this state.
310  */
311 #define	MDI_PHCI_UNSTABLE(ph)		(ph)->ph_unstable++;
312 #define	MDI_PHCI_STABLE(ph) { \
313 	(ph)->ph_unstable--; \
314 	if ((ph)->ph_unstable == 0) { \
315 		cv_broadcast(&(ph)->ph_unstable_cv); \
316 	} \
317 }
318 
319 /*
320  * per-pHCI lock macros
321  */
322 #define	MDI_PHCI_LOCK(ph)		mutex_enter(&((ph))->ph_mutex)
323 #define	MDI_PHCI_TRYLOCK(ph)		mutex_tryenter(&((ph))->ph_mutex)
324 #define	MDI_PHCI_UNLOCK(ph)		mutex_exit(&((ph))->ph_mutex)
325 
326 /*
327  * pHCI state definitions and macros to track the pHCI driver instance state
328  */
329 #define	MDI_PHCI_FLAGS_OFFLINE		0x1	/* pHCI is offline */
330 #define	MDI_PHCI_FLAGS_SUSPEND		0x2	/* pHCI is suspended */
331 #define	MDI_PHCI_FLAGS_POWER_DOWN	0x4	/* pHCI is power down */
332 #define	MDI_PHCI_FLAGS_DETACH		0x8	/* pHCI is detached */
333 #define	MDI_PHCI_FLAGS_USER_DISABLE	0x10	/* pHCI is disabled,user */
334 #define	MDI_PHCI_FLAGS_D_DISABLE	0x20	/* pHCI is disabled,driver */
335 #define	MDI_PHCI_FLAGS_D_DISABLE_TRANS	0x40	/* pHCI is disabled,transient */
336 #define	MDI_PHCI_FLAGS_POWER_TRANSITION	0x80	/* pHCI is power transition */
337 
338 #define	MDI_PHCI_DISABLE_MASK	(~(MDI_PHCI_FLAGS_USER_DISABLE | \
339 				MDI_PHCI_FLAGS_D_DISABLE | \
340 				MDI_PHCI_FLAGS_D_DISABLE_TRANS))
341 #define	MDI_PHCI_IS_READY(ph) \
342 	(((ph)->ph_flags &  (MDI_PHCI_DISABLE_MASK)) == 0)
343 
344 #define	MDI_PHCI_SET_OFFLINE(ph) \
345 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE)
346 
347 #define	MDI_PHCI_SET_ONLINE(ph) \
348 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE)
349 
350 #define	MDI_PHCI_SET_SUSPEND(ph) \
351 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND)
352 
353 #define	MDI_PHCI_SET_RESUME(ph) \
354 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND)
355 
356 #define	MDI_PHCI_IS_OFFLINE(ph) \
357 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE)
358 
359 #define	MDI_PHCI_IS_SUSPENDED(ph) \
360 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
361 
362 #define	MDI_PHCI_SET_DETACH(ph) \
363 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH)
364 
365 #define	MDI_PHCI_SET_ATTACH(ph) \
366 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH)
367 
368 #define	MDI_PHCI_SET_POWER_DOWN(ph) \
369 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN)
370 
371 #define	MDI_PHCI_SET_POWER_UP(ph) \
372 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN)
373 
374 #define	MDI_PHCI_SET_USER_ENABLE(ph) \
375 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE)
376 
377 #define	MDI_PHCI_SET_USER_DISABLE(ph) \
378 		((ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE)
379 
380 #define	MDI_PHCI_SET_DRV_ENABLE(ph)	\
381 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE)
382 
383 #define	MDI_PHCI_SET_DRV_DISABLE(ph)	\
384 		((ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE)
385 
386 #define	MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph)	\
387 		((ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS)
388 
389 #define	MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph)	\
390 		((ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS)
391 
392 #define	MDI_PHCI_IS_USER_DISABLED(ph) \
393 		((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE)
394 
395 #define	MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)	\
396 		((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS)
397 
398 #define	MDI_PHCI_IS_DRV_DISABLED(ph)	\
399 		((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE)
400 
401 #define	MDI_PHCI_IS_POWERED_DOWN(ph) \
402 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN)
403 
404 #define	MDI_PHCI_SET_POWER_TRANSITION(ph) \
405 	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION)
406 
407 #define	MDI_PHCI_CLEAR_POWER_TRANSITION(ph) \
408 	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION)
409 
410 #define	MDI_PHCI_IS_POWER_TRANSITION(ph) \
411 	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION)
412 
413 /*
414  * mpxio Managed Clients:
415  *
416  * This framework creates a struct mdi_client for every client device created
417  * by the framework as a result of self-enumeration of target devices by the
418  * registered pHCI devices.  This structure is bound to client device dev_info
419  * node at the time of client device allocation (ndi_devi_alloc(9e)). This
420  * structure is unbound from the dev_info node when mpxio framework removes a
421  * client device node from the system.
422  *
423  * This structure is created when a first path is enumerated and removed when
424  * last path is de-enumerated from the system.
425  *
426  * Multipath client devices are instantiated as children of corresponding vHCI
427  * driver instance. Each client device is uniquely identified by a GUID
428  * provided by target device itself.  The parent vHCI device also maintains a
429  * hashed list of client devices, protected by the global mdi_mutex.
430  *
431  * Typically pHCI devices self-enumerate their child devices using taskq,
432  * resulting in multiple paths to the same client device to be enumerated by
433  * competing threads.  mdi_mutex is also used to serialize the client device
434  * creation.
435  *
436  * Currently this framework supports two kinds of load-balancing policy
437  * configurable through the vHCI driver configuration files.
438  *
439  * NONE		- Legacy AP mode
440  * Round Robin	- Balance the pHCI load in a Round Robin fashion.
441  *
442  * This framework identifies the client device in three distinct states:
443  *
444  * OPTIMAL	- Client device has atleast one redundant path.
445  * DEGRADED	- No redundant paths (critical).  Failure in the current active
446  *                path would result in data access failures.
447  * FAILED 	- No paths are available to access this device.
448  *
449  * Locking order:
450  *
451  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
452  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock))
453  */
454 typedef struct mdi_client {
455 	kmutex_t		ct_mutex;	/* per-client mutex	*/
456 	struct mdi_client	*ct_hnext;	/* next client		*/
457 	struct mdi_client	*ct_hprev;	/* prev client		*/
458 	dev_info_t		*ct_dip;	/* client devi handle	*/
459 	struct mdi_vhci		*ct_vhci;	/* vHCI back ref	*/
460 	char			*ct_drvname;	/* client driver name	*/
461 	char			*ct_guid;	/* client guid		*/
462 	void			*ct_cprivate;	/* client driver private */
463 	client_lb_t		ct_lb;		/* load balancing scheme */
464 	client_lb_args_t	*ct_lb_args; 	/* load balancing args */
465 	int			ct_flags;	/* Driver op. flags	*/
466 	int			ct_state;	/* state information	*/
467 	int			ct_failover_flags;	/* Failover args */
468 	int			ct_failover_status;	/* last fo status */
469 	kcondvar_t		ct_failover_cv;	/* Failover status cv	*/
470 	int			ct_path_count;	/* multi path count	*/
471 	mdi_pathinfo_t		*ct_path_head;	/* multi path list head	*/
472 	mdi_pathinfo_t		*ct_path_tail;	/* multi path list tail	*/
473 	mdi_pathinfo_t		*ct_path_last;	/* last path used for i/o */
474 	int			ct_unstable;	/* Paths in transient state */
475 	kcondvar_t		ct_unstable_cv;	/* Paths in transient state */
476 	int			ct_power_cnt;	/* Hold count on parent power */
477 	kcondvar_t		ct_powerchange_cv;
478 					/* Paths in power transient state */
479 	short			ct_powercnt_config;
480 					/* held in pre/post config */
481 	short			ct_powercnt_unconfig;
482 					/* held in pre/post unconfig */
483 	int			ct_powercnt_reset;
484 					/* ct_power_cnt was resetted */
485 	void			*ct_vprivate;	/* vHCI driver private	*/
486 } mdi_client_t;
487 
488 /*
489  * per-Client device locking definitions
490  */
491 #define	MDI_CLIENT_LOCK(ct)		mutex_enter(&((ct))->ct_mutex)
492 #define	MDI_CLIENT_TRYLOCK(ct)		mutex_tryenter(&((ct))->ct_mutex)
493 #define	MDI_CLIENT_UNLOCK(ct)		mutex_exit(&((ct))->ct_mutex)
494 
495 /*
496  * A Client device is in unstable while one or more paths are in transitional
497  * state.  We do not allow failover to take place while paths are in transient
498  * state. Similarly we do not allow state transition while client device
499  * failover is in progress.
500  */
501 #define	MDI_CLIENT_UNSTABLE(ct)		(ct)->ct_unstable++;
502 #define	MDI_CLIENT_STABLE(ct) { \
503 	(ct)->ct_unstable--; \
504 	if ((ct)->ct_unstable == 0) { \
505 		cv_broadcast(&(ct)->ct_unstable_cv); \
506 	} \
507 }
508 
509 /*
510  * Client driver instance state definitions:
511  */
512 #define	MDI_CLIENT_FLAGS_OFFLINE		0x00000001
513 #define	MDI_CLIENT_FLAGS_SUSPEND		0x00000002
514 #define	MDI_CLIENT_FLAGS_POWER_DOWN		0x00000004
515 #define	MDI_CLIENT_FLAGS_DETACH			0x00000008
516 #define	MDI_CLIENT_FLAGS_FAILOVER		0x00000010
517 #define	MDI_CLIENT_FLAGS_REPORT_DEV		0x00000020
518 #define	MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS	0x00000040
519 #define	MDI_CLIENT_FLAGS_ASYNC_FREE		0x00000080
520 #define	MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED	0x00000100
521 #define	MDI_CLIENT_FLAGS_POWER_TRANSITION	0x00000200
522 
523 #define	MDI_CLIENT_SET_OFFLINE(ct) \
524 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE)
525 
526 #define	MDI_CLIENT_SET_ONLINE(ct) \
527 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE)
528 
529 #define	MDI_CLIENT_IS_OFFLINE(ct) \
530 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE)
531 
532 #define	MDI_CLIENT_SET_SUSPEND(ct) \
533 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND)
534 
535 #define	MDI_CLIENT_SET_RESUME(ct) \
536 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND)
537 
538 #define	MDI_CLIENT_IS_SUSPENDED(ct) \
539 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND)
540 
541 #define	MDI_CLIENT_SET_POWER_DOWN(ct) \
542 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN)
543 
544 #define	MDI_CLIENT_SET_POWER_UP(ct) \
545 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN)
546 
547 #define	MDI_CLIENT_IS_POWERED_DOWN(ct) \
548 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN)
549 
550 #define	MDI_CLIENT_SET_POWER_TRANSITION(ct) \
551 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION)
552 
553 #define	MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) \
554 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION)
555 
556 #define	MDI_CLIENT_IS_POWER_TRANSITION(ct) \
557 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION)
558 
559 #define	MDI_CLIENT_SET_DETACH(ct) \
560 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH)
561 
562 #define	MDI_CLIENT_SET_ATTACH(ct) \
563 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH)
564 
565 #define	MDI_CLIENT_IS_DETACHED(ct) \
566 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH)
567 
568 #define	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) \
569 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER)
570 
571 #define	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) \
572 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER)
573 
574 #define	MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \
575 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER)
576 
577 #define	MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) \
578 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV)
579 
580 #define	MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) \
581 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV)
582 
583 #define	MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \
584 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV)
585 
586 #define	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) \
587 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
588 
589 #define	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) \
590 	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
591 
592 #define	MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \
593 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
594 
595 #define	MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) \
596 	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
597 
598 #define	MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \
599 	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
600 
601 /*
602  * Client operating states.
603  */
604 #define	MDI_CLIENT_STATE_OPTIMAL	1
605 #define	MDI_CLIENT_STATE_DEGRADED	2
606 #define	MDI_CLIENT_STATE_FAILED		3
607 
608 #define	MDI_CLIENT_STATE(ct) ((ct)->ct_state)
609 #define	MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state)
610 
611 #define	MDI_CLIENT_IS_FAILED(ct) \
612 	    ((ct)->ct_state == MDI_CLIENT_STATE_FAILED)
613 
614 /*
615  * mdi_pathinfo nodes:
616  *
617  * From this framework's perspective, a 'path' is a tuple consisting of a
618  * client or end device, a host controller which provides device
619  * identification and transport services (pHCI), and bus specific unit
620  * addressing information.  A path may be decorated with properties which
621  * describe the capabilities of the path; such properties are analogous to
622  * device node and minor node properties.
623  *
624  * The framework maintains link list of mdi_pathinfo nodes created by every
625  * pHCI driver instance via the pi_phci_link linkage; this is used (for example)
626  * to make sure that all relevant pathinfo nodes are freed before the pHCI
627  * is unregistered.
628  *
629  * Locking order:
630  *
631  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
632  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
633  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
634  * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex))
635  *
636  * mdi_pathinfo node structure definition
637  */
638 struct mdi_pathinfo {
639 	kmutex_t		pi_mutex;	/* per path mutex	*/
640 	mdi_pathinfo_state_t	pi_state;	/* path state		*/
641 	mdi_pathinfo_state_t	pi_old_state;	/* path state		*/
642 	kcondvar_t		pi_state_cv;	/* path state condvar	*/
643 	mdi_client_t		*pi_client;	/* client		*/
644 	mdi_phci_t		*pi_phci;	/* pHCI dev_info node	*/
645 	char			*pi_addr;	/* path unit address	*/
646 	nvlist_t		*pi_prop;	/* Properties		*/
647 	void			*pi_cprivate;	/* client private info	*/
648 	void			*pi_pprivate;	/* phci private info	*/
649 	struct mdi_pathinfo	*pi_client_link; /* next path in client list */
650 	struct mdi_pathinfo	*pi_phci_link;	 /* next path in phci list */
651 	int			pi_ref_cnt;	/* pi reference count	*/
652 	kcondvar_t		pi_ref_cv;	/* condition variable	*/
653 	struct mdi_pi_kstats	*pi_kstats;	/* aggregate kstats */
654 	int			pi_pm_held;	/* phci's kidsup incremented */
655 	int			pi_preferred;	/* Preferred path 	*/
656 	void			*pi_vprivate;	/* vhci private info	*/
657 };
658 
659 /*
660  * pathinfo statistics:
661  *
662  * The mpxio architecture allows for multiple pathinfo nodes for each
663  * client-pHCI combination.  For statistics purposes, these statistics are
664  * aggregated into a single client-pHCI set of kstats.
665  */
666 struct mdi_pi_kstats {
667 	int	pi_kstat_ref;		/* # paths aggregated, also a ref cnt */
668 	kstat_t	*pi_kstat_iostats;	/* mdi:iopath statistic set */
669 	kstat_t *pi_kstat_errstats;	/* error statistics */
670 };
671 
672 /*
673  * pathinfo error kstat
674  */
675 struct pi_errs {
676 	struct kstat_named pi_softerrs;		/* "Soft" Error */
677 	struct kstat_named pi_harderrs;		/* "Hard" Error */
678 	struct kstat_named pi_transerrs;	/* Transport Errors */
679 	struct kstat_named pi_icnt_busy;	/* Interconnect Busy */
680 	struct kstat_named pi_icnt_errors;	/* Interconnect Errors */
681 	struct kstat_named pi_phci_rsrc;	/* pHCI No Resources */
682 	struct kstat_named pi_phci_localerr;	/* pHCI Local Errors */
683 	struct kstat_named pi_phci_invstate;	/* pHCI Invalid State */
684 	struct kstat_named pi_failedfrom;	/* Failover: Failed From */
685 	struct kstat_named pi_failedto;		/* Failover: Failed To */
686 };
687 
688 /*
689  * increment an error counter
690  */
691 #define	MDI_PI_ERRSTAT(pip, x) { \
692 	if (MDI_PI((pip))->pi_kstats != NULL) { \
693 		struct pi_errs *pep; \
694 		pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \
695 		pep->x.value.ui32++; \
696 	} \
697 }
698 
699 /*
700  * error codes which can be passed to MDI_PI_ERRSTAT
701  */
702 #define	MDI_PI_SOFTERR	pi_softerrs
703 #define	MDI_PI_HARDERR	pi_harderrs
704 #define	MDI_PI_TRANSERR	pi_transerrs
705 #define	MDI_PI_ICNTBUSY	pi_icnt_busy
706 #define	MDI_PI_ICNTERR	pi_icnt_errors
707 #define	MDI_PI_PHCIRSRC	pi_phci_rsrc
708 #define	MDI_PI_PHCILOCL	pi_phci_localerr
709 #define	MDI_PI_PHCIINVS	pi_phci_invstate
710 #define	MDI_PI_FAILFROM	pi_failedfrom
711 #define	MDI_PI_FAILTO	pi_failedto
712 
713 #define	MDI_PI(type)			((struct mdi_pathinfo *)(type))
714 
715 #define	MDI_PI_LOCK(pip)		mutex_enter(&MDI_PI((pip))->pi_mutex)
716 #define	MDI_PI_UNLOCK(pip)		mutex_exit(&MDI_PI((pip))->pi_mutex)
717 #define	MDI_PI_HOLD(pip)		(++MDI_PI((pip))->pi_ref_cnt)
718 #define	MDI_PI_RELE(pip)		(--MDI_PI((pip))->pi_ref_cnt)
719 
720 #define	MDI_EXT_STATE_CHANGE		0x10000000
721 
722 
723 #define	MDI_DISABLE_OP			0x1
724 #define	MDI_ENABLE_OP			0x2
725 #define	MDI_BEFORE_STATE_CHANGE		0x4
726 #define	MDI_AFTER_STATE_CHANGE		0x8
727 #define	MDI_SYNC_FLAG			0x10
728 
729 #define	MDI_PI_STATE(pip) \
730 	    (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK)
731 
732 #define	MDI_PI_OLD_STATE(pip) \
733 	    (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK)
734 
735 #define	MDI_PI_EXT_STATE(pip) \
736 		(MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK)
737 
738 #define	MDI_PI_OLD_EXT_STATE(pip) \
739 		(MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK)
740 
741 #define	MDI_PI_SET_TRANSIENT(pip) \
742 	    (MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT)
743 
744 #define	MDI_PI_CLEAR_TRANSIENT(pip) \
745 	    (MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT)
746 
747 #define	MDI_PI_IS_TRANSIENT(pip) \
748 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT)
749 
750 #define	MDI_PI_SET_USER_DISABLE(pip) \
751 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE)
752 
753 #define	MDI_PI_SET_DRV_DISABLE(pip) \
754 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE)
755 
756 #define	MDI_PI_SET_DRV_DISABLE_TRANS(pip) \
757 	(MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
758 
759 #define	MDI_PI_SET_USER_ENABLE(pip) \
760 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE)
761 
762 #define	MDI_PI_SET_DRV_ENABLE(pip) \
763 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE)
764 
765 #define	MDI_PI_SET_DRV_ENABLE_TRANS(pip) \
766 	(MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
767 
768 #define	MDI_PI_IS_USER_DISABLE(pip)	\
769 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE)
770 
771 #define	MDI_PI_IS_DRV_DISABLE(pip)	\
772 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE)
773 
774 #define	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)	\
775 	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
776 
777 #define	MDI_PI_IS_DISABLE(pip)	\
778 	(MDI_PI_IS_USER_DISABLE(pip) || \
779 	MDI_PI_IS_DRV_DISABLE(pip) || \
780 	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip))
781 
782 #define	MDI_PI_IS_INIT(pip) \
783 	    ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
784 		MDI_PATHINFO_STATE_INIT)
785 
786 #define	MDI_PI_IS_INITING(pip) \
787 	    ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
788 		(MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT))
789 
790 #define	MDI_PI_SET_INIT(pip) \
791 	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT)
792 
793 #define	MDI_PI_SET_ONLINING(pip) { \
794 	uint32_t	ext_state; \
795 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
796 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
797 	MDI_PI(pip)->pi_state = \
798 	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \
799 	MDI_PI(pip)->pi_state |= ext_state; \
800 }
801 
802 #define	MDI_PI_IS_ONLINING(pip) \
803 	((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
804 	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT))
805 
806 #define	MDI_PI_SET_ONLINE(pip) { \
807 	uint32_t	ext_state; \
808 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
809 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \
810 	MDI_PI(pip)->pi_state |= ext_state; \
811 }
812 
813 
814 #define	MDI_PI_IS_ONLINE(pip) \
815 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
816 	MDI_PATHINFO_STATE_ONLINE)
817 
818 #define	MDI_PI_SET_OFFLINING(pip) { \
819 	uint32_t	ext_state; \
820 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
821 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
822 	MDI_PI(pip)->pi_state = \
823 	(MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \
824 	MDI_PI(pip)->pi_state |= ext_state; \
825 }
826 
827 #define	MDI_PI_IS_OFFLINING(pip) \
828 	    ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \
829 	    (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT))
830 
831 #define	MDI_PI_SET_OFFLINE(pip) { \
832 	uint32_t	ext_state; \
833 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
834 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \
835 	MDI_PI(pip)->pi_state |= ext_state; \
836 }
837 
838 #define	MDI_PI_IS_OFFLINE(pip) \
839 	    ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
840 		MDI_PATHINFO_STATE_OFFLINE)
841 
842 #define	MDI_PI_SET_STANDBYING(pip) { \
843 	uint32_t	ext_state; \
844 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
845 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
846 	MDI_PI(pip)->pi_state = \
847 	(MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \
848 	MDI_PI(pip)->pi_state |= ext_state; \
849 }
850 
851 #define	MDI_PI_SET_STANDBY(pip) { \
852 	uint32_t	ext_state; \
853 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
854 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \
855 	MDI_PI(pip)->pi_state |= ext_state; \
856 }
857 
858 #define	MDI_PI_IS_STANDBY(pip) \
859 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
860 	MDI_PATHINFO_STATE_STANDBY)
861 
862 #define	MDI_PI_SET_FAULTING(pip) { \
863 	uint32_t	ext_state; \
864 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
865 	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
866 	MDI_PI(pip)->pi_state = \
867 	    (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \
868 	MDI_PI(pip)->pi_state |= ext_state; \
869 }
870 
871 #define	MDI_PI_SET_FAULT(pip) { \
872 	uint32_t	ext_state; \
873 	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
874 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \
875 	MDI_PI(pip)->pi_state |= ext_state; \
876 }
877 
878 #define	MDI_PI_IS_FAULT(pip) \
879 	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \
880 	MDI_PATHINFO_STATE_FAULT)
881 
882 #define	MDI_PI_IS_SUSPENDED(pip) \
883 	    ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
884 
885 /*
886  * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures
887  * hold the vhci to phci client mappings of the on-disk vhci busconfig cache.
888  */
889 
890 /* phci structure of vhci cache */
891 typedef struct mdi_vhcache_phci {
892 	char			*cphci_path;	/* phci path name */
893 	uint32_t		cphci_id;	/* used when building nvlist */
894 	mdi_phci_t		*cphci_phci;	/* pointer to actual phci */
895 	struct mdi_vhcache_phci	*cphci_next;	/* next in vhci phci list */
896 } mdi_vhcache_phci_t;
897 
898 /* pathinfo structure of vhci cache */
899 typedef struct mdi_vhcache_pathinfo {
900 	char			*cpi_addr;	/* path address */
901 	mdi_vhcache_phci_t	*cpi_cphci;	/* phci the path belongs to */
902 	struct mdi_pathinfo	*cpi_pip;	/* ptr to actual pathinfo */
903 	uint32_t		cpi_flags;	/* see below */
904 	struct mdi_vhcache_pathinfo *cpi_next;	/* next path for the client */
905 } mdi_vhcache_pathinfo_t;
906 
907 /*
908  * cpi_flags
909  *
910  * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has
911  * failed.
912  */
913 #define	MDI_CPI_HINT_PATH_DOES_NOT_EXIST	0x0001
914 
915 /* client structure of vhci cache */
916 typedef struct mdi_vhcache_client {
917 	char			*cct_name_addr;	/* client address */
918 	mdi_vhcache_pathinfo_t	*cct_cpi_head;	/* client's path list head */
919 	mdi_vhcache_pathinfo_t	*cct_cpi_tail;	/* client's path list tail */
920 	struct mdi_vhcache_client *cct_next;	/* next in vhci client list */
921 } mdi_vhcache_client_t;
922 
923 /* vhci cache structure - one for vhci instance */
924 typedef struct mdi_vhci_cache {
925 	mdi_vhcache_phci_t	*vhcache_phci_head;	/* phci list head */
926 	mdi_vhcache_phci_t	*vhcache_phci_tail;	/* phci list tail */
927 	mdi_vhcache_client_t	*vhcache_client_head;	/* client list head */
928 	mdi_vhcache_client_t	*vhcache_client_tail;	/* client list tail */
929 	mod_hash_t		*vhcache_client_hash;	/* client hash */
930 	int			vhcache_flags;		/* see below */
931 	int64_t			vhcache_clean_time;	/* last clean time */
932 	krwlock_t		vhcache_lock;		/* cache lock */
933 } mdi_vhci_cache_t;
934 
935 /* vhcache_flags */
936 #define	MDI_VHCI_CACHE_SETUP_DONE	0x0001	/* cache setup completed */
937 
938 typedef struct mdi_phci_driver_info {
939 	char			*phdriver_name;	/* name of the phci driver */
940 
941 	/* set to non zero if the phci driver supports root device */
942 	int			phdriver_root_support;
943 } mdi_phci_driver_info_t;
944 
945 /* vhci bus config structure - one for vhci instance */
946 typedef struct mdi_vhci_config {
947 	char			*vhc_vhcache_filename;	/* on-disk file name */
948 	mdi_vhci_cache_t	vhc_vhcache;		/* vhci cache */
949 	mdi_phci_driver_info_t	*vhc_phci_driver_list;	/* ph drv info array */
950 	int			vhc_nphci_drivers;	/* # of phci drivers */
951 	kmutex_t		vhc_lock;		/* vhci config lock */
952 	kcondvar_t		vhc_cv;
953 	int			vhc_flags;		/* see below */
954 
955 	/* flush vhci cache when lbolt reaches vhc_flush_at_ticks */
956 	clock_t			vhc_flush_at_ticks;
957 
958 	/*
959 	 * Head and tail of the client list whose paths are being configured
960 	 * asynchronously. vhc_acc_count is the number of clients on this list.
961 	 * vhc_acc_thrcount is the number threads running to configure
962 	 * the paths for these clients.
963 	 */
964 	struct mdi_async_client_config *vhc_acc_list_head;
965 	struct mdi_async_client_config *vhc_acc_list_tail;
966 	int			vhc_acc_count;
967 	int			vhc_acc_thrcount;
968 
969 	/* callback id - for flushing the cache during system shutdown */
970 	callb_id_t		vhc_cbid;
971 
972 	/*
973 	 * vhc_path_discovery_boot -	number of times path discovery will be
974 	 *				attempted during early boot.
975 	 * vhc_path_discovery_postboot	number of times path discovery will be
976 	 *				attempted during late boot.
977 	 * vhc_path_discovery_cutoff_time - time at which paths were last
978 	 *				discovered  + some timeout
979 	 */
980 	int			vhc_path_discovery_boot;
981 	int			vhc_path_discovery_postboot;
982 	int64_t			vhc_path_discovery_cutoff_time;
983 } mdi_vhci_config_t;
984 
985 /* vhc_flags */
986 #define	MDI_VHC_SINGLE_THREADED		0x0001	/* config single threaded */
987 #define	MDI_VHC_EXIT			0x0002	/* exit all config activity */
988 #define	MDI_VHC_VHCACHE_DIRTY		0x0004	/* cache dirty */
989 #define	MDI_VHC_VHCACHE_FLUSH_THREAD	0x0008	/* cache flush thead running */
990 #define	MDI_VHC_VHCACHE_FLUSH_ERROR	0x0010	/* failed to flush cache */
991 #define	MDI_VHC_READONLY_FS		0x0020	/* filesys is readonly */
992 
993 typedef struct mdi_phys_path {
994 	char			*phys_path;
995 	struct mdi_phys_path	*phys_path_next;
996 } mdi_phys_path_t;
997 
998 /*
999  * Lookup tokens are used to cache the result of the vhci cache client lookup
1000  * operations (to reduce the number of real lookup operations).
1001  */
1002 typedef struct mdi_vhcache_lookup_token {
1003 	mdi_vhcache_client_t	*lt_cct;		/* vhcache client */
1004 	int64_t			lt_cct_lookup_time;	/* last lookup time */
1005 } mdi_vhcache_lookup_token_t;
1006 
1007 /* asynchronous configuration of client paths */
1008 typedef struct mdi_async_client_config {
1009 	char			*acc_ct_name;	/* client name */
1010 	char			*acc_ct_addr;	/* client address */
1011 	mdi_phys_path_t		*acc_phclient_path_list_head;	/* path head */
1012 	mdi_vhcache_lookup_token_t acc_token;	/* lookup token */
1013 	struct mdi_async_client_config *acc_next; /* next in vhci acc list */
1014 } mdi_async_client_config_t;
1015 
1016 /*
1017  * vHCI driver instance registration/unregistration
1018  *
1019  * mdi_vhci_register() is called by a vHCI driver to register itself as the
1020  * manager of devices from a particular 'class'.  This should be called from
1021  * attach(9e).
1022  *
1023  * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI
1024  * instance from the framework.
1025  */
1026 int		mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int);
1027 int		mdi_vhci_unregister(dev_info_t *, int);
1028 
1029 /*
1030  * Utility functions
1031  */
1032 int		mdi_phci_get_path_count(dev_info_t *);
1033 dev_info_t	*mdi_phci_path2devinfo(dev_info_t *, caddr_t);
1034 
1035 
1036 /*
1037  * Path Selection Functions:
1038  *
1039  * mdi_select_path() is called by a vHCI driver to select to which path an
1040  * I/O request should be routed.  The caller passes the 'buf' structure as
1041  * one of the parameters.  The mpxio framework uses the buf's contents to
1042  * maintain per path statistics (total I/O size / count pending).  If more
1043  * than one online path is available, the framework automatically selects
1044  * a suitable one.  If a failover operation is active for this client device
1045  * the call fails, returning MDI_BUSY.
1046  *
1047  * By default this function returns a suitable path in the 'online' state,
1048  * based on the current load balancing policy.  Currently we support
1049  * LOAD_BALANCE_NONE (Previously selected online path will continue to be
1050  * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths
1051  * will be selected in a round robin fashion).  The load balancing scheme
1052  * can be configured in the vHCI driver's configuration file (driver.conf).
1053  *
1054  * vHCI drivers may override this default behaviour by specifying appropriate
1055  * flags.  If start_pip is specified (non NULL), it is used as the routine's
1056  * starting point; it starts walking from there to find the next appropriate
1057  * path.
1058  *
1059  * The following values for 'flags' are currently defined:
1060  *
1061  * 	MDI_SELECT_ONLINE_PATH: select an ONLINE path
1062  *	MDI_SELECT_STANDBY_PATH: select a STANDBY path
1063  *
1064  * The selected paths are returned in a held state (ref_cnt) and caller should
1065  * release the hold by calling mdi_rele_path() at the end of operation.
1066  */
1067 int		mdi_select_path(dev_info_t *, struct buf *, int,
1068 		    mdi_pathinfo_t *, mdi_pathinfo_t **);
1069 void		mdi_hold_path(mdi_pathinfo_t *);
1070 void		mdi_rele_path(mdi_pathinfo_t *);
1071 int		mdi_set_lb_policy(dev_info_t *, client_lb_t);
1072 int		mdi_set_lb_region_size(dev_info_t *, int);
1073 client_lb_t	mdi_get_lb_policy(dev_info_t *);
1074 
1075 /*
1076  * flags for mdi_select_path() routine
1077  */
1078 #define	MDI_SELECT_ONLINE_PATH		0x0001
1079 #define	MDI_SELECT_STANDBY_PATH		0x0002
1080 
1081 /*
1082  * MDI client device utility functions
1083  */
1084 int		mdi_client_get_path_count(dev_info_t *);
1085 dev_info_t	*mdi_client_path2devinfo(dev_info_t *, caddr_t);
1086 
1087 /*
1088  * Failover:
1089  *
1090  * The vHCI driver calls mdi_failover() to initiate a failover operation.
1091  * mdi_failover() calls back into the vHCI driver's vo_failover()
1092  * entry point to perform the actual failover operation.  The reason
1093  * for requiring the vHCI driver to initiate failover by calling
1094  * mdi_failover(), instead of directly executing vo_failover() itself,
1095  * is to ensure that the mdi framework can keep track of the client
1096  * state properly.  Additionally, mdi_failover() provides as a
1097  * convenience the option of performing the failover operation
1098  * synchronously or asynchronously
1099  *
1100  * Upon successful completion of the failover operation, the paths that were
1101  * previously ONLINE will be in the STANDBY state, and the newly activated
1102  * paths will be in the ONLINE state.
1103  *
1104  * The flags modifier determines whether the activation is done synchronously
1105  */
1106 int mdi_failover(dev_info_t *, dev_info_t *, int);
1107 
1108 /*
1109  * Client device failover mode of operation
1110  */
1111 #define	MDI_FAILOVER_SYNC	1	/* Syncronous Failover		*/
1112 #define	MDI_FAILOVER_ASYNC	2	/* Asyncronous Failover		*/
1113 
1114 /*
1115  * mdi_pathinfo node state change functions.
1116  */
1117 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *);
1118 
1119 /*
1120  * mdi_pathinfo node extended state change functions.
1121  */
1122 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *);
1123 int mdi_pi_get_preferred(mdi_pathinfo_t *);
1124 
1125 /*
1126  * mdi_pathinfo node member functions
1127  */
1128 void *mdi_pi_get_client_private(mdi_pathinfo_t *);
1129 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *);
1130 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t);
1131 void mdi_pi_set_preferred(mdi_pathinfo_t *, int);
1132 
1133 /* get/set vhci private data */
1134 void *mdi_client_get_vhci_private(dev_info_t *);
1135 void mdi_client_set_vhci_private(dev_info_t *, void *);
1136 void *mdi_phci_get_vhci_private(dev_info_t *);
1137 void mdi_phci_set_vhci_private(dev_info_t *, void *);
1138 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *);
1139 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *);
1140 
1141 /*
1142  * mdi_pathinfo Property utilities
1143  */
1144 int mdi_prop_size(mdi_pathinfo_t *, size_t *);
1145 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t);
1146 
1147 /* obsolete interface, to be removed */
1148 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **);
1149 int mdi_get_component_type(dev_info_t *);
1150 
1151 #endif	/* _KERNEL */
1152 
1153 #ifdef	__cplusplus
1154 }
1155 #endif
1156 
1157 #endif	/* _SYS_MDI_IMPLDEFS_H */
1158