xref: /titanic_51/usr/src/uts/common/avs/ns/rdc/rdc_io.h (revision d5508a7fb37e6b070e142ee081bec69a3d20bd6c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _RDC_IO_H
27 #define	_RDC_IO_H
28 
29 #ifdef	__cplusplus
30 extern "C" {
31 #endif
32 
33 
34 #include <sys/unistat/spcs_s.h>
35 #ifdef DS_DDICT
36 #define	bool_t	int
37 #endif
38 #include  <sys/nsctl/rdc_prot.h>
39 #include <sys/nsctl/nsctl.h>
40 #include <sys/nsctl/rdc_ioctl.h>
41 
42 /*
43  * Definitions for kstats
44  */
45 #define	RDC_MKSTAT_MAXSETS		"maxsets"
46 #define	RDC_MKSTAT_MAXFBAS		"maxfbas"
47 #define	RDC_MKSTAT_RPC_TIMEOUT		"rpc_timeout"
48 #define	RDC_MKSTAT_HEALTH_THRES		"health_thres"
49 #define	RDC_MKSTAT_BITMAP_WRITES	"bitmap_writes"
50 #define	RDC_MKSTAT_CLNT_COTS_CALLS	"clnt_cots_calls"
51 #define	RDC_MKSTAT_CLNT_CLTS_CALLS	"clnt_clts_calls"
52 #define	RDC_MKSTAT_SVC_COTS_CALLS	"svc_cots_calls"
53 #define	RDC_MKSTAT_SVC_CLTS_CALLS	"svc_clts_calls"
54 #define	RDC_MKSTAT_BITMAP_REF_DELAY	"bitmap_ref_delay"
55 
56 #define	RDC_IKSTAT_FLAGS		"flags"
57 #define	RDC_IKSTAT_SYNCFLAGS		"syncflags"
58 #define	RDC_IKSTAT_BMPFLAGS		"bmpflags"
59 #define	RDC_IKSTAT_SYNCPOS		"syncpos"
60 #define	RDC_IKSTAT_VOLSIZE		"volsize"
61 #define	RDC_IKSTAT_BITSSET		"bitsset"
62 #define	RDC_IKSTAT_AUTOSYNC		"autosync"
63 #define	RDC_IKSTAT_MAXQFBAS		"maxqfbas"
64 #define	RDC_IKSTAT_MAXQITEMS		"maxqitems"
65 #define	RDC_IKSTAT_FILE			"primary_vol"
66 #define	RDC_IKSTAT_SECFILE		"secondary_vol"
67 #define	RDC_IKSTAT_BITMAP		"bitmap"
68 #define	RDC_IKSTAT_PRIMARY_HOST		"primary_host"
69 #define	RDC_IKSTAT_SECONDARY_HOST	"secondary_host"
70 #define	RDC_IKSTAT_TYPE_FLAG		"type_flag"
71 #define	RDC_IKSTAT_BMP_SIZE		"bmp_size"
72 #define	RDC_IKSTAT_DISK_STATUS		"disk_status"
73 #define	RDC_IKSTAT_IF_DOWN		"if_down"
74 #define	RDC_IKSTAT_IF_RPC_VERSION	"if_rpc_version"
75 #define	RDC_IKSTAT_ASYNC_THROTTLE_DELAY	"async_throttle_delay"
76 #define	RDC_IKSTAT_ASYNC_BLOCK_HWM	"async_block_hwm"
77 #define	RDC_IKSTAT_ASYNC_ITEM_HWM	"async_item_hwm"
78 #define	RDC_IKSTAT_QUEUE_TYPE		"async_queue_type"
79 #define	RDC_IKSTAT_ASYNC_ITEMS		"async_queue_items"
80 #define	RDC_IKSTAT_ASYNC_BLOCKS		"async_queue_blocks"
81 
82 /*
83  * Queue types
84  */
85 #define	RDC_DISKQUE	0X01
86 #define	RDC_MEMQUE	0x02
87 #define	RDC_NOQUE	-1
88 
89 #define	RDC_ACTIVE	0x1
90 #define	RDC_INACTIVE	0x2
91 
92 #ifdef _KERNEL
93 
94 extern nstset_t *_rdc_ioset;
95 extern nstset_t *_rdc_flset;
96 
97 #ifdef DEBUG
98 extern int RDC_MAX_SYNC_THREADS;
99 extern int rdc_maxthreads_last;
100 int num_sync_threads;
101 #else
102 #define	RDC_MAX_SYNC_THREADS	8
103 #endif
104 #ifdef DEBUG
105 #define	RDC_AVAIL_THR_TUNE(n)	\
106 	do { \
107 		if (rdc_maxthreads_last < RDC_MAX_SYNC_THREADS) { \
108 			(void) nst_add_thread(n.rdc_syncset, \
109 		    RDC_MAX_SYNC_THREADS - rdc_maxthreads_last);\
110 		} \
111 		if (rdc_maxthreads_last > RDC_MAX_SYNC_THREADS) { \
112 			(void) nst_del_thread(n.rdc_syncset, \
113 			    rdc_maxthreads_last - RDC_MAX_SYNC_THREADS); \
114 		} \
115 		n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \
116 		if (n.avail_thr < 0) { \
117 			n.avail_thr = 0; \
118 		} \
119 		rdc_maxthreads_last = RDC_MAX_SYNC_THREADS; \
120 		num_sync_threads = nst_nthread(n.rdc_syncset); \
121 	} while (0);
122 #else
123 #define	RDC_AVAIL_THR_TUNE(n)	\
124 	do { \
125 		n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \
126 		if (n.avail_thr < 0) \
127 			n.avail_thr = 0; \
128 	} while (0);
129 
130 #endif
131 
132 typedef struct syncloop_info {
133 	int		active_thr;
134 	int		avail_thr; /* should be MAX_RDC_SYNC_THREADS - active */
135 	kmutex_t	lock;
136 	nstset_t	*rdc_syncset;
137 } sync_info_t;
138 
139 sync_info_t sync_info;
140 
141 /*
142  * Static server information
143  */
144 typedef struct servinfo {
145 	struct knetconfig	*ri_knconf;	/* bound TLI fd */
146 	struct netbuf		ri_addr;	/* server's address */
147 	struct sec_data		*ri_secdata;	/* sec data for rpcsec module */
148 	char			*ri_hostname;	/* server's hostname */
149 	int			ri_hostnamelen; /* server's hostname length */
150 } rdc_srv_t;
151 
152 /*
153  * Interface structure, including health monitoring.
154  */
155 typedef struct rdc_if_s {
156 	struct rdc_if_s *next;		/* chain pointer */
157 	struct netbuf ifaddr;
158 	struct netbuf r_ifaddr;
159 	rdc_srv_t *srv;			/* servinfo of server end */
160 	int	if_down;		/* i/f is down (set on primary) */
161 	int	isprimary;		/* this end is a primary */
162 	int	issecondary;		/* this end is a secondary */
163 	rpcvers_t rpc_version;		/* RPC protocol version in use */
164 	int	no_ping;		/* set on secondary to hold off RPCs */
165 	int	old_pulse;		/* previous (current) pulse value */
166 	int	new_pulse;		/* new (incoming) pulse value */
167 	int	deadness;		/* how close to death are we? */
168 	volatile int exiting;		/* daemon exit flag */
169 	time_t	last;			/* time of last ping */
170 } rdc_if_t;
171 
172 
173 typedef struct rdc_aio_s {
174 	struct rdc_aio_s *next;
175 	nsc_buf_t *handle;
176 	nsc_buf_t *qhandle;
177 	nsc_off_t pos;
178 	nsc_off_t qpos;
179 	nsc_size_t len;
180 	nsc_size_t orig_len;
181 	int	flag;
182 	int	iostatus;
183 	int	index;
184 	uint_t	seq;		/* sequence on async Q */
185 } rdc_aio_t;
186 
187 /* values for (rdc_aio_t *)->iostatus */
188 enum {
189 	RDC_IO_NONE = 0,	/* not used */
190 	RDC_IO_INIT,		/* io started */
191 	RDC_IO_DONE,		/* io done successfully */
192 	RDC_IO_FAILED,		/* io failed */
193 	RDC_IO_DISCARDED,	/* io discarded */
194 	RDC_IO_CANCELLED	/* group_log in progress */
195 };
196 
197 
198 #define	RDC_MAX_QBLOCKS	16384	/* 8MB temporary q for diskq to flush to */
199 #define	RDC_LOW_QBLOCKS 13927	/* roughly 85% of queue full */
200 #define	RDC_HALF_MQUEUE 8192	/* half of the memory queue */
201 
202 typedef struct netqueue {
203 	rdc_aio_t *net_qhead;
204 	rdc_aio_t *net_qtail;
205 	kmutex_t net_qlock;
206 	int hwmhit;			/* queue full hit? reset after hwm */
207 	int qfill_sleeping;		/* waiting for work? */
208 	int qfflags;			/* diskq/memq flusher flags */
209 	kcondvar_t qfcv;		/* for timed waits */
210 	volatile nsc_size_t blocks;	/* number of FBAs in q */
211 	volatile uint64_t nitems;	/* number of items in q */
212 	volatile int  inflbls;		/* number of inflight blocks */
213 	volatile int  inflitems;	/* number of inflight items */
214 	uint64_t  nitems_hwm;		/* highest items on queue */
215 	nsc_size_t  blocks_hwm;		/* highest blocks on queue */
216 	long throttle_delay;		/* Number of times we delayed x 2 */
217 } net_queue;
218 
219 
220 /*
221  * Bitmap header structures.
222  * These must be fixed size in all data models.
223  * If we ever support little-endian machines (eg. Intel) we will need
224  * to add byte-swapping logic.
225  */
226 
227 typedef struct {
228 	int32_t magic;
229 	int32_t serial_mode;
230 	int32_t use_mirror;
231 	int32_t mirror_down;
232 	int32_t sync_needed;
233 	char bitmapname[NSC_MAXPATH];
234 	char filename[NSC_MAXPATH];
235 	int32_t volume_failed;
236 } rdc_headerv2_t;
237 #define	RDC_HDR_V2	0x52444302	/* RDC2 */
238 
239 #define	RDC_SYNC	0x1
240 #define	RDC_REV_SYNC	0x2
241 #define	RDC_FULL_SYNC	0x3
242 
243 #define	RDC_FAILED	0x1
244 #define	RDC_COMPLETED	0x2
245 
246 typedef struct {
247 	char	file[NSC_MAXPATH];
248 	char	bitmap[NSC_MAXPATH];
249 } rdc_hdr_addr_t;
250 
251 typedef struct {
252 	int32_t		magic;
253 	rdc_hdr_addr_t	primary;
254 	rdc_hdr_addr_t	secondary;
255 	int32_t		flags;
256 	int32_t		autosync;
257 	int32_t		maxqfbas;
258 	int32_t		maxqitems;
259 	int32_t		syshostid;	/* for cluster bitmaps */
260 } rdc_headerv3_t;
261 #define	RDC_HDR_V3	0x52444303	/* RDC3 */
262 
263 typedef struct {
264 	int32_t		magic;
265 	rdc_hdr_addr_t	primary;
266 	rdc_hdr_addr_t	secondary;
267 	int32_t		flags;
268 	int32_t		autosync;
269 	int32_t		maxqfbas;
270 	int32_t		maxqitems;
271 	int32_t		syshostid;	/* for cluster bitmaps */
272 	int32_t		asyncthr;
273 } rdc_headerv4_t;
274 #define	RDC_HDR_V4	0x52444304	/* RDC4 */
275 
276 typedef struct {
277 	int32_t		magic;
278 	rdc_hdr_addr_t	primary;
279 	rdc_hdr_addr_t	secondary;
280 	int32_t		flags;
281 	int32_t		autosync;
282 	int64_t		maxqfbas;
283 	int64_t		maxqitems;
284 	int32_t		syshostid;	/* for cluster bitmaps */
285 	int32_t		asyncthr;
286 	int32_t		refcntsize;	/* size in bytes of each refcount */
287 } rdc_headerv5_t;
288 #define	RDC_HDR_V5	0x52444305	/* RDC5 */
289 
290 typedef rdc_headerv5_t	rdc_header_t;	/* Current header type */
291 #define	RDC_HDR_MAGIC	RDC_HDR_V5	/* Current header magic number */
292 
293 #endif	/* _KERNEL */
294 
295 #define	RDC_BITMAP_FBA	1		/* Offset at which the bitmap starts */
296 #define	RDC_BITREF_FBA(krdc) (RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size))
297 
298 #ifdef _KERNEL
299 
300 #define	RDC_FUTILE_ATTEMPTS	50
301 typedef struct aio_buf_s {
302 	struct aio_buf_s	*next;		/* next aio_buf */
303 	nsc_buf_t		*rdc_abufp;	/* actual anon buf */
304 	int			kindex;		/* index we are attached to */
305 } aio_buf_t;
306 
307 typedef struct rdc_thrsync {
308 	kmutex_t	lock;
309 	int		threads;
310 	int		complete;
311 	kcondvar_t	cv;
312 } rdc_thrsync_t;
313 
314 typedef struct sync_status_s {
315 	int	offset;
316 	struct sync_status_s *next;
317 } sync_status_t;
318 
319 typedef struct rdc_syncthr {
320 	nsc_off_t		offset;
321 	nsc_size_t		len;
322 	struct rdc_k_info	*krdc;
323 	sync_status_t		*status;
324 } rdc_syncthr_t;
325 
326 /*
327  * RDC buffer header
328  */
329 
330 typedef struct rdc_buf_s {
331 	nsc_buf_t	rdc_bufh;	/* exported buffer header */
332 	nsc_buf_t	*rdc_bufp;	/* underlying buffer */
333 	aio_buf_t	*rdc_anon;	/* ANON async buffer */
334 	struct rdc_fd_s	*rdc_fd;	/* back link */
335 	size_t		rdc_vsize;	/* size of allocated nsc_vec_t */
336 	int		rdc_flags;	/* flags */
337 	kmutex_t	aio_lock;	/* lock for rdc_anon */
338 	rdc_thrsync_t	rdc_sync;	/* for thread syncronization */
339 } rdc_buf_t;
340 
341 #define	RDC_VEC_ALLOC	0x1		/* local kmem vector for remote io */
342 #define	RDC_ALLOC	0x2		/* rdc_bufp is nsc_buf_alloc'd */
343 #define	RDC_ASYNC_VEC	0x4		/* Keep tmp handle for async flusher */
344 #define	RDC_REMOTE_BUF	0x8		/* buffer alloc'd for remote io only */
345 #define	RDC_NULL_BUF	0x10		/* tell diskq to only store io_hdr */
346 #define	RDC_ASYNC_BUF	0x20		/* this buf is to an async vol */
347 #define	RDC_NULLBUFREAD	0x0f000000	/* read because RDC_NULL_BUF detected */
348 
349 #define	BUF_IS_ASYNC(h)	(((h) != NULL) && (h)->rdc_flags & RDC_ASYNC_BUF)
350 #define	RDC_REMOTE(h)	(((h) != NULL) && ((h)->rdc_flags & RDC_REMOTE_BUF) && \
351 			(((h)->rdc_flags & RDC_ASYNC_VEC) == 0))
352 
353 /* check a handle against a supplied pos/len pair */
354 
355 #define	RDC_HANDLE_LIMITS(h, p, l) \
356 		(((h)->sb_user & RDC_DISKQUE) || \
357 		((p) >= (h)->sb_pos) && \
358 		(((p) + (l)) <= ((h)->sb_pos + (h)->sb_len)))
359 
360 /* check a dset against a supplied pos/len pair */
361 
362 #define	RDC_DSET_LIMITS(d, p, l) \
363 		(((p) >= (d)->pos) && \
364 		(((p) + (l)) <= ((d)->pos + (d)->fbalen)))
365 
366 /*
367  * RDC device info structures
368  */
369 
370 typedef struct _rdc_info_dev_s {
371 	nsc_fd_t	*bi_fd;		/* file descriptor */
372 	nsc_iodev_t	*bi_iodev;	/* I/O device structure */
373 	struct rdc_k_info *bi_krdc;	/* back link */
374 	int		bi_rsrv;	/* Count of reserves held */
375 	int		bi_orsrv;	/* Reserves for other io provider */
376 	int		bi_failed;	/* Count of failed (faked) reserves */
377 	int		bi_ofailed;	/* Other io provider failed reserves */
378 	int		bi_flag;	/* Reserve flags */
379 } _rdc_info_dev_t;
380 
381 
382 typedef struct rdc_info_dev_s {
383 	struct rdc_info_dev_s	*id_next;	/* forward link */
384 	_rdc_info_dev_t		id_cache_dev;	/* cached device info */
385 	_rdc_info_dev_t		id_raw_dev;	/* raw device info */
386 	kmutex_t		id_rlock;	/* reserve/release lock */
387 	kcondvar_t		id_rcv;		/* nsc_release pending cv */
388 	int			id_sets;	/* # of sets referencing */
389 	int			id_release;	/* # of pending nsc_releases */
390 	int			id_flag;	/* flags */
391 } rdc_info_dev_t;
392 
393 
394 typedef struct rdc_path_s {
395 	nsc_path_t		*rp_tok;	/* nsc_register_path token */
396 	int			rp_ref;		/* # of rdc_fd_t's */
397 } rdc_path_t;
398 
399 
400 /*
401  * Values for id_flag
402  */
403 #define	RDC_ID_CLOSING		0x1		/* device is closing */
404 
405 #include <sys/nsctl/rdc_diskq.h>
406 
407 /*
408  * value for diskio.seq.
409  */
410 #define	RDC_NOSEQ		(0)		/* ignore sequence */
411 #define	RDC_NEWSEQ		(1)		/* start of sequence */
412 
413 typedef struct rdc_sleepq {
414 	struct rdc_sleepq	*next;
415 	uint_t			seq;		/* sequence in queue */
416 	int			idx;		/* idx number of request */
417 	int			pindex;		/* primary host set index */
418 	int			sindex;		/* secondary host set index */
419 	uint64_t		qpos;		/* offset on primary's queue */
420 	int			nocache;	/* cache flag to alloc_buf */
421 } rdc_sleepq_t;
422 
423 /*
424  * RDC group structure
425  */
426 typedef struct rdc_group {
427 	int		count;
428 	int		rdc_writer;
429 	int		unregistering;
430 	kmutex_t	lock;
431 	net_queue	ra_queue;	/* io todo async queues */
432 	kcondvar_t	iowaitcv;	/* wait for flusher */
433 	kcondvar_t	unregistercv;	/* wait for unregister */
434 	int		rdc_thrnum;	/* number of threads */
435 	int		rdc_addthrnum;	/* number threads added to thr set */
436 	kmutex_t	addthrnumlk;	/* lock for above */
437 	rdc_sleepq_t	*sleepq;	/* head of waiting tasks */
438 	/*
439 	 * Dual use, the outgoing sequence number on the client.
440 	 * The next expected sequence number on the server.
441 	 * Protected by the ra_queue lock.
442 	 */
443 	uint_t		seq;
444 	/*
445 	 * Dual use, the last acknowledged sequence number.
446 	 * Used to ensure that the queue doesn't overflow on server
447 	 * and to stall transmissions on the client.
448 	 * Protected by the ra_queue lock.
449 	 */
450 	uint_t		seqack;
451 	int		asyncstall;	/* count of asleep threads */
452 	int		asyncdis;	/* discard stalled output */
453 	kcondvar_t	asyncqcv;	/* output stall here */
454 	int		flags;		/* memory or disk. status etc */
455 	disk_queue	diskq;		/* disk queue */
456 	nsc_fd_t	*diskqfd;	/* diskq handle */
457 	nsc_path_t	*q_tok;		/* q registration */
458 	int		diskqrsrv;	/* reserve count */
459 	kmutex_t	diskqmutex;	/* enables/disables/reserves */
460 	uint_t		synccount;	/* number of group members syncing */
461 } rdc_group_t;
462 
463 /* group state */
464 #define	RDC_DISKQ_KILL		0x01	/* a force kill of diskq pending */
465 
466 #define	RDC_IS_DISKQ(grp)	(grp->flags & RDC_DISKQUE)
467 #define	RDC_IS_MEMQ(grp)	(grp->flags & RDC_MEMQUE)
468 
469 /*
470  * These flags are used in the
471  * aux_state field, and are used to track:
472  * AUXSYNCIP: When the code has a sync thread running, used instead
473  * of the RC_SYNCING flag which gets cleared before the sync thread
474  * terminates.
475  * AUXWRITE: Set when rdc_sync_write_thr is running, so the rdc_unintercept
476  * code can wait until a one-to-many write has actually terminated.
477  */
478 #define	RDC_AUXSYNCIP	0x01		/* a sync is in progress */
479 #define	RDC_AUXWRITE	0x02		/* I've got a write in progress */
480 
481 
482 /*
483  * RDC kernel-private information
484  */
485 typedef struct rdc_k_info {
486 	int			index;		/* Index into array */
487 	int			remote_index;	/* -1 means unknown */
488 	int			type_flag;
489 	int			rpc_version;	/* RPC version this set supps */
490 	int			spare1;
491 	nsc_off_t		syncbitpos;
492 	kmutex_t		syncbitmutex;	/* lock for syncbitpos */
493 	volatile int		busy_count;	/* ioctls in progress */
494 	volatile int		sync_done;
495 	int			aux_state; /* syncing ,don't disable */
496 	rdc_thrsync_t		syncs;		/* _rdc_sync thread tracking */
497 	rdc_info_dev_t		*devices;
498 	nsc_iodev_t		*iodev;		/* I/O device structure */
499 	rdc_path_t		cache_path;
500 	rdc_path_t		raw_path;
501 	rdc_if_t		*intf;
502 	rdc_srv_t		*lsrv;		/* list of servinfo */
503 	nsc_size_t		maxfbas;	/* returned from nsc_maxfbas */
504 	unsigned char		*dcio_bitmap;
505 	void			*bitmap_ref;	/* Incore bitmap bit ref */
506 	struct rdc_group	*group;
507 	nsc_size_t		bitmap_size;
508 	int			bmaprsrv;	/* bitmap reserve count */
509 	int			bitmap_write;
510 	nsc_fd_t		*bitmapfd;
511 	nsc_fd_t		*remote_fd;	/* FCAL direct io */
512 	volatile int		disk_status;	/* set to halt sync */
513 	int			closing;
514 	nsc_path_t		*b_tok;		/* Bitmap registration */
515 	int			b_ref;
516 	kmutex_t		dc_sleep;
517 	kmutex_t		bmapmutex;	/* mutex for bitmap ops */
518 	kcondvar_t		busycv;		/* wait for ioctl to complete */
519 	kcondvar_t		closingcv;	/* unregister_path/close */
520 	kcondvar_t		haltcv;		/* wait for sync to halt */
521 	kcondvar_t		synccv;		/* wait for sync to halt */
522 	struct rdc_net_dataset  *net_dataset;	/* replaces hnds */
523 	int64_t			io_time;	/* moved from cd_info */
524 	struct rdc_k_info	*many_next;	/* 1-to-many circular list */
525 	struct rdc_k_info	*multi_next;	/* to multihop krdc */
526 	struct rdc_k_info	*group_next;	/* group circular list */
527 	kstat_t			*io_kstats;	/* io kstat */
528 	kstat_t			*bmp_kstats;	/* bitmap io kstat */
529 	kstat_t			*set_kstats;	/* set kstat */
530 	kmutex_t		kstat_mutex;	/* mutex for kstats */
531 	kmutex_t		bmp_kstat_mutex;	/* mutex for kstats */
532 	struct bm_ref_ops	*bm_refs;
533 } rdc_k_info_t;
534 
535 #define	c_fd		devices->id_cache_dev.bi_fd
536 #define	c_rsrv		devices->id_cache_dev.bi_rsrv
537 #define	c_failed	devices->id_cache_dev.bi_failed
538 #define	c_flag		devices->id_cache_dev.bi_flag
539 
540 #define	c_tok		cache_path.rp_tok
541 #define	c_ref		cache_path.rp_ref
542 
543 #define	r_fd		devices->id_raw_dev.bi_fd
544 #define	r_rsrv		devices->id_raw_dev.bi_rsrv
545 #define	r_failed	devices->id_raw_dev.bi_failed
546 #define	r_flag		devices->id_raw_dev.bi_flag
547 
548 #define	r_tok		raw_path.rp_tok
549 #define	r_ref		raw_path.rp_ref
550 
551 /*
552  * flags for _rdc_rsrv_devs()
553  */
554 
555 /*
556  * which device(s) to reserve - integer bitmap.
557  */
558 
559 #define	RDC_CACHE	0x1	/* data device in cache mode */
560 #define	RDC_RAW		0x2	/* data device in raw mode */
561 #define	RDC_BMP		0x4	/* bitmap device */
562 #define	RDC_QUE		0x8	/* diskq device */
563 
564 /*
565  * device usage after reserve - integer flag.
566  */
567 
568 #define	RDC_INTERNAL	0x1	/* reserve for rdc internal purposes */
569 #define	RDC_EXTERNAL	0x2	/* reserve in response to io provider Attach */
570 
571 /*
572  * Utility macro for nsc_*() io function returns.
573  */
574 
575 #define	RDC_SUCCESS(rc)	(((rc) == NSC_DONE) || ((rc) == NSC_HIT))
576 
577 /*
578  * RDC file descriptor structure
579  */
580 
581 typedef struct rdc_fd_s {
582 	rdc_k_info_t	*rdc_info;	/* devices info structure */
583 	int		rdc_type;	/* open type, diskq or bitmap */
584 	int		rdc_oflags;	/* raw or cached open type */
585 } rdc_fd_t;
586 
587 /*
588  * fd and rsrv macros
589  */
590 
591 #define	RSRV(bi)	(((bi)->bi_rsrv > 0) || ((bi)->bi_failed > 0))
592 #define	ORSRV(bi)	(((bi)->bi_orsrv > 0) || ((bi)->bi_ofailed > 0))
593 #define	RFAILED(bi)	(((bi)->bi_failed > 0) || ((bi)->bi_ofailed > 0))
594 
595 #define	IS_RSRV(bi)	(RSRV(bi) || ORSRV(bi))
596 
597 #define	IS_CRSRV(gcd)	(IS_RSRV(&(gcd)->devices->id_cache_dev))
598 #define	IS_RRSRV(gcd)	(IS_RSRV(&(gcd)->devices->id_raw_dev))
599 
600 #define	IS_RFAILED(gcd)	\
601 		(RFAILED(&(gcd)->devices->id_cache_dev) || \
602 		RFAILED(&(gcd)->devices->id_raw_dev))
603 
604 #define	RDC_IS_BMP(rdc)	((rdc)->rdc_type == RDC_BMP)
605 #define	RDC_IS_QUE(rdc) ((rdc)->rdc_type == RDC_QUE)
606 #define	RDC_IS_RAW(rdc)	(((rdc)->rdc_oflags & NSC_CACHE) == 0)
607 #define	RDC_U_FD(gcd)	(IS_CRSRV(gcd) ? (gcd)->c_fd : (gcd)->r_fd)
608 #define	RDC_FD(rdc)	(RDC_U_FD(rdc->rdc_info))
609 
610 
611 typedef struct rdc_host_u {
612 	char *nodename;
613 	int netaddr;
614 	struct netbuf *naddr;
615 } rdc_host_t;
616 
617 /*
618  * Reply from remote read
619  * - convenience defines for the client side code.
620  * - keep this in sync with the readres structure in rdc_prot.h/.x
621  */
622 #define	rdcrdresult	readres
623 #define	rr_status	status
624 #define	rr_ok		readres_u.reply
625 #define	rr_bufsize	rr_ok.data.data_len
626 #define	rr_data		rr_ok.data.data_val
627 
628 /*
629  * Flags for remote read rpc
630  *
631  * _START must be a unique rpc, _DATA and _END may be OR-d together.
632  */
633 #define	RDC_RREAD_DATA	0x1	/* Intermediate rpc with data payload */
634 #define	RDC_RREAD_START	0x2	/* Setup rpc */
635 #define	RDC_RREAD_END	0x4	/* End rpc */
636 #define	RDC_RREAD_FAIL	0x8	/* Primary is failed */
637 
638 /*
639  * Flags for remote write rpc
640  */
641 #define	RDC_RWRITE_FAIL	0x8	/* Primary is failed */
642 
643 /*
644  * macro used to determine if the incomming sq, with sequence
645  * value x, should be placed before the sq with sequence value y.
646  * This has to account for integer wrap. We account for integer
647  * wrap by checking if the difference between x and y is within
648  * half of the maximum integer value (RDC_MAXINT) or not.
649  */
650 
651 #define	RDC_BITSPERBYTE 8
652 #define	RDC_BITS(type)	(RDC_BITSPERBYTE * (long)sizeof (type))
653 #define	RDC_HIBITI	((unsigned)1 << (RDC_BITS(int) - 1))
654 #define	RDC_MAXINT	((int)(~RDC_HIBITI))
655 #define	RDC_RANGE	((RDC_MAXINT / 2) -1)
656 
657 #define	RDC_INFRONT(x, y) (((x < y) && ((y - x) < RDC_RANGE)) ? 1 : \
658 	((x > y) && ((x - y) > RDC_RANGE)) ? 1 : 0)
659 
660 
661 
662 
663 #endif /* _KERNEL */
664 
665 /*
666  * RDC user-visible information
667  */
668 typedef rdc_set_t rdc_u_info_t;
669 
670 
671 /*
672  * RDC flags for set state / set cd RPC.
673  * Must remain compatible with rdc RPC protocol version v3.
674  */
675 #define	CCIO_NONE		0x0000
676 #define	CCIO_ENABLE		0x0008
677 #define	CCIO_SLAVE		0x0010
678 #define	CCIO_DONE		0x0020
679 #define	CCIO_ENABLELOG		0x0100
680 #define	CCIO_RSYNC		0x0400
681 #define	CCIO_REMOTE		0x2000
682 
683 
684 /*
685  * In kernel type flags (krdc->type_flag).
686  */
687 #define	RDC_CONFIGURED		0x1
688 #define	RDC_DISABLEPEND		0x2	/* Suspend/Disable is in progress */
689 #define	RDC_ASYNCMODE		0x4
690 #define	RDC_RESUMEPEND		0x8
691 #define	RDC_RESPONSIBLE		0x10
692 #define	RDC_BUSYWAIT		0x20
693 #define	RDC_UNREGISTER		0x40	/* Unregister is in progress */
694 #define	RDC_QDISABLEPEND	0x100	/* Q Suspend/Disable is in progress */
695 
696 #define	IS_ENABLED(urdc)	((IS_CONFIGURED(&rdc_k_info[(urdc)->index]) && \
697 	(rdc_get_vflags(urdc) & RDC_ENABLED)))
698 #define	IS_CONFIGURED(krdc)	((krdc)->type_flag & RDC_CONFIGURED)
699 #define	IS_MANY(krdc)		((krdc)->many_next != (krdc))
700 #define	IS_MULTI(krdc)		((krdc)->multi_next != NULL)
701 
702 #define	IS_VALID_INDEX(index)	((index) >= 0 && (index) < rdc_max_sets && \
703 					IS_CONFIGURED(&rdc_k_info[(index)]))
704 
705 #define	RDC_NOFLUSH	0	/* Do not do a flush when starting logging */
706 #define	RDC_NOREMOTE	0	/* Do no remote logging notifications */
707 #define	RDC_FLUSH	1	/* Do a flush when starting logging */
708 #define	RDC_ALLREMOTE	2	/* Notify all remote group members */
709 #define	RDC_OTHERREMOTE	4	/* Notify all remote group members except */
710 				/* the one corresponding to the current set, */
711 				/* to prevent recursion in the case where */
712 				/* the request was initiated from the remote */
713 				/* node. */
714 #define	RDC_FORCE_GROUP 8	/* set all group memebers logging regardless */
715 
716 #ifdef _KERNEL
717 
718 /*
719  * Functions, vars
720  */
721 
722 #define	RDC_SYNC_EVENT_TIMEOUT	(60 * HZ)
723 typedef struct {
724 	clock_t lbolt;
725 	int event;
726 	int ack;
727 	int daemon_waiting;		/* Daemon waiting in ioctl */
728 	int kernel_waiting;		/* Kernel waiting for daemon to reply */
729 	char master[NSC_MAXPATH];
730 	char group[NSC_MAXPATH];
731 	kmutex_t mutex;
732 	kcondvar_t cv;
733 	kcondvar_t done_cv;
734 } rdc_sync_event_t;
735 extern rdc_sync_event_t rdc_sync_event;
736 extern clock_t rdc_sync_event_timeout;
737 extern kmutex_t rdc_sync_mutex;
738 
739 extern rdc_u_info_t *rdc_u_info;
740 extern rdc_k_info_t *rdc_k_info;
741 
742 extern int rdc_max_sets;
743 
744 extern unsigned long rdc_async_timeout;
745 
746 extern int rdc_self_host();
747 extern uint64_t mirror_getsize(int index);
748 extern void rdc_sleepqdiscard(rdc_group_t *);
749 
750 
751 #ifdef	DEBUG
752 extern void rdc_stallzero(int);
753 #endif
754 
755 struct rdc_net_dataitem {
756 	void *dptr;
757 	int   len;	/* byte count */
758 	int   mlen;	/* actual malloced size */
759 	struct rdc_net_dataitem *next;
760 };
761 typedef struct rdc_net_dataitem rdc_net_dataitem_t;
762 
763 struct rdc_net_dataset {
764 	int id;
765 	int inuse;
766 	int delpend;
767 	int nitems;
768 	nsc_off_t pos;
769 	nsc_size_t fbalen;
770 	rdc_net_dataitem_t *head;
771 	rdc_net_dataitem_t *tail;
772 	struct rdc_net_dataset *next;
773 };
774 typedef struct rdc_net_dataset rdc_net_dataset_t;
775 
776 
777 #endif /* _KERNEL */
778 
779 
780 #define	RDC_TCP_DEV		"/dev/tcp"
781 
782 #define	RDC_VERS_MIN	RDC_VERSION5
783 #define	RDC_VERS_MAX	RDC_VERSION7
784 
785 #define	RDC_HEALTH_THRESHOLD	20
786 #define	RDC_MIN_HEALTH_THRES	5
787 #define	SNDR_MAXTHREADS		16
788 /*
789  * These next two defines are the default value of the async queue size
790  * They have been calculated to be 8MB of data with an average of
791  * 2K IO size
792  */
793 #define	RDC_MAXTHRES_QUEUE 	16384	/* max # of fbas on async q */
794 #define	RDC_MAX_QITEMS		4096	/* max # of items on async q */
795 #define	RDC_ASYNCTHR		2	/* number of async threads */
796 
797 #define	RDC_RPC_MAX		(RDC_MAXDATA + sizeof (net_data5) +\
798 					(RPC_MAXDATASIZE - 8192))
799 #define	ATM_NONE 0
800 #define	ATM_INIT 1
801 #define	ATM_EXIT 2
802 
803 #define	RDC_CLNT_TMOUT		16
804 
805 #define	BMAP_BLKSIZE 1024
806 #define	BMAP_BLKSIZEV7 RDC_MAXDATA
807 
808 /* right now we can only trace 1m or less writes to the bitmap (32 bits wide) */
809 #define	RDC_MAX_MAXFBAS	2048
810 
811 #if defined(_KERNEL)
812 /* kstat interface */
813 
814 /*
815  * Per module kstats
816  * only one instance
817  */
818 typedef struct {
819 	kstat_named_t	m_maxsets;		/* Max # of sndr sets */
820 	kstat_named_t	m_maxfbas;		/* Max # of FBAS from nsctl */
821 	kstat_named_t	m_rpc_timeout;		/* global RPC timeout */
822 	kstat_named_t	m_health_thres;		/* Health thread timeout */
823 	kstat_named_t	m_bitmap_writes;	/* True for bitmap writes */
824 	kstat_named_t	m_clnt_cots_calls;	/* # of clnt COTS calls */
825 	kstat_named_t	m_clnt_clts_calls;	/* # of clnt CLTS calls */
826 	kstat_named_t	m_svc_cots_calls;	/* # of server COTS calls */
827 	kstat_named_t	m_svc_clts_calls;	/* # of server CLTS calls */
828 	kstat_named_t	m_bitmap_ref_delay;	/* # of bitmap ref overflows */
829 } sndr_m_stats_t;
830 
831 /*
832  * Per set kstats
833  * one instance per configured set
834  */
835 typedef struct {
836 	kstat_named_t	s_flags;	/* from rdc_set_t */
837 	kstat_named_t	s_syncflags;	/* from rdc_set_t */
838 	kstat_named_t	s_bmpflags;	/* from rdc_set_t */
839 	kstat_named_t	s_syncpos;	/* from rdc_set_t */
840 	kstat_named_t	s_volsize;	/* from rdc_set_t */
841 	kstat_named_t	s_bits_set;	/* from rdc_set_t */
842 	kstat_named_t	s_autosync;	/* from rdc_set_t */
843 	kstat_named_t	s_maxqfbas;	/* from rdc_set_t */
844 	kstat_named_t	s_maxqitems;	/* from rdc_set_t */
845 	kstat_named_t	s_primary_vol;	/* from rdc_set_t */
846 	kstat_named_t	s_secondary_vol;	/* from rdc_set_t */
847 	kstat_named_t	s_bitmap;	/* from rdc_set_t */
848 	kstat_named_t	s_primary_intf;	/* from rdc_set_t */
849 	kstat_named_t	s_secondary_intf;	/* from rdc_set_t */
850 	kstat_named_t	s_type_flag;	/* from rdc_k_info_t */
851 	kstat_named_t	s_bitmap_size;	/* from rdc_k_info_t */
852 	kstat_named_t	s_disk_status;	/* from rdc_k_info_t */
853 	kstat_named_t	s_if_if_down;	/* from rdc_if_t */
854 	kstat_named_t	s_if_rpc_version;	/* from rdc_if_t */
855 	kstat_named_t	s_aqueue_blk_hwm;	/* from rdc_k_info_t */
856 	kstat_named_t	s_aqueue_itm_hwm;	/* from rdc_k_info_t */
857 	kstat_named_t	s_aqueue_throttle;	/* from rdc_k_info_t */
858 	kstat_named_t	s_aqueue_items;
859 	kstat_named_t	s_aqueue_blocks;
860 	kstat_named_t	s_aqueue_type;
861 } rdc_info_stats_t;
862 #endif /* _KERNEL */
863 
864 #ifndef _SunOS_5_6 	/* i.e. 2.7+ */
865 typedef int xdr_t;
866 #else	/* i.e. 2.6- */
867 typedef unsigned long rpcprog_t;
868 typedef unsigned long rpcvers_t;
869 typedef unsigned long rpcproc_t;
870 typedef unsigned long rpcprot_t;
871 typedef unsigned long rpcport_t;
872 #endif /* _SunOS_5_6 */
873 
874 
875 #ifdef _KERNEL
876 
877 extern nsc_size_t MAX_RDC_FBAS;
878 extern volatile int net_exit;
879 extern nsc_size_t rdc_maxthres_queue;	/* max # of fbas on async q */
880 extern int rdc_max_qitems;		/* max # of items on async q */
881 extern int rdc_asyncthr;	/* # of async threads */
882 
883 #ifdef DEBUG
884 extern kmutex_t rdc_cntlock;
885 extern int rdc_datasetcnt;
886 #endif
887 
888 /*
889  * Macro to keep tabs on dataset memory usage.
890  */
891 #ifdef DEBUG
892 #define	RDC_DSMEMUSE(x) \
893 	mutex_enter(&rdc_cntlock);\
894 	rdc_datasetcnt += (x);\
895 	mutex_exit(&rdc_cntlock);
896 #else
897 #define	RDC_DSMEMUSE(x)
898 #endif
899 
900 
901 
902 
903 
904 extern kmutex_t rdc_ping_lock;
905 extern rdc_if_t *rdc_if_top;
906 
907 extern int _rdc_enqueue_write(rdc_k_info_t *, nsc_off_t, nsc_size_t, int,
908     nsc_buf_t *);
909 extern int rdc_net_state(int, int);
910 extern int rdc_net_getbmap(int, int);
911 extern int rdc_net_getsize(int, uint64_t *);
912 extern int rdc_net_write(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t, uint_t,
913     int, netwriteres *);
914 extern int rdc_net_read(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t);
915 extern int _rdc_remote_read(rdc_k_info_t *, nsc_buf_t *, nsc_off_t, nsc_size_t,
916     int);
917 extern int _rdc_multi_write(nsc_buf_t *, nsc_off_t, nsc_size_t, int,
918     rdc_k_info_t *);
919 extern int rdc_start_server(struct rdc_svc_args *, int);
920 extern aio_buf_t *rdc_aio_buf_get(rdc_buf_t *, int);
921 extern void rdc_aio_buf_del(rdc_buf_t *, rdc_k_info_t *);
922 extern aio_buf_t *rdc_aio_buf_add(int, rdc_buf_t *);
923 extern int rdc_net_getstate(rdc_k_info_t *, int *, int *, int *, int);
924 extern kmutex_t rdc_conf_lock;
925 extern kmutex_t rdc_many_lock;
926 extern int rdc_drain_queue(int);
927 extern int flush_group_queue(int);
928 extern void rdc_dev_close(rdc_k_info_t *);
929 extern int rdc_dev_open(rdc_set_t *, int);
930 extern void rdc_get_details(rdc_k_info_t *);
931 extern int rdc_lookup_bitmap(char *);
932 extern int rdc_lookup_enabled(char *, int);
933 extern int rdc_lookup_byaddr(rdc_set_t *);
934 extern int rdc_lookup_byname(rdc_set_t *);
935 extern int rdc_intercept(rdc_k_info_t *);
936 extern int rdc_unintercept(rdc_k_info_t *);
937 extern int _rdc_rsrv_devs(rdc_k_info_t *, int, int);
938 extern void _rdc_rlse_devs(rdc_k_info_t *, int);
939 extern void _rdc_unload(void);
940 extern int _rdc_load(void);
941 extern int _rdc_configure(void);
942 extern void _rdc_deconfigure(void);
943 extern void _rdc_async_throttle(rdc_k_info_t *, long);
944 extern int rdc_writer(int);
945 extern int rdc_dump_alloc_bufs_cd(int);
946 extern void rdc_dump_alloc_bufs(rdc_if_t *);
947 extern int rdc_check_secondary(rdc_if_t *, int);
948 extern void rdc_dump_queue(int);
949 extern int rdc_isactive_if(struct netbuf *, struct netbuf *);
950 extern rdc_if_t *rdc_add_to_if(rdc_srv_t *, struct netbuf *, struct netbuf *,
951     int);
952 extern void rdc_remove_from_if(rdc_if_t *);
953 extern void rdc_set_if_vers(rdc_u_info_t *, rpcvers_t);
954 
955 extern void rdc_print_svinfo(rdc_srv_t *, char *);
956 extern rdc_srv_t *rdc_create_svinfo(char *, struct netbuf *,
957 			struct knetconfig *);
958 extern void rdc_destroy_svinfo(rdc_srv_t *);
959 
960 extern void init_rdc_netbuf(struct netbuf *);
961 extern void free_rdc_netbuf(struct netbuf *);
962 extern void dup_rdc_netbuf(const struct netbuf *, struct netbuf *);
963 extern int rdc_netbuf_toint(struct netbuf *);
964 extern struct netbuf *rdc_int_tonetbuf(int);
965 extern void rdc_lor(const uchar_t *, uchar_t *, int);
966 extern int rdc_resume2(rdc_k_info_t *);
967 extern void rdc_set_flags(rdc_u_info_t *, int);
968 extern void rdc_clr_flags(rdc_u_info_t *, int);
969 extern int rdc_get_vflags(rdc_u_info_t *);
970 extern void rdc_set_mflags(rdc_u_info_t *, int);
971 extern void rdc_clr_mflags(rdc_u_info_t *, int);
972 extern int rdc_get_mflags(rdc_u_info_t *);
973 extern void rdc_set_flags_log(rdc_u_info_t *, int, char *);
974 extern void rdc_group_log(rdc_k_info_t *krdc, int flush, char *why);
975 extern int _rdc_config(void *, int, spcs_s_info_t, int *);
976 extern void rdc_many_enter(rdc_k_info_t *);
977 extern void rdc_many_exit(rdc_k_info_t *);
978 extern void rdc_group_enter(rdc_k_info_t *);
979 extern void rdc_group_exit(rdc_k_info_t *);
980 extern int _rdc_sync_event_wait(void *, void *, int, spcs_s_info_t, int *);
981 extern int _rdc_sync_event_notify(int, char *, char *);
982 extern int _rdc_link_down(void *, int, spcs_s_info_t, int *);
983 extern void rdc_delgroup(rdc_group_t *);
984 extern int rdc_write_bitmap_fba(rdc_k_info_t *, nsc_off_t);
985 extern int rdc_bitmapset(int, char *, char *, void *, int, nsc_off_t, int);
986 extern rdc_net_dataset_t *rdc_net_add_set(int);
987 extern rdc_net_dataset_t *rdc_net_get_set(int, int);
988 extern void rdc_net_put_set(int, rdc_net_dataset_t *);
989 extern void rdc_net_del_set(int, rdc_net_dataset_t *);
990 extern void rdc_net_free_set(rdc_k_info_t *, rdc_net_dataset_t *);
991 extern int rdc_lookup_byhostdev(char *intf, char *file);
992 extern int rdc_lookup_configured(char *path);
993 extern void rdc_dump_dsets(int);
994 extern void set_busy(rdc_k_info_t *);
995 extern void wakeup_busy(rdc_k_info_t *);
996 
997 
998 #ifdef	DEBUG
999 extern int rdc_async6(void *, int mode, int *);
1000 extern int rdc_readgen(void *, int, int *);
1001 #endif
1002 
1003 #endif /* _KERNEL */
1004 
1005 #ifdef	__cplusplus
1006 }
1007 #endif
1008 
1009 #endif /* _RDC_IO_H */
1010